1/* Copyright (C) 2014-2020 Free Software Foundation, Inc.
2
3   This file is part of GCC.
4
5   GCC is free software; you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 3, or (at your option)
8   any later version.
9
10   GCC is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   Under Section 7 of GPL version 3, you are granted additional
16   permissions described in the GCC Runtime Library Exception, version
17   3.1, as published by the Free Software Foundation.
18
19   You should have received a copy of the GNU General Public License and
20   a copy of the GCC Runtime Library Exception along with this program;
21   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22   <http://www.gnu.org/licenses/>.  */
23
24#ifndef _IMMINTRIN_H_INCLUDED
25#error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef _AVX512VLINTRIN_H_INCLUDED
29#define _AVX512VLINTRIN_H_INCLUDED
30
31#ifndef __AVX512VL__
32#pragma GCC push_options
33#pragma GCC target("avx512vl")
34#define __DISABLE_AVX512VL__
35#endif /* __AVX512VL__ */
36
37/* Internal data types for implementing the intrinsics.  */
38typedef unsigned int __mmask32;
39
40extern __inline __m256d
41__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
42_mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
43{
44  return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
45						  (__v4df) __W,
46						  (__mmask8) __U);
47}
48
49extern __inline __m256d
50__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
51_mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
52{
53  return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
54						  (__v4df)
55						  _mm256_setzero_pd (),
56						  (__mmask8) __U);
57}
58
59extern __inline __m128d
60__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
61_mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
62{
63  return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
64						  (__v2df) __W,
65						  (__mmask8) __U);
66}
67
68extern __inline __m128d
69__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
70_mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
71{
72  return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
73						  (__v2df)
74						  _mm_setzero_pd (),
75						  (__mmask8) __U);
76}
77
78extern __inline __m256d
79__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
80_mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
81{
82  return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
83						   (__v4df) __W,
84						   (__mmask8) __U);
85}
86
87extern __inline __m256d
88__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
89_mm256_maskz_load_pd (__mmask8 __U, void const *__P)
90{
91  return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
92						   (__v4df)
93						   _mm256_setzero_pd (),
94						   (__mmask8) __U);
95}
96
97extern __inline __m128d
98__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
99_mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
100{
101  return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
102						   (__v2df) __W,
103						   (__mmask8) __U);
104}
105
106extern __inline __m128d
107__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
108_mm_maskz_load_pd (__mmask8 __U, void const *__P)
109{
110  return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
111						   (__v2df)
112						   _mm_setzero_pd (),
113						   (__mmask8) __U);
114}
115
116extern __inline void
117__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
118_mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
119{
120  __builtin_ia32_storeapd256_mask ((__v4df *) __P,
121				   (__v4df) __A,
122				   (__mmask8) __U);
123}
124
125extern __inline void
126__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
127_mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
128{
129  __builtin_ia32_storeapd128_mask ((__v2df *) __P,
130				   (__v2df) __A,
131				   (__mmask8) __U);
132}
133
134extern __inline __m256
135__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
136_mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
137{
138  return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
139						 (__v8sf) __W,
140						 (__mmask8) __U);
141}
142
143extern __inline __m256
144__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
145_mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
146{
147  return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
148						 (__v8sf)
149						 _mm256_setzero_ps (),
150						 (__mmask8) __U);
151}
152
153extern __inline __m128
154__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
155_mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
156{
157  return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
158						 (__v4sf) __W,
159						 (__mmask8) __U);
160}
161
162extern __inline __m128
163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
164_mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
165{
166  return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
167						 (__v4sf)
168						 _mm_setzero_ps (),
169						 (__mmask8) __U);
170}
171
172extern __inline __m256
173__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
174_mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
175{
176  return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
177						  (__v8sf) __W,
178						  (__mmask8) __U);
179}
180
181extern __inline __m256
182__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
183_mm256_maskz_load_ps (__mmask8 __U, void const *__P)
184{
185  return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
186						  (__v8sf)
187						  _mm256_setzero_ps (),
188						  (__mmask8) __U);
189}
190
191extern __inline __m128
192__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
193_mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
194{
195  return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
196						  (__v4sf) __W,
197						  (__mmask8) __U);
198}
199
200extern __inline __m128
201__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
202_mm_maskz_load_ps (__mmask8 __U, void const *__P)
203{
204  return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
205						  (__v4sf)
206						  _mm_setzero_ps (),
207						  (__mmask8) __U);
208}
209
210extern __inline void
211__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
212_mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
213{
214  __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
215				   (__v8sf) __A,
216				   (__mmask8) __U);
217}
218
219extern __inline void
220__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
221_mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
222{
223  __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
224				   (__v4sf) __A,
225				   (__mmask8) __U);
226}
227
228extern __inline __m256i
229__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
230_mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
231{
232  return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
233						     (__v4di) __W,
234						     (__mmask8) __U);
235}
236
237extern __inline __m256i
238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
239_mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
240{
241  return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
242						     (__v4di)
243						     _mm256_setzero_si256 (),
244						     (__mmask8) __U);
245}
246
247extern __inline __m128i
248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
249_mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
250{
251  return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
252						     (__v2di) __W,
253						     (__mmask8) __U);
254}
255
256extern __inline __m128i
257__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
258_mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
259{
260  return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
261						     (__v2di)
262						     _mm_setzero_si128 (),
263						     (__mmask8) __U);
264}
265
266extern __inline __m256i
267__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
268_mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
269{
270  return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
271							(__v4di) __W,
272							(__mmask8)
273							__U);
274}
275
276extern __inline __m256i
277__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
278_mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
279{
280  return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
281							(__v4di)
282							_mm256_setzero_si256 (),
283							(__mmask8)
284							__U);
285}
286
287extern __inline __m128i
288__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
289_mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
290{
291  return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
292							(__v2di) __W,
293							(__mmask8)
294							__U);
295}
296
297extern __inline __m128i
298__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
299_mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
300{
301  return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
302							(__v2di)
303							_mm_setzero_si128 (),
304							(__mmask8)
305							__U);
306}
307
308extern __inline void
309__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
310_mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
311{
312  __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
313					(__v4di) __A,
314					(__mmask8) __U);
315}
316
317extern __inline void
318__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
319_mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
320{
321  __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
322					(__v2di) __A,
323					(__mmask8) __U);
324}
325
326extern __inline __m256i
327__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
328_mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
329{
330  return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
331						     (__v8si) __W,
332						     (__mmask8) __U);
333}
334
335extern __inline __m256i
336__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
337_mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
338{
339  return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
340						     (__v8si)
341						     _mm256_setzero_si256 (),
342						     (__mmask8) __U);
343}
344
345extern __inline __m128i
346__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
347_mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
348{
349  return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
350						     (__v4si) __W,
351						     (__mmask8) __U);
352}
353
354extern __inline __m128i
355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
356_mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
357{
358  return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
359						     (__v4si)
360						     _mm_setzero_si128 (),
361						     (__mmask8) __U);
362}
363
364extern __inline __m256i
365__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
366_mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
367{
368  return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
369							(__v8si) __W,
370							(__mmask8)
371							__U);
372}
373
374extern __inline __m256i
375__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
376_mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
377{
378  return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
379							(__v8si)
380							_mm256_setzero_si256 (),
381							(__mmask8)
382							__U);
383}
384
385extern __inline __m128i
386__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
387_mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
388{
389  return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
390							(__v4si) __W,
391							(__mmask8)
392							__U);
393}
394
395extern __inline __m128i
396__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
397_mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
398{
399  return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
400							(__v4si)
401							_mm_setzero_si128 (),
402							(__mmask8)
403							__U);
404}
405
406extern __inline void
407__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
408_mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
409{
410  __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
411					(__v8si) __A,
412					(__mmask8) __U);
413}
414
415extern __inline void
416__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
417_mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
418{
419  __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
420					(__v4si) __A,
421					(__mmask8) __U);
422}
423
424extern __inline __m128d
425__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
426_mm_mask_add_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
427{
428  return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
429						 (__v2df) __B,
430						 (__v2df) __W,
431						 (__mmask8) __U);
432}
433
434extern __inline __m128d
435__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
436_mm_maskz_add_pd (__mmask8 __U, __m128d __A, __m128d __B)
437{
438  return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
439						 (__v2df) __B,
440						 (__v2df)
441						 _mm_setzero_pd (),
442						 (__mmask8) __U);
443}
444
445extern __inline __m256d
446__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
447_mm256_mask_add_pd (__m256d __W, __mmask8 __U, __m256d __A,
448		    __m256d __B)
449{
450  return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
451						 (__v4df) __B,
452						 (__v4df) __W,
453						 (__mmask8) __U);
454}
455
456extern __inline __m256d
457__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
458_mm256_maskz_add_pd (__mmask8 __U, __m256d __A, __m256d __B)
459{
460  return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
461						 (__v4df) __B,
462						 (__v4df)
463						 _mm256_setzero_pd (),
464						 (__mmask8) __U);
465}
466
467extern __inline __m128
468__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
469_mm_mask_add_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
470{
471  return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
472						(__v4sf) __B,
473						(__v4sf) __W,
474						(__mmask8) __U);
475}
476
477extern __inline __m128
478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
479_mm_maskz_add_ps (__mmask8 __U, __m128 __A, __m128 __B)
480{
481  return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
482						(__v4sf) __B,
483						(__v4sf)
484						_mm_setzero_ps (),
485						(__mmask8) __U);
486}
487
488extern __inline __m256
489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
490_mm256_mask_add_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
491{
492  return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
493						(__v8sf) __B,
494						(__v8sf) __W,
495						(__mmask8) __U);
496}
497
498extern __inline __m256
499__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
500_mm256_maskz_add_ps (__mmask8 __U, __m256 __A, __m256 __B)
501{
502  return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
503						(__v8sf) __B,
504						(__v8sf)
505						_mm256_setzero_ps (),
506						(__mmask8) __U);
507}
508
509extern __inline __m128d
510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
511_mm_mask_sub_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
512{
513  return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
514						 (__v2df) __B,
515						 (__v2df) __W,
516						 (__mmask8) __U);
517}
518
519extern __inline __m128d
520__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
521_mm_maskz_sub_pd (__mmask8 __U, __m128d __A, __m128d __B)
522{
523  return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
524						 (__v2df) __B,
525						 (__v2df)
526						 _mm_setzero_pd (),
527						 (__mmask8) __U);
528}
529
530extern __inline __m256d
531__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
532_mm256_mask_sub_pd (__m256d __W, __mmask8 __U, __m256d __A,
533		    __m256d __B)
534{
535  return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
536						 (__v4df) __B,
537						 (__v4df) __W,
538						 (__mmask8) __U);
539}
540
541extern __inline __m256d
542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
543_mm256_maskz_sub_pd (__mmask8 __U, __m256d __A, __m256d __B)
544{
545  return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
546						 (__v4df) __B,
547						 (__v4df)
548						 _mm256_setzero_pd (),
549						 (__mmask8) __U);
550}
551
552extern __inline __m128
553__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
554_mm_mask_sub_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
555{
556  return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
557						(__v4sf) __B,
558						(__v4sf) __W,
559						(__mmask8) __U);
560}
561
562extern __inline __m128
563__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
564_mm_maskz_sub_ps (__mmask8 __U, __m128 __A, __m128 __B)
565{
566  return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
567						(__v4sf) __B,
568						(__v4sf)
569						_mm_setzero_ps (),
570						(__mmask8) __U);
571}
572
573extern __inline __m256
574__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
575_mm256_mask_sub_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
576{
577  return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
578						(__v8sf) __B,
579						(__v8sf) __W,
580						(__mmask8) __U);
581}
582
583extern __inline __m256
584__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
585_mm256_maskz_sub_ps (__mmask8 __U, __m256 __A, __m256 __B)
586{
587  return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
588						(__v8sf) __B,
589						(__v8sf)
590						_mm256_setzero_ps (),
591						(__mmask8) __U);
592}
593
594extern __inline void
595__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
596_mm256_store_epi64 (void *__P, __m256i __A)
597{
598  *(__m256i *) __P = __A;
599}
600
601extern __inline void
602__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
603_mm_store_epi64 (void *__P, __m128i __A)
604{
605  *(__m128i *) __P = __A;
606}
607
608extern __inline __m256d
609__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
610_mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
611{
612  return (__m256d) __builtin_ia32_loadupd256_mask ((const double *) __P,
613						   (__v4df) __W,
614						   (__mmask8) __U);
615}
616
617extern __inline __m256d
618__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
619_mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
620{
621  return (__m256d) __builtin_ia32_loadupd256_mask ((const double *) __P,
622						   (__v4df)
623						   _mm256_setzero_pd (),
624						   (__mmask8) __U);
625}
626
627extern __inline __m128d
628__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
629_mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
630{
631  return (__m128d) __builtin_ia32_loadupd128_mask ((const double *) __P,
632						   (__v2df) __W,
633						   (__mmask8) __U);
634}
635
636extern __inline __m128d
637__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
638_mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
639{
640  return (__m128d) __builtin_ia32_loadupd128_mask ((const double *) __P,
641						   (__v2df)
642						   _mm_setzero_pd (),
643						   (__mmask8) __U);
644}
645
646extern __inline void
647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
648_mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
649{
650  __builtin_ia32_storeupd256_mask ((double *) __P,
651				   (__v4df) __A,
652				   (__mmask8) __U);
653}
654
655extern __inline void
656__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
657_mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
658{
659  __builtin_ia32_storeupd128_mask ((double *) __P,
660				   (__v2df) __A,
661				   (__mmask8) __U);
662}
663
664extern __inline __m256
665__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
666_mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
667{
668  return (__m256) __builtin_ia32_loadups256_mask ((const float *) __P,
669						  (__v8sf) __W,
670						  (__mmask8) __U);
671}
672
673extern __inline __m256
674__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
675_mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
676{
677  return (__m256) __builtin_ia32_loadups256_mask ((const float *) __P,
678						  (__v8sf)
679						  _mm256_setzero_ps (),
680						  (__mmask8) __U);
681}
682
683extern __inline __m128
684__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
685_mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
686{
687  return (__m128) __builtin_ia32_loadups128_mask ((const float *) __P,
688						  (__v4sf) __W,
689						  (__mmask8) __U);
690}
691
692extern __inline __m128
693__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
694_mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
695{
696  return (__m128) __builtin_ia32_loadups128_mask ((const float *) __P,
697						  (__v4sf)
698						  _mm_setzero_ps (),
699						  (__mmask8) __U);
700}
701
702extern __inline void
703__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
704_mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
705{
706  __builtin_ia32_storeups256_mask ((float *) __P,
707				   (__v8sf) __A,
708				   (__mmask8) __U);
709}
710
711extern __inline void
712__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
713_mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
714{
715  __builtin_ia32_storeups128_mask ((float *) __P,
716				   (__v4sf) __A,
717				   (__mmask8) __U);
718}
719
720extern __inline __m256i
721__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
722_mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
723{
724  return (__m256i) __builtin_ia32_loaddqudi256_mask ((const long long *) __P,
725						     (__v4di) __W,
726						     (__mmask8) __U);
727}
728
729extern __inline __m256i
730__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
731_mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
732{
733  return (__m256i) __builtin_ia32_loaddqudi256_mask ((const long long *) __P,
734						     (__v4di)
735						     _mm256_setzero_si256 (),
736						     (__mmask8) __U);
737}
738
739extern __inline __m128i
740__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
741_mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
742{
743  return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P,
744						     (__v2di) __W,
745						     (__mmask8) __U);
746}
747
748extern __inline __m128i
749__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
750_mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
751{
752  return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P,
753						     (__v2di)
754						     _mm_setzero_si128 (),
755						     (__mmask8) __U);
756}
757
758extern __inline void
759__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
760_mm256_storeu_epi64 (void *__P, __m256i __A)
761{
762  *(__m256i_u *) __P = (__m256i_u) __A;
763}
764
765extern __inline void
766__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
767_mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
768{
769  __builtin_ia32_storedqudi256_mask ((long long *) __P,
770				     (__v4di) __A,
771				     (__mmask8) __U);
772}
773
774extern __inline void
775__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
776_mm_storeu_epi64 (void *__P, __m128i __A)
777{
778  *(__m128i_u *) __P = (__m128i_u) __A;
779}
780
781extern __inline void
782__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
783_mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
784{
785  __builtin_ia32_storedqudi128_mask ((long long *) __P,
786				     (__v2di) __A,
787				     (__mmask8) __U);
788}
789
790extern __inline __m256i
791__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
792_mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
793{
794  return (__m256i) __builtin_ia32_loaddqusi256_mask ((const int *) __P,
795						     (__v8si) __W,
796						     (__mmask8) __U);
797}
798
799extern __inline __m256i
800__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
801_mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
802{
803  return (__m256i) __builtin_ia32_loaddqusi256_mask ((const int *) __P,
804						     (__v8si)
805						     _mm256_setzero_si256 (),
806						     (__mmask8) __U);
807}
808
809extern __inline __m128i
810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
811_mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
812{
813  return (__m128i) __builtin_ia32_loaddqusi128_mask ((const int *) __P,
814						     (__v4si) __W,
815						     (__mmask8) __U);
816}
817
818extern __inline __m128i
819__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
820_mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
821{
822  return (__m128i) __builtin_ia32_loaddqusi128_mask ((const int *) __P,
823						     (__v4si)
824						     _mm_setzero_si128 (),
825						     (__mmask8) __U);
826}
827
828extern __inline void
829__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
830_mm256_storeu_epi32 (void *__P, __m256i __A)
831{
832  *(__m256i_u *) __P = (__m256i_u) __A;
833}
834
835extern __inline void
836__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
837_mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
838{
839  __builtin_ia32_storedqusi256_mask ((int *) __P,
840				     (__v8si) __A,
841				     (__mmask8) __U);
842}
843
844extern __inline void
845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
846_mm_storeu_epi32 (void *__P, __m128i __A)
847{
848  *(__m128i_u *) __P = (__m128i_u) __A;
849}
850
851extern __inline void
852__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
853_mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
854{
855  __builtin_ia32_storedqusi128_mask ((int *) __P,
856				     (__v4si) __A,
857				     (__mmask8) __U);
858}
859
860extern __inline __m256i
861__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
862_mm256_mask_abs_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
863{
864  return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
865						 (__v8si) __W,
866						 (__mmask8) __U);
867}
868
869extern __inline __m256i
870__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
871_mm256_maskz_abs_epi32 (__mmask8 __U, __m256i __A)
872{
873  return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
874						 (__v8si)
875						 _mm256_setzero_si256 (),
876						 (__mmask8) __U);
877}
878
879extern __inline __m128i
880__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
881_mm_mask_abs_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
882{
883  return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
884						 (__v4si) __W,
885						 (__mmask8) __U);
886}
887
888extern __inline __m128i
889__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
890_mm_maskz_abs_epi32 (__mmask8 __U, __m128i __A)
891{
892  return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
893						 (__v4si)
894						 _mm_setzero_si128 (),
895						 (__mmask8) __U);
896}
897
898extern __inline __m256i
899__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
900_mm256_abs_epi64 (__m256i __A)
901{
902  return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
903						 (__v4di)
904						 _mm256_setzero_si256 (),
905						 (__mmask8) -1);
906}
907
908extern __inline __m256i
909__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
910_mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
911{
912  return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
913						 (__v4di) __W,
914						 (__mmask8) __U);
915}
916
917extern __inline __m256i
918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
919_mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A)
920{
921  return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
922						 (__v4di)
923						 _mm256_setzero_si256 (),
924						 (__mmask8) __U);
925}
926
927extern __inline __m128i
928__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
929_mm_abs_epi64 (__m128i __A)
930{
931  return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
932						 (__v2di)
933						 _mm_setzero_si128 (),
934						 (__mmask8) -1);
935}
936
937extern __inline __m128i
938__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
939_mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
940{
941  return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
942						 (__v2di) __W,
943						 (__mmask8) __U);
944}
945
946extern __inline __m128i
947__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
948_mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A)
949{
950  return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
951						 (__v2di)
952						 _mm_setzero_si128 (),
953						 (__mmask8) __U);
954}
955
956extern __inline __m128i
957__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
958_mm256_cvtpd_epu32 (__m256d __A)
959{
960  return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
961						     (__v4si)
962						     _mm_setzero_si128 (),
963						     (__mmask8) -1);
964}
965
966extern __inline __m128i
967__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
968_mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
969{
970  return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
971						     (__v4si) __W,
972						     (__mmask8) __U);
973}
974
975extern __inline __m128i
976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
977_mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A)
978{
979  return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
980						     (__v4si)
981						     _mm_setzero_si128 (),
982						     (__mmask8) __U);
983}
984
985extern __inline __m128i
986__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
987_mm_cvtpd_epu32 (__m128d __A)
988{
989  return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
990						     (__v4si)
991						     _mm_setzero_si128 (),
992						     (__mmask8) -1);
993}
994
995extern __inline __m128i
996__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
997_mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
998{
999  return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
1000						     (__v4si) __W,
1001						     (__mmask8) __U);
1002}
1003
1004extern __inline __m128i
1005__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1006_mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A)
1007{
1008  return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
1009						     (__v4si)
1010						     _mm_setzero_si128 (),
1011						     (__mmask8) __U);
1012}
1013
1014extern __inline __m256i
1015__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1016_mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
1017{
1018  return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
1019						     (__v8si) __W,
1020						     (__mmask8) __U);
1021}
1022
1023extern __inline __m256i
1024__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1025_mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A)
1026{
1027  return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
1028						     (__v8si)
1029						     _mm256_setzero_si256 (),
1030						     (__mmask8) __U);
1031}
1032
1033extern __inline __m128i
1034__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1035_mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
1036{
1037  return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
1038						     (__v4si) __W,
1039						     (__mmask8) __U);
1040}
1041
1042extern __inline __m128i
1043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1044_mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A)
1045{
1046  return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
1047						     (__v4si)
1048						     _mm_setzero_si128 (),
1049						     (__mmask8) __U);
1050}
1051
1052extern __inline __m256i
1053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1054_mm256_cvttps_epu32 (__m256 __A)
1055{
1056  return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1057						      (__v8si)
1058						      _mm256_setzero_si256 (),
1059						      (__mmask8) -1);
1060}
1061
1062extern __inline __m256i
1063__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1064_mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
1065{
1066  return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1067						      (__v8si) __W,
1068						      (__mmask8) __U);
1069}
1070
1071extern __inline __m256i
1072__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1073_mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A)
1074{
1075  return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1076						      (__v8si)
1077						      _mm256_setzero_si256 (),
1078						      (__mmask8) __U);
1079}
1080
1081extern __inline __m128i
1082__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1083_mm_cvttps_epu32 (__m128 __A)
1084{
1085  return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1086						      (__v4si)
1087						      _mm_setzero_si128 (),
1088						      (__mmask8) -1);
1089}
1090
1091extern __inline __m128i
1092__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1093_mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
1094{
1095  return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1096						      (__v4si) __W,
1097						      (__mmask8) __U);
1098}
1099
1100extern __inline __m128i
1101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1102_mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A)
1103{
1104  return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1105						      (__v4si)
1106						      _mm_setzero_si128 (),
1107						      (__mmask8) __U);
1108}
1109
1110extern __inline __m128i
1111__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1112_mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
1113{
1114  return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
1115						     (__v4si) __W,
1116						     (__mmask8) __U);
1117}
1118
1119extern __inline __m128i
1120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1121_mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A)
1122{
1123  return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
1124						     (__v4si)
1125						     _mm_setzero_si128 (),
1126						     (__mmask8) __U);
1127}
1128
1129extern __inline __m128i
1130__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1131_mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
1132{
1133  return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
1134						     (__v4si) __W,
1135						     (__mmask8) __U);
1136}
1137
1138extern __inline __m128i
1139__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1140_mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A)
1141{
1142  return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
1143						     (__v4si)
1144						     _mm_setzero_si128 (),
1145						     (__mmask8) __U);
1146}
1147
1148extern __inline __m128i
1149__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1150_mm256_cvttpd_epu32 (__m256d __A)
1151{
1152  return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1153						      (__v4si)
1154						      _mm_setzero_si128 (),
1155						      (__mmask8) -1);
1156}
1157
1158extern __inline __m128i
1159__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1160_mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
1161{
1162  return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1163						      (__v4si) __W,
1164						      (__mmask8) __U);
1165}
1166
1167extern __inline __m128i
1168__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1169_mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A)
1170{
1171  return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1172						      (__v4si)
1173						      _mm_setzero_si128 (),
1174						      (__mmask8) __U);
1175}
1176
1177extern __inline __m128i
1178__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1179_mm_cvttpd_epu32 (__m128d __A)
1180{
1181  return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1182						      (__v4si)
1183						      _mm_setzero_si128 (),
1184						      (__mmask8) -1);
1185}
1186
1187extern __inline __m128i
1188__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1189_mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
1190{
1191  return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1192						      (__v4si) __W,
1193						      (__mmask8) __U);
1194}
1195
1196extern __inline __m128i
1197__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1198_mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A)
1199{
1200  return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1201						      (__v4si)
1202						      _mm_setzero_si128 (),
1203						      (__mmask8) __U);
1204}
1205
1206extern __inline __m128i
1207__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1208_mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
1209{
1210  return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
1211						    (__v4si) __W,
1212						    (__mmask8) __U);
1213}
1214
1215extern __inline __m128i
1216__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1217_mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A)
1218{
1219  return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
1220						    (__v4si)
1221						    _mm_setzero_si128 (),
1222						    (__mmask8) __U);
1223}
1224
1225extern __inline __m128i
1226__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1227_mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
1228{
1229  return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1230						    (__v4si) __W,
1231						    (__mmask8) __U);
1232}
1233
1234extern __inline __m128i
1235__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1236_mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A)
1237{
1238  return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1239						    (__v4si)
1240						    _mm_setzero_si128 (),
1241						    (__mmask8) __U);
1242}
1243
1244extern __inline __m256d
1245__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1246_mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A)
1247{
1248  return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
1249						    (__v4df) __W,
1250						    (__mmask8) __U);
1251}
1252
1253extern __inline __m256d
1254__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1255_mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
1256{
1257  return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
1258						    (__v4df)
1259						    _mm256_setzero_pd (),
1260						    (__mmask8) __U);
1261}
1262
1263extern __inline __m128d
1264__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1265_mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A)
1266{
1267  return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
1268						    (__v2df) __W,
1269						    (__mmask8) __U);
1270}
1271
1272extern __inline __m128d
1273__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1274_mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
1275{
1276  return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
1277						    (__v2df)
1278						    _mm_setzero_pd (),
1279						    (__mmask8) __U);
1280}
1281
1282extern __inline __m256d
1283__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1284_mm256_cvtepu32_pd (__m128i __A)
1285{
1286  return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1287						     (__v4df)
1288						     _mm256_setzero_pd (),
1289						     (__mmask8) -1);
1290}
1291
1292extern __inline __m256d
1293__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1294_mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A)
1295{
1296  return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1297						     (__v4df) __W,
1298						     (__mmask8) __U);
1299}
1300
1301extern __inline __m256d
1302__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1303_mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
1304{
1305  return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1306						     (__v4df)
1307						     _mm256_setzero_pd (),
1308						     (__mmask8) __U);
1309}
1310
1311extern __inline __m128d
1312__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1313_mm_cvtepu32_pd (__m128i __A)
1314{
1315  return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1316						     (__v2df)
1317						     _mm_setzero_pd (),
1318						     (__mmask8) -1);
1319}
1320
1321extern __inline __m128d
1322__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1323_mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A)
1324{
1325  return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1326						     (__v2df) __W,
1327						     (__mmask8) __U);
1328}
1329
1330extern __inline __m128d
1331__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1332_mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
1333{
1334  return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1335						     (__v2df)
1336						     _mm_setzero_pd (),
1337						     (__mmask8) __U);
1338}
1339
1340extern __inline __m256
1341__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1342_mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A)
1343{
1344  return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
1345						   (__v8sf) __W,
1346						   (__mmask8) __U);
1347}
1348
1349extern __inline __m256
1350__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1351_mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A)
1352{
1353  return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
1354						   (__v8sf)
1355						   _mm256_setzero_ps (),
1356						   (__mmask8) __U);
1357}
1358
1359extern __inline __m128
1360__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1361_mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A)
1362{
1363  return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
1364						   (__v4sf) __W,
1365						   (__mmask8) __U);
1366}
1367
1368extern __inline __m128
1369__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1370_mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A)
1371{
1372  return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
1373						   (__v4sf)
1374						   _mm_setzero_ps (),
1375						   (__mmask8) __U);
1376}
1377
1378extern __inline __m256
1379__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1380_mm256_cvtepu32_ps (__m256i __A)
1381{
1382  return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1383						    (__v8sf)
1384						    _mm256_setzero_ps (),
1385						    (__mmask8) -1);
1386}
1387
1388extern __inline __m256
1389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1390_mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A)
1391{
1392  return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1393						    (__v8sf) __W,
1394						    (__mmask8) __U);
1395}
1396
1397extern __inline __m256
1398__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1399_mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A)
1400{
1401  return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1402						    (__v8sf)
1403						    _mm256_setzero_ps (),
1404						    (__mmask8) __U);
1405}
1406
1407extern __inline __m128
1408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1409_mm_cvtepu32_ps (__m128i __A)
1410{
1411  return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1412						    (__v4sf)
1413						    _mm_setzero_ps (),
1414						    (__mmask8) -1);
1415}
1416
1417extern __inline __m128
1418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1419_mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A)
1420{
1421  return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1422						    (__v4sf) __W,
1423						    (__mmask8) __U);
1424}
1425
1426extern __inline __m128
1427__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1428_mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A)
1429{
1430  return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1431						    (__v4sf)
1432						    _mm_setzero_ps (),
1433						    (__mmask8) __U);
1434}
1435
1436extern __inline __m256d
1437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1438_mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A)
1439{
1440  return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
1441						    (__v4df) __W,
1442						    (__mmask8) __U);
1443}
1444
1445extern __inline __m256d
1446__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1447_mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
1448{
1449  return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
1450						    (__v4df)
1451						    _mm256_setzero_pd (),
1452						    (__mmask8) __U);
1453}
1454
1455extern __inline __m128d
1456__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1457_mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A)
1458{
1459  return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
1460						    (__v2df) __W,
1461						    (__mmask8) __U);
1462}
1463
1464extern __inline __m128d
1465__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1466_mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
1467{
1468  return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
1469						    (__v2df)
1470						    _mm_setzero_pd (),
1471						    (__mmask8) __U);
1472}
1473
1474extern __inline __m128i
1475__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1476_mm_cvtepi32_epi8 (__m128i __A)
1477{
1478  return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1479						  (__v16qi)
1480						  _mm_undefined_si128 (),
1481						  (__mmask8) -1);
1482}
1483
1484extern __inline void
1485__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1486_mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1487{
1488  __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1489}
1490
1491extern __inline __m128i
1492__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1493_mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1494{
1495  return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1496						  (__v16qi) __O, __M);
1497}
1498
1499extern __inline __m128i
1500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1501_mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
1502{
1503  return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1504						  (__v16qi)
1505						  _mm_setzero_si128 (),
1506						  __M);
1507}
1508
1509extern __inline __m128i
1510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1511_mm256_cvtepi32_epi8 (__m256i __A)
1512{
1513  return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1514						  (__v16qi)
1515						  _mm_undefined_si128 (),
1516						  (__mmask8) -1);
1517}
1518
1519extern __inline __m128i
1520__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1521_mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1522{
1523  return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1524						  (__v16qi) __O, __M);
1525}
1526
1527extern __inline void
1528__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1529_mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1530{
1531  __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
1532}
1533
1534extern __inline __m128i
1535__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1536_mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
1537{
1538  return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1539						  (__v16qi)
1540						  _mm_setzero_si128 (),
1541						  __M);
1542}
1543
1544extern __inline __m128i
1545__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1546_mm_cvtsepi32_epi8 (__m128i __A)
1547{
1548  return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1549						   (__v16qi)
1550						   _mm_undefined_si128 (),
1551						   (__mmask8) -1);
1552}
1553
1554extern __inline void
1555__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1556_mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1557{
1558  __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1559}
1560
1561extern __inline __m128i
1562__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1563_mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1564{
1565  return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1566						   (__v16qi) __O, __M);
1567}
1568
1569extern __inline __m128i
1570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1571_mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
1572{
1573  return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1574						   (__v16qi)
1575						   _mm_setzero_si128 (),
1576						   __M);
1577}
1578
1579extern __inline __m128i
1580__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1581_mm256_cvtsepi32_epi8 (__m256i __A)
1582{
1583  return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1584						   (__v16qi)
1585						   _mm_undefined_si128 (),
1586						   (__mmask8) -1);
1587}
1588
1589extern __inline void
1590__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1591_mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1592{
1593  __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
1594}
1595
1596extern __inline __m128i
1597__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1598_mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1599{
1600  return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1601						   (__v16qi) __O, __M);
1602}
1603
1604extern __inline __m128i
1605__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1606_mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
1607{
1608  return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1609						   (__v16qi)
1610						   _mm_setzero_si128 (),
1611						   __M);
1612}
1613
1614extern __inline __m128i
1615__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1616_mm_cvtusepi32_epi8 (__m128i __A)
1617{
1618  return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1619						    (__v16qi)
1620						    _mm_undefined_si128 (),
1621						    (__mmask8) -1);
1622}
1623
1624extern __inline void
1625__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1626_mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1627{
1628  __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1629}
1630
1631extern __inline __m128i
1632__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1633_mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1634{
1635  return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1636						    (__v16qi) __O,
1637						    __M);
1638}
1639
1640extern __inline __m128i
1641__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1642_mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
1643{
1644  return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1645						    (__v16qi)
1646						    _mm_setzero_si128 (),
1647						    __M);
1648}
1649
1650extern __inline __m128i
1651__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1652_mm256_cvtusepi32_epi8 (__m256i __A)
1653{
1654  return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1655						    (__v16qi)
1656						    _mm_undefined_si128 (),
1657						    (__mmask8) -1);
1658}
1659
1660extern __inline void
1661__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1662_mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1663{
1664  __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
1665}
1666
1667extern __inline __m128i
1668__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1669_mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1670{
1671  return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1672						    (__v16qi) __O,
1673						    __M);
1674}
1675
1676extern __inline __m128i
1677__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1678_mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
1679{
1680  return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1681						    (__v16qi)
1682						    _mm_setzero_si128 (),
1683						    __M);
1684}
1685
1686extern __inline __m128i
1687__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1688_mm_cvtepi32_epi16 (__m128i __A)
1689{
1690  return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1691						  (__v8hi)
1692						  _mm_setzero_si128 (),
1693						  (__mmask8) -1);
1694}
1695
1696extern __inline void
1697__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1698_mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1699{
1700  __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1701}
1702
1703extern __inline __m128i
1704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1705_mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1706{
1707  return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1708						  (__v8hi) __O, __M);
1709}
1710
1711extern __inline __m128i
1712__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1713_mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
1714{
1715  return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1716						  (__v8hi)
1717						  _mm_setzero_si128 (),
1718						  __M);
1719}
1720
1721extern __inline __m128i
1722__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1723_mm256_cvtepi32_epi16 (__m256i __A)
1724{
1725  return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1726						  (__v8hi)
1727						  _mm_setzero_si128 (),
1728						  (__mmask8) -1);
1729}
1730
1731extern __inline void
1732__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1733_mm256_mask_cvtepi32_storeu_epi16 (void *  __P, __mmask8 __M, __m256i __A)
1734{
1735  __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1736}
1737
1738extern __inline __m128i
1739__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1740_mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1741{
1742  return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1743						  (__v8hi) __O, __M);
1744}
1745
1746extern __inline __m128i
1747__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1748_mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
1749{
1750  return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1751						  (__v8hi)
1752						  _mm_setzero_si128 (),
1753						  __M);
1754}
1755
1756extern __inline __m128i
1757__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1758_mm_cvtsepi32_epi16 (__m128i __A)
1759{
1760  return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1761						   (__v8hi)
1762						   _mm_setzero_si128 (),
1763						   (__mmask8) -1);
1764}
1765
1766extern __inline void
1767__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1768_mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1769{
1770  __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1771}
1772
1773extern __inline __m128i
1774__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1775_mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1776{
1777  return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1778						   (__v8hi)__O,
1779						   __M);
1780}
1781
1782extern __inline __m128i
1783__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1784_mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
1785{
1786  return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1787						   (__v8hi)
1788						   _mm_setzero_si128 (),
1789						   __M);
1790}
1791
1792extern __inline __m128i
1793__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1794_mm256_cvtsepi32_epi16 (__m256i __A)
1795{
1796  return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1797						   (__v8hi)
1798						   _mm_undefined_si128 (),
1799						   (__mmask8) -1);
1800}
1801
1802extern __inline void
1803__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1804_mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1805{
1806  __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1807}
1808
1809extern __inline __m128i
1810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1811_mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1812{
1813  return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1814						   (__v8hi) __O, __M);
1815}
1816
1817extern __inline __m128i
1818__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1819_mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
1820{
1821  return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1822						   (__v8hi)
1823						   _mm_setzero_si128 (),
1824						   __M);
1825}
1826
1827extern __inline __m128i
1828__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1829_mm_cvtusepi32_epi16 (__m128i __A)
1830{
1831  return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1832						    (__v8hi)
1833						    _mm_undefined_si128 (),
1834						    (__mmask8) -1);
1835}
1836
1837extern __inline void
1838__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1839_mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1840{
1841  __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1842}
1843
1844extern __inline __m128i
1845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1846_mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1847{
1848  return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1849						    (__v8hi) __O, __M);
1850}
1851
1852extern __inline __m128i
1853__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1854_mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
1855{
1856  return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1857						    (__v8hi)
1858						    _mm_setzero_si128 (),
1859						    __M);
1860}
1861
1862extern __inline __m128i
1863__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1864_mm256_cvtusepi32_epi16 (__m256i __A)
1865{
1866  return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1867						    (__v8hi)
1868						    _mm_undefined_si128 (),
1869						    (__mmask8) -1);
1870}
1871
1872extern __inline void
1873__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1874_mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1875{
1876  __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1877}
1878
1879extern __inline __m128i
1880__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1881_mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1882{
1883  return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1884						    (__v8hi) __O, __M);
1885}
1886
1887extern __inline __m128i
1888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1889_mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
1890{
1891  return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1892						    (__v8hi)
1893						    _mm_setzero_si128 (),
1894						    __M);
1895}
1896
1897extern __inline __m128i
1898__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1899_mm_cvtepi64_epi8 (__m128i __A)
1900{
1901  return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1902						  (__v16qi)
1903						  _mm_undefined_si128 (),
1904						  (__mmask8) -1);
1905}
1906
1907extern __inline void
1908__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1909_mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1910{
1911  __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
1912}
1913
1914extern __inline __m128i
1915__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1916_mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1917{
1918  return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1919						  (__v16qi) __O, __M);
1920}
1921
1922extern __inline __m128i
1923__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1924_mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
1925{
1926  return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1927						  (__v16qi)
1928						  _mm_setzero_si128 (),
1929						  __M);
1930}
1931
1932extern __inline __m128i
1933__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1934_mm256_cvtepi64_epi8 (__m256i __A)
1935{
1936  return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1937						  (__v16qi)
1938						  _mm_undefined_si128 (),
1939						  (__mmask8) -1);
1940}
1941
1942extern __inline void
1943__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1944_mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1945{
1946  __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
1947}
1948
1949extern __inline __m128i
1950__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1951_mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1952{
1953  return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1954						  (__v16qi) __O, __M);
1955}
1956
1957extern __inline __m128i
1958__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1959_mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
1960{
1961  return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1962						  (__v16qi)
1963						  _mm_setzero_si128 (),
1964						  __M);
1965}
1966
1967extern __inline __m128i
1968__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1969_mm_cvtsepi64_epi8 (__m128i __A)
1970{
1971  return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1972						   (__v16qi)
1973						   _mm_undefined_si128 (),
1974						   (__mmask8) -1);
1975}
1976
1977extern __inline void
1978__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1979_mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1980{
1981  __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
1982}
1983
1984extern __inline __m128i
1985__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1986_mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1987{
1988  return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1989						   (__v16qi) __O, __M);
1990}
1991
1992extern __inline __m128i
1993__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1994_mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
1995{
1996  return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1997						   (__v16qi)
1998						   _mm_setzero_si128 (),
1999						   __M);
2000}
2001
2002extern __inline __m128i
2003__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2004_mm256_cvtsepi64_epi8 (__m256i __A)
2005{
2006  return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
2007						   (__v16qi)
2008						   _mm_undefined_si128 (),
2009						   (__mmask8) -1);
2010}
2011
2012extern __inline void
2013__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2014_mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
2015{
2016  __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
2017}
2018
2019extern __inline __m128i
2020__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2021_mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
2022{
2023  return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
2024						   (__v16qi) __O, __M);
2025}
2026
2027extern __inline __m128i
2028__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2029_mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
2030{
2031  return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
2032						   (__v16qi)
2033						   _mm_setzero_si128 (),
2034						   __M);
2035}
2036
2037extern __inline __m128i
2038__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2039_mm_cvtusepi64_epi8 (__m128i __A)
2040{
2041  return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2042						    (__v16qi)
2043						    _mm_undefined_si128 (),
2044						    (__mmask8) -1);
2045}
2046
2047extern __inline void
2048__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2049_mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
2050{
2051  __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
2052}
2053
2054extern __inline __m128i
2055__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2056_mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
2057{
2058  return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2059						    (__v16qi) __O,
2060						    __M);
2061}
2062
2063extern __inline __m128i
2064__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2065_mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
2066{
2067  return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2068						    (__v16qi)
2069						    _mm_setzero_si128 (),
2070						    __M);
2071}
2072
2073extern __inline __m128i
2074__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2075_mm256_cvtusepi64_epi8 (__m256i __A)
2076{
2077  return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2078						    (__v16qi)
2079						    _mm_undefined_si128 (),
2080						    (__mmask8) -1);
2081}
2082
2083extern __inline void
2084__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2085_mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
2086{
2087  __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
2088}
2089
2090extern __inline __m128i
2091__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2092_mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
2093{
2094  return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2095						    (__v16qi) __O,
2096						    __M);
2097}
2098
2099extern __inline __m128i
2100__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2101_mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
2102{
2103  return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2104						    (__v16qi)
2105						    _mm_setzero_si128 (),
2106						    __M);
2107}
2108
2109extern __inline __m128i
2110__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2111_mm_cvtepi64_epi16 (__m128i __A)
2112{
2113  return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2114						  (__v8hi)
2115						  _mm_undefined_si128 (),
2116						  (__mmask8) -1);
2117}
2118
2119extern __inline void
2120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2121_mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2122{
2123  __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2124}
2125
2126extern __inline __m128i
2127__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2128_mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2129{
2130  return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2131						  (__v8hi)__O,
2132						  __M);
2133}
2134
2135extern __inline __m128i
2136__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2137_mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
2138{
2139  return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2140						  (__v8hi)
2141						  _mm_setzero_si128 (),
2142						  __M);
2143}
2144
2145extern __inline __m128i
2146__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2147_mm256_cvtepi64_epi16 (__m256i __A)
2148{
2149  return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2150						  (__v8hi)
2151						  _mm_undefined_si128 (),
2152						  (__mmask8) -1);
2153}
2154
2155extern __inline void
2156__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2157_mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2158{
2159  __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2160}
2161
2162extern __inline __m128i
2163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2164_mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2165{
2166  return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2167						  (__v8hi) __O, __M);
2168}
2169
2170extern __inline __m128i
2171__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2172_mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
2173{
2174  return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2175						  (__v8hi)
2176						  _mm_setzero_si128 (),
2177						  __M);
2178}
2179
2180extern __inline __m128i
2181__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2182_mm_cvtsepi64_epi16 (__m128i __A)
2183{
2184  return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2185						   (__v8hi)
2186						   _mm_undefined_si128 (),
2187						   (__mmask8) -1);
2188}
2189
2190extern __inline void
2191__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2192_mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2193{
2194  __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2195}
2196
2197extern __inline __m128i
2198__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2199_mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2200{
2201  return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2202						   (__v8hi) __O, __M);
2203}
2204
2205extern __inline __m128i
2206__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2207_mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
2208{
2209  return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2210						   (__v8hi)
2211						   _mm_setzero_si128 (),
2212						   __M);
2213}
2214
2215extern __inline __m128i
2216__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2217_mm256_cvtsepi64_epi16 (__m256i __A)
2218{
2219  return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2220						   (__v8hi)
2221						   _mm_undefined_si128 (),
2222						   (__mmask8) -1);
2223}
2224
2225extern __inline void
2226__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2227_mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2228{
2229  __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2230}
2231
2232extern __inline __m128i
2233__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2234_mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2235{
2236  return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2237						   (__v8hi) __O, __M);
2238}
2239
2240extern __inline __m128i
2241__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2242_mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
2243{
2244  return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2245						   (__v8hi)
2246						   _mm_setzero_si128 (),
2247						   __M);
2248}
2249
2250extern __inline __m128i
2251__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2252_mm_cvtusepi64_epi16 (__m128i __A)
2253{
2254  return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2255						    (__v8hi)
2256						    _mm_undefined_si128 (),
2257						    (__mmask8) -1);
2258}
2259
2260extern __inline void
2261__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2262_mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2263{
2264  __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2265}
2266
2267extern __inline __m128i
2268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2269_mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2270{
2271  return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2272						    (__v8hi) __O, __M);
2273}
2274
2275extern __inline __m128i
2276__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2277_mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
2278{
2279  return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2280						    (__v8hi)
2281						    _mm_setzero_si128 (),
2282						    __M);
2283}
2284
2285extern __inline __m128i
2286__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2287_mm256_cvtusepi64_epi16 (__m256i __A)
2288{
2289  return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2290						    (__v8hi)
2291						    _mm_undefined_si128 (),
2292						    (__mmask8) -1);
2293}
2294
2295extern __inline void
2296__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2297_mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2298{
2299  __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2300}
2301
2302extern __inline __m128i
2303__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2304_mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2305{
2306  return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2307						    (__v8hi) __O, __M);
2308}
2309
2310extern __inline __m128i
2311__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2312_mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
2313{
2314  return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2315						    (__v8hi)
2316						    _mm_setzero_si128 (),
2317						    __M);
2318}
2319
2320extern __inline __m128i
2321__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2322_mm_cvtepi64_epi32 (__m128i __A)
2323{
2324  return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2325						  (__v4si)
2326						  _mm_undefined_si128 (),
2327						  (__mmask8) -1);
2328}
2329
2330extern __inline void
2331__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2332_mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2333{
2334  __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2335}
2336
2337extern __inline __m128i
2338__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2339_mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2340{
2341  return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2342						  (__v4si) __O, __M);
2343}
2344
2345extern __inline __m128i
2346__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2347_mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
2348{
2349  return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2350						  (__v4si)
2351						  _mm_setzero_si128 (),
2352						  __M);
2353}
2354
2355extern __inline __m128i
2356__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2357_mm256_cvtepi64_epi32 (__m256i __A)
2358{
2359  return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2360						  (__v4si)
2361						  _mm_undefined_si128 (),
2362						  (__mmask8) -1);
2363}
2364
2365extern __inline void
2366__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2367_mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2368{
2369  __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2370}
2371
2372extern __inline __m128i
2373__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2374_mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2375{
2376  return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2377						  (__v4si) __O, __M);
2378}
2379
2380extern __inline __m128i
2381__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2382_mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
2383{
2384  return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2385						  (__v4si)
2386						  _mm_setzero_si128 (),
2387						  __M);
2388}
2389
2390extern __inline __m128i
2391__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2392_mm_cvtsepi64_epi32 (__m128i __A)
2393{
2394  return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2395						   (__v4si)
2396						   _mm_undefined_si128 (),
2397						   (__mmask8) -1);
2398}
2399
2400extern __inline void
2401__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2402_mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2403{
2404  __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2405}
2406
2407extern __inline __m128i
2408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2409_mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2410{
2411  return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2412						   (__v4si) __O, __M);
2413}
2414
2415extern __inline __m128i
2416__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2417_mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
2418{
2419  return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2420						   (__v4si)
2421						   _mm_setzero_si128 (),
2422						   __M);
2423}
2424
2425extern __inline __m128i
2426__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2427_mm256_cvtsepi64_epi32 (__m256i __A)
2428{
2429  return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2430						   (__v4si)
2431						   _mm_undefined_si128 (),
2432						   (__mmask8) -1);
2433}
2434
2435extern __inline void
2436__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2437_mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2438{
2439  __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2440}
2441
2442extern __inline __m128i
2443__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2444_mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2445{
2446  return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2447						   (__v4si)__O,
2448						   __M);
2449}
2450
2451extern __inline __m128i
2452__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2453_mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
2454{
2455  return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2456						   (__v4si)
2457						   _mm_setzero_si128 (),
2458						   __M);
2459}
2460
2461extern __inline __m128i
2462__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2463_mm_cvtusepi64_epi32 (__m128i __A)
2464{
2465  return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2466						    (__v4si)
2467						    _mm_undefined_si128 (),
2468						    (__mmask8) -1);
2469}
2470
2471extern __inline void
2472__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2473_mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2474{
2475  __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2476}
2477
2478extern __inline __m128i
2479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2480_mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2481{
2482  return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2483						    (__v4si) __O, __M);
2484}
2485
2486extern __inline __m128i
2487__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2488_mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
2489{
2490  return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2491						    (__v4si)
2492						    _mm_setzero_si128 (),
2493						    __M);
2494}
2495
2496extern __inline __m128i
2497__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2498_mm256_cvtusepi64_epi32 (__m256i __A)
2499{
2500  return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2501						    (__v4si)
2502						    _mm_undefined_si128 (),
2503						    (__mmask8) -1);
2504}
2505
2506extern __inline void
2507__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2508_mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2509{
2510  __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2511}
2512
2513extern __inline __m128i
2514__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2515_mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2516{
2517  return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2518						    (__v4si) __O, __M);
2519}
2520
2521extern __inline __m128i
2522__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2523_mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
2524{
2525  return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2526						    (__v4si)
2527						    _mm_setzero_si128 (),
2528						    __M);
2529}
2530
2531extern __inline __m256
2532__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2533_mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
2534{
2535  return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
2536						      (__v8sf) __O,
2537						      __M);
2538}
2539
2540extern __inline __m256
2541__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2542_mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
2543{
2544  return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
2545						      (__v8sf)
2546						      _mm256_setzero_ps (),
2547						      __M);
2548}
2549
2550extern __inline __m128
2551__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2552_mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
2553{
2554  return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
2555						      (__v4sf) __O,
2556						      __M);
2557}
2558
2559extern __inline __m128
2560__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2561_mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
2562{
2563  return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
2564						      (__v4sf)
2565						      _mm_setzero_ps (),
2566						      __M);
2567}
2568
2569extern __inline __m256d
2570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2571_mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
2572{
2573  return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
2574						       (__v4df) __O,
2575						       __M);
2576}
2577
2578extern __inline __m256d
2579__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2580_mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
2581{
2582  return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
2583						       (__v4df)
2584						       _mm256_setzero_pd (),
2585						       __M);
2586}
2587
2588extern __inline __m256i
2589__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2590_mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
2591{
2592  return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
2593						       (__v8si) __O,
2594						       __M);
2595}
2596
2597extern __inline __m256i
2598__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2599_mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
2600{
2601  return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
2602						       (__v8si)
2603						       _mm256_setzero_si256 (),
2604						       __M);
2605}
2606
2607extern __inline __m256i
2608__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2609_mm256_mask_set1_epi32 (__m256i __O, __mmask8 __M, int __A)
2610{
2611  return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A, (__v8si) __O,
2612							   __M);
2613}
2614
2615extern __inline __m256i
2616__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2617_mm256_maskz_set1_epi32 (__mmask8 __M, int __A)
2618{
2619  return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A,
2620							   (__v8si)
2621							   _mm256_setzero_si256 (),
2622							   __M);
2623}
2624
2625extern __inline __m128i
2626__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2627_mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2628{
2629  return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
2630						       (__v4si) __O,
2631						       __M);
2632}
2633
2634extern __inline __m128i
2635__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2636_mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
2637{
2638  return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
2639						       (__v4si)
2640						       _mm_setzero_si128 (),
2641						       __M);
2642}
2643
2644extern __inline __m128i
2645__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2646_mm_mask_set1_epi32 (__m128i __O, __mmask8 __M, int __A)
2647{
2648  return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A, (__v4si) __O,
2649							   __M);
2650}
2651
2652extern __inline __m128i
2653__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2654_mm_maskz_set1_epi32 (__mmask8 __M, int __A)
2655{
2656  return (__m128i)
2657	 __builtin_ia32_pbroadcastd128_gpr_mask (__A,
2658						 (__v4si) _mm_setzero_si128 (),
2659						 __M);
2660}
2661
2662extern __inline __m256i
2663__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2664_mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
2665{
2666  return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
2667						       (__v4di) __O,
2668						       __M);
2669}
2670
2671extern __inline __m256i
2672__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2673_mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
2674{
2675  return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
2676						       (__v4di)
2677						       _mm256_setzero_si256 (),
2678						       __M);
2679}
2680
2681extern __inline __m256i
2682__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2683_mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
2684{
2685  return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O,
2686							   __M);
2687}
2688
2689extern __inline __m256i
2690__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2691_mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
2692{
2693  return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A,
2694							   (__v4di)
2695							   _mm256_setzero_si256 (),
2696							   __M);
2697}
2698
2699extern __inline __m128i
2700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2701_mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
2702{
2703  return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
2704						       (__v2di) __O,
2705						       __M);
2706}
2707
2708extern __inline __m128i
2709__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2710_mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
2711{
2712  return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
2713						       (__v2di)
2714						       _mm_setzero_si128 (),
2715						       __M);
2716}
2717
2718extern __inline __m128i
2719__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2720_mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
2721{
2722  return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O,
2723							   __M);
2724}
2725
2726extern __inline __m128i
2727__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2728_mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
2729{
2730  return (__m128i)
2731	 __builtin_ia32_pbroadcastq128_gpr_mask (__A,
2732						 (__v2di) _mm_setzero_si128 (),
2733						 __M);
2734}
2735
2736extern __inline __m256
2737__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2738_mm256_broadcast_f32x4 (__m128 __A)
2739{
2740  return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2741						          (__v8sf)_mm256_undefined_pd (),
2742							  (__mmask8) -1);
2743}
2744
2745extern __inline __m256
2746__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2747_mm256_mask_broadcast_f32x4 (__m256 __O, __mmask8 __M, __m128 __A)
2748{
2749  return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2750							  (__v8sf) __O,
2751							  __M);
2752}
2753
2754extern __inline __m256
2755__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2756_mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
2757{
2758  return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2759							  (__v8sf)
2760							  _mm256_setzero_ps (),
2761							  __M);
2762}
2763
2764extern __inline __m256i
2765__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2766_mm256_broadcast_i32x4 (__m128i __A)
2767{
2768  return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2769							   __A,
2770						           (__v8si)_mm256_undefined_si256 (),
2771							   (__mmask8) -1);
2772}
2773
2774extern __inline __m256i
2775__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2776_mm256_mask_broadcast_i32x4 (__m256i __O, __mmask8 __M, __m128i __A)
2777{
2778  return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2779							   __A,
2780							   (__v8si)
2781							   __O, __M);
2782}
2783
2784extern __inline __m256i
2785__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2786_mm256_maskz_broadcast_i32x4 (__mmask8 __M, __m128i __A)
2787{
2788  return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2789							   __A,
2790							   (__v8si)
2791							   _mm256_setzero_si256 (),
2792							   __M);
2793}
2794
2795extern __inline __m256i
2796__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2797_mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2798{
2799  return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
2800						    (__v8si) __W,
2801						    (__mmask8) __U);
2802}
2803
2804extern __inline __m256i
2805__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2806_mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
2807{
2808  return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
2809						    (__v8si)
2810						    _mm256_setzero_si256 (),
2811						    (__mmask8) __U);
2812}
2813
2814extern __inline __m128i
2815__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2816_mm_mask_cvtepi8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2817{
2818  return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
2819						    (__v4si) __W,
2820						    (__mmask8) __U);
2821}
2822
2823extern __inline __m128i
2824__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2825_mm_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
2826{
2827  return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
2828						    (__v4si)
2829						    _mm_setzero_si128 (),
2830						    (__mmask8) __U);
2831}
2832
2833extern __inline __m256i
2834__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2835_mm256_mask_cvtepi8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2836{
2837  return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
2838						    (__v4di) __W,
2839						    (__mmask8) __U);
2840}
2841
2842extern __inline __m256i
2843__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2844_mm256_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2845{
2846  return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
2847						    (__v4di)
2848						    _mm256_setzero_si256 (),
2849						    (__mmask8) __U);
2850}
2851
2852extern __inline __m128i
2853__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2854_mm_mask_cvtepi8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
2855{
2856  return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
2857						    (__v2di) __W,
2858						    (__mmask8) __U);
2859}
2860
2861extern __inline __m128i
2862__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2863_mm_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2864{
2865  return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
2866						    (__v2di)
2867						    _mm_setzero_si128 (),
2868						    (__mmask8) __U);
2869}
2870
2871extern __inline __m256i
2872__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2873_mm256_mask_cvtepi16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2874{
2875  return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
2876						    (__v8si) __W,
2877						    (__mmask8) __U);
2878}
2879
2880extern __inline __m256i
2881__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2882_mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
2883{
2884  return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
2885						    (__v8si)
2886						    _mm256_setzero_si256 (),
2887						    (__mmask8) __U);
2888}
2889
2890extern __inline __m128i
2891__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2892_mm_mask_cvtepi16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2893{
2894  return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
2895						    (__v4si) __W,
2896						    (__mmask8) __U);
2897}
2898
2899extern __inline __m128i
2900__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2901_mm_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
2902{
2903  return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
2904						    (__v4si)
2905						    _mm_setzero_si128 (),
2906						    (__mmask8) __U);
2907}
2908
2909extern __inline __m256i
2910__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2911_mm256_mask_cvtepi16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2912{
2913  return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
2914						    (__v4di) __W,
2915						    (__mmask8) __U);
2916}
2917
2918extern __inline __m256i
2919__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2920_mm256_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2921{
2922  return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
2923						    (__v4di)
2924						    _mm256_setzero_si256 (),
2925						    (__mmask8) __U);
2926}
2927
2928extern __inline __m128i
2929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2930_mm_mask_cvtepi16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
2931{
2932  return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
2933						    (__v2di) __W,
2934						    (__mmask8) __U);
2935}
2936
2937extern __inline __m128i
2938__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2939_mm_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2940{
2941  return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
2942						    (__v2di)
2943						    _mm_setzero_si128 (),
2944						    (__mmask8) __U);
2945}
2946
2947extern __inline __m256i
2948__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2949_mm256_mask_cvtepi32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
2950{
2951  return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
2952						    (__v4di) __W,
2953						    (__mmask8) __U);
2954}
2955
2956extern __inline __m256i
2957__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2958_mm256_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
2959{
2960  return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
2961						    (__v4di)
2962						    _mm256_setzero_si256 (),
2963						    (__mmask8) __U);
2964}
2965
2966extern __inline __m128i
2967__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2968_mm_mask_cvtepi32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
2969{
2970  return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
2971						    (__v2di) __W,
2972						    (__mmask8) __U);
2973}
2974
2975extern __inline __m128i
2976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2977_mm_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
2978{
2979  return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
2980						    (__v2di)
2981						    _mm_setzero_si128 (),
2982						    (__mmask8) __U);
2983}
2984
2985extern __inline __m256i
2986__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2987_mm256_mask_cvtepu8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2988{
2989  return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
2990						    (__v8si) __W,
2991						    (__mmask8) __U);
2992}
2993
2994extern __inline __m256i
2995__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2996_mm256_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
2997{
2998  return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
2999						    (__v8si)
3000						    _mm256_setzero_si256 (),
3001						    (__mmask8) __U);
3002}
3003
3004extern __inline __m128i
3005__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3006_mm_mask_cvtepu8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
3007{
3008  return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
3009						    (__v4si) __W,
3010						    (__mmask8) __U);
3011}
3012
3013extern __inline __m128i
3014__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3015_mm_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
3016{
3017  return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
3018						    (__v4si)
3019						    _mm_setzero_si128 (),
3020						    (__mmask8) __U);
3021}
3022
3023extern __inline __m256i
3024__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3025_mm256_mask_cvtepu8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
3026{
3027  return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
3028						    (__v4di) __W,
3029						    (__mmask8) __U);
3030}
3031
3032extern __inline __m256i
3033__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3034_mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
3035{
3036  return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
3037						    (__v4di)
3038						    _mm256_setzero_si256 (),
3039						    (__mmask8) __U);
3040}
3041
3042extern __inline __m128i
3043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3044_mm_mask_cvtepu8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
3045{
3046  return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
3047						    (__v2di) __W,
3048						    (__mmask8) __U);
3049}
3050
3051extern __inline __m128i
3052__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3053_mm_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
3054{
3055  return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
3056						    (__v2di)
3057						    _mm_setzero_si128 (),
3058						    (__mmask8) __U);
3059}
3060
3061extern __inline __m256i
3062__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3063_mm256_mask_cvtepu16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
3064{
3065  return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
3066						    (__v8si) __W,
3067						    (__mmask8) __U);
3068}
3069
3070extern __inline __m256i
3071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3072_mm256_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
3073{
3074  return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
3075						    (__v8si)
3076						    _mm256_setzero_si256 (),
3077						    (__mmask8) __U);
3078}
3079
3080extern __inline __m128i
3081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3082_mm_mask_cvtepu16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
3083{
3084  return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
3085						    (__v4si) __W,
3086						    (__mmask8) __U);
3087}
3088
3089extern __inline __m128i
3090__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3091_mm_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
3092{
3093  return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
3094						    (__v4si)
3095						    _mm_setzero_si128 (),
3096						    (__mmask8) __U);
3097}
3098
3099extern __inline __m256i
3100__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3101_mm256_mask_cvtepu16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
3102{
3103  return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
3104						    (__v4di) __W,
3105						    (__mmask8) __U);
3106}
3107
3108extern __inline __m256i
3109__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3110_mm256_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
3111{
3112  return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
3113						    (__v4di)
3114						    _mm256_setzero_si256 (),
3115						    (__mmask8) __U);
3116}
3117
3118extern __inline __m128i
3119__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3120_mm_mask_cvtepu16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
3121{
3122  return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
3123						    (__v2di) __W,
3124						    (__mmask8) __U);
3125}
3126
3127extern __inline __m128i
3128__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3129_mm_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
3130{
3131  return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
3132						    (__v2di)
3133						    _mm_setzero_si128 (),
3134						    (__mmask8) __U);
3135}
3136
3137extern __inline __m256i
3138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3139_mm256_mask_cvtepu32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
3140{
3141  return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
3142						    (__v4di) __W,
3143						    (__mmask8) __U);
3144}
3145
3146extern __inline __m256i
3147__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3148_mm256_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
3149{
3150  return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
3151						    (__v4di)
3152						    _mm256_setzero_si256 (),
3153						    (__mmask8) __U);
3154}
3155
3156extern __inline __m128i
3157__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3158_mm_mask_cvtepu32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
3159{
3160  return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
3161						    (__v2di) __W,
3162						    (__mmask8) __U);
3163}
3164
3165extern __inline __m128i
3166__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3167_mm_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
3168{
3169  return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
3170						    (__v2di)
3171						    _mm_setzero_si128 (),
3172						    (__mmask8) __U);
3173}
3174
3175extern __inline __m256d
3176__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3177_mm256_rcp14_pd (__m256d __A)
3178{
3179  return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3180					      (__v4df)
3181					      _mm256_setzero_pd (),
3182					      (__mmask8) -1);
3183}
3184
3185extern __inline __m256d
3186__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3187_mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
3188{
3189  return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3190					      (__v4df) __W,
3191					      (__mmask8) __U);
3192}
3193
3194extern __inline __m256d
3195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3196_mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
3197{
3198  return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3199					      (__v4df)
3200					      _mm256_setzero_pd (),
3201					      (__mmask8) __U);
3202}
3203
3204extern __inline __m128d
3205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3206_mm_rcp14_pd (__m128d __A)
3207{
3208  return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3209					      (__v2df)
3210					      _mm_setzero_pd (),
3211					      (__mmask8) -1);
3212}
3213
3214extern __inline __m128d
3215__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3216_mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
3217{
3218  return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3219					      (__v2df) __W,
3220					      (__mmask8) __U);
3221}
3222
3223extern __inline __m128d
3224__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3225_mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
3226{
3227  return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3228					      (__v2df)
3229					      _mm_setzero_pd (),
3230					      (__mmask8) __U);
3231}
3232
3233extern __inline __m256
3234__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3235_mm256_rcp14_ps (__m256 __A)
3236{
3237  return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3238					     (__v8sf)
3239					     _mm256_setzero_ps (),
3240					     (__mmask8) -1);
3241}
3242
3243extern __inline __m256
3244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3245_mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
3246{
3247  return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3248					     (__v8sf) __W,
3249					     (__mmask8) __U);
3250}
3251
3252extern __inline __m256
3253__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3254_mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
3255{
3256  return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3257					     (__v8sf)
3258					     _mm256_setzero_ps (),
3259					     (__mmask8) __U);
3260}
3261
3262extern __inline __m128
3263__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3264_mm_rcp14_ps (__m128 __A)
3265{
3266  return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3267					     (__v4sf)
3268					     _mm_setzero_ps (),
3269					     (__mmask8) -1);
3270}
3271
3272extern __inline __m128
3273__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3274_mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
3275{
3276  return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3277					     (__v4sf) __W,
3278					     (__mmask8) __U);
3279}
3280
3281extern __inline __m128
3282__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3283_mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
3284{
3285  return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3286					     (__v4sf)
3287					     _mm_setzero_ps (),
3288					     (__mmask8) __U);
3289}
3290
3291extern __inline __m256d
3292__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3293_mm256_rsqrt14_pd (__m256d __A)
3294{
3295  return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3296						     (__v4df)
3297						     _mm256_setzero_pd (),
3298						     (__mmask8) -1);
3299}
3300
3301extern __inline __m256d
3302__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3303_mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
3304{
3305  return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3306						     (__v4df) __W,
3307						     (__mmask8) __U);
3308}
3309
3310extern __inline __m256d
3311__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3312_mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
3313{
3314  return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3315						     (__v4df)
3316						     _mm256_setzero_pd (),
3317						     (__mmask8) __U);
3318}
3319
3320extern __inline __m128d
3321__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3322_mm_rsqrt14_pd (__m128d __A)
3323{
3324  return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3325						     (__v2df)
3326						     _mm_setzero_pd (),
3327						     (__mmask8) -1);
3328}
3329
3330extern __inline __m128d
3331__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3332_mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
3333{
3334  return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3335						     (__v2df) __W,
3336						     (__mmask8) __U);
3337}
3338
3339extern __inline __m128d
3340__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3341_mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
3342{
3343  return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3344						     (__v2df)
3345						     _mm_setzero_pd (),
3346						     (__mmask8) __U);
3347}
3348
3349extern __inline __m256
3350__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3351_mm256_rsqrt14_ps (__m256 __A)
3352{
3353  return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3354						    (__v8sf)
3355						    _mm256_setzero_ps (),
3356						    (__mmask8) -1);
3357}
3358
3359extern __inline __m256
3360__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3361_mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
3362{
3363  return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3364						    (__v8sf) __W,
3365						    (__mmask8) __U);
3366}
3367
3368extern __inline __m256
3369__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3370_mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
3371{
3372  return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3373						    (__v8sf)
3374						    _mm256_setzero_ps (),
3375						    (__mmask8) __U);
3376}
3377
3378extern __inline __m128
3379__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3380_mm_rsqrt14_ps (__m128 __A)
3381{
3382  return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3383						    (__v4sf)
3384						    _mm_setzero_ps (),
3385						    (__mmask8) -1);
3386}
3387
3388extern __inline __m128
3389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3390_mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
3391{
3392  return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3393						    (__v4sf) __W,
3394						    (__mmask8) __U);
3395}
3396
3397extern __inline __m128
3398__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3399_mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
3400{
3401  return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3402						    (__v4sf)
3403						    _mm_setzero_ps (),
3404						    (__mmask8) __U);
3405}
3406
3407extern __inline __m256d
3408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3409_mm256_mask_sqrt_pd (__m256d __W, __mmask8 __U, __m256d __A)
3410{
3411  return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
3412						  (__v4df) __W,
3413						  (__mmask8) __U);
3414}
3415
3416extern __inline __m256d
3417__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3418_mm256_maskz_sqrt_pd (__mmask8 __U, __m256d __A)
3419{
3420  return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
3421						  (__v4df)
3422						  _mm256_setzero_pd (),
3423						  (__mmask8) __U);
3424}
3425
3426extern __inline __m128d
3427__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3428_mm_mask_sqrt_pd (__m128d __W, __mmask8 __U, __m128d __A)
3429{
3430  return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
3431						  (__v2df) __W,
3432						  (__mmask8) __U);
3433}
3434
3435extern __inline __m128d
3436__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3437_mm_maskz_sqrt_pd (__mmask8 __U, __m128d __A)
3438{
3439  return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
3440						  (__v2df)
3441						  _mm_setzero_pd (),
3442						  (__mmask8) __U);
3443}
3444
3445extern __inline __m256
3446__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3447_mm256_mask_sqrt_ps (__m256 __W, __mmask8 __U, __m256 __A)
3448{
3449  return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
3450						 (__v8sf) __W,
3451						 (__mmask8) __U);
3452}
3453
3454extern __inline __m256
3455__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3456_mm256_maskz_sqrt_ps (__mmask8 __U, __m256 __A)
3457{
3458  return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
3459						 (__v8sf)
3460						 _mm256_setzero_ps (),
3461						 (__mmask8) __U);
3462}
3463
3464extern __inline __m128
3465__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3466_mm_mask_sqrt_ps (__m128 __W, __mmask8 __U, __m128 __A)
3467{
3468  return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
3469						 (__v4sf) __W,
3470						 (__mmask8) __U);
3471}
3472
3473extern __inline __m128
3474__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3475_mm_maskz_sqrt_ps (__mmask8 __U, __m128 __A)
3476{
3477  return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
3478						 (__v4sf)
3479						 _mm_setzero_ps (),
3480						 (__mmask8) __U);
3481}
3482
3483extern __inline __m256i
3484__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3485_mm256_mask_add_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3486		       __m256i __B)
3487{
3488  return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
3489						 (__v8si) __B,
3490						 (__v8si) __W,
3491						 (__mmask8) __U);
3492}
3493
3494extern __inline __m256i
3495__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3496_mm256_maskz_add_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3497{
3498  return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
3499						 (__v8si) __B,
3500						 (__v8si)
3501						 _mm256_setzero_si256 (),
3502						 (__mmask8) __U);
3503}
3504
3505extern __inline __m256i
3506__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3507_mm256_mask_add_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3508		       __m256i __B)
3509{
3510  return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
3511						 (__v4di) __B,
3512						 (__v4di) __W,
3513						 (__mmask8) __U);
3514}
3515
3516extern __inline __m256i
3517__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3518_mm256_maskz_add_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
3519{
3520  return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
3521						 (__v4di) __B,
3522						 (__v4di)
3523						 _mm256_setzero_si256 (),
3524						 (__mmask8) __U);
3525}
3526
3527extern __inline __m256i
3528__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3529_mm256_mask_sub_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3530		       __m256i __B)
3531{
3532  return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
3533						 (__v8si) __B,
3534						 (__v8si) __W,
3535						 (__mmask8) __U);
3536}
3537
3538extern __inline __m256i
3539__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3540_mm256_maskz_sub_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3541{
3542  return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
3543						 (__v8si) __B,
3544						 (__v8si)
3545						 _mm256_setzero_si256 (),
3546						 (__mmask8) __U);
3547}
3548
3549extern __inline __m256i
3550__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3551_mm256_mask_sub_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3552		       __m256i __B)
3553{
3554  return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
3555						 (__v4di) __B,
3556						 (__v4di) __W,
3557						 (__mmask8) __U);
3558}
3559
3560extern __inline __m256i
3561__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3562_mm256_maskz_sub_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
3563{
3564  return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
3565						 (__v4di) __B,
3566						 (__v4di)
3567						 _mm256_setzero_si256 (),
3568						 (__mmask8) __U);
3569}
3570
3571extern __inline __m128i
3572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3573_mm_mask_add_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3574		    __m128i __B)
3575{
3576  return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
3577						 (__v4si) __B,
3578						 (__v4si) __W,
3579						 (__mmask8) __U);
3580}
3581
3582extern __inline __m128i
3583__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3584_mm_maskz_add_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3585{
3586  return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
3587						 (__v4si) __B,
3588						 (__v4si)
3589						 _mm_setzero_si128 (),
3590						 (__mmask8) __U);
3591}
3592
3593extern __inline __m128i
3594__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3595_mm_mask_add_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3596		    __m128i __B)
3597{
3598  return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
3599						 (__v2di) __B,
3600						 (__v2di) __W,
3601						 (__mmask8) __U);
3602}
3603
3604extern __inline __m128i
3605__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3606_mm_maskz_add_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3607{
3608  return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
3609						 (__v2di) __B,
3610						 (__v2di)
3611						 _mm_setzero_si128 (),
3612						 (__mmask8) __U);
3613}
3614
3615extern __inline __m128i
3616__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3617_mm_mask_sub_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3618		    __m128i __B)
3619{
3620  return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
3621						 (__v4si) __B,
3622						 (__v4si) __W,
3623						 (__mmask8) __U);
3624}
3625
3626extern __inline __m128i
3627__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3628_mm_maskz_sub_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3629{
3630  return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
3631						 (__v4si) __B,
3632						 (__v4si)
3633						 _mm_setzero_si128 (),
3634						 (__mmask8) __U);
3635}
3636
3637extern __inline __m128i
3638__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3639_mm_mask_sub_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3640		    __m128i __B)
3641{
3642  return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
3643						 (__v2di) __B,
3644						 (__v2di) __W,
3645						 (__mmask8) __U);
3646}
3647
3648extern __inline __m128i
3649__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3650_mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3651{
3652  return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
3653						 (__v2di) __B,
3654						 (__v2di)
3655						 _mm_setzero_si128 (),
3656						 (__mmask8) __U);
3657}
3658
3659extern __inline __m256
3660__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3661_mm256_getexp_ps (__m256 __A)
3662{
3663  return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3664						   (__v8sf)
3665						   _mm256_setzero_ps (),
3666						   (__mmask8) -1);
3667}
3668
3669extern __inline __m256
3670__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3671_mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A)
3672{
3673  return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3674						   (__v8sf) __W,
3675						   (__mmask8) __U);
3676}
3677
3678extern __inline __m256
3679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3680_mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A)
3681{
3682  return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3683						   (__v8sf)
3684						   _mm256_setzero_ps (),
3685						   (__mmask8) __U);
3686}
3687
3688extern __inline __m256d
3689__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3690_mm256_getexp_pd (__m256d __A)
3691{
3692  return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3693						    (__v4df)
3694						    _mm256_setzero_pd (),
3695						    (__mmask8) -1);
3696}
3697
3698extern __inline __m256d
3699__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3700_mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A)
3701{
3702  return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3703						    (__v4df) __W,
3704						    (__mmask8) __U);
3705}
3706
3707extern __inline __m256d
3708__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3709_mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A)
3710{
3711  return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3712						    (__v4df)
3713						    _mm256_setzero_pd (),
3714						    (__mmask8) __U);
3715}
3716
3717extern __inline __m128
3718__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3719_mm_getexp_ps (__m128 __A)
3720{
3721  return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3722						   (__v4sf)
3723						   _mm_setzero_ps (),
3724						   (__mmask8) -1);
3725}
3726
3727extern __inline __m128
3728__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3729_mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A)
3730{
3731  return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3732						   (__v4sf) __W,
3733						   (__mmask8) __U);
3734}
3735
3736extern __inline __m128
3737__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3738_mm_maskz_getexp_ps (__mmask8 __U, __m128 __A)
3739{
3740  return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3741						   (__v4sf)
3742						   _mm_setzero_ps (),
3743						   (__mmask8) __U);
3744}
3745
3746extern __inline __m128d
3747__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3748_mm_getexp_pd (__m128d __A)
3749{
3750  return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3751						    (__v2df)
3752						    _mm_setzero_pd (),
3753						    (__mmask8) -1);
3754}
3755
3756extern __inline __m128d
3757__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3758_mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A)
3759{
3760  return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3761						    (__v2df) __W,
3762						    (__mmask8) __U);
3763}
3764
3765extern __inline __m128d
3766__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3767_mm_maskz_getexp_pd (__mmask8 __U, __m128d __A)
3768{
3769  return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3770						    (__v2df)
3771						    _mm_setzero_pd (),
3772						    (__mmask8) __U);
3773}
3774
3775extern __inline __m256i
3776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3777_mm256_mask_srl_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3778		       __m128i __B)
3779{
3780  return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
3781						 (__v4si) __B,
3782						 (__v8si) __W,
3783						 (__mmask8) __U);
3784}
3785
3786extern __inline __m256i
3787__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3788_mm256_maskz_srl_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
3789{
3790  return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
3791						 (__v4si) __B,
3792						 (__v8si)
3793						 _mm256_setzero_si256 (),
3794						 (__mmask8) __U);
3795}
3796
3797extern __inline __m128i
3798__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3799_mm_mask_srl_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3800		    __m128i __B)
3801{
3802  return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
3803						 (__v4si) __B,
3804						 (__v4si) __W,
3805						 (__mmask8) __U);
3806}
3807
3808extern __inline __m128i
3809__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3810_mm_maskz_srl_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3811{
3812  return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
3813						 (__v4si) __B,
3814						 (__v4si)
3815						 _mm_setzero_si128 (),
3816						 (__mmask8) __U);
3817}
3818
3819extern __inline __m256i
3820__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3821_mm256_mask_srl_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3822		       __m128i __B)
3823{
3824  return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
3825						 (__v2di) __B,
3826						 (__v4di) __W,
3827						 (__mmask8) __U);
3828}
3829
3830extern __inline __m256i
3831__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3832_mm256_maskz_srl_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
3833{
3834  return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
3835						 (__v2di) __B,
3836						 (__v4di)
3837						 _mm256_setzero_si256 (),
3838						 (__mmask8) __U);
3839}
3840
3841extern __inline __m128i
3842__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3843_mm_mask_srl_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3844		    __m128i __B)
3845{
3846  return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
3847						 (__v2di) __B,
3848						 (__v2di) __W,
3849						 (__mmask8) __U);
3850}
3851
3852extern __inline __m128i
3853__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3854_mm_maskz_srl_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3855{
3856  return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
3857						 (__v2di) __B,
3858						 (__v2di)
3859						 _mm_setzero_si128 (),
3860						 (__mmask8) __U);
3861}
3862
3863extern __inline __m256i
3864__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3865_mm256_mask_and_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3866		       __m256i __B)
3867{
3868  return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
3869						 (__v8si) __B,
3870						 (__v8si) __W,
3871						 (__mmask8) __U);
3872}
3873
3874extern __inline __m256i
3875__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3876_mm256_maskz_and_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3877{
3878  return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
3879						 (__v8si) __B,
3880						 (__v8si)
3881						 _mm256_setzero_si256 (),
3882						 (__mmask8) __U);
3883}
3884
3885extern __inline __m256d
3886__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3887_mm256_scalef_pd (__m256d __A, __m256d __B)
3888{
3889  return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3890						    (__v4df) __B,
3891						    (__v4df)
3892						    _mm256_setzero_pd (),
3893						    (__mmask8) -1);
3894}
3895
3896extern __inline __m256d
3897__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3898_mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
3899		       __m256d __B)
3900{
3901  return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3902						    (__v4df) __B,
3903						    (__v4df) __W,
3904						    (__mmask8) __U);
3905}
3906
3907extern __inline __m256d
3908__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3909_mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B)
3910{
3911  return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3912						    (__v4df) __B,
3913						    (__v4df)
3914						    _mm256_setzero_pd (),
3915						    (__mmask8) __U);
3916}
3917
3918extern __inline __m256
3919__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3920_mm256_scalef_ps (__m256 __A, __m256 __B)
3921{
3922  return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3923						   (__v8sf) __B,
3924						   (__v8sf)
3925						   _mm256_setzero_ps (),
3926						   (__mmask8) -1);
3927}
3928
3929extern __inline __m256
3930__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3931_mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
3932		       __m256 __B)
3933{
3934  return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3935						   (__v8sf) __B,
3936						   (__v8sf) __W,
3937						   (__mmask8) __U);
3938}
3939
3940extern __inline __m256
3941__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3942_mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B)
3943{
3944  return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3945						   (__v8sf) __B,
3946						   (__v8sf)
3947						   _mm256_setzero_ps (),
3948						   (__mmask8) __U);
3949}
3950
3951extern __inline __m128d
3952__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3953_mm_scalef_pd (__m128d __A, __m128d __B)
3954{
3955  return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3956						    (__v2df) __B,
3957						    (__v2df)
3958						    _mm_setzero_pd (),
3959						    (__mmask8) -1);
3960}
3961
3962extern __inline __m128d
3963__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3964_mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
3965		    __m128d __B)
3966{
3967  return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3968						    (__v2df) __B,
3969						    (__v2df) __W,
3970						    (__mmask8) __U);
3971}
3972
3973extern __inline __m128d
3974__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3975_mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B)
3976{
3977  return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3978						    (__v2df) __B,
3979						    (__v2df)
3980						    _mm_setzero_pd (),
3981						    (__mmask8) __U);
3982}
3983
3984extern __inline __m128
3985__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3986_mm_scalef_ps (__m128 __A, __m128 __B)
3987{
3988  return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3989						   (__v4sf) __B,
3990						   (__v4sf)
3991						   _mm_setzero_ps (),
3992						   (__mmask8) -1);
3993}
3994
3995extern __inline __m128
3996__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3997_mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
3998{
3999  return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
4000						   (__v4sf) __B,
4001						   (__v4sf) __W,
4002						   (__mmask8) __U);
4003}
4004
4005extern __inline __m128
4006__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4007_mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B)
4008{
4009  return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
4010						   (__v4sf) __B,
4011						   (__v4sf)
4012						   _mm_setzero_ps (),
4013						   (__mmask8) __U);
4014}
4015
4016extern __inline __m256d
4017__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4018_mm256_mask_fmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
4019		      __m256d __C)
4020{
4021  return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
4022						    (__v4df) __B,
4023						    (__v4df) __C,
4024						    (__mmask8) __U);
4025}
4026
4027extern __inline __m256d
4028__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4029_mm256_mask3_fmadd_pd (__m256d __A, __m256d __B, __m256d __C,
4030		       __mmask8 __U)
4031{
4032  return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A,
4033						     (__v4df) __B,
4034						     (__v4df) __C,
4035						     (__mmask8) __U);
4036}
4037
4038extern __inline __m256d
4039__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4040_mm256_maskz_fmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4041		       __m256d __C)
4042{
4043  return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
4044						     (__v4df) __B,
4045						     (__v4df) __C,
4046						     (__mmask8) __U);
4047}
4048
4049extern __inline __m128d
4050__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4051_mm_mask_fmadd_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
4052{
4053  return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
4054						    (__v2df) __B,
4055						    (__v2df) __C,
4056						    (__mmask8) __U);
4057}
4058
4059extern __inline __m128d
4060__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4061_mm_mask3_fmadd_pd (__m128d __A, __m128d __B, __m128d __C,
4062		    __mmask8 __U)
4063{
4064  return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A,
4065						     (__v2df) __B,
4066						     (__v2df) __C,
4067						     (__mmask8) __U);
4068}
4069
4070extern __inline __m128d
4071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4072_mm_maskz_fmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4073		    __m128d __C)
4074{
4075  return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
4076						     (__v2df) __B,
4077						     (__v2df) __C,
4078						     (__mmask8) __U);
4079}
4080
4081extern __inline __m256
4082__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4083_mm256_mask_fmadd_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
4084{
4085  return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
4086						   (__v8sf) __B,
4087						   (__v8sf) __C,
4088						   (__mmask8) __U);
4089}
4090
4091extern __inline __m256
4092__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4093_mm256_mask3_fmadd_ps (__m256 __A, __m256 __B, __m256 __C,
4094		       __mmask8 __U)
4095{
4096  return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A,
4097						    (__v8sf) __B,
4098						    (__v8sf) __C,
4099						    (__mmask8) __U);
4100}
4101
4102extern __inline __m256
4103__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4104_mm256_maskz_fmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4105		       __m256 __C)
4106{
4107  return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
4108						    (__v8sf) __B,
4109						    (__v8sf) __C,
4110						    (__mmask8) __U);
4111}
4112
4113extern __inline __m128
4114__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4115_mm_mask_fmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4116{
4117  return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
4118						   (__v4sf) __B,
4119						   (__v4sf) __C,
4120						   (__mmask8) __U);
4121}
4122
4123extern __inline __m128
4124__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4125_mm_mask3_fmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4126{
4127  return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A,
4128						    (__v4sf) __B,
4129						    (__v4sf) __C,
4130						    (__mmask8) __U);
4131}
4132
4133extern __inline __m128
4134__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4135_mm_maskz_fmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4136{
4137  return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
4138						    (__v4sf) __B,
4139						    (__v4sf) __C,
4140						    (__mmask8) __U);
4141}
4142
4143extern __inline __m256d
4144__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4145_mm256_mask_fmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4146		      __m256d __C)
4147{
4148  return (__m256d) __builtin_ia32_vfmsubpd256_mask ((__v4df) __A,
4149						    (__v4df) __B,
4150						    (__v4df) __C,
4151						    (__mmask8) __U);
4152}
4153
4154extern __inline __m256d
4155__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4156_mm256_mask3_fmsub_pd (__m256d __A, __m256d __B, __m256d __C,
4157		       __mmask8 __U)
4158{
4159  return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A,
4160						     (__v4df) __B,
4161						     (__v4df) __C,
4162						     (__mmask8) __U);
4163}
4164
4165extern __inline __m256d
4166__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4167_mm256_maskz_fmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4168		       __m256d __C)
4169{
4170  return (__m256d) __builtin_ia32_vfmsubpd256_maskz ((__v4df) __A,
4171						     (__v4df) __B,
4172						     (__v4df) __C,
4173						     (__mmask8) __U);
4174}
4175
4176extern __inline __m128d
4177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4178_mm_mask_fmsub_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
4179{
4180  return (__m128d) __builtin_ia32_vfmsubpd128_mask ((__v2df) __A,
4181						    (__v2df) __B,
4182						    (__v2df) __C,
4183						    (__mmask8) __U);
4184}
4185
4186extern __inline __m128d
4187__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4188_mm_mask3_fmsub_pd (__m128d __A, __m128d __B, __m128d __C,
4189		    __mmask8 __U)
4190{
4191  return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A,
4192						     (__v2df) __B,
4193						     (__v2df) __C,
4194						     (__mmask8) __U);
4195}
4196
4197extern __inline __m128d
4198__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4199_mm_maskz_fmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4200		    __m128d __C)
4201{
4202  return (__m128d) __builtin_ia32_vfmsubpd128_maskz ((__v2df) __A,
4203						     (__v2df) __B,
4204						     (__v2df) __C,
4205						     (__mmask8) __U);
4206}
4207
4208extern __inline __m256
4209__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4210_mm256_mask_fmsub_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
4211{
4212  return (__m256) __builtin_ia32_vfmsubps256_mask ((__v8sf) __A,
4213						   (__v8sf) __B,
4214						   (__v8sf) __C,
4215						   (__mmask8) __U);
4216}
4217
4218extern __inline __m256
4219__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4220_mm256_mask3_fmsub_ps (__m256 __A, __m256 __B, __m256 __C,
4221		       __mmask8 __U)
4222{
4223  return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A,
4224						    (__v8sf) __B,
4225						    (__v8sf) __C,
4226						    (__mmask8) __U);
4227}
4228
4229extern __inline __m256
4230__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4231_mm256_maskz_fmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4232		       __m256 __C)
4233{
4234  return (__m256) __builtin_ia32_vfmsubps256_maskz ((__v8sf) __A,
4235						    (__v8sf) __B,
4236						    (__v8sf) __C,
4237						    (__mmask8) __U);
4238}
4239
4240extern __inline __m128
4241__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4242_mm_mask_fmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4243{
4244  return (__m128) __builtin_ia32_vfmsubps128_mask ((__v4sf) __A,
4245						   (__v4sf) __B,
4246						   (__v4sf) __C,
4247						   (__mmask8) __U);
4248}
4249
4250extern __inline __m128
4251__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4252_mm_mask3_fmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4253{
4254  return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A,
4255						    (__v4sf) __B,
4256						    (__v4sf) __C,
4257						    (__mmask8) __U);
4258}
4259
4260extern __inline __m128
4261__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4262_mm_maskz_fmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4263{
4264  return (__m128) __builtin_ia32_vfmsubps128_maskz ((__v4sf) __A,
4265						    (__v4sf) __B,
4266						    (__v4sf) __C,
4267						    (__mmask8) __U);
4268}
4269
4270extern __inline __m256d
4271__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4272_mm256_mask_fmaddsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4273			 __m256d __C)
4274{
4275  return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
4276						       (__v4df) __B,
4277						       (__v4df) __C,
4278						       (__mmask8) __U);
4279}
4280
4281extern __inline __m256d
4282__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4283_mm256_mask3_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C,
4284			  __mmask8 __U)
4285{
4286  return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A,
4287							(__v4df) __B,
4288							(__v4df) __C,
4289							(__mmask8)
4290							__U);
4291}
4292
4293extern __inline __m256d
4294__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4295_mm256_maskz_fmaddsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4296			  __m256d __C)
4297{
4298  return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
4299							(__v4df) __B,
4300							(__v4df) __C,
4301							(__mmask8)
4302							__U);
4303}
4304
4305extern __inline __m128d
4306__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4307_mm_mask_fmaddsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
4308		      __m128d __C)
4309{
4310  return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
4311						       (__v2df) __B,
4312						       (__v2df) __C,
4313						       (__mmask8) __U);
4314}
4315
4316extern __inline __m128d
4317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4318_mm_mask3_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C,
4319		       __mmask8 __U)
4320{
4321  return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A,
4322							(__v2df) __B,
4323							(__v2df) __C,
4324							(__mmask8)
4325							__U);
4326}
4327
4328extern __inline __m128d
4329__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4330_mm_maskz_fmaddsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4331		       __m128d __C)
4332{
4333  return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
4334							(__v2df) __B,
4335							(__v2df) __C,
4336							(__mmask8)
4337							__U);
4338}
4339
4340extern __inline __m256
4341__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4342_mm256_mask_fmaddsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
4343			 __m256 __C)
4344{
4345  return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
4346						      (__v8sf) __B,
4347						      (__v8sf) __C,
4348						      (__mmask8) __U);
4349}
4350
4351extern __inline __m256
4352__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4353_mm256_mask3_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C,
4354			  __mmask8 __U)
4355{
4356  return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A,
4357						       (__v8sf) __B,
4358						       (__v8sf) __C,
4359						       (__mmask8) __U);
4360}
4361
4362extern __inline __m256
4363__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4364_mm256_maskz_fmaddsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4365			  __m256 __C)
4366{
4367  return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
4368						       (__v8sf) __B,
4369						       (__v8sf) __C,
4370						       (__mmask8) __U);
4371}
4372
4373extern __inline __m128
4374__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4375_mm_mask_fmaddsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4376{
4377  return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
4378						      (__v4sf) __B,
4379						      (__v4sf) __C,
4380						      (__mmask8) __U);
4381}
4382
4383extern __inline __m128
4384__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4385_mm_mask3_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C,
4386		       __mmask8 __U)
4387{
4388  return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A,
4389						       (__v4sf) __B,
4390						       (__v4sf) __C,
4391						       (__mmask8) __U);
4392}
4393
4394extern __inline __m128
4395__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4396_mm_maskz_fmaddsub_ps (__mmask8 __U, __m128 __A, __m128 __B,
4397		       __m128 __C)
4398{
4399  return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
4400						       (__v4sf) __B,
4401						       (__v4sf) __C,
4402						       (__mmask8) __U);
4403}
4404
4405extern __inline __m256d
4406__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4407_mm256_mask_fmsubadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
4408			 __m256d __C)
4409{
4410  return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
4411						       (__v4df) __B,
4412						       -(__v4df) __C,
4413						       (__mmask8) __U);
4414}
4415
4416extern __inline __m256d
4417__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4418_mm256_mask3_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C,
4419			  __mmask8 __U)
4420{
4421  return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A,
4422							(__v4df) __B,
4423							(__v4df) __C,
4424							(__mmask8)
4425							__U);
4426}
4427
4428extern __inline __m256d
4429__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4430_mm256_maskz_fmsubadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4431			  __m256d __C)
4432{
4433  return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
4434							(__v4df) __B,
4435							-(__v4df) __C,
4436							(__mmask8)
4437							__U);
4438}
4439
4440extern __inline __m128d
4441__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4442_mm_mask_fmsubadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
4443		      __m128d __C)
4444{
4445  return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
4446						       (__v2df) __B,
4447						       -(__v2df) __C,
4448						       (__mmask8) __U);
4449}
4450
4451extern __inline __m128d
4452__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4453_mm_mask3_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C,
4454		       __mmask8 __U)
4455{
4456  return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A,
4457							(__v2df) __B,
4458							(__v2df) __C,
4459							(__mmask8)
4460							__U);
4461}
4462
4463extern __inline __m128d
4464__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4465_mm_maskz_fmsubadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4466		       __m128d __C)
4467{
4468  return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
4469							(__v2df) __B,
4470							-(__v2df) __C,
4471							(__mmask8)
4472							__U);
4473}
4474
4475extern __inline __m256
4476__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4477_mm256_mask_fmsubadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
4478			 __m256 __C)
4479{
4480  return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
4481						      (__v8sf) __B,
4482						      -(__v8sf) __C,
4483						      (__mmask8) __U);
4484}
4485
4486extern __inline __m256
4487__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4488_mm256_mask3_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C,
4489			  __mmask8 __U)
4490{
4491  return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A,
4492						       (__v8sf) __B,
4493						       (__v8sf) __C,
4494						       (__mmask8) __U);
4495}
4496
4497extern __inline __m256
4498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4499_mm256_maskz_fmsubadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4500			  __m256 __C)
4501{
4502  return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
4503						       (__v8sf) __B,
4504						       -(__v8sf) __C,
4505						       (__mmask8) __U);
4506}
4507
4508extern __inline __m128
4509__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4510_mm_mask_fmsubadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4511{
4512  return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
4513						      (__v4sf) __B,
4514						      -(__v4sf) __C,
4515						      (__mmask8) __U);
4516}
4517
4518extern __inline __m128
4519__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4520_mm_mask3_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C,
4521		       __mmask8 __U)
4522{
4523  return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A,
4524						       (__v4sf) __B,
4525						       (__v4sf) __C,
4526						       (__mmask8) __U);
4527}
4528
4529extern __inline __m128
4530__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4531_mm_maskz_fmsubadd_ps (__mmask8 __U, __m128 __A, __m128 __B,
4532		       __m128 __C)
4533{
4534  return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
4535						       (__v4sf) __B,
4536						       -(__v4sf) __C,
4537						       (__mmask8) __U);
4538}
4539
4540extern __inline __m256d
4541__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4542_mm256_mask_fnmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
4543		       __m256d __C)
4544{
4545  return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A,
4546						     (__v4df) __B,
4547						     (__v4df) __C,
4548						     (__mmask8) __U);
4549}
4550
4551extern __inline __m256d
4552__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4553_mm256_mask3_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C,
4554			__mmask8 __U)
4555{
4556  return (__m256d) __builtin_ia32_vfnmaddpd256_mask3 ((__v4df) __A,
4557						      (__v4df) __B,
4558						      (__v4df) __C,
4559						      (__mmask8) __U);
4560}
4561
4562extern __inline __m256d
4563__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4564_mm256_maskz_fnmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4565			__m256d __C)
4566{
4567  return (__m256d) __builtin_ia32_vfnmaddpd256_maskz ((__v4df) __A,
4568						      (__v4df) __B,
4569						      (__v4df) __C,
4570						      (__mmask8) __U);
4571}
4572
4573extern __inline __m128d
4574__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4575_mm_mask_fnmadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
4576		    __m128d __C)
4577{
4578  return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A,
4579						     (__v2df) __B,
4580						     (__v2df) __C,
4581						     (__mmask8) __U);
4582}
4583
4584extern __inline __m128d
4585__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4586_mm_mask3_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C,
4587		     __mmask8 __U)
4588{
4589  return (__m128d) __builtin_ia32_vfnmaddpd128_mask3 ((__v2df) __A,
4590						      (__v2df) __B,
4591						      (__v2df) __C,
4592						      (__mmask8) __U);
4593}
4594
4595extern __inline __m128d
4596__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4597_mm_maskz_fnmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4598		     __m128d __C)
4599{
4600  return (__m128d) __builtin_ia32_vfnmaddpd128_maskz ((__v2df) __A,
4601						      (__v2df) __B,
4602						      (__v2df) __C,
4603						      (__mmask8) __U);
4604}
4605
4606extern __inline __m256
4607__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4608_mm256_mask_fnmadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
4609		       __m256 __C)
4610{
4611  return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A,
4612						    (__v8sf) __B,
4613						    (__v8sf) __C,
4614						    (__mmask8) __U);
4615}
4616
4617extern __inline __m256
4618__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4619_mm256_mask3_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C,
4620			__mmask8 __U)
4621{
4622  return (__m256) __builtin_ia32_vfnmaddps256_mask3 ((__v8sf) __A,
4623						     (__v8sf) __B,
4624						     (__v8sf) __C,
4625						     (__mmask8) __U);
4626}
4627
4628extern __inline __m256
4629__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4630_mm256_maskz_fnmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4631			__m256 __C)
4632{
4633  return (__m256) __builtin_ia32_vfnmaddps256_maskz ((__v8sf) __A,
4634						     (__v8sf) __B,
4635						     (__v8sf) __C,
4636						     (__mmask8) __U);
4637}
4638
4639extern __inline __m128
4640__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4641_mm_mask_fnmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4642{
4643  return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A,
4644						    (__v4sf) __B,
4645						    (__v4sf) __C,
4646						    (__mmask8) __U);
4647}
4648
4649extern __inline __m128
4650__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4651_mm_mask3_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4652{
4653  return (__m128) __builtin_ia32_vfnmaddps128_mask3 ((__v4sf) __A,
4654						     (__v4sf) __B,
4655						     (__v4sf) __C,
4656						     (__mmask8) __U);
4657}
4658
4659extern __inline __m128
4660__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4661_mm_maskz_fnmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4662{
4663  return (__m128) __builtin_ia32_vfnmaddps128_maskz ((__v4sf) __A,
4664						     (__v4sf) __B,
4665						     (__v4sf) __C,
4666						     (__mmask8) __U);
4667}
4668
4669extern __inline __m256d
4670__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4671_mm256_mask_fnmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4672		       __m256d __C)
4673{
4674  return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A,
4675						     (__v4df) __B,
4676						     (__v4df) __C,
4677						     (__mmask8) __U);
4678}
4679
4680extern __inline __m256d
4681__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4682_mm256_mask3_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C,
4683			__mmask8 __U)
4684{
4685  return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A,
4686						      (__v4df) __B,
4687						      (__v4df) __C,
4688						      (__mmask8) __U);
4689}
4690
4691extern __inline __m256d
4692__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4693_mm256_maskz_fnmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4694			__m256d __C)
4695{
4696  return (__m256d) __builtin_ia32_vfnmsubpd256_maskz ((__v4df) __A,
4697						      (__v4df) __B,
4698						      (__v4df) __C,
4699						      (__mmask8) __U);
4700}
4701
4702extern __inline __m128d
4703__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4704_mm_mask_fnmsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
4705		    __m128d __C)
4706{
4707  return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A,
4708						     (__v2df) __B,
4709						     (__v2df) __C,
4710						     (__mmask8) __U);
4711}
4712
4713extern __inline __m128d
4714__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4715_mm_mask3_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C,
4716		     __mmask8 __U)
4717{
4718  return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A,
4719						      (__v2df) __B,
4720						      (__v2df) __C,
4721						      (__mmask8) __U);
4722}
4723
4724extern __inline __m128d
4725__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4726_mm_maskz_fnmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4727		     __m128d __C)
4728{
4729  return (__m128d) __builtin_ia32_vfnmsubpd128_maskz ((__v2df) __A,
4730						      (__v2df) __B,
4731						      (__v2df) __C,
4732						      (__mmask8) __U);
4733}
4734
4735extern __inline __m256
4736__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4737_mm256_mask_fnmsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
4738		       __m256 __C)
4739{
4740  return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A,
4741						    (__v8sf) __B,
4742						    (__v8sf) __C,
4743						    (__mmask8) __U);
4744}
4745
4746extern __inline __m256
4747__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4748_mm256_mask3_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C,
4749			__mmask8 __U)
4750{
4751  return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A,
4752						     (__v8sf) __B,
4753						     (__v8sf) __C,
4754						     (__mmask8) __U);
4755}
4756
4757extern __inline __m256
4758__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4759_mm256_maskz_fnmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4760			__m256 __C)
4761{
4762  return (__m256) __builtin_ia32_vfnmsubps256_maskz ((__v8sf) __A,
4763						     (__v8sf) __B,
4764						     (__v8sf) __C,
4765						     (__mmask8) __U);
4766}
4767
4768extern __inline __m128
4769__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4770_mm_mask_fnmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4771{
4772  return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A,
4773						    (__v4sf) __B,
4774						    (__v4sf) __C,
4775						    (__mmask8) __U);
4776}
4777
4778extern __inline __m128
4779__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4780_mm_mask3_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4781{
4782  return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A,
4783						     (__v4sf) __B,
4784						     (__v4sf) __C,
4785						     (__mmask8) __U);
4786}
4787
4788extern __inline __m128
4789__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4790_mm_maskz_fnmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4791{
4792  return (__m128) __builtin_ia32_vfnmsubps128_maskz ((__v4sf) __A,
4793						     (__v4sf) __B,
4794						     (__v4sf) __C,
4795						     (__mmask8) __U);
4796}
4797
4798extern __inline __m128i
4799__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4800_mm_mask_and_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4801		    __m128i __B)
4802{
4803  return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
4804						 (__v4si) __B,
4805						 (__v4si) __W,
4806						 (__mmask8) __U);
4807}
4808
4809extern __inline __m128i
4810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4811_mm_maskz_and_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4812{
4813  return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
4814						 (__v4si) __B,
4815						 (__v4si)
4816						 _mm_setzero_si128 (),
4817						 (__mmask8) __U);
4818}
4819
4820extern __inline __m256i
4821__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4822_mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4823			  __m256i __B)
4824{
4825  return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
4826						  (__v8si) __B,
4827						  (__v8si) __W,
4828						  (__mmask8) __U);
4829}
4830
4831extern __inline __m256i
4832__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4833_mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4834{
4835  return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
4836						  (__v8si) __B,
4837						  (__v8si)
4838						  _mm256_setzero_si256 (),
4839						  (__mmask8) __U);
4840}
4841
4842extern __inline __m128i
4843__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4844_mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4845		       __m128i __B)
4846{
4847  return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
4848						  (__v4si) __B,
4849						  (__v4si) __W,
4850						  (__mmask8) __U);
4851}
4852
4853extern __inline __m128i
4854__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4855_mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4856{
4857  return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
4858						  (__v4si) __B,
4859						  (__v4si)
4860						  _mm_setzero_si128 (),
4861						  (__mmask8) __U);
4862}
4863
4864extern __inline __m256i
4865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4866_mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4867		      __m256i __B)
4868{
4869  return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
4870						(__v8si) __B,
4871						(__v8si) __W,
4872						(__mmask8) __U);
4873}
4874
4875extern __inline __m256i
4876__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4877_mm256_maskz_or_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4878{
4879  return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
4880						(__v8si) __B,
4881						(__v8si)
4882						_mm256_setzero_si256 (),
4883						(__mmask8) __U);
4884}
4885
4886extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
4887_mm256_or_epi32 (__m256i __A, __m256i __B)
4888{
4889  return (__m256i) ((__v8su)__A | (__v8su)__B);
4890}
4891
4892extern __inline __m128i
4893__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4894_mm_mask_or_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4895{
4896  return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
4897						(__v4si) __B,
4898						(__v4si) __W,
4899						(__mmask8) __U);
4900}
4901
4902extern __inline __m128i
4903__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4904_mm_maskz_or_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4905{
4906  return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
4907						(__v4si) __B,
4908						(__v4si)
4909						_mm_setzero_si128 (),
4910						(__mmask8) __U);
4911}
4912
4913extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
4914_mm_or_epi32 (__m128i __A, __m128i __B)
4915{
4916  return (__m128i) ((__v4su)__A | (__v4su)__B);
4917}
4918
4919extern __inline __m256i
4920__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4921_mm256_mask_xor_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4922		       __m256i __B)
4923{
4924  return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
4925						 (__v8si) __B,
4926						 (__v8si) __W,
4927						 (__mmask8) __U);
4928}
4929
4930extern __inline __m256i
4931__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4932_mm256_maskz_xor_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4933{
4934  return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
4935						 (__v8si) __B,
4936						 (__v8si)
4937						 _mm256_setzero_si256 (),
4938						 (__mmask8) __U);
4939}
4940
4941extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
4942_mm256_xor_epi32 (__m256i __A, __m256i __B)
4943{
4944  return (__m256i) ((__v8su)__A ^ (__v8su)__B);
4945}
4946
4947extern __inline __m128i
4948__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4949_mm_mask_xor_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4950		    __m128i __B)
4951{
4952  return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
4953						 (__v4si) __B,
4954						 (__v4si) __W,
4955						 (__mmask8) __U);
4956}
4957
4958extern __inline __m128i
4959__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4960_mm_maskz_xor_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4961{
4962  return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
4963						 (__v4si) __B,
4964						 (__v4si)
4965						 _mm_setzero_si128 (),
4966						 (__mmask8) __U);
4967}
4968
4969extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
4970_mm_xor_epi32 (__m128i __A, __m128i __B)
4971{
4972  return (__m128i) ((__v4su)__A ^ (__v4su)__B);
4973}
4974
4975extern __inline __m128
4976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4977_mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A)
4978{
4979  return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
4980						(__v4sf) __W,
4981						(__mmask8) __U);
4982}
4983
4984extern __inline __m128
4985__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4986_mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A)
4987{
4988  return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
4989						(__v4sf)
4990						_mm_setzero_ps (),
4991						(__mmask8) __U);
4992}
4993
4994extern __inline __m128
4995__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4996_mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A)
4997{
4998  return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
4999						   (__v4sf) __W,
5000						   (__mmask8) __U);
5001}
5002
5003extern __inline __m128
5004__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5005_mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A)
5006{
5007  return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
5008						   (__v4sf)
5009						   _mm_setzero_ps (),
5010						   (__mmask8) __U);
5011}
5012
5013extern __inline __m256i
5014__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5015_mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
5016{
5017  return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
5018						    (__v8si) __W,
5019						    (__mmask8) __U);
5020}
5021
5022extern __inline __m256i
5023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5024_mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A)
5025{
5026  return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
5027						    (__v8si)
5028						    _mm256_setzero_si256 (),
5029						    (__mmask8) __U);
5030}
5031
5032extern __inline __m128i
5033__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5034_mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
5035{
5036  return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
5037						    (__v4si) __W,
5038						    (__mmask8) __U);
5039}
5040
5041extern __inline __m128i
5042__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5043_mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A)
5044{
5045  return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
5046						    (__v4si)
5047						    _mm_setzero_si128 (),
5048						    (__mmask8) __U);
5049}
5050
5051extern __inline __m256i
5052__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5053_mm256_cvtps_epu32 (__m256 __A)
5054{
5055  return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5056						     (__v8si)
5057						     _mm256_setzero_si256 (),
5058						     (__mmask8) -1);
5059}
5060
5061extern __inline __m256i
5062__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5063_mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
5064{
5065  return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5066						     (__v8si) __W,
5067						     (__mmask8) __U);
5068}
5069
5070extern __inline __m256i
5071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5072_mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A)
5073{
5074  return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5075						     (__v8si)
5076						     _mm256_setzero_si256 (),
5077						     (__mmask8) __U);
5078}
5079
5080extern __inline __m128i
5081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5082_mm_cvtps_epu32 (__m128 __A)
5083{
5084  return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5085						     (__v4si)
5086						     _mm_setzero_si128 (),
5087						     (__mmask8) -1);
5088}
5089
5090extern __inline __m128i
5091__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5092_mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
5093{
5094  return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5095						     (__v4si) __W,
5096						     (__mmask8) __U);
5097}
5098
5099extern __inline __m128i
5100__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5101_mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A)
5102{
5103  return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5104						     (__v4si)
5105						     _mm_setzero_si128 (),
5106						     (__mmask8) __U);
5107}
5108
5109extern __inline __m256d
5110__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5111_mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
5112{
5113  return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
5114						   (__v4df) __W,
5115						   (__mmask8) __U);
5116}
5117
5118extern __inline __m256d
5119__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5120_mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
5121{
5122  return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
5123						   (__v4df)
5124						   _mm256_setzero_pd (),
5125						   (__mmask8) __U);
5126}
5127
5128extern __inline __m128d
5129__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5130_mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
5131{
5132  return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
5133						   (__v2df) __W,
5134						   (__mmask8) __U);
5135}
5136
5137extern __inline __m128d
5138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5139_mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
5140{
5141  return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
5142						   (__v2df)
5143						   _mm_setzero_pd (),
5144						   (__mmask8) __U);
5145}
5146
5147extern __inline __m256
5148__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5149_mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
5150{
5151  return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
5152						   (__v8sf) __W,
5153						   (__mmask8) __U);
5154}
5155
5156extern __inline __m256
5157__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5158_mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
5159{
5160  return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
5161						   (__v8sf)
5162						   _mm256_setzero_ps (),
5163						   (__mmask8) __U);
5164}
5165
5166extern __inline __m128
5167__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5168_mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
5169{
5170  return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
5171						   (__v4sf) __W,
5172						   (__mmask8) __U);
5173}
5174
5175extern __inline __m128
5176__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5177_mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
5178{
5179  return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
5180						   (__v4sf)
5181						   _mm_setzero_ps (),
5182						   (__mmask8) __U);
5183}
5184
5185extern __inline __m256
5186__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5187_mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
5188{
5189  return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
5190						   (__v8sf) __W,
5191						   (__mmask8) __U);
5192}
5193
5194extern __inline __m256
5195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5196_mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
5197{
5198  return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
5199						   (__v8sf)
5200						   _mm256_setzero_ps (),
5201						   (__mmask8) __U);
5202}
5203
5204extern __inline __m128
5205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5206_mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
5207{
5208  return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
5209						   (__v4sf) __W,
5210						   (__mmask8) __U);
5211}
5212
5213extern __inline __m128
5214__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5215_mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
5216{
5217  return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
5218						   (__v4sf)
5219						   _mm_setzero_ps (),
5220						   (__mmask8) __U);
5221}
5222
5223extern __inline __m128i
5224__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5225_mm_mask_unpackhi_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
5226			 __m128i __B)
5227{
5228  return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
5229						     (__v4si) __B,
5230						     (__v4si) __W,
5231						     (__mmask8) __U);
5232}
5233
5234extern __inline __m128i
5235__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5236_mm_maskz_unpackhi_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
5237{
5238  return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
5239						     (__v4si) __B,
5240						     (__v4si)
5241						     _mm_setzero_si128 (),
5242						     (__mmask8) __U);
5243}
5244
5245extern __inline __m256i
5246__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5247_mm256_mask_unpackhi_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
5248			    __m256i __B)
5249{
5250  return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
5251						     (__v8si) __B,
5252						     (__v8si) __W,
5253						     (__mmask8) __U);
5254}
5255
5256extern __inline __m256i
5257__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5258_mm256_maskz_unpackhi_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
5259{
5260  return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
5261						     (__v8si) __B,
5262						     (__v8si)
5263						     _mm256_setzero_si256 (),
5264						     (__mmask8) __U);
5265}
5266
5267extern __inline __m128i
5268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5269_mm_mask_unpackhi_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
5270			 __m128i __B)
5271{
5272  return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
5273						      (__v2di) __B,
5274						      (__v2di) __W,
5275						      (__mmask8) __U);
5276}
5277
5278extern __inline __m128i
5279__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5280_mm_maskz_unpackhi_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
5281{
5282  return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
5283						      (__v2di) __B,
5284						      (__v2di)
5285						      _mm_setzero_si128 (),
5286						      (__mmask8) __U);
5287}
5288
5289extern __inline __m256i
5290__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5291_mm256_mask_unpackhi_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
5292			    __m256i __B)
5293{
5294  return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
5295						      (__v4di) __B,
5296						      (__v4di) __W,
5297						      (__mmask8) __U);
5298}
5299
5300extern __inline __m256i
5301__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5302_mm256_maskz_unpackhi_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
5303{
5304  return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
5305						      (__v4di) __B,
5306						      (__v4di)
5307						      _mm256_setzero_si256 (),
5308						      (__mmask8) __U);
5309}
5310
5311extern __inline __m128i
5312__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5313_mm_mask_unpacklo_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
5314			 __m128i __B)
5315{
5316  return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
5317						     (__v4si) __B,
5318						     (__v4si) __W,
5319						     (__mmask8) __U);
5320}
5321
5322extern __inline __m128i
5323__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5324_mm_maskz_unpacklo_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
5325{
5326  return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
5327						     (__v4si) __B,
5328						     (__v4si)
5329						     _mm_setzero_si128 (),
5330						     (__mmask8) __U);
5331}
5332
5333extern __inline __m256i
5334__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5335_mm256_mask_unpacklo_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
5336			    __m256i __B)
5337{
5338  return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
5339						     (__v8si) __B,
5340						     (__v8si) __W,
5341						     (__mmask8) __U);
5342}
5343
5344extern __inline __m256i
5345__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5346_mm256_maskz_unpacklo_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
5347{
5348  return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
5349						     (__v8si) __B,
5350						     (__v8si)
5351						     _mm256_setzero_si256 (),
5352						     (__mmask8) __U);
5353}
5354
5355extern __inline __m128i
5356__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5357_mm_mask_unpacklo_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
5358			 __m128i __B)
5359{
5360  return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
5361						      (__v2di) __B,
5362						      (__v2di) __W,
5363						      (__mmask8) __U);
5364}
5365
5366extern __inline __m128i
5367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5368_mm_maskz_unpacklo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
5369{
5370  return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
5371						      (__v2di) __B,
5372						      (__v2di)
5373						      _mm_setzero_si128 (),
5374						      (__mmask8) __U);
5375}
5376
5377extern __inline __m256i
5378__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5379_mm256_mask_unpacklo_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
5380			    __m256i __B)
5381{
5382  return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
5383						      (__v4di) __B,
5384						      (__v4di) __W,
5385						      (__mmask8) __U);
5386}
5387
5388extern __inline __m256i
5389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5390_mm256_maskz_unpacklo_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
5391{
5392  return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
5393						      (__v4di) __B,
5394						      (__v4di)
5395						      _mm256_setzero_si256 (),
5396						      (__mmask8) __U);
5397}
5398
5399extern __inline __mmask8
5400__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5401_mm_cmpeq_epu32_mask (__m128i __A, __m128i __B)
5402{
5403  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5404						   (__v4si) __B, 0,
5405						   (__mmask8) -1);
5406}
5407
5408extern __inline __mmask8
5409__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5410_mm_cmpeq_epi32_mask (__m128i __A, __m128i __B)
5411{
5412  return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
5413						    (__v4si) __B,
5414						    (__mmask8) -1);
5415}
5416
5417extern __inline __mmask8
5418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5419_mm_mask_cmpeq_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5420{
5421  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5422						   (__v4si) __B, 0, __U);
5423}
5424
5425extern __inline __mmask8
5426__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5427_mm_mask_cmpeq_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5428{
5429  return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
5430						    (__v4si) __B, __U);
5431}
5432
5433extern __inline __mmask8
5434__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5435_mm256_cmpeq_epu32_mask (__m256i __A, __m256i __B)
5436{
5437  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5438						   (__v8si) __B, 0,
5439						   (__mmask8) -1);
5440}
5441
5442extern __inline __mmask8
5443__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5444_mm256_cmpeq_epi32_mask (__m256i __A, __m256i __B)
5445{
5446  return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
5447						    (__v8si) __B,
5448						    (__mmask8) -1);
5449}
5450
5451extern __inline __mmask8
5452__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5453_mm256_mask_cmpeq_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5454{
5455  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5456						   (__v8si) __B, 0, __U);
5457}
5458
5459extern __inline __mmask8
5460__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5461_mm256_mask_cmpeq_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5462{
5463  return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
5464						    (__v8si) __B, __U);
5465}
5466
5467extern __inline __mmask8
5468__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5469_mm_cmpeq_epu64_mask (__m128i __A, __m128i __B)
5470{
5471  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5472						   (__v2di) __B, 0,
5473						   (__mmask8) -1);
5474}
5475
5476extern __inline __mmask8
5477__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5478_mm_cmpeq_epi64_mask (__m128i __A, __m128i __B)
5479{
5480  return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
5481						    (__v2di) __B,
5482						    (__mmask8) -1);
5483}
5484
5485extern __inline __mmask8
5486__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5487_mm_mask_cmpeq_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5488{
5489  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5490						   (__v2di) __B, 0, __U);
5491}
5492
5493extern __inline __mmask8
5494__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5495_mm_mask_cmpeq_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5496{
5497  return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
5498						    (__v2di) __B, __U);
5499}
5500
5501extern __inline __mmask8
5502__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5503_mm256_cmpeq_epu64_mask (__m256i __A, __m256i __B)
5504{
5505  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5506						   (__v4di) __B, 0,
5507						   (__mmask8) -1);
5508}
5509
5510extern __inline __mmask8
5511__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5512_mm256_cmpeq_epi64_mask (__m256i __A, __m256i __B)
5513{
5514  return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
5515						    (__v4di) __B,
5516						    (__mmask8) -1);
5517}
5518
5519extern __inline __mmask8
5520__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5521_mm256_mask_cmpeq_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5522{
5523  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5524						   (__v4di) __B, 0, __U);
5525}
5526
5527extern __inline __mmask8
5528__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5529_mm256_mask_cmpeq_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5530{
5531  return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
5532						    (__v4di) __B, __U);
5533}
5534
5535extern __inline __mmask8
5536__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5537_mm_cmpgt_epu32_mask (__m128i __A, __m128i __B)
5538{
5539  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5540						   (__v4si) __B, 6,
5541						   (__mmask8) -1);
5542}
5543
5544extern __inline __mmask8
5545__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5546_mm_cmpgt_epi32_mask (__m128i __A, __m128i __B)
5547{
5548  return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
5549						    (__v4si) __B,
5550						    (__mmask8) -1);
5551}
5552
5553extern __inline __mmask8
5554__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5555_mm_mask_cmpgt_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5556{
5557  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5558						   (__v4si) __B, 6, __U);
5559}
5560
5561extern __inline __mmask8
5562__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5563_mm_mask_cmpgt_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5564{
5565  return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
5566						    (__v4si) __B, __U);
5567}
5568
5569extern __inline __mmask8
5570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5571_mm256_cmpgt_epu32_mask (__m256i __A, __m256i __B)
5572{
5573  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5574						   (__v8si) __B, 6,
5575						   (__mmask8) -1);
5576}
5577
5578extern __inline __mmask8
5579__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5580_mm256_cmpgt_epi32_mask (__m256i __A, __m256i __B)
5581{
5582  return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
5583						    (__v8si) __B,
5584						    (__mmask8) -1);
5585}
5586
5587extern __inline __mmask8
5588__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5589_mm256_mask_cmpgt_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5590{
5591  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5592						   (__v8si) __B, 6, __U);
5593}
5594
5595extern __inline __mmask8
5596__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5597_mm256_mask_cmpgt_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5598{
5599  return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
5600						    (__v8si) __B, __U);
5601}
5602
5603extern __inline __mmask8
5604__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5605_mm_cmpgt_epu64_mask (__m128i __A, __m128i __B)
5606{
5607  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5608						   (__v2di) __B, 6,
5609						   (__mmask8) -1);
5610}
5611
5612extern __inline __mmask8
5613__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5614_mm_cmpgt_epi64_mask (__m128i __A, __m128i __B)
5615{
5616  return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
5617						    (__v2di) __B,
5618						    (__mmask8) -1);
5619}
5620
5621extern __inline __mmask8
5622__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5623_mm_mask_cmpgt_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5624{
5625  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5626						   (__v2di) __B, 6, __U);
5627}
5628
5629extern __inline __mmask8
5630__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5631_mm_mask_cmpgt_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5632{
5633  return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
5634						    (__v2di) __B, __U);
5635}
5636
5637extern __inline __mmask8
5638__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5639_mm256_cmpgt_epu64_mask (__m256i __A, __m256i __B)
5640{
5641  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5642						   (__v4di) __B, 6,
5643						   (__mmask8) -1);
5644}
5645
5646extern __inline __mmask8
5647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5648_mm256_cmpgt_epi64_mask (__m256i __A, __m256i __B)
5649{
5650  return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
5651						    (__v4di) __B,
5652						    (__mmask8) -1);
5653}
5654
5655extern __inline __mmask8
5656__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5657_mm256_mask_cmpgt_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5658{
5659  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5660						   (__v4di) __B, 6, __U);
5661}
5662
5663extern __inline __mmask8
5664__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5665_mm256_mask_cmpgt_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5666{
5667  return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
5668						    (__v4di) __B, __U);
5669}
5670
5671extern __inline __mmask8
5672__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5673_mm_test_epi32_mask (__m128i __A, __m128i __B)
5674{
5675  return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
5676					       (__v4si) __B,
5677					       (__mmask8) -1);
5678}
5679
5680extern __inline __mmask8
5681__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5682_mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5683{
5684  return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
5685					       (__v4si) __B, __U);
5686}
5687
5688extern __inline __mmask8
5689__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5690_mm256_test_epi32_mask (__m256i __A, __m256i __B)
5691{
5692  return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
5693					       (__v8si) __B,
5694					       (__mmask8) -1);
5695}
5696
5697extern __inline __mmask8
5698__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5699_mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5700{
5701  return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
5702					       (__v8si) __B, __U);
5703}
5704
5705extern __inline __mmask8
5706__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5707_mm_test_epi64_mask (__m128i __A, __m128i __B)
5708{
5709  return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
5710					       (__v2di) __B,
5711					       (__mmask8) -1);
5712}
5713
5714extern __inline __mmask8
5715__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5716_mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5717{
5718  return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
5719					       (__v2di) __B, __U);
5720}
5721
5722extern __inline __mmask8
5723__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5724_mm256_test_epi64_mask (__m256i __A, __m256i __B)
5725{
5726  return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
5727					       (__v4di) __B,
5728					       (__mmask8) -1);
5729}
5730
5731extern __inline __mmask8
5732__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5733_mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5734{
5735  return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
5736					       (__v4di) __B, __U);
5737}
5738
5739extern __inline __mmask8
5740__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5741_mm_testn_epi32_mask (__m128i __A, __m128i __B)
5742{
5743  return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
5744						(__v4si) __B,
5745						(__mmask8) -1);
5746}
5747
5748extern __inline __mmask8
5749__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5750_mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5751{
5752  return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
5753						(__v4si) __B, __U);
5754}
5755
5756extern __inline __mmask8
5757__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5758_mm256_testn_epi32_mask (__m256i __A, __m256i __B)
5759{
5760  return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
5761						(__v8si) __B,
5762						(__mmask8) -1);
5763}
5764
5765extern __inline __mmask8
5766__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5767_mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5768{
5769  return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
5770						(__v8si) __B, __U);
5771}
5772
5773extern __inline __mmask8
5774__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5775_mm_testn_epi64_mask (__m128i __A, __m128i __B)
5776{
5777  return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
5778						(__v2di) __B,
5779						(__mmask8) -1);
5780}
5781
5782extern __inline __mmask8
5783__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5784_mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5785{
5786  return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
5787						(__v2di) __B, __U);
5788}
5789
5790extern __inline __mmask8
5791__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5792_mm256_testn_epi64_mask (__m256i __A, __m256i __B)
5793{
5794  return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
5795						(__v4di) __B,
5796						(__mmask8) -1);
5797}
5798
5799extern __inline __mmask8
5800__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5801_mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5802{
5803  return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
5804						(__v4di) __B, __U);
5805}
5806
5807extern __inline __m256d
5808__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5809_mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A)
5810{
5811  return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
5812						      (__v4df) __W,
5813						      (__mmask8) __U);
5814}
5815
5816extern __inline __m256d
5817__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5818_mm256_maskz_compress_pd (__mmask8 __U, __m256d __A)
5819{
5820  return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
5821						      (__v4df)
5822						      _mm256_setzero_pd (),
5823						      (__mmask8) __U);
5824}
5825
5826extern __inline void
5827__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5828_mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A)
5829{
5830  __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
5831					  (__v4df) __A,
5832					  (__mmask8) __U);
5833}
5834
5835extern __inline __m128d
5836__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5837_mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A)
5838{
5839  return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
5840						      (__v2df) __W,
5841						      (__mmask8) __U);
5842}
5843
5844extern __inline __m128d
5845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5846_mm_maskz_compress_pd (__mmask8 __U, __m128d __A)
5847{
5848  return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
5849						      (__v2df)
5850						      _mm_setzero_pd (),
5851						      (__mmask8) __U);
5852}
5853
5854extern __inline void
5855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5856_mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A)
5857{
5858  __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
5859					  (__v2df) __A,
5860					  (__mmask8) __U);
5861}
5862
5863extern __inline __m256
5864__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5865_mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A)
5866{
5867  return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
5868						     (__v8sf) __W,
5869						     (__mmask8) __U);
5870}
5871
5872extern __inline __m256
5873__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5874_mm256_maskz_compress_ps (__mmask8 __U, __m256 __A)
5875{
5876  return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
5877						     (__v8sf)
5878						     _mm256_setzero_ps (),
5879						     (__mmask8) __U);
5880}
5881
5882extern __inline void
5883__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5884_mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A)
5885{
5886  __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
5887					  (__v8sf) __A,
5888					  (__mmask8) __U);
5889}
5890
5891extern __inline __m128
5892__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5893_mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A)
5894{
5895  return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
5896						     (__v4sf) __W,
5897						     (__mmask8) __U);
5898}
5899
5900extern __inline __m128
5901__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5902_mm_maskz_compress_ps (__mmask8 __U, __m128 __A)
5903{
5904  return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
5905						     (__v4sf)
5906						     _mm_setzero_ps (),
5907						     (__mmask8) __U);
5908}
5909
5910extern __inline void
5911__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5912_mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A)
5913{
5914  __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
5915					  (__v4sf) __A,
5916					  (__mmask8) __U);
5917}
5918
5919extern __inline __m256i
5920__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5921_mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
5922{
5923  return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
5924						      (__v4di) __W,
5925						      (__mmask8) __U);
5926}
5927
5928extern __inline __m256i
5929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5930_mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A)
5931{
5932  return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
5933						      (__v4di)
5934						      _mm256_setzero_si256 (),
5935						      (__mmask8) __U);
5936}
5937
5938extern __inline void
5939__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5940_mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A)
5941{
5942  __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
5943					  (__v4di) __A,
5944					  (__mmask8) __U);
5945}
5946
5947extern __inline __m128i
5948__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5949_mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
5950{
5951  return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
5952						      (__v2di) __W,
5953						      (__mmask8) __U);
5954}
5955
5956extern __inline __m128i
5957__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5958_mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A)
5959{
5960  return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
5961						      (__v2di)
5962						      _mm_setzero_si128 (),
5963						      (__mmask8) __U);
5964}
5965
5966extern __inline void
5967__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5968_mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A)
5969{
5970  __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
5971					  (__v2di) __A,
5972					  (__mmask8) __U);
5973}
5974
5975extern __inline __m256i
5976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5977_mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
5978{
5979  return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
5980						      (__v8si) __W,
5981						      (__mmask8) __U);
5982}
5983
5984extern __inline __m256i
5985__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5986_mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A)
5987{
5988  return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
5989						      (__v8si)
5990						      _mm256_setzero_si256 (),
5991						      (__mmask8) __U);
5992}
5993
5994extern __inline void
5995__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5996_mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A)
5997{
5998  __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
5999					  (__v8si) __A,
6000					  (__mmask8) __U);
6001}
6002
6003extern __inline __m128i
6004__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6005_mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
6006{
6007  return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
6008						      (__v4si) __W,
6009						      (__mmask8) __U);
6010}
6011
6012extern __inline __m128i
6013__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6014_mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A)
6015{
6016  return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
6017						      (__v4si)
6018						      _mm_setzero_si128 (),
6019						      (__mmask8) __U);
6020}
6021
6022extern __inline void
6023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6024_mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A)
6025{
6026  __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
6027					  (__v4si) __A,
6028					  (__mmask8) __U);
6029}
6030
6031extern __inline __m256d
6032__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6033_mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A)
6034{
6035  return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
6036						    (__v4df) __W,
6037						    (__mmask8) __U);
6038}
6039
6040extern __inline __m256d
6041__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6042_mm256_maskz_expand_pd (__mmask8 __U, __m256d __A)
6043{
6044  return (__m256d) __builtin_ia32_expanddf256_maskz ((__v4df) __A,
6045						     (__v4df)
6046						     _mm256_setzero_pd (),
6047						     (__mmask8) __U);
6048}
6049
6050extern __inline __m256d
6051__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6052_mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P)
6053{
6054  return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
6055							(__v4df) __W,
6056							(__mmask8)
6057							__U);
6058}
6059
6060extern __inline __m256d
6061__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6062_mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
6063{
6064  return (__m256d) __builtin_ia32_expandloaddf256_maskz ((__v4df *) __P,
6065							 (__v4df)
6066							 _mm256_setzero_pd (),
6067							 (__mmask8)
6068							 __U);
6069}
6070
6071extern __inline __m128d
6072__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6073_mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A)
6074{
6075  return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
6076						    (__v2df) __W,
6077						    (__mmask8) __U);
6078}
6079
6080extern __inline __m128d
6081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6082_mm_maskz_expand_pd (__mmask8 __U, __m128d __A)
6083{
6084  return (__m128d) __builtin_ia32_expanddf128_maskz ((__v2df) __A,
6085						     (__v2df)
6086						     _mm_setzero_pd (),
6087						     (__mmask8) __U);
6088}
6089
6090extern __inline __m128d
6091__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6092_mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P)
6093{
6094  return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
6095							(__v2df) __W,
6096							(__mmask8)
6097							__U);
6098}
6099
6100extern __inline __m128d
6101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6102_mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
6103{
6104  return (__m128d) __builtin_ia32_expandloaddf128_maskz ((__v2df *) __P,
6105							 (__v2df)
6106							 _mm_setzero_pd (),
6107							 (__mmask8)
6108							 __U);
6109}
6110
6111extern __inline __m256
6112__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6113_mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A)
6114{
6115  return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
6116						   (__v8sf) __W,
6117						   (__mmask8) __U);
6118}
6119
6120extern __inline __m256
6121__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6122_mm256_maskz_expand_ps (__mmask8 __U, __m256 __A)
6123{
6124  return (__m256) __builtin_ia32_expandsf256_maskz ((__v8sf) __A,
6125						    (__v8sf)
6126						    _mm256_setzero_ps (),
6127						    (__mmask8) __U);
6128}
6129
6130extern __inline __m256
6131__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6132_mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P)
6133{
6134  return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
6135						       (__v8sf) __W,
6136						       (__mmask8) __U);
6137}
6138
6139extern __inline __m256
6140__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6141_mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
6142{
6143  return (__m256) __builtin_ia32_expandloadsf256_maskz ((__v8sf *) __P,
6144							(__v8sf)
6145							_mm256_setzero_ps (),
6146							(__mmask8)
6147							__U);
6148}
6149
6150extern __inline __m128
6151__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6152_mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A)
6153{
6154  return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
6155						   (__v4sf) __W,
6156						   (__mmask8) __U);
6157}
6158
6159extern __inline __m128
6160__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6161_mm_maskz_expand_ps (__mmask8 __U, __m128 __A)
6162{
6163  return (__m128) __builtin_ia32_expandsf128_maskz ((__v4sf) __A,
6164						    (__v4sf)
6165						    _mm_setzero_ps (),
6166						    (__mmask8) __U);
6167}
6168
6169extern __inline __m128
6170__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6171_mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P)
6172{
6173  return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
6174						       (__v4sf) __W,
6175						       (__mmask8) __U);
6176}
6177
6178extern __inline __m128
6179__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6180_mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
6181{
6182  return (__m128) __builtin_ia32_expandloadsf128_maskz ((__v4sf *) __P,
6183							(__v4sf)
6184							_mm_setzero_ps (),
6185							(__mmask8)
6186							__U);
6187}
6188
6189extern __inline __m256i
6190__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6191_mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
6192{
6193  return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
6194						    (__v4di) __W,
6195						    (__mmask8) __U);
6196}
6197
6198extern __inline __m256i
6199__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6200_mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A)
6201{
6202  return (__m256i) __builtin_ia32_expanddi256_maskz ((__v4di) __A,
6203						     (__v4di)
6204						     _mm256_setzero_si256 (),
6205						     (__mmask8) __U);
6206}
6207
6208extern __inline __m256i
6209__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6210_mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
6211			       void const *__P)
6212{
6213  return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
6214							(__v4di) __W,
6215							(__mmask8)
6216							__U);
6217}
6218
6219extern __inline __m256i
6220__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6221_mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
6222{
6223  return (__m256i) __builtin_ia32_expandloaddi256_maskz ((__v4di *) __P,
6224							 (__v4di)
6225							 _mm256_setzero_si256 (),
6226							 (__mmask8)
6227							 __U);
6228}
6229
6230extern __inline __m128i
6231__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6232_mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
6233{
6234  return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
6235						    (__v2di) __W,
6236						    (__mmask8) __U);
6237}
6238
6239extern __inline __m128i
6240__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6241_mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A)
6242{
6243  return (__m128i) __builtin_ia32_expanddi128_maskz ((__v2di) __A,
6244						     (__v2di)
6245						     _mm_setzero_si128 (),
6246						     (__mmask8) __U);
6247}
6248
6249extern __inline __m128i
6250__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6251_mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
6252{
6253  return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
6254							(__v2di) __W,
6255							(__mmask8)
6256							__U);
6257}
6258
6259extern __inline __m128i
6260__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6261_mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
6262{
6263  return (__m128i) __builtin_ia32_expandloaddi128_maskz ((__v2di *) __P,
6264							 (__v2di)
6265							 _mm_setzero_si128 (),
6266							 (__mmask8)
6267							 __U);
6268}
6269
6270extern __inline __m256i
6271__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6272_mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
6273{
6274  return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
6275						    (__v8si) __W,
6276						    (__mmask8) __U);
6277}
6278
6279extern __inline __m256i
6280__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6281_mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A)
6282{
6283  return (__m256i) __builtin_ia32_expandsi256_maskz ((__v8si) __A,
6284						     (__v8si)
6285						     _mm256_setzero_si256 (),
6286						     (__mmask8) __U);
6287}
6288
6289extern __inline __m256i
6290__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6291_mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
6292			       void const *__P)
6293{
6294  return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
6295							(__v8si) __W,
6296							(__mmask8)
6297							__U);
6298}
6299
6300extern __inline __m256i
6301__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6302_mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
6303{
6304  return (__m256i) __builtin_ia32_expandloadsi256_maskz ((__v8si *) __P,
6305							 (__v8si)
6306							 _mm256_setzero_si256 (),
6307							 (__mmask8)
6308							 __U);
6309}
6310
6311extern __inline __m128i
6312__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6313_mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
6314{
6315  return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
6316						    (__v4si) __W,
6317						    (__mmask8) __U);
6318}
6319
6320extern __inline __m128i
6321__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6322_mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A)
6323{
6324  return (__m128i) __builtin_ia32_expandsi128_maskz ((__v4si) __A,
6325						     (__v4si)
6326						     _mm_setzero_si128 (),
6327						     (__mmask8) __U);
6328}
6329
6330extern __inline __m128i
6331__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6332_mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
6333{
6334  return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
6335							(__v4si) __W,
6336							(__mmask8)
6337							__U);
6338}
6339
6340extern __inline __m128i
6341__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6342_mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
6343{
6344  return (__m128i) __builtin_ia32_expandloadsi128_maskz ((__v4si *) __P,
6345							 (__v4si)
6346							 _mm_setzero_si128 (),
6347							 (__mmask8)
6348							 __U);
6349}
6350
6351extern __inline __m256d
6352__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6353_mm256_permutex2var_pd (__m256d __A, __m256i __I, __m256d __B)
6354{
6355  return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
6356							/* idx */ ,
6357							(__v4df) __A,
6358							(__v4df) __B,
6359							(__mmask8) -1);
6360}
6361
6362extern __inline __m256d
6363__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6364_mm256_mask_permutex2var_pd (__m256d __A, __mmask8 __U, __m256i __I,
6365			     __m256d __B)
6366{
6367  return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
6368							/* idx */ ,
6369							(__v4df) __A,
6370							(__v4df) __B,
6371							(__mmask8)
6372							__U);
6373}
6374
6375extern __inline __m256d
6376__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6377_mm256_mask2_permutex2var_pd (__m256d __A, __m256i __I, __mmask8 __U,
6378			      __m256d __B)
6379{
6380  return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A,
6381							(__v4di) __I
6382							/* idx */ ,
6383							(__v4df) __B,
6384							(__mmask8)
6385							__U);
6386}
6387
6388extern __inline __m256d
6389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6390_mm256_maskz_permutex2var_pd (__mmask8 __U, __m256d __A, __m256i __I,
6391			      __m256d __B)
6392{
6393  return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I
6394							 /* idx */ ,
6395							 (__v4df) __A,
6396							 (__v4df) __B,
6397							 (__mmask8)
6398							 __U);
6399}
6400
6401extern __inline __m256
6402__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6403_mm256_permutex2var_ps (__m256 __A, __m256i __I, __m256 __B)
6404{
6405  return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
6406						       /* idx */ ,
6407						       (__v8sf) __A,
6408						       (__v8sf) __B,
6409						       (__mmask8) -1);
6410}
6411
6412extern __inline __m256
6413__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6414_mm256_mask_permutex2var_ps (__m256 __A, __mmask8 __U, __m256i __I,
6415			     __m256 __B)
6416{
6417  return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
6418						       /* idx */ ,
6419						       (__v8sf) __A,
6420						       (__v8sf) __B,
6421						       (__mmask8) __U);
6422}
6423
6424extern __inline __m256
6425__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6426_mm256_mask2_permutex2var_ps (__m256 __A, __m256i __I, __mmask8 __U,
6427			      __m256 __B)
6428{
6429  return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A,
6430						       (__v8si) __I
6431						       /* idx */ ,
6432						       (__v8sf) __B,
6433						       (__mmask8) __U);
6434}
6435
6436extern __inline __m256
6437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6438_mm256_maskz_permutex2var_ps (__mmask8 __U, __m256 __A, __m256i __I,
6439			      __m256 __B)
6440{
6441  return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I
6442							/* idx */ ,
6443							(__v8sf) __A,
6444							(__v8sf) __B,
6445							(__mmask8)
6446							__U);
6447}
6448
6449extern __inline __m128i
6450__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6451_mm_permutex2var_epi64 (__m128i __A, __m128i __I, __m128i __B)
6452{
6453  return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
6454						       /* idx */ ,
6455						       (__v2di) __A,
6456						       (__v2di) __B,
6457						       (__mmask8) -1);
6458}
6459
6460extern __inline __m128i
6461__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6462_mm_mask_permutex2var_epi64 (__m128i __A, __mmask8 __U, __m128i __I,
6463			     __m128i __B)
6464{
6465  return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
6466						       /* idx */ ,
6467						       (__v2di) __A,
6468						       (__v2di) __B,
6469						       (__mmask8) __U);
6470}
6471
6472extern __inline __m128i
6473__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6474_mm_mask2_permutex2var_epi64 (__m128i __A, __m128i __I, __mmask8 __U,
6475			      __m128i __B)
6476{
6477  return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A,
6478						       (__v2di) __I
6479						       /* idx */ ,
6480						       (__v2di) __B,
6481						       (__mmask8) __U);
6482}
6483
6484extern __inline __m128i
6485__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6486_mm_maskz_permutex2var_epi64 (__mmask8 __U, __m128i __A, __m128i __I,
6487			      __m128i __B)
6488{
6489  return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I
6490							/* idx */ ,
6491							(__v2di) __A,
6492							(__v2di) __B,
6493							(__mmask8)
6494							__U);
6495}
6496
6497extern __inline __m128i
6498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6499_mm_permutex2var_epi32 (__m128i __A, __m128i __I, __m128i __B)
6500{
6501  return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
6502						       /* idx */ ,
6503						       (__v4si) __A,
6504						       (__v4si) __B,
6505						       (__mmask8) -1);
6506}
6507
6508extern __inline __m128i
6509__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6510_mm_mask_permutex2var_epi32 (__m128i __A, __mmask8 __U, __m128i __I,
6511			     __m128i __B)
6512{
6513  return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
6514						       /* idx */ ,
6515						       (__v4si) __A,
6516						       (__v4si) __B,
6517						       (__mmask8) __U);
6518}
6519
6520extern __inline __m128i
6521__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6522_mm_mask2_permutex2var_epi32 (__m128i __A, __m128i __I, __mmask8 __U,
6523			      __m128i __B)
6524{
6525  return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A,
6526						       (__v4si) __I
6527						       /* idx */ ,
6528						       (__v4si) __B,
6529						       (__mmask8) __U);
6530}
6531
6532extern __inline __m128i
6533__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6534_mm_maskz_permutex2var_epi32 (__mmask8 __U, __m128i __A, __m128i __I,
6535			      __m128i __B)
6536{
6537  return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I
6538							/* idx */ ,
6539							(__v4si) __A,
6540							(__v4si) __B,
6541							(__mmask8)
6542							__U);
6543}
6544
6545extern __inline __m256i
6546__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6547_mm256_permutex2var_epi64 (__m256i __A, __m256i __I, __m256i __B)
6548{
6549  return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
6550						       /* idx */ ,
6551						       (__v4di) __A,
6552						       (__v4di) __B,
6553						       (__mmask8) -1);
6554}
6555
6556extern __inline __m256i
6557__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6558_mm256_mask_permutex2var_epi64 (__m256i __A, __mmask8 __U, __m256i __I,
6559				__m256i __B)
6560{
6561  return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
6562						       /* idx */ ,
6563						       (__v4di) __A,
6564						       (__v4di) __B,
6565						       (__mmask8) __U);
6566}
6567
6568extern __inline __m256i
6569__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6570_mm256_mask2_permutex2var_epi64 (__m256i __A, __m256i __I,
6571				 __mmask8 __U, __m256i __B)
6572{
6573  return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A,
6574						       (__v4di) __I
6575						       /* idx */ ,
6576						       (__v4di) __B,
6577						       (__mmask8) __U);
6578}
6579
6580extern __inline __m256i
6581__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6582_mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A,
6583				 __m256i __I, __m256i __B)
6584{
6585  return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I
6586							/* idx */ ,
6587							(__v4di) __A,
6588							(__v4di) __B,
6589							(__mmask8)
6590							__U);
6591}
6592
6593extern __inline __m256i
6594__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6595_mm256_permutex2var_epi32 (__m256i __A, __m256i __I, __m256i __B)
6596{
6597  return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
6598						       /* idx */ ,
6599						       (__v8si) __A,
6600						       (__v8si) __B,
6601						       (__mmask8) -1);
6602}
6603
6604extern __inline __m256i
6605__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6606_mm256_mask_permutex2var_epi32 (__m256i __A, __mmask8 __U, __m256i __I,
6607				__m256i __B)
6608{
6609  return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
6610						       /* idx */ ,
6611						       (__v8si) __A,
6612						       (__v8si) __B,
6613						       (__mmask8) __U);
6614}
6615
6616extern __inline __m256i
6617__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6618_mm256_mask2_permutex2var_epi32 (__m256i __A, __m256i __I,
6619				 __mmask8 __U, __m256i __B)
6620{
6621  return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A,
6622						       (__v8si) __I
6623						       /* idx */ ,
6624						       (__v8si) __B,
6625						       (__mmask8) __U);
6626}
6627
6628extern __inline __m256i
6629__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6630_mm256_maskz_permutex2var_epi32 (__mmask8 __U, __m256i __A,
6631				 __m256i __I, __m256i __B)
6632{
6633  return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I
6634							/* idx */ ,
6635							(__v8si) __A,
6636							(__v8si) __B,
6637							(__mmask8)
6638							__U);
6639}
6640
6641extern __inline __m128d
6642__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6643_mm_permutex2var_pd (__m128d __A, __m128i __I, __m128d __B)
6644{
6645  return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
6646							/* idx */ ,
6647							(__v2df) __A,
6648							(__v2df) __B,
6649							(__mmask8) -1);
6650}
6651
6652extern __inline __m128d
6653__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6654_mm_mask_permutex2var_pd (__m128d __A, __mmask8 __U, __m128i __I,
6655			  __m128d __B)
6656{
6657  return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
6658							/* idx */ ,
6659							(__v2df) __A,
6660							(__v2df) __B,
6661							(__mmask8)
6662							__U);
6663}
6664
6665extern __inline __m128d
6666__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6667_mm_mask2_permutex2var_pd (__m128d __A, __m128i __I, __mmask8 __U,
6668			   __m128d __B)
6669{
6670  return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A,
6671							(__v2di) __I
6672							/* idx */ ,
6673							(__v2df) __B,
6674							(__mmask8)
6675							__U);
6676}
6677
6678extern __inline __m128d
6679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6680_mm_maskz_permutex2var_pd (__mmask8 __U, __m128d __A, __m128i __I,
6681			   __m128d __B)
6682{
6683  return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I
6684							 /* idx */ ,
6685							 (__v2df) __A,
6686							 (__v2df) __B,
6687							 (__mmask8)
6688							 __U);
6689}
6690
6691extern __inline __m128
6692__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6693_mm_permutex2var_ps (__m128 __A, __m128i __I, __m128 __B)
6694{
6695  return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
6696						       /* idx */ ,
6697						       (__v4sf) __A,
6698						       (__v4sf) __B,
6699						       (__mmask8) -1);
6700}
6701
6702extern __inline __m128
6703__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6704_mm_mask_permutex2var_ps (__m128 __A, __mmask8 __U, __m128i __I,
6705			  __m128 __B)
6706{
6707  return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
6708						       /* idx */ ,
6709						       (__v4sf) __A,
6710						       (__v4sf) __B,
6711						       (__mmask8) __U);
6712}
6713
6714extern __inline __m128
6715__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6716_mm_mask2_permutex2var_ps (__m128 __A, __m128i __I, __mmask8 __U,
6717			   __m128 __B)
6718{
6719  return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A,
6720						       (__v4si) __I
6721						       /* idx */ ,
6722						       (__v4sf) __B,
6723						       (__mmask8) __U);
6724}
6725
6726extern __inline __m128
6727__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6728_mm_maskz_permutex2var_ps (__mmask8 __U, __m128 __A, __m128i __I,
6729			   __m128 __B)
6730{
6731  return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I
6732							/* idx */ ,
6733							(__v4sf) __A,
6734							(__v4sf) __B,
6735							(__mmask8)
6736							__U);
6737}
6738
6739extern __inline __m128i
6740__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6741_mm_srav_epi64 (__m128i __X, __m128i __Y)
6742{
6743  return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6744						  (__v2di) __Y,
6745						  (__v2di)
6746						  _mm_setzero_si128 (),
6747						  (__mmask8) -1);
6748}
6749
6750extern __inline __m128i
6751__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6752_mm_mask_srav_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6753		     __m128i __Y)
6754{
6755  return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6756						  (__v2di) __Y,
6757						  (__v2di) __W,
6758						  (__mmask8) __U);
6759}
6760
6761extern __inline __m128i
6762__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6763_mm_maskz_srav_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6764{
6765  return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6766						  (__v2di) __Y,
6767						  (__v2di)
6768						  _mm_setzero_si128 (),
6769						  (__mmask8) __U);
6770}
6771
6772extern __inline __m256i
6773__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6774_mm256_mask_sllv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6775			__m256i __Y)
6776{
6777  return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
6778						 (__v8si) __Y,
6779						 (__v8si) __W,
6780						 (__mmask8) __U);
6781}
6782
6783extern __inline __m256i
6784__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6785_mm256_maskz_sllv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6786{
6787  return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
6788						 (__v8si) __Y,
6789						 (__v8si)
6790						 _mm256_setzero_si256 (),
6791						 (__mmask8) __U);
6792}
6793
6794extern __inline __m128i
6795__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6796_mm_mask_sllv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6797		     __m128i __Y)
6798{
6799  return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
6800						 (__v4si) __Y,
6801						 (__v4si) __W,
6802						 (__mmask8) __U);
6803}
6804
6805extern __inline __m128i
6806__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6807_mm_maskz_sllv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6808{
6809  return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
6810						 (__v4si) __Y,
6811						 (__v4si)
6812						 _mm_setzero_si128 (),
6813						 (__mmask8) __U);
6814}
6815
6816extern __inline __m256i
6817__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6818_mm256_mask_sllv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
6819			__m256i __Y)
6820{
6821  return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
6822						 (__v4di) __Y,
6823						 (__v4di) __W,
6824						 (__mmask8) __U);
6825}
6826
6827extern __inline __m256i
6828__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6829_mm256_maskz_sllv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
6830{
6831  return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
6832						 (__v4di) __Y,
6833						 (__v4di)
6834						 _mm256_setzero_si256 (),
6835						 (__mmask8) __U);
6836}
6837
6838extern __inline __m128i
6839__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6840_mm_mask_sllv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6841		     __m128i __Y)
6842{
6843  return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
6844						 (__v2di) __Y,
6845						 (__v2di) __W,
6846						 (__mmask8) __U);
6847}
6848
6849extern __inline __m128i
6850__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6851_mm_maskz_sllv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6852{
6853  return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
6854						 (__v2di) __Y,
6855						 (__v2di)
6856						 _mm_setzero_si128 (),
6857						 (__mmask8) __U);
6858}
6859
6860extern __inline __m256i
6861__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6862_mm256_mask_srav_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6863			__m256i __Y)
6864{
6865  return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
6866						 (__v8si) __Y,
6867						 (__v8si) __W,
6868						 (__mmask8) __U);
6869}
6870
6871extern __inline __m256i
6872__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6873_mm256_maskz_srav_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6874{
6875  return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
6876						 (__v8si) __Y,
6877						 (__v8si)
6878						 _mm256_setzero_si256 (),
6879						 (__mmask8) __U);
6880}
6881
6882extern __inline __m128i
6883__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6884_mm_mask_srav_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6885		     __m128i __Y)
6886{
6887  return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
6888						 (__v4si) __Y,
6889						 (__v4si) __W,
6890						 (__mmask8) __U);
6891}
6892
6893extern __inline __m128i
6894__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6895_mm_maskz_srav_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6896{
6897  return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
6898						 (__v4si) __Y,
6899						 (__v4si)
6900						 _mm_setzero_si128 (),
6901						 (__mmask8) __U);
6902}
6903
6904extern __inline __m256i
6905__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6906_mm256_mask_srlv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6907			__m256i __Y)
6908{
6909  return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
6910						 (__v8si) __Y,
6911						 (__v8si) __W,
6912						 (__mmask8) __U);
6913}
6914
6915extern __inline __m256i
6916__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6917_mm256_maskz_srlv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6918{
6919  return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
6920						 (__v8si) __Y,
6921						 (__v8si)
6922						 _mm256_setzero_si256 (),
6923						 (__mmask8) __U);
6924}
6925
6926extern __inline __m128i
6927__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6928_mm_mask_srlv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6929		     __m128i __Y)
6930{
6931  return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
6932						 (__v4si) __Y,
6933						 (__v4si) __W,
6934						 (__mmask8) __U);
6935}
6936
6937extern __inline __m128i
6938__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6939_mm_maskz_srlv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6940{
6941  return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
6942						 (__v4si) __Y,
6943						 (__v4si)
6944						 _mm_setzero_si128 (),
6945						 (__mmask8) __U);
6946}
6947
6948extern __inline __m256i
6949__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6950_mm256_mask_srlv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
6951			__m256i __Y)
6952{
6953  return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
6954						 (__v4di) __Y,
6955						 (__v4di) __W,
6956						 (__mmask8) __U);
6957}
6958
6959extern __inline __m256i
6960__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6961_mm256_maskz_srlv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
6962{
6963  return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
6964						 (__v4di) __Y,
6965						 (__v4di)
6966						 _mm256_setzero_si256 (),
6967						 (__mmask8) __U);
6968}
6969
6970extern __inline __m128i
6971__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6972_mm_mask_srlv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6973		     __m128i __Y)
6974{
6975  return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
6976						 (__v2di) __Y,
6977						 (__v2di) __W,
6978						 (__mmask8) __U);
6979}
6980
6981extern __inline __m128i
6982__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6983_mm_maskz_srlv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6984{
6985  return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
6986						 (__v2di) __Y,
6987						 (__v2di)
6988						 _mm_setzero_si128 (),
6989						 (__mmask8) __U);
6990}
6991
6992extern __inline __m256i
6993__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6994_mm256_rolv_epi32 (__m256i __A, __m256i __B)
6995{
6996  return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6997						  (__v8si) __B,
6998						  (__v8si)
6999						  _mm256_setzero_si256 (),
7000						  (__mmask8) -1);
7001}
7002
7003extern __inline __m256i
7004__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7005_mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
7006			__m256i __B)
7007{
7008  return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
7009						  (__v8si) __B,
7010						  (__v8si) __W,
7011						  (__mmask8) __U);
7012}
7013
7014extern __inline __m256i
7015__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7016_mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
7017{
7018  return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
7019						  (__v8si) __B,
7020						  (__v8si)
7021						  _mm256_setzero_si256 (),
7022						  (__mmask8) __U);
7023}
7024
7025extern __inline __m128i
7026__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7027_mm_rolv_epi32 (__m128i __A, __m128i __B)
7028{
7029  return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
7030						  (__v4si) __B,
7031						  (__v4si)
7032						  _mm_setzero_si128 (),
7033						  (__mmask8) -1);
7034}
7035
7036extern __inline __m128i
7037__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7038_mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
7039		     __m128i __B)
7040{
7041  return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
7042						  (__v4si) __B,
7043						  (__v4si) __W,
7044						  (__mmask8) __U);
7045}
7046
7047extern __inline __m128i
7048__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7049_mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
7050{
7051  return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
7052						  (__v4si) __B,
7053						  (__v4si)
7054						  _mm_setzero_si128 (),
7055						  (__mmask8) __U);
7056}
7057
7058extern __inline __m256i
7059__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7060_mm256_rorv_epi32 (__m256i __A, __m256i __B)
7061{
7062  return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7063						  (__v8si) __B,
7064						  (__v8si)
7065						  _mm256_setzero_si256 (),
7066						  (__mmask8) -1);
7067}
7068
7069extern __inline __m256i
7070__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7071_mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
7072			__m256i __B)
7073{
7074  return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7075						  (__v8si) __B,
7076						  (__v8si) __W,
7077						  (__mmask8) __U);
7078}
7079
7080extern __inline __m256i
7081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7082_mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
7083{
7084  return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7085						  (__v8si) __B,
7086						  (__v8si)
7087						  _mm256_setzero_si256 (),
7088						  (__mmask8) __U);
7089}
7090
7091extern __inline __m128i
7092__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7093_mm_rorv_epi32 (__m128i __A, __m128i __B)
7094{
7095  return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7096						  (__v4si) __B,
7097						  (__v4si)
7098						  _mm_setzero_si128 (),
7099						  (__mmask8) -1);
7100}
7101
7102extern __inline __m128i
7103__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7104_mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
7105		     __m128i __B)
7106{
7107  return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7108						  (__v4si) __B,
7109						  (__v4si) __W,
7110						  (__mmask8) __U);
7111}
7112
7113extern __inline __m128i
7114__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7115_mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
7116{
7117  return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7118						  (__v4si) __B,
7119						  (__v4si)
7120						  _mm_setzero_si128 (),
7121						  (__mmask8) __U);
7122}
7123
7124extern __inline __m256i
7125__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7126_mm256_rolv_epi64 (__m256i __A, __m256i __B)
7127{
7128  return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7129						  (__v4di) __B,
7130						  (__v4di)
7131						  _mm256_setzero_si256 (),
7132						  (__mmask8) -1);
7133}
7134
7135extern __inline __m256i
7136__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7137_mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7138			__m256i __B)
7139{
7140  return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7141						  (__v4di) __B,
7142						  (__v4di) __W,
7143						  (__mmask8) __U);
7144}
7145
7146extern __inline __m256i
7147__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7148_mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7149{
7150  return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7151						  (__v4di) __B,
7152						  (__v4di)
7153						  _mm256_setzero_si256 (),
7154						  (__mmask8) __U);
7155}
7156
7157extern __inline __m128i
7158__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7159_mm_rolv_epi64 (__m128i __A, __m128i __B)
7160{
7161  return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7162						  (__v2di) __B,
7163						  (__v2di)
7164						  _mm_setzero_si128 (),
7165						  (__mmask8) -1);
7166}
7167
7168extern __inline __m128i
7169__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7170_mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7171		     __m128i __B)
7172{
7173  return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7174						  (__v2di) __B,
7175						  (__v2di) __W,
7176						  (__mmask8) __U);
7177}
7178
7179extern __inline __m128i
7180__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7181_mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7182{
7183  return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7184						  (__v2di) __B,
7185						  (__v2di)
7186						  _mm_setzero_si128 (),
7187						  (__mmask8) __U);
7188}
7189
7190extern __inline __m256i
7191__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7192_mm256_rorv_epi64 (__m256i __A, __m256i __B)
7193{
7194  return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7195						  (__v4di) __B,
7196						  (__v4di)
7197						  _mm256_setzero_si256 (),
7198						  (__mmask8) -1);
7199}
7200
7201extern __inline __m256i
7202__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7203_mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7204			__m256i __B)
7205{
7206  return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7207						  (__v4di) __B,
7208						  (__v4di) __W,
7209						  (__mmask8) __U);
7210}
7211
7212extern __inline __m256i
7213__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7214_mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7215{
7216  return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7217						  (__v4di) __B,
7218						  (__v4di)
7219						  _mm256_setzero_si256 (),
7220						  (__mmask8) __U);
7221}
7222
7223extern __inline __m128i
7224__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7225_mm_rorv_epi64 (__m128i __A, __m128i __B)
7226{
7227  return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7228						  (__v2di) __B,
7229						  (__v2di)
7230						  _mm_setzero_si128 (),
7231						  (__mmask8) -1);
7232}
7233
7234extern __inline __m128i
7235__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7236_mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7237		     __m128i __B)
7238{
7239  return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7240						  (__v2di) __B,
7241						  (__v2di) __W,
7242						  (__mmask8) __U);
7243}
7244
7245extern __inline __m128i
7246__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7247_mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7248{
7249  return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7250						  (__v2di) __B,
7251						  (__v2di)
7252						  _mm_setzero_si128 (),
7253						  (__mmask8) __U);
7254}
7255
7256extern __inline __m256i
7257__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7258_mm256_srav_epi64 (__m256i __X, __m256i __Y)
7259{
7260  return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7261						  (__v4di) __Y,
7262						  (__v4di)
7263						  _mm256_setzero_si256 (),
7264						  (__mmask8) -1);
7265}
7266
7267extern __inline __m256i
7268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7269_mm256_mask_srav_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
7270			__m256i __Y)
7271{
7272  return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7273						  (__v4di) __Y,
7274						  (__v4di) __W,
7275						  (__mmask8) __U);
7276}
7277
7278extern __inline __m256i
7279__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7280_mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
7281{
7282  return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7283						  (__v4di) __Y,
7284						  (__v4di)
7285						  _mm256_setzero_si256 (),
7286						  (__mmask8) __U);
7287}
7288
7289extern __inline __m256i
7290__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7291_mm256_mask_and_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7292		       __m256i __B)
7293{
7294  return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
7295						 (__v4di) __B,
7296						 (__v4di) __W, __U);
7297}
7298
7299extern __inline __m256i
7300__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7301_mm256_maskz_and_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7302{
7303  return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
7304						 (__v4di) __B,
7305						 (__v4di)
7306						 _mm256_setzero_pd (),
7307						 __U);
7308}
7309
7310extern __inline __m128i
7311__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7312_mm_mask_and_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7313		    __m128i __B)
7314{
7315  return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
7316						 (__v2di) __B,
7317						 (__v2di) __W, __U);
7318}
7319
7320extern __inline __m128i
7321__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7322_mm_maskz_and_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7323{
7324  return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
7325						 (__v2di) __B,
7326						 (__v2di)
7327						 _mm_setzero_pd (),
7328						 __U);
7329}
7330
7331extern __inline __m256i
7332__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7333_mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7334			  __m256i __B)
7335{
7336  return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
7337						  (__v4di) __B,
7338						  (__v4di) __W, __U);
7339}
7340
7341extern __inline __m256i
7342__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7343_mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7344{
7345  return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
7346						  (__v4di) __B,
7347						  (__v4di)
7348						  _mm256_setzero_pd (),
7349						  __U);
7350}
7351
7352extern __inline __m128i
7353__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7354_mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7355		       __m128i __B)
7356{
7357  return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
7358						  (__v2di) __B,
7359						  (__v2di) __W, __U);
7360}
7361
7362extern __inline __m128i
7363__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7364_mm_maskz_andnot_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7365{
7366  return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
7367						  (__v2di) __B,
7368						  (__v2di)
7369						  _mm_setzero_pd (),
7370						  __U);
7371}
7372
7373extern __inline __m256i
7374__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7375_mm256_mask_or_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7376		      __m256i __B)
7377{
7378  return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
7379						(__v4di) __B,
7380						(__v4di) __W,
7381						(__mmask8) __U);
7382}
7383
7384extern __inline __m256i
7385__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7386_mm256_maskz_or_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7387{
7388  return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
7389						(__v4di) __B,
7390						(__v4di)
7391						_mm256_setzero_si256 (),
7392						(__mmask8) __U);
7393}
7394
7395extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
7396_mm256_or_epi64 (__m256i __A, __m256i __B)
7397{
7398  return (__m256i) ((__v4du)__A | (__v4du)__B);
7399}
7400
7401extern __inline __m128i
7402__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7403_mm_mask_or_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
7404{
7405  return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
7406						(__v2di) __B,
7407						(__v2di) __W,
7408						(__mmask8) __U);
7409}
7410
7411extern __inline __m128i
7412__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7413_mm_maskz_or_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7414{
7415  return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
7416						(__v2di) __B,
7417						(__v2di)
7418						_mm_setzero_si128 (),
7419						(__mmask8) __U);
7420}
7421
7422extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
7423_mm_or_epi64 (__m128i __A, __m128i __B)
7424{
7425  return (__m128i) ((__v2du)__A | (__v2du)__B);
7426}
7427
7428extern __inline __m256i
7429__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7430_mm256_mask_xor_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7431		       __m256i __B)
7432{
7433  return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
7434						 (__v4di) __B,
7435						 (__v4di) __W,
7436						 (__mmask8) __U);
7437}
7438
7439extern __inline __m256i
7440__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7441_mm256_maskz_xor_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7442{
7443  return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
7444						 (__v4di) __B,
7445						 (__v4di)
7446						 _mm256_setzero_si256 (),
7447						 (__mmask8) __U);
7448}
7449
7450extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
7451_mm256_xor_epi64 (__m256i __A, __m256i __B)
7452{
7453  return (__m256i) ((__v4du)__A ^ (__v4du)__B);
7454}
7455
7456extern __inline __m128i
7457__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7458_mm_mask_xor_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7459		    __m128i __B)
7460{
7461  return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
7462						 (__v2di) __B,
7463						 (__v2di) __W,
7464						 (__mmask8) __U);
7465}
7466
7467extern __inline __m128i
7468__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7469_mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7470{
7471  return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
7472						 (__v2di) __B,
7473						 (__v2di)
7474						 _mm_setzero_si128 (),
7475						 (__mmask8) __U);
7476}
7477
7478extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
7479_mm_xor_epi64 (__m128i __A, __m128i __B)
7480{
7481  return (__m128i) ((__v2du)__A ^ (__v2du)__B);
7482}
7483
7484extern __inline __m256d
7485__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7486_mm256_mask_max_pd (__m256d __W, __mmask8 __U, __m256d __A,
7487		    __m256d __B)
7488{
7489  return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
7490						 (__v4df) __B,
7491						 (__v4df) __W,
7492						 (__mmask8) __U);
7493}
7494
7495extern __inline __m256d
7496__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7497_mm256_maskz_max_pd (__mmask8 __U, __m256d __A, __m256d __B)
7498{
7499  return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
7500						 (__v4df) __B,
7501						 (__v4df)
7502						 _mm256_setzero_pd (),
7503						 (__mmask8) __U);
7504}
7505
7506extern __inline __m256
7507__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7508_mm256_mask_max_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7509{
7510  return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
7511						(__v8sf) __B,
7512						(__v8sf) __W,
7513						(__mmask8) __U);
7514}
7515
7516extern __inline __m256
7517__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7518_mm256_maskz_max_ps (__mmask8 __U, __m256 __A, __m256 __B)
7519{
7520  return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
7521						(__v8sf) __B,
7522						(__v8sf)
7523						_mm256_setzero_ps (),
7524						(__mmask8) __U);
7525}
7526
7527extern __inline __m128
7528__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7529_mm_mask_div_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7530{
7531  return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
7532					     (__v4sf) __B,
7533					     (__v4sf) __W,
7534					     (__mmask8) __U);
7535}
7536
7537extern __inline __m128
7538__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7539_mm_maskz_div_ps (__mmask8 __U, __m128 __A, __m128 __B)
7540{
7541  return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
7542					     (__v4sf) __B,
7543					     (__v4sf)
7544					     _mm_setzero_ps (),
7545					     (__mmask8) __U);
7546}
7547
7548extern __inline __m128d
7549__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7550_mm_mask_div_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7551{
7552  return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
7553					      (__v2df) __B,
7554					      (__v2df) __W,
7555					      (__mmask8) __U);
7556}
7557
7558extern __inline __m128d
7559__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7560_mm_maskz_div_pd (__mmask8 __U, __m128d __A, __m128d __B)
7561{
7562  return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
7563					      (__v2df) __B,
7564					      (__v2df)
7565					      _mm_setzero_pd (),
7566					      (__mmask8) __U);
7567}
7568
7569extern __inline __m256d
7570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7571_mm256_mask_min_pd (__m256d __W, __mmask8 __U, __m256d __A,
7572		    __m256d __B)
7573{
7574  return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
7575						 (__v4df) __B,
7576						 (__v4df) __W,
7577						 (__mmask8) __U);
7578}
7579
7580extern __inline __m256d
7581__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7582_mm256_mask_div_pd (__m256d __W, __mmask8 __U, __m256d __A,
7583		    __m256d __B)
7584{
7585  return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
7586						 (__v4df) __B,
7587						 (__v4df) __W,
7588						 (__mmask8) __U);
7589}
7590
7591extern __inline __m256d
7592__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7593_mm256_maskz_min_pd (__mmask8 __U, __m256d __A, __m256d __B)
7594{
7595  return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
7596						 (__v4df) __B,
7597						 (__v4df)
7598						 _mm256_setzero_pd (),
7599						 (__mmask8) __U);
7600}
7601
7602extern __inline __m256
7603__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7604_mm256_mask_min_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7605{
7606  return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
7607						(__v8sf) __B,
7608						(__v8sf) __W,
7609						(__mmask8) __U);
7610}
7611
7612extern __inline __m256d
7613__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7614_mm256_maskz_div_pd (__mmask8 __U, __m256d __A, __m256d __B)
7615{
7616  return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
7617						 (__v4df) __B,
7618						 (__v4df)
7619						 _mm256_setzero_pd (),
7620						 (__mmask8) __U);
7621}
7622
7623extern __inline __m256
7624__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7625_mm256_mask_div_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7626{
7627  return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
7628						(__v8sf) __B,
7629						(__v8sf) __W,
7630						(__mmask8) __U);
7631}
7632
7633extern __inline __m256
7634__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7635_mm256_maskz_min_ps (__mmask8 __U, __m256 __A, __m256 __B)
7636{
7637  return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
7638						(__v8sf) __B,
7639						(__v8sf)
7640						_mm256_setzero_ps (),
7641						(__mmask8) __U);
7642}
7643
7644extern __inline __m256
7645__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7646_mm256_maskz_div_ps (__mmask8 __U, __m256 __A, __m256 __B)
7647{
7648  return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
7649						(__v8sf) __B,
7650						(__v8sf)
7651						_mm256_setzero_ps (),
7652						(__mmask8) __U);
7653}
7654
7655extern __inline __m128
7656__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7657_mm_mask_min_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7658{
7659  return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
7660					     (__v4sf) __B,
7661					     (__v4sf) __W,
7662					     (__mmask8) __U);
7663}
7664
7665extern __inline __m128
7666__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7667_mm_mask_mul_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7668{
7669  return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
7670					     (__v4sf) __B,
7671					     (__v4sf) __W,
7672					     (__mmask8) __U);
7673}
7674
7675extern __inline __m128
7676__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7677_mm_maskz_min_ps (__mmask8 __U, __m128 __A, __m128 __B)
7678{
7679  return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
7680					     (__v4sf) __B,
7681					     (__v4sf)
7682					     _mm_setzero_ps (),
7683					     (__mmask8) __U);
7684}
7685
7686extern __inline __m128
7687__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7688_mm_maskz_mul_ps (__mmask8 __U, __m128 __A, __m128 __B)
7689{
7690  return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
7691					     (__v4sf) __B,
7692					     (__v4sf)
7693					     _mm_setzero_ps (),
7694					     (__mmask8) __U);
7695}
7696
7697extern __inline __m128
7698__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7699_mm_mask_max_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7700{
7701  return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
7702					     (__v4sf) __B,
7703					     (__v4sf) __W,
7704					     (__mmask8) __U);
7705}
7706
7707extern __inline __m128
7708__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7709_mm_maskz_max_ps (__mmask8 __U, __m128 __A, __m128 __B)
7710{
7711  return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
7712					     (__v4sf) __B,
7713					     (__v4sf)
7714					     _mm_setzero_ps (),
7715					     (__mmask8) __U);
7716}
7717
7718extern __inline __m128d
7719__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7720_mm_mask_min_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7721{
7722  return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
7723					      (__v2df) __B,
7724					      (__v2df) __W,
7725					      (__mmask8) __U);
7726}
7727
7728extern __inline __m128d
7729__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7730_mm_maskz_min_pd (__mmask8 __U, __m128d __A, __m128d __B)
7731{
7732  return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
7733					      (__v2df) __B,
7734					      (__v2df)
7735					      _mm_setzero_pd (),
7736					      (__mmask8) __U);
7737}
7738
7739extern __inline __m128d
7740__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7741_mm_mask_max_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7742{
7743  return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
7744					      (__v2df) __B,
7745					      (__v2df) __W,
7746					      (__mmask8) __U);
7747}
7748
7749extern __inline __m128d
7750__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7751_mm_maskz_max_pd (__mmask8 __U, __m128d __A, __m128d __B)
7752{
7753  return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
7754					      (__v2df) __B,
7755					      (__v2df)
7756					      _mm_setzero_pd (),
7757					      (__mmask8) __U);
7758}
7759
7760extern __inline __m128d
7761__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7762_mm_mask_mul_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7763{
7764  return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
7765					      (__v2df) __B,
7766					      (__v2df) __W,
7767					      (__mmask8) __U);
7768}
7769
7770extern __inline __m128d
7771__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7772_mm_maskz_mul_pd (__mmask8 __U, __m128d __A, __m128d __B)
7773{
7774  return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
7775					      (__v2df) __B,
7776					      (__v2df)
7777					      _mm_setzero_pd (),
7778					      (__mmask8) __U);
7779}
7780
7781extern __inline __m256
7782__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7783_mm256_mask_mul_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7784{
7785  return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
7786						(__v8sf) __B,
7787						(__v8sf) __W,
7788						(__mmask8) __U);
7789}
7790
7791extern __inline __m256
7792__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7793_mm256_maskz_mul_ps (__mmask8 __U, __m256 __A, __m256 __B)
7794{
7795  return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
7796						(__v8sf) __B,
7797						(__v8sf)
7798						_mm256_setzero_ps (),
7799						(__mmask8) __U);
7800}
7801
7802extern __inline __m256d
7803__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7804_mm256_mask_mul_pd (__m256d __W, __mmask8 __U, __m256d __A,
7805		    __m256d __B)
7806{
7807  return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
7808						 (__v4df) __B,
7809						 (__v4df) __W,
7810						 (__mmask8) __U);
7811}
7812
7813extern __inline __m256d
7814__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7815_mm256_maskz_mul_pd (__mmask8 __U, __m256d __A, __m256d __B)
7816{
7817  return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
7818						 (__v4df) __B,
7819						 (__v4df)
7820						 _mm256_setzero_pd (),
7821						 (__mmask8) __U);
7822}
7823
7824extern __inline __m256i
7825__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7826_mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
7827{
7828  return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7829						  (__v4di) __B,
7830						  (__v4di)
7831						  _mm256_setzero_si256 (),
7832						  __M);
7833}
7834
7835extern __inline __m256i
7836__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7837_mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
7838		       __m256i __B)
7839{
7840  return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7841						  (__v4di) __B,
7842						  (__v4di) __W, __M);
7843}
7844
7845extern __inline __m256i
7846__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7847_mm256_min_epi64 (__m256i __A, __m256i __B)
7848{
7849  return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7850						  (__v4di) __B,
7851						  (__v4di)
7852						  _mm256_setzero_si256 (),
7853						  (__mmask8) -1);
7854}
7855
7856extern __inline __m256i
7857__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7858_mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
7859		       __m256i __B)
7860{
7861  return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7862						  (__v4di) __B,
7863						  (__v4di) __W, __M);
7864}
7865
7866extern __inline __m256i
7867__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7868_mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
7869{
7870  return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7871						  (__v4di) __B,
7872						  (__v4di)
7873						  _mm256_setzero_si256 (),
7874						  __M);
7875}
7876
7877extern __inline __m256i
7878__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7879_mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
7880{
7881  return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7882						  (__v4di) __B,
7883						  (__v4di)
7884						  _mm256_setzero_si256 (),
7885						  __M);
7886}
7887
7888extern __inline __m256i
7889__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7890_mm256_max_epi64 (__m256i __A, __m256i __B)
7891{
7892  return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7893						  (__v4di) __B,
7894						  (__v4di)
7895						  _mm256_setzero_si256 (),
7896						  (__mmask8) -1);
7897}
7898
7899extern __inline __m256i
7900__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7901_mm256_max_epu64 (__m256i __A, __m256i __B)
7902{
7903  return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7904						  (__v4di) __B,
7905						  (__v4di)
7906						  _mm256_setzero_si256 (),
7907						  (__mmask8) -1);
7908}
7909
7910extern __inline __m256i
7911__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7912_mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
7913		       __m256i __B)
7914{
7915  return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7916						  (__v4di) __B,
7917						  (__v4di) __W, __M);
7918}
7919
7920extern __inline __m256i
7921__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7922_mm256_min_epu64 (__m256i __A, __m256i __B)
7923{
7924  return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7925						  (__v4di) __B,
7926						  (__v4di)
7927						  _mm256_setzero_si256 (),
7928						  (__mmask8) -1);
7929}
7930
7931extern __inline __m256i
7932__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7933_mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
7934		       __m256i __B)
7935{
7936  return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7937						  (__v4di) __B,
7938						  (__v4di) __W, __M);
7939}
7940
7941extern __inline __m256i
7942__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7943_mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
7944{
7945  return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7946						  (__v4di) __B,
7947						  (__v4di)
7948						  _mm256_setzero_si256 (),
7949						  __M);
7950}
7951
7952extern __inline __m256i
7953__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7954_mm256_maskz_max_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
7955{
7956  return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
7957						  (__v8si) __B,
7958						  (__v8si)
7959						  _mm256_setzero_si256 (),
7960						  __M);
7961}
7962
7963extern __inline __m256i
7964__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7965_mm256_mask_max_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
7966		       __m256i __B)
7967{
7968  return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
7969						  (__v8si) __B,
7970						  (__v8si) __W, __M);
7971}
7972
7973extern __inline __m256i
7974__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7975_mm256_maskz_min_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
7976{
7977  return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
7978						  (__v8si) __B,
7979						  (__v8si)
7980						  _mm256_setzero_si256 (),
7981						  __M);
7982}
7983
7984extern __inline __m256i
7985__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7986_mm256_mask_min_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
7987		       __m256i __B)
7988{
7989  return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
7990						  (__v8si) __B,
7991						  (__v8si) __W, __M);
7992}
7993
7994extern __inline __m256i
7995__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7996_mm256_maskz_max_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
7997{
7998  return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
7999						  (__v8si) __B,
8000						  (__v8si)
8001						  _mm256_setzero_si256 (),
8002						  __M);
8003}
8004
8005extern __inline __m256i
8006__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8007_mm256_mask_max_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
8008		       __m256i __B)
8009{
8010  return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
8011						  (__v8si) __B,
8012						  (__v8si) __W, __M);
8013}
8014
8015extern __inline __m256i
8016__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8017_mm256_maskz_min_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
8018{
8019  return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
8020						  (__v8si) __B,
8021						  (__v8si)
8022						  _mm256_setzero_si256 (),
8023						  __M);
8024}
8025
8026extern __inline __m256i
8027__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8028_mm256_mask_min_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
8029		       __m256i __B)
8030{
8031  return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
8032						  (__v8si) __B,
8033						  (__v8si) __W, __M);
8034}
8035
8036extern __inline __m128i
8037__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8038_mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
8039{
8040  return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
8041						  (__v2di) __B,
8042						  (__v2di)
8043						  _mm_setzero_si128 (),
8044						  __M);
8045}
8046
8047extern __inline __m128i
8048__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8049_mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
8050		    __m128i __B)
8051{
8052  return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
8053						  (__v2di) __B,
8054						  (__v2di) __W, __M);
8055}
8056
8057extern __inline __m128i
8058__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8059_mm_min_epi64 (__m128i __A, __m128i __B)
8060{
8061  return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
8062						  (__v2di) __B,
8063						  (__v2di)
8064						  _mm_setzero_si128 (),
8065						  (__mmask8) -1);
8066}
8067
8068extern __inline __m128i
8069__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8070_mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
8071		    __m128i __B)
8072{
8073  return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
8074						  (__v2di) __B,
8075						  (__v2di) __W, __M);
8076}
8077
8078extern __inline __m128i
8079__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8080_mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
8081{
8082  return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
8083						  (__v2di) __B,
8084						  (__v2di)
8085						  _mm_setzero_si128 (),
8086						  __M);
8087}
8088
8089extern __inline __m128i
8090__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8091_mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
8092{
8093  return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8094						  (__v2di) __B,
8095						  (__v2di)
8096						  _mm_setzero_si128 (),
8097						  __M);
8098}
8099
8100extern __inline __m128i
8101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8102_mm_max_epi64 (__m128i __A, __m128i __B)
8103{
8104  return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
8105						  (__v2di) __B,
8106						  (__v2di)
8107						  _mm_setzero_si128 (),
8108						  (__mmask8) -1);
8109}
8110
8111extern __inline __m128i
8112__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8113_mm_max_epu64 (__m128i __A, __m128i __B)
8114{
8115  return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8116						  (__v2di) __B,
8117						  (__v2di)
8118						  _mm_setzero_si128 (),
8119						  (__mmask8) -1);
8120}
8121
8122extern __inline __m128i
8123__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8124_mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
8125		    __m128i __B)
8126{
8127  return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8128						  (__v2di) __B,
8129						  (__v2di) __W, __M);
8130}
8131
8132extern __inline __m128i
8133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8134_mm_min_epu64 (__m128i __A, __m128i __B)
8135{
8136  return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8137						  (__v2di) __B,
8138						  (__v2di)
8139						  _mm_setzero_si128 (),
8140						  (__mmask8) -1);
8141}
8142
8143extern __inline __m128i
8144__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8145_mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
8146		    __m128i __B)
8147{
8148  return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8149						  (__v2di) __B,
8150						  (__v2di) __W, __M);
8151}
8152
8153extern __inline __m128i
8154__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8155_mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
8156{
8157  return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8158						  (__v2di) __B,
8159						  (__v2di)
8160						  _mm_setzero_si128 (),
8161						  __M);
8162}
8163
8164extern __inline __m128i
8165__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8166_mm_maskz_max_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
8167{
8168  return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
8169						  (__v4si) __B,
8170						  (__v4si)
8171						  _mm_setzero_si128 (),
8172						  __M);
8173}
8174
8175extern __inline __m128i
8176__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8177_mm_mask_max_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
8178		    __m128i __B)
8179{
8180  return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
8181						  (__v4si) __B,
8182						  (__v4si) __W, __M);
8183}
8184
8185extern __inline __m128i
8186__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8187_mm_maskz_min_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
8188{
8189  return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
8190						  (__v4si) __B,
8191						  (__v4si)
8192						  _mm_setzero_si128 (),
8193						  __M);
8194}
8195
8196extern __inline __m128i
8197__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8198_mm_mask_min_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
8199		    __m128i __B)
8200{
8201  return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
8202						  (__v4si) __B,
8203						  (__v4si) __W, __M);
8204}
8205
8206extern __inline __m128i
8207__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8208_mm_maskz_max_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
8209{
8210  return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
8211						  (__v4si) __B,
8212						  (__v4si)
8213						  _mm_setzero_si128 (),
8214						  __M);
8215}
8216
8217extern __inline __m128i
8218__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8219_mm_mask_max_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
8220		    __m128i __B)
8221{
8222  return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
8223						  (__v4si) __B,
8224						  (__v4si) __W, __M);
8225}
8226
8227extern __inline __m128i
8228__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8229_mm_maskz_min_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
8230{
8231  return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
8232						  (__v4si) __B,
8233						  (__v4si)
8234						  _mm_setzero_si128 (),
8235						  __M);
8236}
8237
8238extern __inline __m128i
8239__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8240_mm_mask_min_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
8241		    __m128i __B)
8242{
8243  return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
8244						  (__v4si) __B,
8245						  (__v4si) __W, __M);
8246}
8247
8248#ifndef __AVX512CD__
8249#pragma GCC push_options
8250#pragma GCC target("avx512vl,avx512cd")
8251#define __DISABLE_AVX512VLCD__
8252#endif
8253
8254extern __inline __m128i
8255__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8256_mm_broadcastmb_epi64 (__mmask8 __A)
8257{
8258  return (__m128i) __builtin_ia32_broadcastmb128 (__A);
8259}
8260
8261extern __inline __m256i
8262__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8263_mm256_broadcastmb_epi64 (__mmask8 __A)
8264{
8265  return (__m256i) __builtin_ia32_broadcastmb256 (__A);
8266}
8267
8268extern __inline __m128i
8269__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8270_mm_broadcastmw_epi32 (__mmask16 __A)
8271{
8272  return (__m128i) __builtin_ia32_broadcastmw128 (__A);
8273}
8274
8275extern __inline __m256i
8276__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8277_mm256_broadcastmw_epi32 (__mmask16 __A)
8278{
8279  return (__m256i) __builtin_ia32_broadcastmw256 (__A);
8280}
8281
8282extern __inline __m256i
8283__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8284_mm256_lzcnt_epi32 (__m256i __A)
8285{
8286  return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8287						     (__v8si)
8288						     _mm256_setzero_si256 (),
8289						     (__mmask8) -1);
8290}
8291
8292extern __inline __m256i
8293__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8294_mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
8295{
8296  return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8297						     (__v8si) __W,
8298						     (__mmask8) __U);
8299}
8300
8301extern __inline __m256i
8302__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8303_mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A)
8304{
8305  return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8306						     (__v8si)
8307						     _mm256_setzero_si256 (),
8308						     (__mmask8) __U);
8309}
8310
8311extern __inline __m256i
8312__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8313_mm256_lzcnt_epi64 (__m256i __A)
8314{
8315  return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8316						     (__v4di)
8317						     _mm256_setzero_si256 (),
8318						     (__mmask8) -1);
8319}
8320
8321extern __inline __m256i
8322__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8323_mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
8324{
8325  return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8326						     (__v4di) __W,
8327						     (__mmask8) __U);
8328}
8329
8330extern __inline __m256i
8331__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8332_mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A)
8333{
8334  return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8335						     (__v4di)
8336						     _mm256_setzero_si256 (),
8337						     (__mmask8) __U);
8338}
8339
8340extern __inline __m256i
8341__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8342_mm256_conflict_epi64 (__m256i __A)
8343{
8344  return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8345							 (__v4di)
8346							 _mm256_setzero_si256 (),
8347							 (__mmask8) -1);
8348}
8349
8350extern __inline __m256i
8351__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8352_mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
8353{
8354  return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8355							 (__v4di) __W,
8356							 (__mmask8)
8357							 __U);
8358}
8359
8360extern __inline __m256i
8361__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8362_mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
8363{
8364  return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8365							 (__v4di)
8366							 _mm256_setzero_si256 (),
8367							 (__mmask8)
8368							 __U);
8369}
8370
8371extern __inline __m256i
8372__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8373_mm256_conflict_epi32 (__m256i __A)
8374{
8375  return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8376							 (__v8si)
8377							 _mm256_setzero_si256 (),
8378							 (__mmask8) -1);
8379}
8380
8381extern __inline __m256i
8382__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8383_mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
8384{
8385  return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8386							 (__v8si) __W,
8387							 (__mmask8)
8388							 __U);
8389}
8390
8391extern __inline __m256i
8392__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8393_mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
8394{
8395  return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8396							 (__v8si)
8397							 _mm256_setzero_si256 (),
8398							 (__mmask8)
8399							 __U);
8400}
8401
8402extern __inline __m128i
8403__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8404_mm_lzcnt_epi32 (__m128i __A)
8405{
8406  return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8407						     (__v4si)
8408						     _mm_setzero_si128 (),
8409						     (__mmask8) -1);
8410}
8411
8412extern __inline __m128i
8413__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8414_mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
8415{
8416  return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8417						     (__v4si) __W,
8418						     (__mmask8) __U);
8419}
8420
8421extern __inline __m128i
8422__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8423_mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A)
8424{
8425  return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8426						     (__v4si)
8427						     _mm_setzero_si128 (),
8428						     (__mmask8) __U);
8429}
8430
8431extern __inline __m128i
8432__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8433_mm_lzcnt_epi64 (__m128i __A)
8434{
8435  return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8436						     (__v2di)
8437						     _mm_setzero_si128 (),
8438						     (__mmask8) -1);
8439}
8440
8441extern __inline __m128i
8442__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8443_mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
8444{
8445  return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8446						     (__v2di) __W,
8447						     (__mmask8) __U);
8448}
8449
8450extern __inline __m128i
8451__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8452_mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A)
8453{
8454  return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8455						     (__v2di)
8456						     _mm_setzero_si128 (),
8457						     (__mmask8) __U);
8458}
8459
8460extern __inline __m128i
8461__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8462_mm_conflict_epi64 (__m128i __A)
8463{
8464  return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8465							 (__v2di)
8466							 _mm_setzero_si128 (),
8467							 (__mmask8) -1);
8468}
8469
8470extern __inline __m128i
8471__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8472_mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
8473{
8474  return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8475							 (__v2di) __W,
8476							 (__mmask8)
8477							 __U);
8478}
8479
8480extern __inline __m128i
8481__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8482_mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
8483{
8484  return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8485							 (__v2di)
8486							 _mm_setzero_si128 (),
8487							 (__mmask8)
8488							 __U);
8489}
8490
8491extern __inline __m128i
8492__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8493_mm_conflict_epi32 (__m128i __A)
8494{
8495  return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8496							 (__v4si)
8497							 _mm_setzero_si128 (),
8498							 (__mmask8) -1);
8499}
8500
8501extern __inline __m128i
8502__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8503_mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
8504{
8505  return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8506							 (__v4si) __W,
8507							 (__mmask8)
8508							 __U);
8509}
8510
8511extern __inline __m128i
8512__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8513_mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
8514{
8515  return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8516							 (__v4si)
8517							 _mm_setzero_si128 (),
8518							 (__mmask8)
8519							 __U);
8520}
8521
8522#ifdef __DISABLE_AVX512VLCD__
8523#pragma GCC pop_options
8524#endif
8525
8526extern __inline __m256d
8527__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8528_mm256_mask_unpacklo_pd (__m256d __W, __mmask8 __U, __m256d __A,
8529			 __m256d __B)
8530{
8531  return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
8532						    (__v4df) __B,
8533						    (__v4df) __W,
8534						    (__mmask8) __U);
8535}
8536
8537extern __inline __m256d
8538__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8539_mm256_maskz_unpacklo_pd (__mmask8 __U, __m256d __A, __m256d __B)
8540{
8541  return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
8542						    (__v4df) __B,
8543						    (__v4df)
8544						    _mm256_setzero_pd (),
8545						    (__mmask8) __U);
8546}
8547
8548extern __inline __m128d
8549__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8550_mm_mask_unpacklo_pd (__m128d __W, __mmask8 __U, __m128d __A,
8551		      __m128d __B)
8552{
8553  return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
8554						    (__v2df) __B,
8555						    (__v2df) __W,
8556						    (__mmask8) __U);
8557}
8558
8559extern __inline __m128d
8560__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8561_mm_maskz_unpacklo_pd (__mmask8 __U, __m128d __A, __m128d __B)
8562{
8563  return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
8564						    (__v2df) __B,
8565						    (__v2df)
8566						    _mm_setzero_pd (),
8567						    (__mmask8) __U);
8568}
8569
8570extern __inline __m256
8571__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8572_mm256_mask_unpacklo_ps (__m256 __W, __mmask8 __U, __m256 __A,
8573			 __m256 __B)
8574{
8575  return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
8576						   (__v8sf) __B,
8577						   (__v8sf) __W,
8578						   (__mmask8) __U);
8579}
8580
8581extern __inline __m256d
8582__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8583_mm256_mask_unpackhi_pd (__m256d __W, __mmask8 __U, __m256d __A,
8584			 __m256d __B)
8585{
8586  return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
8587						    (__v4df) __B,
8588						    (__v4df) __W,
8589						    (__mmask8) __U);
8590}
8591
8592extern __inline __m256d
8593__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8594_mm256_maskz_unpackhi_pd (__mmask8 __U, __m256d __A, __m256d __B)
8595{
8596  return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
8597						    (__v4df) __B,
8598						    (__v4df)
8599						    _mm256_setzero_pd (),
8600						    (__mmask8) __U);
8601}
8602
8603extern __inline __m128d
8604__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8605_mm_mask_unpackhi_pd (__m128d __W, __mmask8 __U, __m128d __A,
8606		      __m128d __B)
8607{
8608  return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
8609						    (__v2df) __B,
8610						    (__v2df) __W,
8611						    (__mmask8) __U);
8612}
8613
8614extern __inline __m128d
8615__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8616_mm_maskz_unpackhi_pd (__mmask8 __U, __m128d __A, __m128d __B)
8617{
8618  return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
8619						    (__v2df) __B,
8620						    (__v2df)
8621						    _mm_setzero_pd (),
8622						    (__mmask8) __U);
8623}
8624
8625extern __inline __m256
8626__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8627_mm256_mask_unpackhi_ps (__m256 __W, __mmask8 __U, __m256 __A,
8628			 __m256 __B)
8629{
8630  return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
8631						   (__v8sf) __B,
8632						   (__v8sf) __W,
8633						   (__mmask8) __U);
8634}
8635
8636extern __inline __m256
8637__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8638_mm256_maskz_unpackhi_ps (__mmask8 __U, __m256 __A, __m256 __B)
8639{
8640  return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
8641						   (__v8sf) __B,
8642						   (__v8sf)
8643						   _mm256_setzero_ps (),
8644						   (__mmask8) __U);
8645}
8646
8647extern __inline __m128
8648__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8649_mm_mask_unpackhi_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8650{
8651  return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
8652						   (__v4sf) __B,
8653						   (__v4sf) __W,
8654						   (__mmask8) __U);
8655}
8656
8657extern __inline __m128
8658__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8659_mm_maskz_unpackhi_ps (__mmask8 __U, __m128 __A, __m128 __B)
8660{
8661  return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
8662						   (__v4sf) __B,
8663						   (__v4sf)
8664						   _mm_setzero_ps (),
8665						   (__mmask8) __U);
8666}
8667
8668extern __inline __m128
8669__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8670_mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
8671{
8672  return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8673						 (__v4sf) __W,
8674						 (__mmask8) __U);
8675}
8676
8677extern __inline __m128
8678__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8679_mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8680{
8681  return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8682						 (__v4sf)
8683						 _mm_setzero_ps (),
8684						 (__mmask8) __U);
8685}
8686
8687extern __inline __m256
8688__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8689_mm256_maskz_unpacklo_ps (__mmask8 __U, __m256 __A, __m256 __B)
8690{
8691  return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
8692						   (__v8sf) __B,
8693						   (__v8sf)
8694						   _mm256_setzero_ps (),
8695						   (__mmask8) __U);
8696}
8697
8698extern __inline __m256
8699__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8700_mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
8701{
8702  return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8703						    (__v8sf) __W,
8704						    (__mmask8) __U);
8705}
8706
8707extern __inline __m256
8708__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8709_mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8710{
8711  return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8712						    (__v8sf)
8713						    _mm256_setzero_ps (),
8714						    (__mmask8) __U);
8715}
8716
8717extern __inline __m128
8718__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8719_mm_mask_unpacklo_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8720{
8721  return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
8722						   (__v4sf) __B,
8723						   (__v4sf) __W,
8724						   (__mmask8) __U);
8725}
8726
8727extern __inline __m128
8728__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8729_mm_maskz_unpacklo_ps (__mmask8 __U, __m128 __A, __m128 __B)
8730{
8731  return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
8732						   (__v4sf) __B,
8733						   (__v4sf)
8734						   _mm_setzero_ps (),
8735						   (__mmask8) __U);
8736}
8737
8738extern __inline __m256i
8739__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8740_mm256_mask_sra_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
8741		       __m128i __B)
8742{
8743  return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
8744						 (__v4si) __B,
8745						 (__v8si) __W,
8746						 (__mmask8) __U);
8747}
8748
8749extern __inline __m256i
8750__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8751_mm256_maskz_sra_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
8752{
8753  return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
8754						 (__v4si) __B,
8755						 (__v8si)
8756						 _mm256_setzero_si256 (),
8757						 (__mmask8) __U);
8758}
8759
8760extern __inline __m128i
8761__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8762_mm_mask_sra_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
8763		    __m128i __B)
8764{
8765  return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
8766						 (__v4si) __B,
8767						 (__v4si) __W,
8768						 (__mmask8) __U);
8769}
8770
8771extern __inline __m128i
8772__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8773_mm_maskz_sra_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
8774{
8775  return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
8776						 (__v4si) __B,
8777						 (__v4si)
8778						 _mm_setzero_si128 (),
8779						 (__mmask8) __U);
8780}
8781
8782extern __inline __m256i
8783__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8784_mm256_sra_epi64 (__m256i __A, __m128i __B)
8785{
8786  return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8787						 (__v2di) __B,
8788						 (__v4di)
8789						 _mm256_setzero_si256 (),
8790						 (__mmask8) -1);
8791}
8792
8793extern __inline __m256i
8794__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8795_mm256_mask_sra_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
8796		       __m128i __B)
8797{
8798  return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8799						 (__v2di) __B,
8800						 (__v4di) __W,
8801						 (__mmask8) __U);
8802}
8803
8804extern __inline __m256i
8805__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8806_mm256_maskz_sra_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
8807{
8808  return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8809						 (__v2di) __B,
8810						 (__v4di)
8811						 _mm256_setzero_si256 (),
8812						 (__mmask8) __U);
8813}
8814
8815extern __inline __m128i
8816__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8817_mm_sra_epi64 (__m128i __A, __m128i __B)
8818{
8819  return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8820						 (__v2di) __B,
8821						 (__v2di)
8822						 _mm_setzero_si128 (),
8823						 (__mmask8) -1);
8824}
8825
8826extern __inline __m128i
8827__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8828_mm_mask_sra_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
8829		    __m128i __B)
8830{
8831  return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8832						 (__v2di) __B,
8833						 (__v2di) __W,
8834						 (__mmask8) __U);
8835}
8836
8837extern __inline __m128i
8838__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8839_mm_maskz_sra_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
8840{
8841  return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8842						 (__v2di) __B,
8843						 (__v2di)
8844						 _mm_setzero_si128 (),
8845						 (__mmask8) __U);
8846}
8847
8848extern __inline __m128i
8849__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8850_mm_mask_sll_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
8851		    __m128i __B)
8852{
8853  return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
8854						 (__v4si) __B,
8855						 (__v4si) __W,
8856						 (__mmask8) __U);
8857}
8858
8859extern __inline __m128i
8860__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8861_mm_maskz_sll_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
8862{
8863  return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
8864						 (__v4si) __B,
8865						 (__v4si)
8866						 _mm_setzero_si128 (),
8867						 (__mmask8) __U);
8868}
8869
8870extern __inline __m128i
8871__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8872_mm_mask_sll_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
8873		    __m128i __B)
8874{
8875  return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
8876						 (__v2di) __B,
8877						 (__v2di) __W,
8878						 (__mmask8) __U);
8879}
8880
8881extern __inline __m128i
8882__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8883_mm_maskz_sll_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
8884{
8885  return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
8886						 (__v2di) __B,
8887						 (__v2di)
8888						 _mm_setzero_si128 (),
8889						 (__mmask8) __U);
8890}
8891
8892extern __inline __m256i
8893__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8894_mm256_mask_sll_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
8895		       __m128i __B)
8896{
8897  return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
8898						 (__v4si) __B,
8899						 (__v8si) __W,
8900						 (__mmask8) __U);
8901}
8902
8903extern __inline __m256i
8904__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8905_mm256_maskz_sll_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
8906{
8907  return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
8908						 (__v4si) __B,
8909						 (__v8si)
8910						 _mm256_setzero_si256 (),
8911						 (__mmask8) __U);
8912}
8913
8914extern __inline __m256i
8915__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8916_mm256_mask_sll_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
8917		       __m128i __B)
8918{
8919  return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
8920						 (__v2di) __B,
8921						 (__v4di) __W,
8922						 (__mmask8) __U);
8923}
8924
8925extern __inline __m256i
8926__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8927_mm256_maskz_sll_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
8928{
8929  return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
8930						 (__v2di) __B,
8931						 (__v4di)
8932						 _mm256_setzero_si256 (),
8933						 (__mmask8) __U);
8934}
8935
8936extern __inline __m256
8937__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8938_mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X,
8939			    __m256 __Y)
8940{
8941  return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8942						    (__v8si) __X,
8943						    (__v8sf) __W,
8944						    (__mmask8) __U);
8945}
8946
8947extern __inline __m256
8948__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8949_mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y)
8950{
8951  return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8952						    (__v8si) __X,
8953						    (__v8sf)
8954						    _mm256_setzero_ps (),
8955						    (__mmask8) __U);
8956}
8957
8958extern __inline __m256d
8959__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8960_mm256_permutexvar_pd (__m256i __X, __m256d __Y)
8961{
8962  return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8963						     (__v4di) __X,
8964						     (__v4df)
8965						     _mm256_setzero_pd (),
8966						     (__mmask8) -1);
8967}
8968
8969extern __inline __m256d
8970__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8971_mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
8972			    __m256d __Y)
8973{
8974  return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8975						     (__v4di) __X,
8976						     (__v4df) __W,
8977						     (__mmask8) __U);
8978}
8979
8980extern __inline __m256d
8981__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8982_mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
8983{
8984  return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8985						     (__v4di) __X,
8986						     (__v4df)
8987						     _mm256_setzero_pd (),
8988						     (__mmask8) __U);
8989}
8990
8991extern __inline __m256d
8992__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8993_mm256_mask_permutevar_pd (__m256d __W, __mmask8 __U, __m256d __A,
8994			   __m256i __C)
8995{
8996  return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
8997							(__v4di) __C,
8998							(__v4df) __W,
8999							(__mmask8)
9000							__U);
9001}
9002
9003extern __inline __m256d
9004__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9005_mm256_maskz_permutevar_pd (__mmask8 __U, __m256d __A, __m256i __C)
9006{
9007  return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
9008							(__v4di) __C,
9009							(__v4df)
9010							_mm256_setzero_pd (),
9011							(__mmask8)
9012							__U);
9013}
9014
9015extern __inline __m256
9016__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9017_mm256_mask_permutevar_ps (__m256 __W, __mmask8 __U, __m256 __A,
9018			   __m256i __C)
9019{
9020  return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
9021						       (__v8si) __C,
9022						       (__v8sf) __W,
9023						       (__mmask8) __U);
9024}
9025
9026extern __inline __m256
9027__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9028_mm256_maskz_permutevar_ps (__mmask8 __U, __m256 __A, __m256i __C)
9029{
9030  return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
9031						       (__v8si) __C,
9032						       (__v8sf)
9033						       _mm256_setzero_ps (),
9034						       (__mmask8) __U);
9035}
9036
9037extern __inline __m128d
9038__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9039_mm_mask_permutevar_pd (__m128d __W, __mmask8 __U, __m128d __A,
9040			__m128i __C)
9041{
9042  return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
9043						     (__v2di) __C,
9044						     (__v2df) __W,
9045						     (__mmask8) __U);
9046}
9047
9048extern __inline __m128d
9049__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9050_mm_maskz_permutevar_pd (__mmask8 __U, __m128d __A, __m128i __C)
9051{
9052  return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
9053						     (__v2di) __C,
9054						     (__v2df)
9055						     _mm_setzero_pd (),
9056						     (__mmask8) __U);
9057}
9058
9059extern __inline __m128
9060__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9061_mm_mask_permutevar_ps (__m128 __W, __mmask8 __U, __m128 __A,
9062			__m128i __C)
9063{
9064  return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
9065						    (__v4si) __C,
9066						    (__v4sf) __W,
9067						    (__mmask8) __U);
9068}
9069
9070extern __inline __m128
9071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9072_mm_maskz_permutevar_ps (__mmask8 __U, __m128 __A, __m128i __C)
9073{
9074  return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
9075						    (__v4si) __C,
9076						    (__v4sf)
9077						    _mm_setzero_ps (),
9078						    (__mmask8) __U);
9079}
9080
9081extern __inline __m256i
9082__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9083_mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
9084{
9085  return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
9086						  (__v8si) __B,
9087						  (__v8si)
9088						  _mm256_setzero_si256 (),
9089						  __M);
9090}
9091
9092extern __inline __m256i
9093__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9094_mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
9095{
9096  return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9097						     (__v4di) __X,
9098						     (__v4di)
9099						     _mm256_setzero_si256 (),
9100						     __M);
9101}
9102
9103extern __inline __m256i
9104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9105_mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
9106			 __m256i __B)
9107{
9108  return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
9109						  (__v8si) __B,
9110						  (__v8si) __W, __M);
9111}
9112
9113extern __inline __m128i
9114__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9115_mm_maskz_mullo_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
9116{
9117  return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
9118						  (__v4si) __B,
9119						  (__v4si)
9120						  _mm_setzero_si128 (),
9121						  __M);
9122}
9123
9124extern __inline __m128i
9125__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9126_mm_mask_mullo_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
9127		      __m128i __B)
9128{
9129  return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
9130						  (__v4si) __B,
9131						  (__v4si) __W, __M);
9132}
9133
9134extern __inline __m256i
9135__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9136_mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
9137		       __m256i __Y)
9138{
9139  return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
9140						  (__v8si) __Y,
9141						  (__v4di) __W, __M);
9142}
9143
9144extern __inline __m256i
9145__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9146_mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
9147{
9148  return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
9149						  (__v8si) __Y,
9150						  (__v4di)
9151						  _mm256_setzero_si256 (),
9152						  __M);
9153}
9154
9155extern __inline __m128i
9156__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9157_mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X,
9158		    __m128i __Y)
9159{
9160  return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
9161						  (__v4si) __Y,
9162						  (__v2di) __W, __M);
9163}
9164
9165extern __inline __m128i
9166__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9167_mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y)
9168{
9169  return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
9170						  (__v4si) __Y,
9171						  (__v2di)
9172						  _mm_setzero_si128 (),
9173						  __M);
9174}
9175
9176extern __inline __m256i
9177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9178_mm256_permutexvar_epi64 (__m256i __X, __m256i __Y)
9179{
9180  return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9181						     (__v4di) __X,
9182						     (__v4di)
9183						     _mm256_setzero_si256 (),
9184						     (__mmask8) -1);
9185}
9186
9187extern __inline __m256i
9188__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9189_mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
9190			       __m256i __Y)
9191{
9192  return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9193						     (__v4di) __X,
9194						     (__v4di) __W,
9195						     __M);
9196}
9197
9198extern __inline __m256i
9199__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9200_mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X,
9201		       __m256i __Y)
9202{
9203  return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
9204						   (__v8si) __Y,
9205						   (__v4di) __W, __M);
9206}
9207
9208extern __inline __m256i
9209__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9210_mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
9211{
9212  return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9213						     (__v8si) __X,
9214						     (__v8si)
9215						     _mm256_setzero_si256 (),
9216						     __M);
9217}
9218
9219extern __inline __m256i
9220__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9221_mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y)
9222{
9223  return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
9224						   (__v8si) __Y,
9225						   (__v4di)
9226						   _mm256_setzero_si256 (),
9227						   __M);
9228}
9229
9230extern __inline __m128i
9231__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9232_mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X,
9233		    __m128i __Y)
9234{
9235  return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
9236						   (__v4si) __Y,
9237						   (__v2di) __W, __M);
9238}
9239
9240extern __inline __m128i
9241__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9242_mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y)
9243{
9244  return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
9245						   (__v4si) __Y,
9246						   (__v2di)
9247						   _mm_setzero_si128 (),
9248						   __M);
9249}
9250
9251extern __inline __m256i
9252__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9253_mm256_permutexvar_epi32 (__m256i __X, __m256i __Y)
9254{
9255  return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9256						     (__v8si) __X,
9257						     (__v8si)
9258						     _mm256_setzero_si256 (),
9259						     (__mmask8) -1);
9260}
9261
9262extern __inline __m256i
9263__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9264_mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
9265			       __m256i __Y)
9266{
9267  return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9268						     (__v8si) __X,
9269						     (__v8si) __W,
9270						     __M);
9271}
9272
9273extern __inline __mmask8
9274  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9275_mm256_mask_cmpneq_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9276{
9277  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9278						  (__v8si) __Y, 4,
9279						  (__mmask8) __M);
9280}
9281
9282extern __inline __mmask8
9283  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9284_mm256_cmpneq_epu32_mask (__m256i __X, __m256i __Y)
9285{
9286  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9287						  (__v8si) __Y, 4,
9288						  (__mmask8) -1);
9289}
9290
9291extern __inline __mmask8
9292  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9293_mm256_mask_cmplt_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9294{
9295  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9296						  (__v8si) __Y, 1,
9297						  (__mmask8) __M);
9298}
9299
9300extern __inline __mmask8
9301  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9302_mm256_cmplt_epu32_mask (__m256i __X, __m256i __Y)
9303{
9304  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9305						  (__v8si) __Y, 1,
9306						  (__mmask8) -1);
9307}
9308
9309extern __inline __mmask8
9310  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9311_mm256_mask_cmpge_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9312{
9313  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9314						  (__v8si) __Y, 5,
9315						  (__mmask8) __M);
9316}
9317
9318extern __inline __mmask8
9319  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9320_mm256_cmpge_epu32_mask (__m256i __X, __m256i __Y)
9321{
9322  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9323						  (__v8si) __Y, 5,
9324						  (__mmask8) -1);
9325}
9326
9327extern __inline __mmask8
9328  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9329_mm256_mask_cmple_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9330{
9331  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9332						  (__v8si) __Y, 2,
9333						  (__mmask8) __M);
9334}
9335
9336extern __inline __mmask8
9337  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9338_mm256_cmple_epu32_mask (__m256i __X, __m256i __Y)
9339{
9340  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9341						  (__v8si) __Y, 2,
9342						  (__mmask8) -1);
9343}
9344
9345extern __inline __mmask8
9346  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9347_mm256_mask_cmpneq_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9348{
9349  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9350						  (__v4di) __Y, 4,
9351						  (__mmask8) __M);
9352}
9353
9354extern __inline __mmask8
9355  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9356_mm256_cmpneq_epu64_mask (__m256i __X, __m256i __Y)
9357{
9358  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9359						  (__v4di) __Y, 4,
9360						  (__mmask8) -1);
9361}
9362
9363extern __inline __mmask8
9364  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9365_mm256_mask_cmplt_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9366{
9367  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9368						  (__v4di) __Y, 1,
9369						  (__mmask8) __M);
9370}
9371
9372extern __inline __mmask8
9373  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9374_mm256_cmplt_epu64_mask (__m256i __X, __m256i __Y)
9375{
9376  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9377						  (__v4di) __Y, 1,
9378						  (__mmask8) -1);
9379}
9380
9381extern __inline __mmask8
9382  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9383_mm256_mask_cmpge_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9384{
9385  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9386						  (__v4di) __Y, 5,
9387						  (__mmask8) __M);
9388}
9389
9390extern __inline __mmask8
9391  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9392_mm256_cmpge_epu64_mask (__m256i __X, __m256i __Y)
9393{
9394  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9395						  (__v4di) __Y, 5,
9396						  (__mmask8) -1);
9397}
9398
9399extern __inline __mmask8
9400  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9401_mm256_mask_cmple_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9402{
9403  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9404						  (__v4di) __Y, 2,
9405						  (__mmask8) __M);
9406}
9407
9408extern __inline __mmask8
9409  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9410_mm256_cmple_epu64_mask (__m256i __X, __m256i __Y)
9411{
9412  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9413						  (__v4di) __Y, 2,
9414						  (__mmask8) -1);
9415}
9416
9417extern __inline __mmask8
9418  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9419_mm256_mask_cmpneq_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9420{
9421  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9422						 (__v8si) __Y, 4,
9423						 (__mmask8) __M);
9424}
9425
9426extern __inline __mmask8
9427  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9428_mm256_cmpneq_epi32_mask (__m256i __X, __m256i __Y)
9429{
9430  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9431						 (__v8si) __Y, 4,
9432						 (__mmask8) -1);
9433}
9434
9435extern __inline __mmask8
9436  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9437_mm256_mask_cmplt_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9438{
9439  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9440						 (__v8si) __Y, 1,
9441						 (__mmask8) __M);
9442}
9443
9444extern __inline __mmask8
9445  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9446_mm256_cmplt_epi32_mask (__m256i __X, __m256i __Y)
9447{
9448  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9449						 (__v8si) __Y, 1,
9450						 (__mmask8) -1);
9451}
9452
9453extern __inline __mmask8
9454  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9455_mm256_mask_cmpge_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9456{
9457  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9458						 (__v8si) __Y, 5,
9459						 (__mmask8) __M);
9460}
9461
9462extern __inline __mmask8
9463  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9464_mm256_cmpge_epi32_mask (__m256i __X, __m256i __Y)
9465{
9466  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9467						 (__v8si) __Y, 5,
9468						 (__mmask8) -1);
9469}
9470
9471extern __inline __mmask8
9472  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9473_mm256_mask_cmple_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9474{
9475  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9476						 (__v8si) __Y, 2,
9477						 (__mmask8) __M);
9478}
9479
9480extern __inline __mmask8
9481  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9482_mm256_cmple_epi32_mask (__m256i __X, __m256i __Y)
9483{
9484  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9485						 (__v8si) __Y, 2,
9486						 (__mmask8) -1);
9487}
9488
9489extern __inline __mmask8
9490  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9491_mm256_mask_cmpneq_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9492{
9493  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9494						 (__v4di) __Y, 4,
9495						 (__mmask8) __M);
9496}
9497
9498extern __inline __mmask8
9499  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9500_mm256_cmpneq_epi64_mask (__m256i __X, __m256i __Y)
9501{
9502  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9503						 (__v4di) __Y, 4,
9504						 (__mmask8) -1);
9505}
9506
9507extern __inline __mmask8
9508  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9509_mm256_mask_cmplt_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9510{
9511  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9512						 (__v4di) __Y, 1,
9513						 (__mmask8) __M);
9514}
9515
9516extern __inline __mmask8
9517  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9518_mm256_cmplt_epi64_mask (__m256i __X, __m256i __Y)
9519{
9520  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9521						 (__v4di) __Y, 1,
9522						 (__mmask8) -1);
9523}
9524
9525extern __inline __mmask8
9526  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9527_mm256_mask_cmpge_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9528{
9529  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9530						 (__v4di) __Y, 5,
9531						 (__mmask8) __M);
9532}
9533
9534extern __inline __mmask8
9535  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9536_mm256_cmpge_epi64_mask (__m256i __X, __m256i __Y)
9537{
9538  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9539						 (__v4di) __Y, 5,
9540						 (__mmask8) -1);
9541}
9542
9543extern __inline __mmask8
9544  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9545_mm256_mask_cmple_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9546{
9547  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9548						 (__v4di) __Y, 2,
9549						 (__mmask8) __M);
9550}
9551
9552extern __inline __mmask8
9553  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9554_mm256_cmple_epi64_mask (__m256i __X, __m256i __Y)
9555{
9556  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9557						 (__v4di) __Y, 2,
9558						 (__mmask8) -1);
9559}
9560
9561extern __inline __mmask8
9562  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9563_mm_mask_cmpneq_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9564{
9565  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9566						  (__v4si) __Y, 4,
9567						  (__mmask8) __M);
9568}
9569
9570extern __inline __mmask8
9571  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9572_mm_cmpneq_epu32_mask (__m128i __X, __m128i __Y)
9573{
9574  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9575						  (__v4si) __Y, 4,
9576						  (__mmask8) -1);
9577}
9578
9579extern __inline __mmask8
9580  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9581_mm_mask_cmplt_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9582{
9583  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9584						  (__v4si) __Y, 1,
9585						  (__mmask8) __M);
9586}
9587
9588extern __inline __mmask8
9589  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9590_mm_cmplt_epu32_mask (__m128i __X, __m128i __Y)
9591{
9592  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9593						  (__v4si) __Y, 1,
9594						  (__mmask8) -1);
9595}
9596
9597extern __inline __mmask8
9598  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9599_mm_mask_cmpge_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9600{
9601  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9602						  (__v4si) __Y, 5,
9603						  (__mmask8) __M);
9604}
9605
9606extern __inline __mmask8
9607  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9608_mm_cmpge_epu32_mask (__m128i __X, __m128i __Y)
9609{
9610  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9611						  (__v4si) __Y, 5,
9612						  (__mmask8) -1);
9613}
9614
9615extern __inline __mmask8
9616  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9617_mm_mask_cmple_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9618{
9619  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9620						  (__v4si) __Y, 2,
9621						  (__mmask8) __M);
9622}
9623
9624extern __inline __mmask8
9625  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9626_mm_cmple_epu32_mask (__m128i __X, __m128i __Y)
9627{
9628  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9629						  (__v4si) __Y, 2,
9630						  (__mmask8) -1);
9631}
9632
9633extern __inline __mmask8
9634  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9635_mm_mask_cmpneq_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9636{
9637  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9638						  (__v2di) __Y, 4,
9639						  (__mmask8) __M);
9640}
9641
9642extern __inline __mmask8
9643  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9644_mm_cmpneq_epu64_mask (__m128i __X, __m128i __Y)
9645{
9646  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9647						  (__v2di) __Y, 4,
9648						  (__mmask8) -1);
9649}
9650
9651extern __inline __mmask8
9652  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9653_mm_mask_cmplt_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9654{
9655  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9656						  (__v2di) __Y, 1,
9657						  (__mmask8) __M);
9658}
9659
9660extern __inline __mmask8
9661  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9662_mm_cmplt_epu64_mask (__m128i __X, __m128i __Y)
9663{
9664  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9665						  (__v2di) __Y, 1,
9666						  (__mmask8) -1);
9667}
9668
9669extern __inline __mmask8
9670  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9671_mm_mask_cmpge_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9672{
9673  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9674						  (__v2di) __Y, 5,
9675						  (__mmask8) __M);
9676}
9677
9678extern __inline __mmask8
9679  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9680_mm_cmpge_epu64_mask (__m128i __X, __m128i __Y)
9681{
9682  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9683						  (__v2di) __Y, 5,
9684						  (__mmask8) -1);
9685}
9686
9687extern __inline __mmask8
9688  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9689_mm_mask_cmple_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9690{
9691  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9692						  (__v2di) __Y, 2,
9693						  (__mmask8) __M);
9694}
9695
9696extern __inline __mmask8
9697  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9698_mm_cmple_epu64_mask (__m128i __X, __m128i __Y)
9699{
9700  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9701						  (__v2di) __Y, 2,
9702						  (__mmask8) -1);
9703}
9704
9705extern __inline __mmask8
9706  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9707_mm_mask_cmpneq_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9708{
9709  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9710						 (__v4si) __Y, 4,
9711						 (__mmask8) __M);
9712}
9713
9714extern __inline __mmask8
9715  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9716_mm_cmpneq_epi32_mask (__m128i __X, __m128i __Y)
9717{
9718  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9719						 (__v4si) __Y, 4,
9720						 (__mmask8) -1);
9721}
9722
9723extern __inline __mmask8
9724  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9725_mm_mask_cmplt_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9726{
9727  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9728						 (__v4si) __Y, 1,
9729						 (__mmask8) __M);
9730}
9731
9732extern __inline __mmask8
9733  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9734_mm_cmplt_epi32_mask (__m128i __X, __m128i __Y)
9735{
9736  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9737						 (__v4si) __Y, 1,
9738						 (__mmask8) -1);
9739}
9740
9741extern __inline __mmask8
9742  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9743_mm_mask_cmpge_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9744{
9745  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9746						 (__v4si) __Y, 5,
9747						 (__mmask8) __M);
9748}
9749
9750extern __inline __mmask8
9751  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9752_mm_cmpge_epi32_mask (__m128i __X, __m128i __Y)
9753{
9754  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9755						 (__v4si) __Y, 5,
9756						 (__mmask8) -1);
9757}
9758
9759extern __inline __mmask8
9760  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9761_mm_mask_cmple_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9762{
9763  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9764						 (__v4si) __Y, 2,
9765						 (__mmask8) __M);
9766}
9767
9768extern __inline __mmask8
9769  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9770_mm_cmple_epi32_mask (__m128i __X, __m128i __Y)
9771{
9772  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9773						 (__v4si) __Y, 2,
9774						 (__mmask8) -1);
9775}
9776
9777extern __inline __mmask8
9778  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9779_mm_mask_cmpneq_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9780{
9781  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9782						 (__v2di) __Y, 4,
9783						 (__mmask8) __M);
9784}
9785
9786extern __inline __mmask8
9787  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9788_mm_cmpneq_epi64_mask (__m128i __X, __m128i __Y)
9789{
9790  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9791						 (__v2di) __Y, 4,
9792						 (__mmask8) -1);
9793}
9794
9795extern __inline __mmask8
9796  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9797_mm_mask_cmplt_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9798{
9799  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9800						 (__v2di) __Y, 1,
9801						 (__mmask8) __M);
9802}
9803
9804extern __inline __mmask8
9805  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9806_mm_cmplt_epi64_mask (__m128i __X, __m128i __Y)
9807{
9808  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9809						 (__v2di) __Y, 1,
9810						 (__mmask8) -1);
9811}
9812
9813extern __inline __mmask8
9814  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9815_mm_mask_cmpge_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9816{
9817  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9818						 (__v2di) __Y, 5,
9819						 (__mmask8) __M);
9820}
9821
9822extern __inline __mmask8
9823  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9824_mm_cmpge_epi64_mask (__m128i __X, __m128i __Y)
9825{
9826  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9827						 (__v2di) __Y, 5,
9828						 (__mmask8) -1);
9829}
9830
9831extern __inline __mmask8
9832  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9833_mm_mask_cmple_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9834{
9835  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9836						 (__v2di) __Y, 2,
9837						 (__mmask8) __M);
9838}
9839
9840extern __inline __mmask8
9841  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9842_mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
9843{
9844  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9845						 (__v2di) __Y, 2,
9846						 (__mmask8) -1);
9847}
9848
9849#ifdef __OPTIMIZE__
9850extern __inline __m256i
9851__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9852_mm256_permutex_epi64 (__m256i __X, const int __I)
9853{
9854  return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9855					      __I,
9856					      (__v4di)
9857					      _mm256_setzero_si256(),
9858					      (__mmask8) -1);
9859}
9860
9861extern __inline __m256i
9862__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9863_mm256_mask_permutex_epi64 (__m256i __W, __mmask8 __M,
9864			    __m256i __X, const int __I)
9865{
9866  return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9867						  __I,
9868						  (__v4di) __W,
9869						  (__mmask8) __M);
9870}
9871
9872extern __inline __m256i
9873__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9874_mm256_maskz_permutex_epi64 (__mmask8 __M, __m256i __X, const int __I)
9875{
9876  return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9877						  __I,
9878						  (__v4di)
9879						  _mm256_setzero_si256 (),
9880						  (__mmask8) __M);
9881}
9882
9883extern __inline __m256d
9884__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9885_mm256_mask_shuffle_pd (__m256d __W, __mmask8 __U, __m256d __A,
9886			__m256d __B, const int __imm)
9887{
9888  return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
9889						  (__v4df) __B, __imm,
9890						  (__v4df) __W,
9891						  (__mmask8) __U);
9892}
9893
9894extern __inline __m256d
9895__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9896_mm256_maskz_shuffle_pd (__mmask8 __U, __m256d __A, __m256d __B,
9897			 const int __imm)
9898{
9899  return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
9900						  (__v4df) __B, __imm,
9901						  (__v4df)
9902						  _mm256_setzero_pd (),
9903						  (__mmask8) __U);
9904}
9905
9906extern __inline __m128d
9907__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9908_mm_mask_shuffle_pd (__m128d __W, __mmask8 __U, __m128d __A,
9909		     __m128d __B, const int __imm)
9910{
9911  return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
9912						  (__v2df) __B, __imm,
9913						  (__v2df) __W,
9914						  (__mmask8) __U);
9915}
9916
9917extern __inline __m128d
9918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9919_mm_maskz_shuffle_pd (__mmask8 __U, __m128d __A, __m128d __B,
9920		      const int __imm)
9921{
9922  return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
9923						  (__v2df) __B, __imm,
9924						  (__v2df)
9925						  _mm_setzero_pd (),
9926						  (__mmask8) __U);
9927}
9928
9929extern __inline __m256
9930__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9931_mm256_mask_shuffle_ps (__m256 __W, __mmask8 __U, __m256 __A,
9932			__m256 __B, const int __imm)
9933{
9934  return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
9935						 (__v8sf) __B, __imm,
9936						 (__v8sf) __W,
9937						 (__mmask8) __U);
9938}
9939
9940extern __inline __m256
9941__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9942_mm256_maskz_shuffle_ps (__mmask8 __U, __m256 __A, __m256 __B,
9943			 const int __imm)
9944{
9945  return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
9946						 (__v8sf) __B, __imm,
9947						 (__v8sf)
9948						 _mm256_setzero_ps (),
9949						 (__mmask8) __U);
9950}
9951
9952extern __inline __m128
9953__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9954_mm_mask_shuffle_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
9955		     const int __imm)
9956{
9957  return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
9958						 (__v4sf) __B, __imm,
9959						 (__v4sf) __W,
9960						 (__mmask8) __U);
9961}
9962
9963extern __inline __m128
9964__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9965_mm_maskz_shuffle_ps (__mmask8 __U, __m128 __A, __m128 __B,
9966		      const int __imm)
9967{
9968  return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
9969						 (__v4sf) __B, __imm,
9970						 (__v4sf)
9971						 _mm_setzero_ps (),
9972						 (__mmask8) __U);
9973}
9974
9975extern __inline __m256i
9976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9977_mm256_inserti32x4 (__m256i __A, __m128i __B, const int __imm)
9978{
9979  return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9980							(__v4si) __B,
9981							__imm,
9982							(__v8si)
9983							_mm256_setzero_si256 (),
9984							(__mmask8) -1);
9985}
9986
9987extern __inline __m256i
9988__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9989_mm256_mask_inserti32x4 (__m256i __W, __mmask8 __U, __m256i __A,
9990			 __m128i __B, const int __imm)
9991{
9992  return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9993							(__v4si) __B,
9994							__imm,
9995							(__v8si) __W,
9996							(__mmask8)
9997							__U);
9998}
9999
10000extern __inline __m256i
10001__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10002_mm256_maskz_inserti32x4 (__mmask8 __U, __m256i __A, __m128i __B,
10003			  const int __imm)
10004{
10005  return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
10006							(__v4si) __B,
10007							__imm,
10008							(__v8si)
10009							_mm256_setzero_si256 (),
10010							(__mmask8)
10011							__U);
10012}
10013
10014extern __inline __m256
10015__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10016_mm256_insertf32x4 (__m256 __A, __m128 __B, const int __imm)
10017{
10018  return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
10019						       (__v4sf) __B,
10020						       __imm,
10021						       (__v8sf)
10022						       _mm256_setzero_ps (),
10023						       (__mmask8) -1);
10024}
10025
10026extern __inline __m256
10027__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10028_mm256_mask_insertf32x4 (__m256 __W, __mmask8 __U, __m256 __A,
10029			 __m128 __B, const int __imm)
10030{
10031  return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
10032						       (__v4sf) __B,
10033						       __imm,
10034						       (__v8sf) __W,
10035						       (__mmask8) __U);
10036}
10037
10038extern __inline __m256
10039__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10040_mm256_maskz_insertf32x4 (__mmask8 __U, __m256 __A, __m128 __B,
10041			  const int __imm)
10042{
10043  return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
10044						       (__v4sf) __B,
10045						       __imm,
10046						       (__v8sf)
10047						       _mm256_setzero_ps (),
10048						       (__mmask8) __U);
10049}
10050
10051extern __inline __m128i
10052__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10053_mm256_extracti32x4_epi32 (__m256i __A, const int __imm)
10054{
10055  return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
10056							 __imm,
10057							 (__v4si)
10058							 _mm_setzero_si128 (),
10059							 (__mmask8) -1);
10060}
10061
10062extern __inline __m128i
10063__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10064_mm256_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m256i __A,
10065				const int __imm)
10066{
10067  return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
10068							 __imm,
10069							 (__v4si) __W,
10070							 (__mmask8)
10071							 __U);
10072}
10073
10074extern __inline __m128i
10075__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10076_mm256_maskz_extracti32x4_epi32 (__mmask8 __U, __m256i __A,
10077				 const int __imm)
10078{
10079  return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
10080							 __imm,
10081							 (__v4si)
10082							 _mm_setzero_si128 (),
10083							 (__mmask8)
10084							 __U);
10085}
10086
10087extern __inline __m128
10088__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10089_mm256_extractf32x4_ps (__m256 __A, const int __imm)
10090{
10091  return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
10092							__imm,
10093							(__v4sf)
10094							_mm_setzero_ps (),
10095							(__mmask8) -1);
10096}
10097
10098extern __inline __m128
10099__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10100_mm256_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m256 __A,
10101			     const int __imm)
10102{
10103  return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
10104							__imm,
10105							(__v4sf) __W,
10106							(__mmask8)
10107							__U);
10108}
10109
10110extern __inline __m128
10111__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10112_mm256_maskz_extractf32x4_ps (__mmask8 __U, __m256 __A,
10113			      const int __imm)
10114{
10115  return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
10116							__imm,
10117							(__v4sf)
10118							_mm_setzero_ps (),
10119							(__mmask8)
10120							__U);
10121}
10122
10123extern __inline __m256i
10124__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10125_mm256_shuffle_i64x2 (__m256i __A, __m256i __B, const int __imm)
10126{
10127  return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
10128						       (__v4di) __B,
10129						       __imm,
10130						       (__v4di)
10131						       _mm256_setzero_si256 (),
10132						       (__mmask8) -1);
10133}
10134
10135extern __inline __m256i
10136__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10137_mm256_mask_shuffle_i64x2 (__m256i __W, __mmask8 __U, __m256i __A,
10138			   __m256i __B, const int __imm)
10139{
10140  return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
10141						       (__v4di) __B,
10142						       __imm,
10143						       (__v4di) __W,
10144						       (__mmask8) __U);
10145}
10146
10147extern __inline __m256i
10148__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10149_mm256_maskz_shuffle_i64x2 (__mmask8 __U, __m256i __A, __m256i __B,
10150			    const int __imm)
10151{
10152  return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
10153						       (__v4di) __B,
10154						       __imm,
10155						       (__v4di)
10156						       _mm256_setzero_si256 (),
10157						       (__mmask8) __U);
10158}
10159
10160extern __inline __m256i
10161__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10162_mm256_shuffle_i32x4 (__m256i __A, __m256i __B, const int __imm)
10163{
10164  return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
10165						       (__v8si) __B,
10166						       __imm,
10167						       (__v8si)
10168						       _mm256_setzero_si256 (),
10169						       (__mmask8) -1);
10170}
10171
10172extern __inline __m256i
10173__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10174_mm256_mask_shuffle_i32x4 (__m256i __W, __mmask8 __U, __m256i __A,
10175			   __m256i __B, const int __imm)
10176{
10177  return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
10178						       (__v8si) __B,
10179						       __imm,
10180						       (__v8si) __W,
10181						       (__mmask8) __U);
10182}
10183
10184extern __inline __m256i
10185__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10186_mm256_maskz_shuffle_i32x4 (__mmask8 __U, __m256i __A, __m256i __B,
10187			    const int __imm)
10188{
10189  return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
10190						       (__v8si) __B,
10191						       __imm,
10192						       (__v8si)
10193						       _mm256_setzero_si256 (),
10194						       (__mmask8) __U);
10195}
10196
10197extern __inline __m256d
10198__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10199_mm256_shuffle_f64x2 (__m256d __A, __m256d __B, const int __imm)
10200{
10201  return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
10202						       (__v4df) __B,
10203						       __imm,
10204						       (__v4df)
10205						       _mm256_setzero_pd (),
10206						       (__mmask8) -1);
10207}
10208
10209extern __inline __m256d
10210__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10211_mm256_mask_shuffle_f64x2 (__m256d __W, __mmask8 __U, __m256d __A,
10212			   __m256d __B, const int __imm)
10213{
10214  return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
10215						       (__v4df) __B,
10216						       __imm,
10217						       (__v4df) __W,
10218						       (__mmask8) __U);
10219}
10220
10221extern __inline __m256d
10222__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10223_mm256_maskz_shuffle_f64x2 (__mmask8 __U, __m256d __A, __m256d __B,
10224			    const int __imm)
10225{
10226  return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
10227						       (__v4df) __B,
10228						       __imm,
10229						       (__v4df)
10230						       _mm256_setzero_pd (),
10231						       (__mmask8) __U);
10232}
10233
10234extern __inline __m256
10235__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10236_mm256_shuffle_f32x4 (__m256 __A, __m256 __B, const int __imm)
10237{
10238  return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
10239						      (__v8sf) __B,
10240						      __imm,
10241						      (__v8sf)
10242						      _mm256_setzero_ps (),
10243						      (__mmask8) -1);
10244}
10245
10246extern __inline __m256
10247__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10248_mm256_mask_shuffle_f32x4 (__m256 __W, __mmask8 __U, __m256 __A,
10249			   __m256 __B, const int __imm)
10250{
10251  return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
10252						      (__v8sf) __B,
10253						      __imm,
10254						      (__v8sf) __W,
10255						      (__mmask8) __U);
10256}
10257
10258extern __inline __m256
10259__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10260_mm256_maskz_shuffle_f32x4 (__mmask8 __U, __m256 __A, __m256 __B,
10261			    const int __imm)
10262{
10263  return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
10264						      (__v8sf) __B,
10265						      __imm,
10266						      (__v8sf)
10267						      _mm256_setzero_ps (),
10268						      (__mmask8) __U);
10269}
10270
10271extern __inline __m256d
10272__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10273_mm256_fixupimm_pd (__m256d __A, __m256d __B, __m256i __C,
10274		    const int __imm)
10275{
10276  return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
10277						      (__v4df) __B,
10278						      (__v4di) __C,
10279						      __imm,
10280						      (__mmask8) -1);
10281}
10282
10283extern __inline __m256d
10284__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10285_mm256_mask_fixupimm_pd (__m256d __A, __mmask8 __U, __m256d __B,
10286			 __m256i __C, const int __imm)
10287{
10288  return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
10289						      (__v4df) __B,
10290						      (__v4di) __C,
10291						      __imm,
10292						      (__mmask8) __U);
10293}
10294
10295extern __inline __m256d
10296__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10297_mm256_maskz_fixupimm_pd (__mmask8 __U, __m256d __A, __m256d __B,
10298			  __m256i __C, const int __imm)
10299{
10300  return (__m256d) __builtin_ia32_fixupimmpd256_maskz ((__v4df) __A,
10301						       (__v4df) __B,
10302						       (__v4di) __C,
10303						       __imm,
10304						       (__mmask8) __U);
10305}
10306
10307extern __inline __m256
10308__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10309_mm256_fixupimm_ps (__m256 __A, __m256 __B, __m256i __C,
10310		    const int __imm)
10311{
10312  return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
10313						     (__v8sf) __B,
10314						     (__v8si) __C,
10315						     __imm,
10316						     (__mmask8) -1);
10317}
10318
10319extern __inline __m256
10320__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10321_mm256_mask_fixupimm_ps (__m256 __A, __mmask8 __U, __m256 __B,
10322			 __m256i __C, const int __imm)
10323{
10324  return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
10325						     (__v8sf) __B,
10326						     (__v8si) __C,
10327						     __imm,
10328						     (__mmask8) __U);
10329}
10330
10331extern __inline __m256
10332__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10333_mm256_maskz_fixupimm_ps (__mmask8 __U, __m256 __A, __m256 __B,
10334			  __m256i __C, const int __imm)
10335{
10336  return (__m256) __builtin_ia32_fixupimmps256_maskz ((__v8sf) __A,
10337						      (__v8sf) __B,
10338						      (__v8si) __C,
10339						      __imm,
10340						      (__mmask8) __U);
10341}
10342
10343extern __inline __m128d
10344__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10345_mm_fixupimm_pd (__m128d __A, __m128d __B, __m128i __C,
10346		 const int __imm)
10347{
10348  return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
10349						      (__v2df) __B,
10350						      (__v2di) __C,
10351						      __imm,
10352						      (__mmask8) -1);
10353}
10354
10355extern __inline __m128d
10356__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10357_mm_mask_fixupimm_pd (__m128d __A, __mmask8 __U, __m128d __B,
10358		      __m128i __C, const int __imm)
10359{
10360  return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
10361						      (__v2df) __B,
10362						      (__v2di) __C,
10363						      __imm,
10364						      (__mmask8) __U);
10365}
10366
10367extern __inline __m128d
10368__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10369_mm_maskz_fixupimm_pd (__mmask8 __U, __m128d __A, __m128d __B,
10370		       __m128i __C, const int __imm)
10371{
10372  return (__m128d) __builtin_ia32_fixupimmpd128_maskz ((__v2df) __A,
10373						       (__v2df) __B,
10374						       (__v2di) __C,
10375						       __imm,
10376						       (__mmask8) __U);
10377}
10378
10379extern __inline __m128
10380__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10381_mm_fixupimm_ps (__m128 __A, __m128 __B, __m128i __C, const int __imm)
10382{
10383  return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
10384						     (__v4sf) __B,
10385						     (__v4si) __C,
10386						     __imm,
10387						     (__mmask8) -1);
10388}
10389
10390extern __inline __m128
10391__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10392_mm_mask_fixupimm_ps (__m128 __A, __mmask8 __U, __m128 __B,
10393		      __m128i __C, const int __imm)
10394{
10395  return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
10396						     (__v4sf) __B,
10397						     (__v4si) __C,
10398						     __imm,
10399						     (__mmask8) __U);
10400}
10401
10402extern __inline __m128
10403__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10404_mm_maskz_fixupimm_ps (__mmask8 __U, __m128 __A, __m128 __B,
10405		       __m128i __C, const int __imm)
10406{
10407  return (__m128) __builtin_ia32_fixupimmps128_maskz ((__v4sf) __A,
10408						      (__v4sf) __B,
10409						      (__v4si) __C,
10410						      __imm,
10411						      (__mmask8) __U);
10412}
10413
10414extern __inline __m256i
10415__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10416_mm256_mask_srli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
10417			const int __imm)
10418{
10419  return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
10420						  (__v8si) __W,
10421						  (__mmask8) __U);
10422}
10423
10424extern __inline __m256i
10425__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10426_mm256_maskz_srli_epi32 (__mmask8 __U, __m256i __A, const int __imm)
10427{
10428  return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
10429						  (__v8si)
10430						  _mm256_setzero_si256 (),
10431						  (__mmask8) __U);
10432}
10433
10434extern __inline __m128i
10435__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10436_mm_mask_srli_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
10437		     const int __imm)
10438{
10439  return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
10440						  (__v4si) __W,
10441						  (__mmask8) __U);
10442}
10443
10444extern __inline __m128i
10445__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10446_mm_maskz_srli_epi32 (__mmask8 __U, __m128i __A, const int __imm)
10447{
10448  return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
10449						  (__v4si)
10450						  _mm_setzero_si128 (),
10451						  (__mmask8) __U);
10452}
10453
10454extern __inline __m256i
10455__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10456_mm256_mask_srli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
10457			const int __imm)
10458{
10459  return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
10460						  (__v4di) __W,
10461						  (__mmask8) __U);
10462}
10463
10464extern __inline __m256i
10465__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10466_mm256_maskz_srli_epi64 (__mmask8 __U, __m256i __A, const int __imm)
10467{
10468  return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
10469						  (__v4di)
10470						  _mm256_setzero_si256 (),
10471						  (__mmask8) __U);
10472}
10473
10474extern __inline __m128i
10475__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10476_mm_mask_srli_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
10477		     const int __imm)
10478{
10479  return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
10480						  (__v2di) __W,
10481						  (__mmask8) __U);
10482}
10483
10484extern __inline __m128i
10485__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10486_mm_maskz_srli_epi64 (__mmask8 __U, __m128i __A, const int __imm)
10487{
10488  return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
10489						  (__v2di)
10490						  _mm_setzero_si128 (),
10491						  (__mmask8) __U);
10492}
10493
10494extern __inline __m256i
10495__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10496_mm256_ternarylogic_epi64 (__m256i __A, __m256i __B, __m256i __C,
10497			   const int __imm)
10498{
10499  return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
10500						     (__v4di) __B,
10501						     (__v4di) __C, __imm,
10502						     (__mmask8) -1);
10503}
10504
10505extern __inline __m256i
10506__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10507_mm256_mask_ternarylogic_epi64 (__m256i __A, __mmask8 __U,
10508				__m256i __B, __m256i __C,
10509				const int __imm)
10510{
10511  return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
10512						     (__v4di) __B,
10513						     (__v4di) __C, __imm,
10514						     (__mmask8) __U);
10515}
10516
10517extern __inline __m256i
10518__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10519_mm256_maskz_ternarylogic_epi64 (__mmask8 __U, __m256i __A,
10520				 __m256i __B, __m256i __C,
10521				 const int __imm)
10522{
10523  return (__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di) __A,
10524						      (__v4di) __B,
10525						      (__v4di) __C,
10526						      __imm,
10527						      (__mmask8) __U);
10528}
10529
10530extern __inline __m256i
10531__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10532_mm256_ternarylogic_epi32 (__m256i __A, __m256i __B, __m256i __C,
10533			   const int __imm)
10534{
10535  return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
10536						     (__v8si) __B,
10537						     (__v8si) __C, __imm,
10538						     (__mmask8) -1);
10539}
10540
10541extern __inline __m256i
10542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10543_mm256_mask_ternarylogic_epi32 (__m256i __A, __mmask8 __U,
10544				__m256i __B, __m256i __C,
10545				const int __imm)
10546{
10547  return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
10548						     (__v8si) __B,
10549						     (__v8si) __C, __imm,
10550						     (__mmask8) __U);
10551}
10552
10553extern __inline __m256i
10554__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10555_mm256_maskz_ternarylogic_epi32 (__mmask8 __U, __m256i __A,
10556				 __m256i __B, __m256i __C,
10557				 const int __imm)
10558{
10559  return (__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si) __A,
10560						      (__v8si) __B,
10561						      (__v8si) __C,
10562						      __imm,
10563						      (__mmask8) __U);
10564}
10565
10566extern __inline __m128i
10567__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10568_mm_ternarylogic_epi64 (__m128i __A, __m128i __B, __m128i __C,
10569			const int __imm)
10570{
10571  return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
10572						     (__v2di) __B,
10573						     (__v2di) __C, __imm,
10574						     (__mmask8) -1);
10575}
10576
10577extern __inline __m128i
10578__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10579_mm_mask_ternarylogic_epi64 (__m128i __A, __mmask8 __U,
10580			     __m128i __B, __m128i __C, const int __imm)
10581{
10582  return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
10583						     (__v2di) __B,
10584						     (__v2di) __C, __imm,
10585						     (__mmask8) __U);
10586}
10587
10588extern __inline __m128i
10589__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10590_mm_maskz_ternarylogic_epi64 (__mmask8 __U, __m128i __A,
10591			      __m128i __B, __m128i __C, const int __imm)
10592{
10593  return (__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di) __A,
10594						      (__v2di) __B,
10595						      (__v2di) __C,
10596						      __imm,
10597						      (__mmask8) __U);
10598}
10599
10600extern __inline __m128i
10601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10602_mm_ternarylogic_epi32 (__m128i __A, __m128i __B, __m128i __C,
10603			const int __imm)
10604{
10605  return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
10606						     (__v4si) __B,
10607						     (__v4si) __C, __imm,
10608						     (__mmask8) -1);
10609}
10610
10611extern __inline __m128i
10612__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10613_mm_mask_ternarylogic_epi32 (__m128i __A, __mmask8 __U,
10614			     __m128i __B, __m128i __C, const int __imm)
10615{
10616  return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
10617						     (__v4si) __B,
10618						     (__v4si) __C, __imm,
10619						     (__mmask8) __U);
10620}
10621
10622extern __inline __m128i
10623__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10624_mm_maskz_ternarylogic_epi32 (__mmask8 __U, __m128i __A,
10625			      __m128i __B, __m128i __C, const int __imm)
10626{
10627  return (__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si) __A,
10628						      (__v4si) __B,
10629						      (__v4si) __C,
10630						      __imm,
10631						      (__mmask8) __U);
10632}
10633
10634extern __inline __m256
10635__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10636_mm256_roundscale_ps (__m256 __A, const int __imm)
10637{
10638  return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
10639						      __imm,
10640						      (__v8sf)
10641						      _mm256_setzero_ps (),
10642						      (__mmask8) -1);
10643}
10644
10645extern __inline __m256
10646__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10647_mm256_mask_roundscale_ps (__m256 __W, __mmask8 __U, __m256 __A,
10648			   const int __imm)
10649{
10650  return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
10651						      __imm,
10652						      (__v8sf) __W,
10653						      (__mmask8) __U);
10654}
10655
10656extern __inline __m256
10657__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10658_mm256_maskz_roundscale_ps (__mmask8 __U, __m256 __A, const int __imm)
10659{
10660  return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
10661						      __imm,
10662						      (__v8sf)
10663						      _mm256_setzero_ps (),
10664						      (__mmask8) __U);
10665}
10666
10667extern __inline __m256d
10668__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10669_mm256_roundscale_pd (__m256d __A, const int __imm)
10670{
10671  return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
10672						       __imm,
10673						       (__v4df)
10674						       _mm256_setzero_pd (),
10675						       (__mmask8) -1);
10676}
10677
10678extern __inline __m256d
10679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10680_mm256_mask_roundscale_pd (__m256d __W, __mmask8 __U, __m256d __A,
10681			   const int __imm)
10682{
10683  return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
10684						       __imm,
10685						       (__v4df) __W,
10686						       (__mmask8) __U);
10687}
10688
10689extern __inline __m256d
10690__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10691_mm256_maskz_roundscale_pd (__mmask8 __U, __m256d __A, const int __imm)
10692{
10693  return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
10694						       __imm,
10695						       (__v4df)
10696						       _mm256_setzero_pd (),
10697						       (__mmask8) __U);
10698}
10699
10700extern __inline __m128
10701__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10702_mm_roundscale_ps (__m128 __A, const int __imm)
10703{
10704  return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10705						      __imm,
10706						      (__v4sf)
10707						      _mm_setzero_ps (),
10708						      (__mmask8) -1);
10709}
10710
10711extern __inline __m128
10712__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10713_mm_mask_roundscale_ps (__m128 __W, __mmask8 __U, __m128 __A,
10714			const int __imm)
10715{
10716  return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10717						      __imm,
10718						      (__v4sf) __W,
10719						      (__mmask8) __U);
10720}
10721
10722extern __inline __m128
10723__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10724_mm_maskz_roundscale_ps (__mmask8 __U, __m128 __A, const int __imm)
10725{
10726  return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10727						      __imm,
10728						      (__v4sf)
10729						      _mm_setzero_ps (),
10730						      (__mmask8) __U);
10731}
10732
10733extern __inline __m128d
10734__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10735_mm_roundscale_pd (__m128d __A, const int __imm)
10736{
10737  return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10738						       __imm,
10739						       (__v2df)
10740						       _mm_setzero_pd (),
10741						       (__mmask8) -1);
10742}
10743
10744extern __inline __m128d
10745__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10746_mm_mask_roundscale_pd (__m128d __W, __mmask8 __U, __m128d __A,
10747			const int __imm)
10748{
10749  return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10750						       __imm,
10751						       (__v2df) __W,
10752						       (__mmask8) __U);
10753}
10754
10755extern __inline __m128d
10756__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10757_mm_maskz_roundscale_pd (__mmask8 __U, __m128d __A, const int __imm)
10758{
10759  return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10760						       __imm,
10761						       (__v2df)
10762						       _mm_setzero_pd (),
10763						       (__mmask8) __U);
10764}
10765
10766extern __inline __m256
10767__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10768_mm256_getmant_ps (__m256 __A, _MM_MANTISSA_NORM_ENUM __B,
10769		   _MM_MANTISSA_SIGN_ENUM __C)
10770{
10771  return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10772						    (__C << 2) | __B,
10773						    (__v8sf)
10774						    _mm256_setzero_ps (),
10775						    (__mmask8) -1);
10776}
10777
10778extern __inline __m256
10779__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10780_mm256_mask_getmant_ps (__m256 __W, __mmask8 __U, __m256 __A,
10781			_MM_MANTISSA_NORM_ENUM __B,
10782			_MM_MANTISSA_SIGN_ENUM __C)
10783{
10784  return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10785						    (__C << 2) | __B,
10786						    (__v8sf) __W,
10787						    (__mmask8) __U);
10788}
10789
10790extern __inline __m256
10791__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10792_mm256_maskz_getmant_ps (__mmask8 __U, __m256 __A,
10793			 _MM_MANTISSA_NORM_ENUM __B,
10794			 _MM_MANTISSA_SIGN_ENUM __C)
10795{
10796  return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10797						    (__C << 2) | __B,
10798						    (__v8sf)
10799						    _mm256_setzero_ps (),
10800						    (__mmask8) __U);
10801}
10802
10803extern __inline __m128
10804__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10805_mm_getmant_ps (__m128 __A, _MM_MANTISSA_NORM_ENUM __B,
10806		_MM_MANTISSA_SIGN_ENUM __C)
10807{
10808  return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10809						    (__C << 2) | __B,
10810						    (__v4sf)
10811						    _mm_setzero_ps (),
10812						    (__mmask8) -1);
10813}
10814
10815extern __inline __m128
10816__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10817_mm_mask_getmant_ps (__m128 __W, __mmask8 __U, __m128 __A,
10818		     _MM_MANTISSA_NORM_ENUM __B,
10819		     _MM_MANTISSA_SIGN_ENUM __C)
10820{
10821  return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10822						    (__C << 2) | __B,
10823						    (__v4sf) __W,
10824						    (__mmask8) __U);
10825}
10826
10827extern __inline __m128
10828__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10829_mm_maskz_getmant_ps (__mmask8 __U, __m128 __A,
10830		      _MM_MANTISSA_NORM_ENUM __B,
10831		      _MM_MANTISSA_SIGN_ENUM __C)
10832{
10833  return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10834						    (__C << 2) | __B,
10835						    (__v4sf)
10836						    _mm_setzero_ps (),
10837						    (__mmask8) __U);
10838}
10839
10840extern __inline __m256d
10841__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10842_mm256_getmant_pd (__m256d __A, _MM_MANTISSA_NORM_ENUM __B,
10843		   _MM_MANTISSA_SIGN_ENUM __C)
10844{
10845  return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10846						     (__C << 2) | __B,
10847						     (__v4df)
10848						     _mm256_setzero_pd (),
10849						     (__mmask8) -1);
10850}
10851
10852extern __inline __m256d
10853__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10854_mm256_mask_getmant_pd (__m256d __W, __mmask8 __U, __m256d __A,
10855			_MM_MANTISSA_NORM_ENUM __B,
10856			_MM_MANTISSA_SIGN_ENUM __C)
10857{
10858  return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10859						     (__C << 2) | __B,
10860						     (__v4df) __W,
10861						     (__mmask8) __U);
10862}
10863
10864extern __inline __m256d
10865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10866_mm256_maskz_getmant_pd (__mmask8 __U, __m256d __A,
10867			 _MM_MANTISSA_NORM_ENUM __B,
10868			 _MM_MANTISSA_SIGN_ENUM __C)
10869{
10870  return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10871						     (__C << 2) | __B,
10872						     (__v4df)
10873						     _mm256_setzero_pd (),
10874						     (__mmask8) __U);
10875}
10876
10877extern __inline __m128d
10878__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10879_mm_getmant_pd (__m128d __A, _MM_MANTISSA_NORM_ENUM __B,
10880		_MM_MANTISSA_SIGN_ENUM __C)
10881{
10882  return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10883						     (__C << 2) | __B,
10884						     (__v2df)
10885						     _mm_setzero_pd (),
10886						     (__mmask8) -1);
10887}
10888
10889extern __inline __m128d
10890__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10891_mm_mask_getmant_pd (__m128d __W, __mmask8 __U, __m128d __A,
10892		     _MM_MANTISSA_NORM_ENUM __B,
10893		     _MM_MANTISSA_SIGN_ENUM __C)
10894{
10895  return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10896						     (__C << 2) | __B,
10897						     (__v2df) __W,
10898						     (__mmask8) __U);
10899}
10900
10901extern __inline __m128d
10902__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10903_mm_maskz_getmant_pd (__mmask8 __U, __m128d __A,
10904		      _MM_MANTISSA_NORM_ENUM __B,
10905		      _MM_MANTISSA_SIGN_ENUM __C)
10906{
10907  return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10908						     (__C << 2) | __B,
10909						     (__v2df)
10910						     _mm_setzero_pd (),
10911						     (__mmask8) __U);
10912}
10913
10914extern __inline __m256
10915__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10916_mm256_mmask_i32gather_ps (__m256 __v1_old, __mmask8 __mask,
10917			   __m256i __index, void const *__addr,
10918			   int __scale)
10919{
10920  return (__m256) __builtin_ia32_gather3siv8sf ((__v8sf) __v1_old,
10921						__addr,
10922						(__v8si) __index,
10923						__mask, __scale);
10924}
10925
10926extern __inline __m128
10927__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10928_mm_mmask_i32gather_ps (__m128 __v1_old, __mmask8 __mask,
10929			__m128i __index, void const *__addr,
10930			int __scale)
10931{
10932  return (__m128) __builtin_ia32_gather3siv4sf ((__v4sf) __v1_old,
10933						__addr,
10934						(__v4si) __index,
10935						__mask, __scale);
10936}
10937
10938extern __inline __m256d
10939__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10940_mm256_mmask_i32gather_pd (__m256d __v1_old, __mmask8 __mask,
10941			   __m128i __index, void const *__addr,
10942			   int __scale)
10943{
10944  return (__m256d) __builtin_ia32_gather3siv4df ((__v4df) __v1_old,
10945						 __addr,
10946						 (__v4si) __index,
10947						 __mask, __scale);
10948}
10949
10950extern __inline __m128d
10951__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10952_mm_mmask_i32gather_pd (__m128d __v1_old, __mmask8 __mask,
10953			__m128i __index, void const *__addr,
10954			int __scale)
10955{
10956  return (__m128d) __builtin_ia32_gather3siv2df ((__v2df) __v1_old,
10957						 __addr,
10958						 (__v4si) __index,
10959						 __mask, __scale);
10960}
10961
10962extern __inline __m128
10963__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10964_mm256_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
10965			   __m256i __index, void const *__addr,
10966			   int __scale)
10967{
10968  return (__m128) __builtin_ia32_gather3div8sf ((__v4sf) __v1_old,
10969						__addr,
10970						(__v4di) __index,
10971						__mask, __scale);
10972}
10973
10974extern __inline __m128
10975__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10976_mm_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
10977			__m128i __index, void const *__addr,
10978			int __scale)
10979{
10980  return (__m128) __builtin_ia32_gather3div4sf ((__v4sf) __v1_old,
10981						__addr,
10982						(__v2di) __index,
10983						__mask, __scale);
10984}
10985
10986extern __inline __m256d
10987__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10988_mm256_mmask_i64gather_pd (__m256d __v1_old, __mmask8 __mask,
10989			   __m256i __index, void const *__addr,
10990			   int __scale)
10991{
10992  return (__m256d) __builtin_ia32_gather3div4df ((__v4df) __v1_old,
10993						 __addr,
10994						 (__v4di) __index,
10995						 __mask, __scale);
10996}
10997
10998extern __inline __m128d
10999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11000_mm_mmask_i64gather_pd (__m128d __v1_old, __mmask8 __mask,
11001			__m128i __index, void const *__addr,
11002			int __scale)
11003{
11004  return (__m128d) __builtin_ia32_gather3div2df ((__v2df) __v1_old,
11005						 __addr,
11006						 (__v2di) __index,
11007						 __mask, __scale);
11008}
11009
11010extern __inline __m256i
11011__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11012_mm256_mmask_i32gather_epi32 (__m256i __v1_old, __mmask8 __mask,
11013			      __m256i __index, void const *__addr,
11014			      int __scale)
11015{
11016  return (__m256i) __builtin_ia32_gather3siv8si ((__v8si) __v1_old,
11017						 __addr,
11018						 (__v8si) __index,
11019						 __mask, __scale);
11020}
11021
11022extern __inline __m128i
11023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11024_mm_mmask_i32gather_epi32 (__m128i __v1_old, __mmask8 __mask,
11025			   __m128i __index, void const *__addr,
11026			   int __scale)
11027{
11028  return (__m128i) __builtin_ia32_gather3siv4si ((__v4si) __v1_old,
11029						 __addr,
11030						 (__v4si) __index,
11031						 __mask, __scale);
11032}
11033
11034extern __inline __m256i
11035__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11036_mm256_mmask_i32gather_epi64 (__m256i __v1_old, __mmask8 __mask,
11037			      __m128i __index, void const *__addr,
11038			      int __scale)
11039{
11040  return (__m256i) __builtin_ia32_gather3siv4di ((__v4di) __v1_old,
11041						 __addr,
11042						 (__v4si) __index,
11043						 __mask, __scale);
11044}
11045
11046extern __inline __m128i
11047__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11048_mm_mmask_i32gather_epi64 (__m128i __v1_old, __mmask8 __mask,
11049			   __m128i __index, void const *__addr,
11050			   int __scale)
11051{
11052  return (__m128i) __builtin_ia32_gather3siv2di ((__v2di) __v1_old,
11053						 __addr,
11054						 (__v4si) __index,
11055						 __mask, __scale);
11056}
11057
11058extern __inline __m128i
11059__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11060_mm256_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
11061			      __m256i __index, void const *__addr,
11062			      int __scale)
11063{
11064  return (__m128i) __builtin_ia32_gather3div8si ((__v4si) __v1_old,
11065						 __addr,
11066						 (__v4di) __index,
11067						 __mask, __scale);
11068}
11069
11070extern __inline __m128i
11071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11072_mm_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
11073			   __m128i __index, void const *__addr,
11074			   int __scale)
11075{
11076  return (__m128i) __builtin_ia32_gather3div4si ((__v4si) __v1_old,
11077						 __addr,
11078						 (__v2di) __index,
11079						 __mask, __scale);
11080}
11081
11082extern __inline __m256i
11083__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11084_mm256_mmask_i64gather_epi64 (__m256i __v1_old, __mmask8 __mask,
11085			      __m256i __index, void const *__addr,
11086			      int __scale)
11087{
11088  return (__m256i) __builtin_ia32_gather3div4di ((__v4di) __v1_old,
11089						 __addr,
11090						 (__v4di) __index,
11091						 __mask, __scale);
11092}
11093
11094extern __inline __m128i
11095__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11096_mm_mmask_i64gather_epi64 (__m128i __v1_old, __mmask8 __mask,
11097			   __m128i __index, void const *__addr,
11098			   int __scale)
11099{
11100  return (__m128i) __builtin_ia32_gather3div2di ((__v2di) __v1_old,
11101						 __addr,
11102						 (__v2di) __index,
11103						 __mask, __scale);
11104}
11105
11106extern __inline void
11107__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11108_mm256_i32scatter_ps (void *__addr, __m256i __index,
11109		      __m256 __v1, const int __scale)
11110{
11111  __builtin_ia32_scattersiv8sf (__addr, (__mmask8) 0xFF,
11112				(__v8si) __index, (__v8sf) __v1,
11113				__scale);
11114}
11115
11116extern __inline void
11117__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11118_mm256_mask_i32scatter_ps (void *__addr, __mmask8 __mask,
11119			   __m256i __index, __m256 __v1,
11120			   const int __scale)
11121{
11122  __builtin_ia32_scattersiv8sf (__addr, __mask, (__v8si) __index,
11123				(__v8sf) __v1, __scale);
11124}
11125
11126extern __inline void
11127__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11128_mm_i32scatter_ps (void *__addr, __m128i __index, __m128 __v1,
11129		   const int __scale)
11130{
11131  __builtin_ia32_scattersiv4sf (__addr, (__mmask8) 0xFF,
11132				(__v4si) __index, (__v4sf) __v1,
11133				__scale);
11134}
11135
11136extern __inline void
11137__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11138_mm_mask_i32scatter_ps (void *__addr, __mmask8 __mask,
11139			__m128i __index, __m128 __v1,
11140			const int __scale)
11141{
11142  __builtin_ia32_scattersiv4sf (__addr, __mask, (__v4si) __index,
11143				(__v4sf) __v1, __scale);
11144}
11145
11146extern __inline void
11147__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11148_mm256_i32scatter_pd (void *__addr, __m128i __index,
11149		      __m256d __v1, const int __scale)
11150{
11151  __builtin_ia32_scattersiv4df (__addr, (__mmask8) 0xFF,
11152				(__v4si) __index, (__v4df) __v1,
11153				__scale);
11154}
11155
11156extern __inline void
11157__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11158_mm256_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
11159			   __m128i __index, __m256d __v1,
11160			   const int __scale)
11161{
11162  __builtin_ia32_scattersiv4df (__addr, __mask, (__v4si) __index,
11163				(__v4df) __v1, __scale);
11164}
11165
11166extern __inline void
11167__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11168_mm_i32scatter_pd (void *__addr, __m128i __index,
11169		   __m128d __v1, const int __scale)
11170{
11171  __builtin_ia32_scattersiv2df (__addr, (__mmask8) 0xFF,
11172				(__v4si) __index, (__v2df) __v1,
11173				__scale);
11174}
11175
11176extern __inline void
11177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11178_mm_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
11179			__m128i __index, __m128d __v1,
11180			const int __scale)
11181{
11182  __builtin_ia32_scattersiv2df (__addr, __mask, (__v4si) __index,
11183				(__v2df) __v1, __scale);
11184}
11185
11186extern __inline void
11187__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11188_mm256_i64scatter_ps (void *__addr, __m256i __index,
11189		      __m128 __v1, const int __scale)
11190{
11191  __builtin_ia32_scatterdiv8sf (__addr, (__mmask8) 0xFF,
11192				(__v4di) __index, (__v4sf) __v1,
11193				__scale);
11194}
11195
11196extern __inline void
11197__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11198_mm256_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
11199			   __m256i __index, __m128 __v1,
11200			   const int __scale)
11201{
11202  __builtin_ia32_scatterdiv8sf (__addr, __mask, (__v4di) __index,
11203				(__v4sf) __v1, __scale);
11204}
11205
11206extern __inline void
11207__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11208_mm_i64scatter_ps (void *__addr, __m128i __index, __m128 __v1,
11209		   const int __scale)
11210{
11211  __builtin_ia32_scatterdiv4sf (__addr, (__mmask8) 0xFF,
11212				(__v2di) __index, (__v4sf) __v1,
11213				__scale);
11214}
11215
11216extern __inline void
11217__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11218_mm_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
11219			__m128i __index, __m128 __v1,
11220			const int __scale)
11221{
11222  __builtin_ia32_scatterdiv4sf (__addr, __mask, (__v2di) __index,
11223				(__v4sf) __v1, __scale);
11224}
11225
11226extern __inline void
11227__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11228_mm256_i64scatter_pd (void *__addr, __m256i __index,
11229		      __m256d __v1, const int __scale)
11230{
11231  __builtin_ia32_scatterdiv4df (__addr, (__mmask8) 0xFF,
11232				(__v4di) __index, (__v4df) __v1,
11233				__scale);
11234}
11235
11236extern __inline void
11237__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11238_mm256_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
11239			   __m256i __index, __m256d __v1,
11240			   const int __scale)
11241{
11242  __builtin_ia32_scatterdiv4df (__addr, __mask, (__v4di) __index,
11243				(__v4df) __v1, __scale);
11244}
11245
11246extern __inline void
11247__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11248_mm_i64scatter_pd (void *__addr, __m128i __index,
11249		   __m128d __v1, const int __scale)
11250{
11251  __builtin_ia32_scatterdiv2df (__addr, (__mmask8) 0xFF,
11252				(__v2di) __index, (__v2df) __v1,
11253				__scale);
11254}
11255
11256extern __inline void
11257__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11258_mm_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
11259			__m128i __index, __m128d __v1,
11260			const int __scale)
11261{
11262  __builtin_ia32_scatterdiv2df (__addr, __mask, (__v2di) __index,
11263				(__v2df) __v1, __scale);
11264}
11265
11266extern __inline void
11267__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11268_mm256_i32scatter_epi32 (void *__addr, __m256i __index,
11269			 __m256i __v1, const int __scale)
11270{
11271  __builtin_ia32_scattersiv8si (__addr, (__mmask8) 0xFF,
11272				(__v8si) __index, (__v8si) __v1,
11273				__scale);
11274}
11275
11276extern __inline void
11277__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11278_mm256_mask_i32scatter_epi32 (void *__addr, __mmask8 __mask,
11279			      __m256i __index, __m256i __v1,
11280			      const int __scale)
11281{
11282  __builtin_ia32_scattersiv8si (__addr, __mask, (__v8si) __index,
11283				(__v8si) __v1, __scale);
11284}
11285
11286extern __inline void
11287__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11288_mm_i32scatter_epi32 (void *__addr, __m128i __index,
11289		      __m128i __v1, const int __scale)
11290{
11291  __builtin_ia32_scattersiv4si (__addr, (__mmask8) 0xFF,
11292				(__v4si) __index, (__v4si) __v1,
11293				__scale);
11294}
11295
11296extern __inline void
11297__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11298_mm_mask_i32scatter_epi32 (void *__addr, __mmask8 __mask,
11299			   __m128i __index, __m128i __v1,
11300			   const int __scale)
11301{
11302  __builtin_ia32_scattersiv4si (__addr, __mask, (__v4si) __index,
11303				(__v4si) __v1, __scale);
11304}
11305
11306extern __inline void
11307__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11308_mm256_i32scatter_epi64 (void *__addr, __m128i __index,
11309			 __m256i __v1, const int __scale)
11310{
11311  __builtin_ia32_scattersiv4di (__addr, (__mmask8) 0xFF,
11312				(__v4si) __index, (__v4di) __v1,
11313				__scale);
11314}
11315
11316extern __inline void
11317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11318_mm256_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
11319			      __m128i __index, __m256i __v1,
11320			      const int __scale)
11321{
11322  __builtin_ia32_scattersiv4di (__addr, __mask, (__v4si) __index,
11323				(__v4di) __v1, __scale);
11324}
11325
11326extern __inline void
11327__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11328_mm_i32scatter_epi64 (void *__addr, __m128i __index,
11329		      __m128i __v1, const int __scale)
11330{
11331  __builtin_ia32_scattersiv2di (__addr, (__mmask8) 0xFF,
11332				(__v4si) __index, (__v2di) __v1,
11333				__scale);
11334}
11335
11336extern __inline void
11337__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11338_mm_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
11339			   __m128i __index, __m128i __v1,
11340			   const int __scale)
11341{
11342  __builtin_ia32_scattersiv2di (__addr, __mask, (__v4si) __index,
11343				(__v2di) __v1, __scale);
11344}
11345
11346extern __inline void
11347__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11348_mm256_i64scatter_epi32 (void *__addr, __m256i __index,
11349			 __m128i __v1, const int __scale)
11350{
11351  __builtin_ia32_scatterdiv8si (__addr, (__mmask8) 0xFF,
11352				(__v4di) __index, (__v4si) __v1,
11353				__scale);
11354}
11355
11356extern __inline void
11357__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11358_mm256_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
11359			      __m256i __index, __m128i __v1,
11360			      const int __scale)
11361{
11362  __builtin_ia32_scatterdiv8si (__addr, __mask, (__v4di) __index,
11363				(__v4si) __v1, __scale);
11364}
11365
11366extern __inline void
11367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11368_mm_i64scatter_epi32 (void *__addr, __m128i __index,
11369		      __m128i __v1, const int __scale)
11370{
11371  __builtin_ia32_scatterdiv4si (__addr, (__mmask8) 0xFF,
11372				(__v2di) __index, (__v4si) __v1,
11373				__scale);
11374}
11375
11376extern __inline void
11377__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11378_mm_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
11379			   __m128i __index, __m128i __v1,
11380			   const int __scale)
11381{
11382  __builtin_ia32_scatterdiv4si (__addr, __mask, (__v2di) __index,
11383				(__v4si) __v1, __scale);
11384}
11385
11386extern __inline void
11387__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11388_mm256_i64scatter_epi64 (void *__addr, __m256i __index,
11389			 __m256i __v1, const int __scale)
11390{
11391  __builtin_ia32_scatterdiv4di (__addr, (__mmask8) 0xFF,
11392				(__v4di) __index, (__v4di) __v1,
11393				__scale);
11394}
11395
11396extern __inline void
11397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11398_mm256_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
11399			      __m256i __index, __m256i __v1,
11400			      const int __scale)
11401{
11402  __builtin_ia32_scatterdiv4di (__addr, __mask, (__v4di) __index,
11403				(__v4di) __v1, __scale);
11404}
11405
11406extern __inline void
11407__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11408_mm_i64scatter_epi64 (void *__addr, __m128i __index,
11409		      __m128i __v1, const int __scale)
11410{
11411  __builtin_ia32_scatterdiv2di (__addr, (__mmask8) 0xFF,
11412				(__v2di) __index, (__v2di) __v1,
11413				__scale);
11414}
11415
11416extern __inline void
11417__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11418_mm_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
11419			   __m128i __index, __m128i __v1,
11420			   const int __scale)
11421{
11422  __builtin_ia32_scatterdiv2di (__addr, __mask, (__v2di) __index,
11423				(__v2di) __v1, __scale);
11424}
11425
11426extern __inline __m256i
11427__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11428_mm256_mask_shuffle_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11429			   _MM_PERM_ENUM __mask)
11430{
11431  return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
11432						  (__v8si) __W,
11433						  (__mmask8) __U);
11434}
11435
11436extern __inline __m256i
11437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11438_mm256_maskz_shuffle_epi32 (__mmask8 __U, __m256i __A,
11439			    _MM_PERM_ENUM __mask)
11440{
11441  return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
11442						  (__v8si)
11443						  _mm256_setzero_si256 (),
11444						  (__mmask8) __U);
11445}
11446
11447extern __inline __m128i
11448__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11449_mm_mask_shuffle_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11450			_MM_PERM_ENUM __mask)
11451{
11452  return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
11453						  (__v4si) __W,
11454						  (__mmask8) __U);
11455}
11456
11457extern __inline __m128i
11458__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11459_mm_maskz_shuffle_epi32 (__mmask8 __U, __m128i __A,
11460			 _MM_PERM_ENUM __mask)
11461{
11462  return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
11463						  (__v4si)
11464						  _mm_setzero_si128 (),
11465						  (__mmask8) __U);
11466}
11467
11468extern __inline __m256i
11469__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11470_mm256_rol_epi32 (__m256i __A, const int __B)
11471{
11472  return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
11473						 (__v8si)
11474						 _mm256_setzero_si256 (),
11475						 (__mmask8) -1);
11476}
11477
11478extern __inline __m256i
11479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11480_mm256_mask_rol_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11481		       const int __B)
11482{
11483  return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
11484						 (__v8si) __W,
11485						 (__mmask8) __U);
11486}
11487
11488extern __inline __m256i
11489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11490_mm256_maskz_rol_epi32 (__mmask8 __U, __m256i __A, const int __B)
11491{
11492  return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
11493						 (__v8si)
11494						 _mm256_setzero_si256 (),
11495						 (__mmask8) __U);
11496}
11497
11498extern __inline __m128i
11499__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11500_mm_rol_epi32 (__m128i __A, const int __B)
11501{
11502  return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
11503						 (__v4si)
11504						 _mm_setzero_si128 (),
11505						 (__mmask8) -1);
11506}
11507
11508extern __inline __m128i
11509__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11510_mm_mask_rol_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11511		    const int __B)
11512{
11513  return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
11514						 (__v4si) __W,
11515						 (__mmask8) __U);
11516}
11517
11518extern __inline __m128i
11519__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11520_mm_maskz_rol_epi32 (__mmask8 __U, __m128i __A, const int __B)
11521{
11522  return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
11523						 (__v4si)
11524						 _mm_setzero_si128 (),
11525						 (__mmask8) __U);
11526}
11527
11528extern __inline __m256i
11529__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11530_mm256_ror_epi32 (__m256i __A, const int __B)
11531{
11532  return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
11533						 (__v8si)
11534						 _mm256_setzero_si256 (),
11535						 (__mmask8) -1);
11536}
11537
11538extern __inline __m256i
11539__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11540_mm256_mask_ror_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11541		       const int __B)
11542{
11543  return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
11544						 (__v8si) __W,
11545						 (__mmask8) __U);
11546}
11547
11548extern __inline __m256i
11549__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11550_mm256_maskz_ror_epi32 (__mmask8 __U, __m256i __A, const int __B)
11551{
11552  return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
11553						 (__v8si)
11554						 _mm256_setzero_si256 (),
11555						 (__mmask8) __U);
11556}
11557
11558extern __inline __m128i
11559__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11560_mm_ror_epi32 (__m128i __A, const int __B)
11561{
11562  return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
11563						 (__v4si)
11564						 _mm_setzero_si128 (),
11565						 (__mmask8) -1);
11566}
11567
11568extern __inline __m128i
11569__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11570_mm_mask_ror_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11571		    const int __B)
11572{
11573  return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
11574						 (__v4si) __W,
11575						 (__mmask8) __U);
11576}
11577
11578extern __inline __m128i
11579__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11580_mm_maskz_ror_epi32 (__mmask8 __U, __m128i __A, const int __B)
11581{
11582  return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
11583						 (__v4si)
11584						 _mm_setzero_si128 (),
11585						 (__mmask8) __U);
11586}
11587
11588extern __inline __m256i
11589__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11590_mm256_rol_epi64 (__m256i __A, const int __B)
11591{
11592  return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
11593						 (__v4di)
11594						 _mm256_setzero_si256 (),
11595						 (__mmask8) -1);
11596}
11597
11598extern __inline __m256i
11599__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11600_mm256_mask_rol_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11601		       const int __B)
11602{
11603  return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
11604						 (__v4di) __W,
11605						 (__mmask8) __U);
11606}
11607
11608extern __inline __m256i
11609__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11610_mm256_maskz_rol_epi64 (__mmask8 __U, __m256i __A, const int __B)
11611{
11612  return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
11613						 (__v4di)
11614						 _mm256_setzero_si256 (),
11615						 (__mmask8) __U);
11616}
11617
11618extern __inline __m128i
11619__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11620_mm_rol_epi64 (__m128i __A, const int __B)
11621{
11622  return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
11623						 (__v2di)
11624						 _mm_setzero_si128 (),
11625						 (__mmask8) -1);
11626}
11627
11628extern __inline __m128i
11629__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11630_mm_mask_rol_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11631		    const int __B)
11632{
11633  return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
11634						 (__v2di) __W,
11635						 (__mmask8) __U);
11636}
11637
11638extern __inline __m128i
11639__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11640_mm_maskz_rol_epi64 (__mmask8 __U, __m128i __A, const int __B)
11641{
11642  return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
11643						 (__v2di)
11644						 _mm_setzero_si128 (),
11645						 (__mmask8) __U);
11646}
11647
11648extern __inline __m256i
11649__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11650_mm256_ror_epi64 (__m256i __A, const int __B)
11651{
11652  return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
11653						 (__v4di)
11654						 _mm256_setzero_si256 (),
11655						 (__mmask8) -1);
11656}
11657
11658extern __inline __m256i
11659__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11660_mm256_mask_ror_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11661		       const int __B)
11662{
11663  return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
11664						 (__v4di) __W,
11665						 (__mmask8) __U);
11666}
11667
11668extern __inline __m256i
11669__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11670_mm256_maskz_ror_epi64 (__mmask8 __U, __m256i __A, const int __B)
11671{
11672  return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
11673						 (__v4di)
11674						 _mm256_setzero_si256 (),
11675						 (__mmask8) __U);
11676}
11677
11678extern __inline __m128i
11679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11680_mm_ror_epi64 (__m128i __A, const int __B)
11681{
11682  return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
11683						 (__v2di)
11684						 _mm_setzero_si128 (),
11685						 (__mmask8) -1);
11686}
11687
11688extern __inline __m128i
11689__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11690_mm_mask_ror_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11691		    const int __B)
11692{
11693  return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
11694						 (__v2di) __W,
11695						 (__mmask8) __U);
11696}
11697
11698extern __inline __m128i
11699__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11700_mm_maskz_ror_epi64 (__mmask8 __U, __m128i __A, const int __B)
11701{
11702  return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
11703						 (__v2di)
11704						 _mm_setzero_si128 (),
11705						 (__mmask8) __U);
11706}
11707
11708extern __inline __m128i
11709__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11710_mm_alignr_epi32 (__m128i __A, __m128i __B, const int __imm)
11711{
11712  return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11713						  (__v4si) __B, __imm,
11714						  (__v4si)
11715						  _mm_setzero_si128 (),
11716						  (__mmask8) -1);
11717}
11718
11719extern __inline __m128i
11720__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11721_mm_mask_alignr_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11722		       __m128i __B, const int __imm)
11723{
11724  return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11725						  (__v4si) __B, __imm,
11726						  (__v4si) __W,
11727						  (__mmask8) __U);
11728}
11729
11730extern __inline __m128i
11731__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11732_mm_maskz_alignr_epi32 (__mmask8 __U, __m128i __A, __m128i __B,
11733			const int __imm)
11734{
11735  return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11736						  (__v4si) __B, __imm,
11737						  (__v4si)
11738						  _mm_setzero_si128 (),
11739						  (__mmask8) __U);
11740}
11741
11742extern __inline __m128i
11743__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11744_mm_alignr_epi64 (__m128i __A, __m128i __B, const int __imm)
11745{
11746  return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11747						  (__v2di) __B, __imm,
11748						  (__v2di)
11749						  _mm_setzero_si128 (),
11750						  (__mmask8) -1);
11751}
11752
11753extern __inline __m128i
11754__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11755_mm_mask_alignr_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11756		       __m128i __B, const int __imm)
11757{
11758  return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11759						  (__v2di) __B, __imm,
11760						  (__v2di) __W,
11761						  (__mmask8) __U);
11762}
11763
11764extern __inline __m128i
11765__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11766_mm_maskz_alignr_epi64 (__mmask8 __U, __m128i __A, __m128i __B,
11767			const int __imm)
11768{
11769  return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11770						  (__v2di) __B, __imm,
11771						  (__v2di)
11772						  _mm_setzero_si128 (),
11773						  (__mmask8) __U);
11774}
11775
11776extern __inline __m256i
11777__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11778_mm256_alignr_epi32 (__m256i __A, __m256i __B, const int __imm)
11779{
11780  return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11781						  (__v8si) __B, __imm,
11782						  (__v8si)
11783						  _mm256_setzero_si256 (),
11784						  (__mmask8) -1);
11785}
11786
11787extern __inline __m256i
11788__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11789_mm256_mask_alignr_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11790			  __m256i __B, const int __imm)
11791{
11792  return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11793						  (__v8si) __B, __imm,
11794						  (__v8si) __W,
11795						  (__mmask8) __U);
11796}
11797
11798extern __inline __m256i
11799__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11800_mm256_maskz_alignr_epi32 (__mmask8 __U, __m256i __A, __m256i __B,
11801			   const int __imm)
11802{
11803  return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11804						  (__v8si) __B, __imm,
11805						  (__v8si)
11806						  _mm256_setzero_si256 (),
11807						  (__mmask8) __U);
11808}
11809
11810extern __inline __m256i
11811__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11812_mm256_alignr_epi64 (__m256i __A, __m256i __B, const int __imm)
11813{
11814  return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11815						  (__v4di) __B, __imm,
11816						  (__v4di)
11817						  _mm256_setzero_si256 (),
11818						  (__mmask8) -1);
11819}
11820
11821extern __inline __m256i
11822__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11823_mm256_mask_alignr_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11824			  __m256i __B, const int __imm)
11825{
11826  return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11827						  (__v4di) __B, __imm,
11828						  (__v4di) __W,
11829						  (__mmask8) __U);
11830}
11831
11832extern __inline __m256i
11833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11834_mm256_maskz_alignr_epi64 (__mmask8 __U, __m256i __A, __m256i __B,
11835			   const int __imm)
11836{
11837  return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11838						  (__v4di) __B, __imm,
11839						  (__v4di)
11840						  _mm256_setzero_si256 (),
11841						  (__mmask8) __U);
11842}
11843
11844extern __inline __m128i
11845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11846_mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A,
11847		   const int __I)
11848{
11849  return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
11850						  (__v8hi) __W,
11851						  (__mmask8) __U);
11852}
11853
11854extern __inline __m128i
11855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11856_mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A, const int __I)
11857{
11858  return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
11859						  (__v8hi)
11860						  _mm_setzero_si128 (),
11861						  (__mmask8) __U);
11862}
11863
11864extern __inline __m128i
11865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11866_mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A,
11867		      const int __I)
11868{
11869  return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
11870						     (__v8hi) __W,
11871						     (__mmask8) __U);
11872}
11873
11874extern __inline __m128i
11875__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11876_mm256_maskz_cvtps_ph (__mmask8 __U, __m256 __A, const int __I)
11877{
11878  return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
11879						     (__v8hi)
11880						     _mm_setzero_si128 (),
11881						     (__mmask8) __U);
11882}
11883
11884extern __inline __m256i
11885__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11886_mm256_mask_srai_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11887			const int __imm)
11888{
11889  return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
11890						  (__v8si) __W,
11891						  (__mmask8) __U);
11892}
11893
11894extern __inline __m256i
11895__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11896_mm256_maskz_srai_epi32 (__mmask8 __U, __m256i __A, const int __imm)
11897{
11898  return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
11899						  (__v8si)
11900						  _mm256_setzero_si256 (),
11901						  (__mmask8) __U);
11902}
11903
11904extern __inline __m128i
11905__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11906_mm_mask_srai_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11907		     const int __imm)
11908{
11909  return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
11910						  (__v4si) __W,
11911						  (__mmask8) __U);
11912}
11913
11914extern __inline __m128i
11915__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11916_mm_maskz_srai_epi32 (__mmask8 __U, __m128i __A, const int __imm)
11917{
11918  return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
11919						  (__v4si)
11920						  _mm_setzero_si128 (),
11921						  (__mmask8) __U);
11922}
11923
11924extern __inline __m256i
11925__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11926_mm256_srai_epi64 (__m256i __A, const int __imm)
11927{
11928  return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11929						  (__v4di)
11930						  _mm256_setzero_si256 (),
11931						  (__mmask8) -1);
11932}
11933
11934extern __inline __m256i
11935__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11936_mm256_mask_srai_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11937			const int __imm)
11938{
11939  return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11940						  (__v4di) __W,
11941						  (__mmask8) __U);
11942}
11943
11944extern __inline __m256i
11945__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11946_mm256_maskz_srai_epi64 (__mmask8 __U, __m256i __A, const int __imm)
11947{
11948  return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11949						  (__v4di)
11950						  _mm256_setzero_si256 (),
11951						  (__mmask8) __U);
11952}
11953
11954extern __inline __m128i
11955__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11956_mm_srai_epi64 (__m128i __A, const int __imm)
11957{
11958  return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11959						  (__v2di)
11960						  _mm_setzero_si128 (),
11961						  (__mmask8) -1);
11962}
11963
11964extern __inline __m128i
11965__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11966_mm_mask_srai_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11967		     const int __imm)
11968{
11969  return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11970						  (__v2di) __W,
11971						  (__mmask8) __U);
11972}
11973
11974extern __inline __m128i
11975__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11976_mm_maskz_srai_epi64 (__mmask8 __U, __m128i __A, const int __imm)
11977{
11978  return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11979						  (__v2di)
11980						  _mm_setzero_si128 (),
11981						  (__mmask8) __U);
11982}
11983
11984extern __inline __m128i
11985__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11986_mm_mask_slli_epi32 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
11987{
11988  return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
11989						  (__v4si) __W,
11990						  (__mmask8) __U);
11991}
11992
11993extern __inline __m128i
11994__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11995_mm_maskz_slli_epi32 (__mmask8 __U, __m128i __A, int __B)
11996{
11997  return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
11998						  (__v4si)
11999						  _mm_setzero_si128 (),
12000						  (__mmask8) __U);
12001}
12002
12003extern __inline __m128i
12004__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12005_mm_mask_slli_epi64 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
12006{
12007  return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
12008						  (__v2di) __W,
12009						  (__mmask8) __U);
12010}
12011
12012extern __inline __m128i
12013__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12014_mm_maskz_slli_epi64 (__mmask8 __U, __m128i __A, int __B)
12015{
12016  return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
12017						  (__v2di)
12018						  _mm_setzero_si128 (),
12019						  (__mmask8) __U);
12020}
12021
12022extern __inline __m256i
12023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12024_mm256_mask_slli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
12025			int __B)
12026{
12027  return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
12028						  (__v8si) __W,
12029						  (__mmask8) __U);
12030}
12031
12032extern __inline __m256i
12033__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12034_mm256_maskz_slli_epi32 (__mmask8 __U, __m256i __A, int __B)
12035{
12036  return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
12037						  (__v8si)
12038						  _mm256_setzero_si256 (),
12039						  (__mmask8) __U);
12040}
12041
12042extern __inline __m256i
12043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12044_mm256_mask_slli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
12045			int __B)
12046{
12047  return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
12048						  (__v4di) __W,
12049						  (__mmask8) __U);
12050}
12051
12052extern __inline __m256i
12053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12054_mm256_maskz_slli_epi64 (__mmask8 __U, __m256i __A, int __B)
12055{
12056  return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
12057						  (__v4di)
12058						  _mm256_setzero_si256 (),
12059						  (__mmask8) __U);
12060}
12061
12062extern __inline __m256d
12063__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12064_mm256_mask_permutex_pd (__m256d __W, __mmask8 __U, __m256d __X,
12065			 const int __imm)
12066{
12067  return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
12068						  (__v4df) __W,
12069						  (__mmask8) __U);
12070}
12071
12072extern __inline __m256d
12073__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12074_mm256_maskz_permutex_pd (__mmask8 __U, __m256d __X, const int __imm)
12075{
12076  return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
12077						  (__v4df)
12078						  _mm256_setzero_pd (),
12079						  (__mmask8) __U);
12080}
12081
12082extern __inline __m256d
12083__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12084_mm256_mask_permute_pd (__m256d __W, __mmask8 __U, __m256d __X,
12085			const int __C)
12086{
12087  return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
12088						     (__v4df) __W,
12089						     (__mmask8) __U);
12090}
12091
12092extern __inline __m256d
12093__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12094_mm256_maskz_permute_pd (__mmask8 __U, __m256d __X, const int __C)
12095{
12096  return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
12097						     (__v4df)
12098						     _mm256_setzero_pd (),
12099						     (__mmask8) __U);
12100}
12101
12102extern __inline __m128d
12103__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12104_mm_mask_permute_pd (__m128d __W, __mmask8 __U, __m128d __X,
12105		     const int __C)
12106{
12107  return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
12108						  (__v2df) __W,
12109						  (__mmask8) __U);
12110}
12111
12112extern __inline __m128d
12113__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12114_mm_maskz_permute_pd (__mmask8 __U, __m128d __X, const int __C)
12115{
12116  return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
12117						  (__v2df)
12118						  _mm_setzero_pd (),
12119						  (__mmask8) __U);
12120}
12121
12122extern __inline __m256
12123__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12124_mm256_mask_permute_ps (__m256 __W, __mmask8 __U, __m256 __X,
12125			const int __C)
12126{
12127  return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
12128						    (__v8sf) __W,
12129						    (__mmask8) __U);
12130}
12131
12132extern __inline __m256
12133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12134_mm256_maskz_permute_ps (__mmask8 __U, __m256 __X, const int __C)
12135{
12136  return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
12137						    (__v8sf)
12138						    _mm256_setzero_ps (),
12139						    (__mmask8) __U);
12140}
12141
12142extern __inline __m128
12143__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12144_mm_mask_permute_ps (__m128 __W, __mmask8 __U, __m128 __X,
12145		     const int __C)
12146{
12147  return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
12148						 (__v4sf) __W,
12149						 (__mmask8) __U);
12150}
12151
12152extern __inline __m128
12153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12154_mm_maskz_permute_ps (__mmask8 __U, __m128 __X, const int __C)
12155{
12156  return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
12157						 (__v4sf)
12158						 _mm_setzero_ps (),
12159						 (__mmask8) __U);
12160}
12161
12162extern __inline __m256d
12163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12164_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W)
12165{
12166  return (__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) __A,
12167						     (__v4df) __W,
12168						     (__mmask8) __U);
12169}
12170
12171extern __inline __m256
12172__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12173_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W)
12174{
12175  return (__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) __A,
12176						    (__v8sf) __W,
12177						    (__mmask8) __U);
12178}
12179
12180extern __inline __m256i
12181__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12182_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W)
12183{
12184  return (__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) __A,
12185						    (__v4di) __W,
12186						    (__mmask8) __U);
12187}
12188
12189extern __inline __m256i
12190__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12191_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W)
12192{
12193  return (__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) __A,
12194						    (__v8si) __W,
12195						    (__mmask8) __U);
12196}
12197
12198extern __inline __m128d
12199__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12200_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W)
12201{
12202  return (__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) __A,
12203						     (__v2df) __W,
12204						     (__mmask8) __U);
12205}
12206
12207extern __inline __m128
12208__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12209_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W)
12210{
12211  return (__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) __A,
12212						    (__v4sf) __W,
12213						    (__mmask8) __U);
12214}
12215
12216extern __inline __m128i
12217__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12218_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W)
12219{
12220  return (__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) __A,
12221						    (__v2di) __W,
12222						    (__mmask8) __U);
12223}
12224
12225extern __inline __m128i
12226__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12227_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W)
12228{
12229  return (__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) __A,
12230						    (__v4si) __W,
12231						    (__mmask8) __U);
12232}
12233
12234extern __inline __mmask8
12235__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12236_mm256_cmp_epi64_mask (__m256i __X, __m256i __Y, const int __P)
12237{
12238  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12239						 (__v4di) __Y, __P,
12240						 (__mmask8) -1);
12241}
12242
12243extern __inline __mmask8
12244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12245_mm256_cmp_epi32_mask (__m256i __X, __m256i __Y, const int __P)
12246{
12247  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
12248						 (__v8si) __Y, __P,
12249						 (__mmask8) -1);
12250}
12251
12252extern __inline __mmask8
12253__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12254_mm256_cmp_epu64_mask (__m256i __X, __m256i __Y, const int __P)
12255{
12256  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
12257						  (__v4di) __Y, __P,
12258						  (__mmask8) -1);
12259}
12260
12261extern __inline __mmask8
12262__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12263_mm256_cmp_epu32_mask (__m256i __X, __m256i __Y, const int __P)
12264{
12265  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
12266						  (__v8si) __Y, __P,
12267						  (__mmask8) -1);
12268}
12269
12270extern __inline __mmask8
12271__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12272_mm256_cmp_pd_mask (__m256d __X, __m256d __Y, const int __P)
12273{
12274  return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
12275						  (__v4df) __Y, __P,
12276						  (__mmask8) -1);
12277}
12278
12279extern __inline __mmask8
12280__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12281_mm256_cmp_ps_mask (__m256 __X, __m256 __Y, const int __P)
12282{
12283  return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
12284						  (__v8sf) __Y, __P,
12285						  (__mmask8) -1);
12286}
12287
12288extern __inline __mmask8
12289__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12290_mm256_mask_cmp_epi64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
12291			    const int __P)
12292{
12293  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12294						 (__v4di) __Y, __P,
12295						 (__mmask8) __U);
12296}
12297
12298extern __inline __mmask8
12299__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12300_mm256_mask_cmp_epi32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
12301			    const int __P)
12302{
12303  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
12304						 (__v8si) __Y, __P,
12305						 (__mmask8) __U);
12306}
12307
12308extern __inline __mmask8
12309__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12310_mm256_mask_cmp_epu64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
12311			    const int __P)
12312{
12313  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
12314						  (__v4di) __Y, __P,
12315						  (__mmask8) __U);
12316}
12317
12318extern __inline __mmask8
12319__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12320_mm256_mask_cmp_epu32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
12321			    const int __P)
12322{
12323  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
12324						  (__v8si) __Y, __P,
12325						  (__mmask8) __U);
12326}
12327
12328extern __inline __mmask8
12329__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12330_mm256_mask_cmp_pd_mask (__mmask8 __U, __m256d __X, __m256d __Y,
12331			 const int __P)
12332{
12333  return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
12334						  (__v4df) __Y, __P,
12335						  (__mmask8) __U);
12336}
12337
12338extern __inline __mmask8
12339__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12340_mm256_mask_cmp_ps_mask (__mmask8 __U, __m256 __X, __m256 __Y,
12341			 const int __P)
12342{
12343  return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
12344						  (__v8sf) __Y, __P,
12345						  (__mmask8) __U);
12346}
12347
12348extern __inline __mmask8
12349__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12350_mm_cmp_epi64_mask (__m128i __X, __m128i __Y, const int __P)
12351{
12352  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12353						 (__v2di) __Y, __P,
12354						 (__mmask8) -1);
12355}
12356
12357extern __inline __mmask8
12358__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12359_mm_cmp_epi32_mask (__m128i __X, __m128i __Y, const int __P)
12360{
12361  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12362						 (__v4si) __Y, __P,
12363						 (__mmask8) -1);
12364}
12365
12366extern __inline __mmask8
12367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12368_mm_cmp_epu64_mask (__m128i __X, __m128i __Y, const int __P)
12369{
12370  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12371						  (__v2di) __Y, __P,
12372						  (__mmask8) -1);
12373}
12374
12375extern __inline __mmask8
12376__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12377_mm_cmp_epu32_mask (__m128i __X, __m128i __Y, const int __P)
12378{
12379  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12380						  (__v4si) __Y, __P,
12381						  (__mmask8) -1);
12382}
12383
12384extern __inline __mmask8
12385__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12386_mm_cmp_pd_mask (__m128d __X, __m128d __Y, const int __P)
12387{
12388  return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
12389						  (__v2df) __Y, __P,
12390						  (__mmask8) -1);
12391}
12392
12393extern __inline __mmask8
12394__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12395_mm_cmp_ps_mask (__m128 __X, __m128 __Y, const int __P)
12396{
12397  return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
12398						  (__v4sf) __Y, __P,
12399						  (__mmask8) -1);
12400}
12401
12402extern __inline __mmask8
12403__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12404_mm_mask_cmp_epi64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
12405			 const int __P)
12406{
12407  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12408						 (__v2di) __Y, __P,
12409						 (__mmask8) __U);
12410}
12411
12412extern __inline __mmask8
12413__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12414_mm_mask_cmp_epi32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
12415			 const int __P)
12416{
12417  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12418						 (__v4si) __Y, __P,
12419						 (__mmask8) __U);
12420}
12421
12422extern __inline __mmask8
12423__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12424_mm_mask_cmp_epu64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
12425			 const int __P)
12426{
12427  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12428						  (__v2di) __Y, __P,
12429						  (__mmask8) __U);
12430}
12431
12432extern __inline __mmask8
12433__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12434_mm_mask_cmp_epu32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
12435			 const int __P)
12436{
12437  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12438						  (__v4si) __Y, __P,
12439						  (__mmask8) __U);
12440}
12441
12442extern __inline __mmask8
12443__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12444_mm_mask_cmp_pd_mask (__mmask8 __U, __m128d __X, __m128d __Y,
12445		      const int __P)
12446{
12447  return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
12448						  (__v2df) __Y, __P,
12449						  (__mmask8) __U);
12450}
12451
12452extern __inline __mmask8
12453__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12454_mm_mask_cmp_ps_mask (__mmask8 __U, __m128 __X, __m128 __Y,
12455		      const int __P)
12456{
12457  return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
12458						  (__v4sf) __Y, __P,
12459						  (__mmask8) __U);
12460}
12461
12462extern __inline __m256d
12463__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12464_mm256_permutex_pd (__m256d __X, const int __M)
12465{
12466  return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __M,
12467						  (__v4df)
12468						  _mm256_undefined_pd (),
12469						  (__mmask8) -1);
12470}
12471
12472#else
12473#define _mm256_permutex_pd(X, M)						\
12474  ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(X), (int)(M),	\
12475					    (__v4df)(__m256d)			\
12476					    _mm256_undefined_pd (),		\
12477					    (__mmask8)-1))
12478
12479#define _mm256_permutex_epi64(X, I)               \
12480  ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
12481					    (int)(I),		\
12482					    (__v4di)(__m256i)	\
12483					    (_mm256_setzero_si256 ()),\
12484					    (__mmask8) -1))
12485
12486#define _mm256_maskz_permutex_epi64(M, X, I)                    \
12487  ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X),    \
12488					    (int)(I),                \
12489					    (__v4di)(__m256i)        \
12490					    (_mm256_setzero_si256 ()),\
12491					    (__mmask8)(M)))
12492
12493#define _mm256_mask_permutex_epi64(W, M, X, I)               \
12494  ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
12495					    (int)(I),             \
12496					    (__v4di)(__m256i)(W), \
12497					    (__mmask8)(M)))
12498
12499#define _mm256_insertf32x4(X, Y, C)                                     \
12500  ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X),  \
12501    (__v4sf)(__m128) (Y), (int) (C),					\
12502    (__v8sf)(__m256)_mm256_setzero_ps (),				\
12503    (__mmask8)-1))
12504
12505#define _mm256_mask_insertf32x4(W, U, X, Y, C)                          \
12506  ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X),  \
12507    (__v4sf)(__m128) (Y), (int) (C),					\
12508    (__v8sf)(__m256)(W),						\
12509    (__mmask8)(U)))
12510
12511#define _mm256_maskz_insertf32x4(U, X, Y, C)                            \
12512  ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X),	\
12513    (__v4sf)(__m128) (Y), (int) (C),					\
12514    (__v8sf)(__m256)_mm256_setzero_ps (),				\
12515    (__mmask8)(U)))
12516
12517#define _mm256_inserti32x4(X, Y, C)                                     \
12518  ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12519    (__v4si)(__m128i) (Y), (int) (C),					\
12520    (__v8si)(__m256i)_mm256_setzero_si256 (),				\
12521    (__mmask8)-1))
12522
12523#define _mm256_mask_inserti32x4(W, U, X, Y, C)                          \
12524  ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12525    (__v4si)(__m128i) (Y), (int) (C),					\
12526    (__v8si)(__m256i)(W),						\
12527    (__mmask8)(U)))
12528
12529#define _mm256_maskz_inserti32x4(U, X, Y, C)                            \
12530  ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12531    (__v4si)(__m128i) (Y), (int) (C),					\
12532    (__v8si)(__m256i)_mm256_setzero_si256 (),				\
12533    (__mmask8)(U)))
12534
12535#define _mm256_extractf32x4_ps(X, C)                                    \
12536  ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12537    (int) (C),								\
12538    (__v4sf)(__m128)_mm_setzero_ps (),					\
12539    (__mmask8)-1))
12540
12541#define _mm256_mask_extractf32x4_ps(W, U, X, C)                         \
12542  ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12543    (int) (C),								\
12544    (__v4sf)(__m128)(W),						\
12545    (__mmask8)(U)))
12546
12547#define _mm256_maskz_extractf32x4_ps(U, X, C)                           \
12548  ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12549    (int) (C),								\
12550    (__v4sf)(__m128)_mm_setzero_ps (),					\
12551    (__mmask8)(U)))
12552
12553#define _mm256_extracti32x4_epi32(X, C)                                 \
12554  ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12555    (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)-1))
12556
12557#define _mm256_mask_extracti32x4_epi32(W, U, X, C)                      \
12558  ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12559    (int) (C), (__v4si)(__m128i)(W), (__mmask8)(U)))
12560
12561#define _mm256_maskz_extracti32x4_epi32(U, X, C)                        \
12562  ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12563    (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)(U)))
12564
12565#define _mm256_shuffle_i64x2(X, Y, C)                                                   \
12566  ((__m256i)  __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X),                 \
12567                                                  (__v4di)(__m256i)(Y), (int)(C),       \
12568                                                  (__v4di)(__m256i)_mm256_setzero_si256 (), \
12569                                                  (__mmask8)-1))
12570
12571#define _mm256_mask_shuffle_i64x2(W, U, X, Y, C)                                        \
12572  ((__m256i)  __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X),                 \
12573                                                  (__v4di)(__m256i)(Y), (int)(C),       \
12574                                                  (__v4di)(__m256i)(W),\
12575                                                  (__mmask8)(U)))
12576
12577#define _mm256_maskz_shuffle_i64x2(U, X, Y, C)                                          \
12578  ((__m256i)  __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X),                 \
12579                                                  (__v4di)(__m256i)(Y), (int)(C),       \
12580                                                  (__v4di)(__m256i)_mm256_setzero_si256 (), \
12581                                                  (__mmask8)(U)))
12582
12583#define _mm256_shuffle_i32x4(X, Y, C)                                                   \
12584  ((__m256i)  __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X),                 \
12585                                                  (__v8si)(__m256i)(Y), (int)(C),       \
12586						  (__v8si)(__m256i)			\
12587						  _mm256_setzero_si256 (),		\
12588                                                  (__mmask8)-1))
12589
12590#define _mm256_mask_shuffle_i32x4(W, U, X, Y, C)                                        \
12591  ((__m256i)  __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X),                 \
12592                                                  (__v8si)(__m256i)(Y), (int)(C),       \
12593                                                  (__v8si)(__m256i)(W),                 \
12594                                                  (__mmask8)(U)))
12595
12596#define _mm256_maskz_shuffle_i32x4(U, X, Y, C)                                          \
12597  ((__m256i)  __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X),                 \
12598                                                  (__v8si)(__m256i)(Y), (int)(C),       \
12599						  (__v8si)(__m256i)			\
12600						  _mm256_setzero_si256 (),		\
12601                                                  (__mmask8)(U)))
12602
12603#define _mm256_shuffle_f64x2(X, Y, C)                                                   \
12604  ((__m256d)  __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X),                 \
12605                                                  (__v4df)(__m256d)(Y), (int)(C),       \
12606						  (__v4df)(__m256d)_mm256_setzero_pd (),\
12607                                                  (__mmask8)-1))
12608
12609#define _mm256_mask_shuffle_f64x2(W, U, X, Y, C)                                        \
12610  ((__m256d)  __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X),                 \
12611                                                  (__v4df)(__m256d)(Y), (int)(C),       \
12612                                                  (__v4df)(__m256d)(W),                 \
12613                                                  (__mmask8)(U)))
12614
12615#define _mm256_maskz_shuffle_f64x2(U, X, Y, C)                                          \
12616  ((__m256d)  __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X),                 \
12617                                                  (__v4df)(__m256d)(Y), (int)(C),       \
12618						  (__v4df)(__m256d)_mm256_setzero_pd( ),\
12619                                                  (__mmask8)(U)))
12620
12621#define _mm256_shuffle_f32x4(X, Y, C)                                                   \
12622  ((__m256)  __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X),                   \
12623                                                 (__v8sf)(__m256)(Y), (int)(C),         \
12624						 (__v8sf)(__m256)_mm256_setzero_ps (),  \
12625                                                 (__mmask8)-1))
12626
12627#define _mm256_mask_shuffle_f32x4(W, U, X, Y, C)                                        \
12628  ((__m256)  __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X),                   \
12629                                                 (__v8sf)(__m256)(Y), (int)(C),         \
12630                                                 (__v8sf)(__m256)(W),                   \
12631                                                 (__mmask8)(U)))
12632
12633#define _mm256_maskz_shuffle_f32x4(U, X, Y, C)                                          \
12634  ((__m256)  __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X),                   \
12635                                                 (__v8sf)(__m256)(Y), (int)(C),         \
12636						 (__v8sf)(__m256)_mm256_setzero_ps (),  \
12637                                                 (__mmask8)(U)))
12638
12639#define _mm256_mask_shuffle_pd(W, U, A, B, C)                                   \
12640  ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A),                \
12641                                           (__v4df)(__m256d)(B), (int)(C),      \
12642                                           (__v4df)(__m256d)(W),                \
12643                                           (__mmask8)(U)))
12644
12645#define _mm256_maskz_shuffle_pd(U, A, B, C)                                     \
12646  ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A),                \
12647                                           (__v4df)(__m256d)(B), (int)(C),      \
12648					   (__v4df)(__m256d)			\
12649					   _mm256_setzero_pd (),		\
12650                                           (__mmask8)(U)))
12651
12652#define _mm_mask_shuffle_pd(W, U, A, B, C)                                      \
12653  ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A),                \
12654                                           (__v2df)(__m128d)(B), (int)(C),      \
12655                                           (__v2df)(__m128d)(W),                \
12656                                           (__mmask8)(U)))
12657
12658#define _mm_maskz_shuffle_pd(U, A, B, C)                                        \
12659  ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A),                \
12660                                           (__v2df)(__m128d)(B), (int)(C),      \
12661					   (__v2df)(__m128d)_mm_setzero_pd (),  \
12662                                           (__mmask8)(U)))
12663
12664#define _mm256_mask_shuffle_ps(W, U, A, B, C)                                   \
12665  ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A),                 \
12666                                           (__v8sf)(__m256)(B), (int)(C),       \
12667                                           (__v8sf)(__m256)(W),                 \
12668                                           (__mmask8)(U)))
12669
12670#define _mm256_maskz_shuffle_ps(U, A, B, C)                                     \
12671  ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A),                 \
12672                                           (__v8sf)(__m256)(B), (int)(C),       \
12673					   (__v8sf)(__m256)_mm256_setzero_ps (),\
12674                                           (__mmask8)(U)))
12675
12676#define _mm_mask_shuffle_ps(W, U, A, B, C)                                      \
12677  ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A),                 \
12678                                           (__v4sf)(__m128)(B), (int)(C),       \
12679                                           (__v4sf)(__m128)(W),                 \
12680                                           (__mmask8)(U)))
12681
12682#define _mm_maskz_shuffle_ps(U, A, B, C)                                        \
12683  ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A),                 \
12684                                           (__v4sf)(__m128)(B), (int)(C),       \
12685					   (__v4sf)(__m128)_mm_setzero_ps (),   \
12686                                           (__mmask8)(U)))
12687
12688#define _mm256_fixupimm_pd(X, Y, Z, C)                                          \
12689  ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X),		\
12690					       (__v4df)(__m256d)(Y),		\
12691					       (__v4di)(__m256i)(Z), (int)(C),	\
12692					       (__mmask8)(-1)))
12693
12694#define _mm256_mask_fixupimm_pd(X, U, Y, Z, C)                                  \
12695   ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X),           \
12696						(__v4df)(__m256d)(Y),           \
12697						(__v4di)(__m256i)(Z), (int)(C), \
12698						(__mmask8)(U)))
12699
12700#define _mm256_maskz_fixupimm_pd(U, X, Y, Z, C)                                 \
12701   ((__m256d)__builtin_ia32_fixupimmpd256_maskz ((__v4df)(__m256d)(X),          \
12702						 (__v4df)(__m256d)(Y),          \
12703						 (__v4di)(__m256i)(Z), (int)(C),\
12704						 (__mmask8)(U)))
12705
12706#define _mm256_fixupimm_ps(X, Y, Z, C)						\
12707  ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X),		\
12708					      (__v8sf)(__m256)(Y),		\
12709					      (__v8si)(__m256i)(Z), (int)(C),	\
12710					      (__mmask8)(-1)))
12711
12712
12713#define _mm256_mask_fixupimm_ps(X, U, Y, Z, C)                                  \
12714    ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X),            \
12715						(__v8sf)(__m256)(Y),            \
12716						(__v8si)(__m256i)(Z), (int)(C), \
12717						(__mmask8)(U)))
12718
12719#define _mm256_maskz_fixupimm_ps(U, X, Y, Z, C)                                 \
12720    ((__m256)__builtin_ia32_fixupimmps256_maskz ((__v8sf)(__m256)(X),           \
12721						 (__v8sf)(__m256)(Y),           \
12722						 (__v8si)(__m256i)(Z), (int)(C),\
12723						 (__mmask8)(U)))
12724
12725#define _mm_fixupimm_pd(X, Y, Z, C)						\
12726  ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X),		\
12727					       (__v2df)(__m128d)(Y),		\
12728					       (__v2di)(__m128i)(Z), (int)(C), 	\
12729					       (__mmask8)(-1)))
12730
12731
12732#define _mm_mask_fixupimm_pd(X, U, Y, Z, C)                                       \
12733     ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X),           \
12734						  (__v2df)(__m128d)(Y),           \
12735						  (__v2di)(__m128i)(Z), (int)(C), \
12736						  (__mmask8)(U)))
12737
12738#define _mm_maskz_fixupimm_pd(U, X, Y, Z, C)                                      \
12739     ((__m128d)__builtin_ia32_fixupimmpd128_maskz ((__v2df)(__m128d)(X),          \
12740						   (__v2df)(__m128d)(Y),          \
12741						   (__v2di)(__m128i)(Z), (int)(C),\
12742						   (__mmask8)(U)))
12743
12744#define _mm_fixupimm_ps(X, Y, Z, C)						\
12745   ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X),		\
12746					       (__v4sf)(__m128)(Y),		\
12747					       (__v4si)(__m128i)(Z), (int)(C), 	\
12748					       (__mmask8)(-1)))
12749
12750#define _mm_mask_fixupimm_ps(X, U, Y, Z, C)                                      \
12751      ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X),           \
12752						  (__v4sf)(__m128)(Y),           \
12753						  (__v4si)(__m128i)(Z), (int)(C),\
12754						  (__mmask8)(U)))
12755
12756#define _mm_maskz_fixupimm_ps(U, X, Y, Z, C)                                      \
12757      ((__m128)__builtin_ia32_fixupimmps128_maskz ((__v4sf)(__m128)(X),           \
12758						   (__v4sf)(__m128)(Y),           \
12759						   (__v4si)(__m128i)(Z), (int)(C),\
12760						   (__mmask8)(U)))
12761
12762#define _mm256_mask_srli_epi32(W, U, A, B)				\
12763  ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A),	\
12764    (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
12765
12766#define _mm256_maskz_srli_epi32(U, A, B)				\
12767  ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A),	\
12768    (int)(B), (__v8si)_mm256_setzero_si256 (), (__mmask8)(U)))
12769
12770#define _mm_mask_srli_epi32(W, U, A, B)                                 \
12771  ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A),       \
12772    (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
12773
12774#define _mm_maskz_srli_epi32(U, A, B)                                   \
12775  ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A),       \
12776    (int)(B), (__v4si)_mm_setzero_si128 (), (__mmask8)(U)))
12777
12778#define _mm256_mask_srli_epi64(W, U, A, B)				\
12779  ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A),	\
12780    (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
12781
12782#define _mm256_maskz_srli_epi64(U, A, B)				\
12783  ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A),	\
12784    (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
12785
12786#define _mm_mask_srli_epi64(W, U, A, B)                                 \
12787  ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A),       \
12788    (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
12789
12790#define _mm_maskz_srli_epi64(U, A, B)                                   \
12791  ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A),       \
12792    (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)(U)))
12793
12794#define _mm256_mask_slli_epi32(W, U, X, C)                                \
12795  ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
12796    (__v8si)(__m256i)(W),						  \
12797    (__mmask8)(U)))
12798
12799#define _mm256_maskz_slli_epi32(U, X, C)                                  \
12800  ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
12801    (__v8si)(__m256i)_mm256_setzero_si256 (),				  \
12802    (__mmask8)(U)))
12803
12804#define _mm256_mask_slli_epi64(W, U, X, C)                                \
12805  ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
12806    (__v4di)(__m256i)(W),						  \
12807    (__mmask8)(U)))
12808
12809#define _mm256_maskz_slli_epi64(U, X, C)                                  \
12810  ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
12811    (__v4di)(__m256i)_mm256_setzero_si256 (),				  \
12812    (__mmask8)(U)))
12813
12814#define _mm_mask_slli_epi32(W, U, X, C)					  \
12815  ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
12816    (__v4si)(__m128i)(W),\
12817    (__mmask8)(U)))
12818
12819#define _mm_maskz_slli_epi32(U, X, C)					  \
12820  ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
12821    (__v4si)(__m128i)_mm_setzero_si128 (),\
12822    (__mmask8)(U)))
12823
12824#define _mm_mask_slli_epi64(W, U, X, C)					  \
12825  ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
12826    (__v2di)(__m128i)(W),\
12827    (__mmask8)(U)))
12828
12829#define _mm_maskz_slli_epi64(U, X, C)					  \
12830  ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
12831    (__v2di)(__m128i)_mm_setzero_si128 (),\
12832    (__mmask8)(U)))
12833
12834#define _mm256_ternarylogic_epi64(A, B, C, I)                           \
12835  ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A),	\
12836    (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)-1))
12837
12838#define _mm256_mask_ternarylogic_epi64(A, U, B, C, I)			\
12839  ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A),	\
12840    (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
12841
12842#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, I)			\
12843  ((__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di)(__m256i)(A),	\
12844    (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
12845
12846#define _mm256_ternarylogic_epi32(A, B, C, I)                           \
12847  ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A),	\
12848    (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)-1))
12849
12850#define _mm256_mask_ternarylogic_epi32(A, U, B, C, I)                   \
12851  ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A),	\
12852    (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
12853
12854#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, I)			\
12855  ((__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si)(__m256i)(A),	\
12856    (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
12857
12858#define _mm_ternarylogic_epi64(A, B, C, I)                              \
12859  ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A),	\
12860    (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)-1))
12861
12862#define _mm_mask_ternarylogic_epi64(A, U, B, C, I)			\
12863  ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A),	\
12864    (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
12865
12866#define _mm_maskz_ternarylogic_epi64(U, A, B, C, I)			\
12867  ((__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di)(__m128i)(A),	\
12868    (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
12869
12870#define _mm_ternarylogic_epi32(A, B, C, I)                              \
12871  ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A),	\
12872    (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)-1))
12873
12874#define _mm_mask_ternarylogic_epi32(A, U, B, C, I)			\
12875  ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A),	\
12876    (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
12877
12878#define _mm_maskz_ternarylogic_epi32(U, A, B, C, I)			\
12879  ((__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si)(__m128i)(A),	\
12880    (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
12881
12882#define _mm256_roundscale_ps(A, B)				        \
12883  ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A),    \
12884    (int)(B), (__v8sf)(__m256)_mm256_setzero_ps (), (__mmask8)-1))
12885
12886#define _mm256_mask_roundscale_ps(W, U, A, B)			        \
12887  ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A),    \
12888    (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U)))
12889
12890#define _mm256_maskz_roundscale_ps(U, A, B)			        \
12891  ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A),    \
12892    (int)(B), (__v8sf)(__m256)_mm256_setzero_ps (), (__mmask8)(U)))
12893
12894#define _mm256_roundscale_pd(A, B)				        \
12895  ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A),  \
12896    (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)-1))
12897
12898#define _mm256_mask_roundscale_pd(W, U, A, B)			        \
12899  ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A),  \
12900    (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
12901
12902#define _mm256_maskz_roundscale_pd(U, A, B)			        \
12903  ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A),  \
12904    (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)(U)))
12905
12906#define _mm_roundscale_ps(A, B)					        \
12907  ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A),    \
12908    (int)(B), (__v4sf)(__m128)_mm_setzero_ps (), (__mmask8)-1))
12909
12910#define _mm_mask_roundscale_ps(W, U, A, B)			        \
12911  ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A),    \
12912    (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U)))
12913
12914#define _mm_maskz_roundscale_ps(U, A, B)			        \
12915  ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A),    \
12916    (int)(B), (__v4sf)(__m128)_mm_setzero_ps (), (__mmask8)(U)))
12917
12918#define _mm_roundscale_pd(A, B)					        \
12919  ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A),  \
12920    (int)(B), (__v2df)(__m128d)_mm_setzero_pd (), (__mmask8)-1))
12921
12922#define _mm_mask_roundscale_pd(W, U, A, B)			        \
12923  ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A),  \
12924    (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U)))
12925
12926#define _mm_maskz_roundscale_pd(U, A, B)			        \
12927  ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A),  \
12928    (int)(B), (__v2df)(__m128d)_mm_setzero_pd (), (__mmask8)(U)))
12929
12930#define _mm256_getmant_ps(X, B, C)                                              \
12931  ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X),             \
12932                                         (int)(((C)<<2) | (B)),                 \
12933					 (__v8sf)(__m256)_mm256_setzero_ps (),  \
12934                                         (__mmask8)-1))
12935
12936#define _mm256_mask_getmant_ps(W, U, X, B, C)                                   \
12937  ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X),             \
12938                                         (int)(((C)<<2) | (B)),                 \
12939                                         (__v8sf)(__m256)(W),                   \
12940                                         (__mmask8)(U)))
12941
12942#define _mm256_maskz_getmant_ps(U, X, B, C)                                     \
12943  ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X),             \
12944                                         (int)(((C)<<2) | (B)),                 \
12945					 (__v8sf)(__m256)_mm256_setzero_ps (),  \
12946                                         (__mmask8)(U)))
12947
12948#define _mm_getmant_ps(X, B, C)                                                 \
12949  ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X),             \
12950                                         (int)(((C)<<2) | (B)),                 \
12951					 (__v4sf)(__m128)_mm_setzero_ps (),     \
12952                                         (__mmask8)-1))
12953
12954#define _mm_mask_getmant_ps(W, U, X, B, C)                                      \
12955  ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X),             \
12956                                         (int)(((C)<<2) | (B)),                 \
12957                                         (__v4sf)(__m128)(W),                   \
12958                                         (__mmask8)(U)))
12959
12960#define _mm_maskz_getmant_ps(U, X, B, C)                                        \
12961  ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X),             \
12962                                         (int)(((C)<<2) | (B)),                 \
12963					 (__v4sf)(__m128)_mm_setzero_ps (),     \
12964                                         (__mmask8)(U)))
12965
12966#define _mm256_getmant_pd(X, B, C)                                              \
12967  ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X),           \
12968                                         (int)(((C)<<2) | (B)),                 \
12969					  (__v4df)(__m256d)_mm256_setzero_pd (),\
12970                                          (__mmask8)-1))
12971
12972#define _mm256_mask_getmant_pd(W, U, X, B, C)                                   \
12973  ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X),           \
12974                                         (int)(((C)<<2) | (B)),                 \
12975                                          (__v4df)(__m256d)(W),                 \
12976                                          (__mmask8)(U)))
12977
12978#define _mm256_maskz_getmant_pd(U, X, B, C)                                     \
12979  ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X),           \
12980                                         (int)(((C)<<2) | (B)),                 \
12981					  (__v4df)(__m256d)_mm256_setzero_pd (),\
12982                                          (__mmask8)(U)))
12983
12984#define _mm_getmant_pd(X, B, C)                                                 \
12985  ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X),           \
12986                                         (int)(((C)<<2) | (B)),                 \
12987					  (__v2df)(__m128d)_mm_setzero_pd (),   \
12988                                          (__mmask8)-1))
12989
12990#define _mm_mask_getmant_pd(W, U, X, B, C)                                      \
12991  ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X),           \
12992                                         (int)(((C)<<2) | (B)),                 \
12993                                          (__v2df)(__m128d)(W),                 \
12994                                          (__mmask8)(U)))
12995
12996#define _mm_maskz_getmant_pd(U, X, B, C)                                        \
12997  ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X),           \
12998                                         (int)(((C)<<2) | (B)),                 \
12999					  (__v2df)(__m128d)_mm_setzero_pd (),   \
13000                                          (__mmask8)(U)))
13001
13002#define _mm256_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)	\
13003  (__m256) __builtin_ia32_gather3siv8sf ((__v8sf)(__m256) (V1OLD),	\
13004					 (void const *) (ADDR),		\
13005					 (__v8si)(__m256i) (INDEX),	\
13006					 (__mmask8) (MASK),		\
13007					 (int) (SCALE))
13008
13009#define _mm_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)		\
13010  (__m128) __builtin_ia32_gather3siv4sf ((__v4sf)(__m128) (V1OLD),	\
13011					 (void const *) (ADDR),		\
13012					 (__v4si)(__m128i) (INDEX),	\
13013					 (__mmask8) (MASK),		\
13014					 (int) (SCALE))
13015
13016#define _mm256_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)	\
13017  (__m256d) __builtin_ia32_gather3siv4df ((__v4df)(__m256d) (V1OLD),	\
13018					  (void const *) (ADDR),	\
13019					  (__v4si)(__m128i) (INDEX),	\
13020					  (__mmask8) (MASK),		\
13021					  (int) (SCALE))
13022
13023#define _mm_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)		\
13024  (__m128d) __builtin_ia32_gather3siv2df ((__v2df)(__m128d) (V1OLD),	\
13025					  (void const *) (ADDR),	\
13026					  (__v4si)(__m128i) (INDEX),	\
13027					  (__mmask8) (MASK),		\
13028					  (int) (SCALE))
13029
13030#define _mm256_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)	\
13031  (__m128) __builtin_ia32_gather3div8sf ((__v4sf)(__m128) (V1OLD),	\
13032					 (void const *) (ADDR),		\
13033					 (__v4di)(__m256i) (INDEX),	\
13034					 (__mmask8) (MASK),		\
13035					 (int) (SCALE))
13036
13037#define _mm_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)		\
13038  (__m128) __builtin_ia32_gather3div4sf ((__v4sf)(__m128) (V1OLD),	\
13039					 (void const *) (ADDR),		\
13040					 (__v2di)(__m128i) (INDEX),	\
13041					 (__mmask8) (MASK),		\
13042					 (int) (SCALE))
13043
13044#define _mm256_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)	\
13045  (__m256d) __builtin_ia32_gather3div4df ((__v4df)(__m256d) (V1OLD),	\
13046					  (void const *) (ADDR),	\
13047					  (__v4di)(__m256i) (INDEX),	\
13048					  (__mmask8) (MASK),		\
13049					  (int) (SCALE))
13050
13051#define _mm_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)		\
13052  (__m128d) __builtin_ia32_gather3div2df ((__v2df)(__m128d) (V1OLD),	\
13053					  (void const *) (ADDR),	\
13054					  (__v2di)(__m128i) (INDEX),	\
13055					  (__mmask8) (MASK),		\
13056					  (int) (SCALE))
13057
13058#define _mm256_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)	\
13059  (__m256i) __builtin_ia32_gather3siv8si ((__v8si)(__m256i) (V1OLD),	\
13060					  (void const *) (ADDR),	\
13061					  (__v8si)(__m256i) (INDEX),	\
13062					  (__mmask8) (MASK),		\
13063					  (int) (SCALE))
13064
13065#define _mm_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)	\
13066  (__m128i) __builtin_ia32_gather3siv4si ((__v4si)(__m128i) (V1OLD),	\
13067					  (void const *) (ADDR),	\
13068					  (__v4si)(__m128i) (INDEX),	\
13069					  (__mmask8) (MASK),		\
13070					  (int) (SCALE))
13071
13072#define _mm256_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)	\
13073  (__m256i) __builtin_ia32_gather3siv4di ((__v4di)(__m256i) (V1OLD),	\
13074					  (void const *) (ADDR),	\
13075					  (__v4si)(__m128i) (INDEX),	\
13076					  (__mmask8) (MASK),		\
13077					  (int) (SCALE))
13078
13079#define _mm_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)	\
13080  (__m128i) __builtin_ia32_gather3siv2di ((__v2di)(__m128i) (V1OLD),	\
13081					  (void const *) (ADDR),	\
13082					  (__v4si)(__m128i) (INDEX),	\
13083					  (__mmask8) (MASK),		\
13084					  (int) (SCALE))
13085
13086#define _mm256_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)	\
13087  (__m128i) __builtin_ia32_gather3div8si ((__v4si)(__m128i) (V1OLD),	\
13088					  (void const *) (ADDR),	\
13089					  (__v4di)(__m256i) (INDEX),	\
13090					  (__mmask8) (MASK),		\
13091					  (int) (SCALE))
13092
13093#define _mm_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)	\
13094  (__m128i) __builtin_ia32_gather3div4si ((__v4si)(__m128i) (V1OLD),	\
13095					  (void const *) (ADDR),	\
13096					  (__v2di)(__m128i) (INDEX),	\
13097					  (__mmask8) (MASK),		\
13098					  (int) (SCALE))
13099
13100#define _mm256_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)	\
13101  (__m256i) __builtin_ia32_gather3div4di ((__v4di)(__m256i) (V1OLD),	\
13102					  (void const *) (ADDR),	\
13103					  (__v4di)(__m256i) (INDEX),	\
13104					  (__mmask8) (MASK),		\
13105					  (int) (SCALE))
13106
13107#define _mm_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)	\
13108  (__m128i) __builtin_ia32_gather3div2di ((__v2di)(__m128i) (V1OLD),	\
13109					  (void const *) (ADDR),	\
13110					  (__v2di)(__m128i) (INDEX),	\
13111					  (__mmask8) (MASK),		\
13112					  (int) (SCALE))
13113
13114#define _mm256_i32scatter_ps(ADDR, INDEX, V1, SCALE)			\
13115  __builtin_ia32_scattersiv8sf ((void *) (ADDR), (__mmask8)0xFF,	\
13116				(__v8si)(__m256i) (INDEX),		\
13117				(__v8sf)(__m256) (V1), (int) (SCALE))
13118
13119#define _mm256_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE)		\
13120  __builtin_ia32_scattersiv8sf ((void *) (ADDR), (__mmask8) (MASK),	\
13121				(__v8si)(__m256i) (INDEX),		\
13122				(__v8sf)(__m256) (V1), (int) (SCALE))
13123
13124#define _mm_i32scatter_ps(ADDR, INDEX, V1, SCALE)			\
13125  __builtin_ia32_scattersiv4sf ((void *) (ADDR), (__mmask8)0xFF,	\
13126				(__v4si)(__m128i) (INDEX),		\
13127				(__v4sf)(__m128) (V1), (int) (SCALE))
13128
13129#define _mm_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE)		\
13130  __builtin_ia32_scattersiv4sf ((void *) (ADDR), (__mmask8) (MASK),	\
13131				(__v4si)(__m128i) (INDEX),		\
13132				(__v4sf)(__m128) (V1), (int) (SCALE))
13133
13134#define _mm256_i32scatter_pd(ADDR, INDEX, V1, SCALE)			\
13135  __builtin_ia32_scattersiv4df ((void *) (ADDR), (__mmask8)0xFF,	\
13136				(__v4si)(__m128i) (INDEX),		\
13137				(__v4df)(__m256d) (V1), (int) (SCALE))
13138
13139#define _mm256_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE)		\
13140  __builtin_ia32_scattersiv4df ((void *) (ADDR), (__mmask8) (MASK),	\
13141				(__v4si)(__m128i) (INDEX),		\
13142				(__v4df)(__m256d) (V1), (int) (SCALE))
13143
13144#define _mm_i32scatter_pd(ADDR, INDEX, V1, SCALE)			\
13145  __builtin_ia32_scattersiv2df ((void *) (ADDR), (__mmask8)0xFF,	\
13146				(__v4si)(__m128i) (INDEX),		\
13147				(__v2df)(__m128d) (V1), (int) (SCALE))
13148
13149#define _mm_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE)		\
13150  __builtin_ia32_scattersiv2df ((void *) (ADDR), (__mmask8) (MASK),	\
13151				(__v4si)(__m128i) (INDEX),		\
13152				(__v2df)(__m128d) (V1), (int) (SCALE))
13153
13154#define _mm256_i64scatter_ps(ADDR, INDEX, V1, SCALE)			\
13155  __builtin_ia32_scatterdiv8sf ((void *) (ADDR), (__mmask8)0xFF,	\
13156				(__v4di)(__m256i) (INDEX),		\
13157				(__v4sf)(__m128) (V1), (int) (SCALE))
13158
13159#define _mm256_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE)		\
13160  __builtin_ia32_scatterdiv8sf ((void *) (ADDR), (__mmask8) (MASK),	\
13161				(__v4di)(__m256i) (INDEX),		\
13162				(__v4sf)(__m128) (V1), (int) (SCALE))
13163
13164#define _mm_i64scatter_ps(ADDR, INDEX, V1, SCALE)			\
13165  __builtin_ia32_scatterdiv4sf ((void *) (ADDR), (__mmask8)0xFF,	\
13166				(__v2di)(__m128i) (INDEX),		\
13167				(__v4sf)(__m128) (V1), (int) (SCALE))
13168
13169#define _mm_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE)		\
13170  __builtin_ia32_scatterdiv4sf ((void *) (ADDR), (__mmask8) (MASK),	\
13171				(__v2di)(__m128i) (INDEX),		\
13172				(__v4sf)(__m128) (V1), (int) (SCALE))
13173
13174#define _mm256_i64scatter_pd(ADDR, INDEX, V1, SCALE)			\
13175  __builtin_ia32_scatterdiv4df ((void *) (ADDR), (__mmask8)0xFF,	\
13176				(__v4di)(__m256i) (INDEX),		\
13177				(__v4df)(__m256d) (V1), (int) (SCALE))
13178
13179#define _mm256_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE)		\
13180  __builtin_ia32_scatterdiv4df ((void *) (ADDR), (__mmask8) (MASK),	\
13181				(__v4di)(__m256i) (INDEX),		\
13182				(__v4df)(__m256d) (V1), (int) (SCALE))
13183
13184#define _mm_i64scatter_pd(ADDR, INDEX, V1, SCALE)			\
13185  __builtin_ia32_scatterdiv2df ((void *) (ADDR), (__mmask8)0xFF,	\
13186				(__v2di)(__m128i) (INDEX),		\
13187				(__v2df)(__m128d) (V1), (int) (SCALE))
13188
13189#define _mm_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE)		\
13190  __builtin_ia32_scatterdiv2df ((void *) (ADDR), (__mmask8) (MASK),	\
13191				(__v2di)(__m128i) (INDEX),		\
13192				(__v2df)(__m128d) (V1), (int) (SCALE))
13193
13194#define _mm256_i32scatter_epi32(ADDR, INDEX, V1, SCALE)			\
13195  __builtin_ia32_scattersiv8si ((void *) (ADDR), (__mmask8)0xFF,	\
13196				(__v8si)(__m256i) (INDEX),		\
13197				(__v8si)(__m256i) (V1), (int) (SCALE))
13198
13199#define _mm256_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)	\
13200  __builtin_ia32_scattersiv8si ((void *) (ADDR), (__mmask8) (MASK),	\
13201				(__v8si)(__m256i) (INDEX),		\
13202				(__v8si)(__m256i) (V1), (int) (SCALE))
13203
13204#define _mm_i32scatter_epi32(ADDR, INDEX, V1, SCALE)			\
13205  __builtin_ia32_scattersiv4si ((void *) (ADDR), (__mmask8)0xFF,	\
13206				(__v4si)(__m128i) (INDEX),		\
13207				(__v4si)(__m128i) (V1), (int) (SCALE))
13208
13209#define _mm_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)		\
13210  __builtin_ia32_scattersiv4si ((void *) (ADDR), (__mmask8) (MASK),	\
13211				(__v4si)(__m128i) (INDEX),		\
13212				(__v4si)(__m128i) (V1), (int) (SCALE))
13213
13214#define _mm256_i32scatter_epi64(ADDR, INDEX, V1, SCALE)			\
13215  __builtin_ia32_scattersiv4di ((void *) (ADDR), (__mmask8)0xFF,	\
13216				(__v4si)(__m128i) (INDEX),		\
13217				(__v4di)(__m256i) (V1), (int) (SCALE))
13218
13219#define _mm256_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)	\
13220  __builtin_ia32_scattersiv4di ((void *) (ADDR), (__mmask8) (MASK),	\
13221				(__v4si)(__m128i) (INDEX),		\
13222				(__v4di)(__m256i) (V1), (int) (SCALE))
13223
13224#define _mm_i32scatter_epi64(ADDR, INDEX, V1, SCALE)			\
13225  __builtin_ia32_scattersiv2di ((void *) (ADDR), (__mmask8)0xFF,	\
13226				(__v4si)(__m128i) (INDEX),		\
13227				(__v2di)(__m128i) (V1), (int) (SCALE))
13228
13229#define _mm_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)		\
13230  __builtin_ia32_scattersiv2di ((void *) (ADDR), (__mmask8) (MASK),	\
13231				(__v4si)(__m128i) (INDEX),		\
13232				(__v2di)(__m128i) (V1), (int) (SCALE))
13233
13234#define _mm256_i64scatter_epi32(ADDR, INDEX, V1, SCALE)			\
13235  __builtin_ia32_scatterdiv8si ((void *) (ADDR), (__mmask8)0xFF,	\
13236				(__v4di)(__m256i) (INDEX),		\
13237				(__v4si)(__m128i) (V1), (int) (SCALE))
13238
13239#define _mm256_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)	\
13240  __builtin_ia32_scatterdiv8si ((void *) (ADDR), (__mmask8) (MASK),	\
13241				(__v4di)(__m256i) (INDEX),		\
13242				(__v4si)(__m128i) (V1), (int) (SCALE))
13243
13244#define _mm_i64scatter_epi32(ADDR, INDEX, V1, SCALE)			\
13245  __builtin_ia32_scatterdiv4si ((void *) (ADDR), (__mmask8)0xFF,	\
13246				(__v2di)(__m128i) (INDEX),		\
13247				(__v4si)(__m128i) (V1), (int) (SCALE))
13248
13249#define _mm_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)		\
13250  __builtin_ia32_scatterdiv4si ((void *) (ADDR), (__mmask8) (MASK),	\
13251				(__v2di)(__m128i) (INDEX),		\
13252				(__v4si)(__m128i) (V1), (int) (SCALE))
13253
13254#define _mm256_i64scatter_epi64(ADDR, INDEX, V1, SCALE)			\
13255  __builtin_ia32_scatterdiv4di ((void *) (ADDR), (__mmask8)0xFF,	\
13256				(__v4di)(__m256i) (INDEX),		\
13257				(__v4di)(__m256i) (V1), (int) (SCALE))
13258
13259#define _mm256_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)	\
13260  __builtin_ia32_scatterdiv4di ((void *) (ADDR), (__mmask8) (MASK),	\
13261				(__v4di)(__m256i) (INDEX),		\
13262				(__v4di)(__m256i) (V1), (int) (SCALE))
13263
13264#define _mm_i64scatter_epi64(ADDR, INDEX, V1, SCALE)			\
13265  __builtin_ia32_scatterdiv2di ((void *) (ADDR), (__mmask8)0xFF,	\
13266				(__v2di)(__m128i) (INDEX),		\
13267				(__v2di)(__m128i) (V1), (int) (SCALE))
13268
13269#define _mm_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)		\
13270  __builtin_ia32_scatterdiv2di ((void *) (ADDR), (__mmask8) (MASK),	\
13271				(__v2di)(__m128i) (INDEX),		\
13272				(__v2di)(__m128i) (V1), (int) (SCALE))
13273
13274#define _mm256_mask_shuffle_epi32(W, U, X, C)                                       \
13275  ((__m256i)  __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C),        \
13276                                             (__v8si)(__m256i)(W),                  \
13277                                             (__mmask8)(U)))
13278
13279#define _mm256_maskz_shuffle_epi32(U, X, C)                                         \
13280  ((__m256i)  __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C),        \
13281					     (__v8si)(__m256i)			    \
13282					     _mm256_setzero_si256 (),		    \
13283                                             (__mmask8)(U)))
13284
13285#define _mm_mask_shuffle_epi32(W, U, X, C)                                          \
13286  ((__m128i)  __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C),        \
13287                                             (__v4si)(__m128i)(W),                  \
13288                                             (__mmask8)(U)))
13289
13290#define _mm_maskz_shuffle_epi32(U, X, C)                                            \
13291  ((__m128i)  __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C),        \
13292					     (__v4si)(__m128i)_mm_setzero_si128 (), \
13293                                             (__mmask8)(U)))
13294
13295#define _mm256_rol_epi64(A, B)                                                 \
13296  ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
13297                                          (__v4di)(__m256i)_mm256_setzero_si256 (),\
13298                                          (__mmask8)-1))
13299
13300#define _mm256_mask_rol_epi64(W, U, A, B)                                      \
13301  ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
13302                                          (__v4di)(__m256i)(W),                \
13303                                          (__mmask8)(U)))
13304
13305#define _mm256_maskz_rol_epi64(U, A, B)                                        \
13306  ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
13307                                          (__v4di)(__m256i)_mm256_setzero_si256 (),\
13308                                          (__mmask8)(U)))
13309
13310#define _mm_rol_epi64(A, B)                                                    \
13311  ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
13312					  (__v2di)(__m128i)_mm_setzero_si128 (),\
13313                                          (__mmask8)-1))
13314
13315#define _mm_mask_rol_epi64(W, U, A, B)                                         \
13316  ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
13317                                          (__v2di)(__m128i)(W),                \
13318                                          (__mmask8)(U)))
13319
13320#define _mm_maskz_rol_epi64(U, A, B)                                           \
13321  ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
13322					  (__v2di)(__m128i)_mm_setzero_si128 (),\
13323                                          (__mmask8)(U)))
13324
13325#define _mm256_ror_epi64(A, B)                                                 \
13326  ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
13327                                          (__v4di)(__m256i)_mm256_setzero_si256 (),\
13328                                          (__mmask8)-1))
13329
13330#define _mm256_mask_ror_epi64(W, U, A, B)                                      \
13331  ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
13332                                          (__v4di)(__m256i)(W),                \
13333                                          (__mmask8)(U)))
13334
13335#define _mm256_maskz_ror_epi64(U, A, B)                                        \
13336  ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
13337                                          (__v4di)(__m256i)_mm256_setzero_si256 (),\
13338                                          (__mmask8)(U)))
13339
13340#define _mm_ror_epi64(A, B)                                                    \
13341  ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
13342					  (__v2di)(__m128i)_mm_setzero_si128 (),\
13343                                          (__mmask8)-1))
13344
13345#define _mm_mask_ror_epi64(W, U, A, B)                                         \
13346  ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
13347                                          (__v2di)(__m128i)(W),                \
13348                                          (__mmask8)(U)))
13349
13350#define _mm_maskz_ror_epi64(U, A, B)                                           \
13351  ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
13352					  (__v2di)(__m128i)_mm_setzero_si128 (),\
13353                                          (__mmask8)(U)))
13354
13355#define _mm256_rol_epi32(A, B)                                                 \
13356  ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B),      \
13357					  (__v8si)(__m256i)_mm256_setzero_si256 (),\
13358                                          (__mmask8)-1))
13359
13360#define _mm256_mask_rol_epi32(W, U, A, B)                                      \
13361  ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B),      \
13362                                          (__v8si)(__m256i)(W),                \
13363                                          (__mmask8)(U)))
13364
13365#define _mm256_maskz_rol_epi32(U, A, B)                                        \
13366  ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B),      \
13367					  (__v8si)(__m256i)_mm256_setzero_si256 (),\
13368                                          (__mmask8)(U)))
13369
13370#define _mm_rol_epi32(A, B)                                                    \
13371  ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B),      \
13372					  (__v4si)(__m128i)_mm_setzero_si128 (),\
13373                                          (__mmask8)-1))
13374
13375#define _mm_mask_rol_epi32(W, U, A, B)                                         \
13376  ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B),      \
13377                                          (__v4si)(__m128i)(W),                \
13378                                          (__mmask8)(U)))
13379
13380#define _mm_maskz_rol_epi32(U, A, B)                                           \
13381  ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B),      \
13382					  (__v4si)(__m128i)_mm_setzero_si128 (),\
13383                                          (__mmask8)(U)))
13384
13385#define _mm256_ror_epi32(A, B)                                                 \
13386  ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B),      \
13387					  (__v8si)(__m256i)_mm256_setzero_si256 (),\
13388                                          (__mmask8)-1))
13389
13390#define _mm256_mask_ror_epi32(W, U, A, B)                                      \
13391  ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B),      \
13392                                          (__v8si)(__m256i)(W),                \
13393                                          (__mmask8)(U)))
13394
13395#define _mm256_maskz_ror_epi32(U, A, B)                                        \
13396  ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B),      \
13397					  (__v8si)(__m256i)		       \
13398					  _mm256_setzero_si256 (),	       \
13399                                          (__mmask8)(U)))
13400
13401#define _mm_ror_epi32(A, B)                                                    \
13402  ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B),      \
13403					  (__v4si)(__m128i)_mm_setzero_si128 (),\
13404                                          (__mmask8)-1))
13405
13406#define _mm_mask_ror_epi32(W, U, A, B)                                         \
13407  ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B),      \
13408                                          (__v4si)(__m128i)(W),                \
13409                                          (__mmask8)(U)))
13410
13411#define _mm_maskz_ror_epi32(U, A, B)                                           \
13412  ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B),      \
13413					  (__v4si)(__m128i)_mm_setzero_si128 (),\
13414                                          (__mmask8)(U)))
13415
13416#define _mm256_alignr_epi32(X, Y, C)                                        \
13417    ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X),          \
13418        (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(X), (__mmask8)-1))
13419
13420#define _mm256_mask_alignr_epi32(W, U, X, Y, C)                             \
13421    ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X),          \
13422        (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(W), (__mmask8)(U)))
13423
13424#define _mm256_maskz_alignr_epi32(U, X, Y, C)                               \
13425    ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X),          \
13426        (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)_mm256_setzero_si256 (),\
13427        (__mmask8)(U)))
13428
13429#define _mm256_alignr_epi64(X, Y, C)                                        \
13430    ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X),          \
13431        (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(X), (__mmask8)-1))
13432
13433#define _mm256_mask_alignr_epi64(W, U, X, Y, C)                             \
13434    ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X),          \
13435        (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(W), (__mmask8)(U)))
13436
13437#define _mm256_maskz_alignr_epi64(U, X, Y, C)                               \
13438    ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X),          \
13439        (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)_mm256_setzero_si256 (),\
13440        (__mmask8)(U)))
13441
13442#define _mm_alignr_epi32(X, Y, C)                                           \
13443    ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X),          \
13444        (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(X), (__mmask8)-1))
13445
13446#define _mm_mask_alignr_epi32(W, U, X, Y, C)                                \
13447    ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X),          \
13448        (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(W), (__mmask8)(U)))
13449
13450#define _mm_maskz_alignr_epi32(U, X, Y, C)                                  \
13451    ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X),          \
13452	(__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)_mm_setzero_si128 (),\
13453        (__mmask8)(U)))
13454
13455#define _mm_alignr_epi64(X, Y, C)                                           \
13456    ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X),          \
13457        (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
13458
13459#define _mm_mask_alignr_epi64(W, U, X, Y, C)                                \
13460    ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X),          \
13461        (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
13462
13463#define _mm_maskz_alignr_epi64(U, X, Y, C)                                  \
13464    ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X),          \
13465	(__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)_mm_setzero_si128 (),\
13466        (__mmask8)(U)))
13467
13468#define _mm_mask_cvtps_ph(W, U, A, I)						\
13469  ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) (A), (int) (I),	\
13470      (__v8hi)(__m128i) (W), (__mmask8) (U)))
13471
13472#define _mm_maskz_cvtps_ph(U, A, I)						\
13473  ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) (A), (int) (I),	\
13474      (__v8hi)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
13475
13476#define _mm256_mask_cvtps_ph(W, U, A, I)					\
13477  ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) (A), (int) (I),	\
13478      (__v8hi)(__m128i) (W), (__mmask8) (U)))
13479
13480#define _mm256_maskz_cvtps_ph(U, A, I)						\
13481  ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) (A), (int) (I),	\
13482      (__v8hi)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
13483
13484#define _mm256_mask_srai_epi32(W, U, A, B)				\
13485  ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A),	\
13486    (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
13487
13488#define _mm256_maskz_srai_epi32(U, A, B)				\
13489  ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A),	\
13490    (int)(B), (__v8si)_mm256_setzero_si256 (), (__mmask8)(U)))
13491
13492#define _mm_mask_srai_epi32(W, U, A, B)                                 \
13493  ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A),       \
13494    (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
13495
13496#define _mm_maskz_srai_epi32(U, A, B)                                   \
13497  ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A),       \
13498    (int)(B), (__v4si)_mm_setzero_si128 (), (__mmask8)(U)))
13499
13500#define _mm256_srai_epi64(A, B)						\
13501  ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A),	\
13502    (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)-1))
13503
13504#define _mm256_mask_srai_epi64(W, U, A, B)				\
13505  ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A),	\
13506    (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
13507
13508#define _mm256_maskz_srai_epi64(U, A, B)				\
13509  ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A),	\
13510    (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
13511
13512#define _mm_srai_epi64(A, B)						\
13513  ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A),       \
13514    (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)-1))
13515
13516#define _mm_mask_srai_epi64(W, U, A, B)                                 \
13517  ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A),       \
13518    (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
13519
13520#define _mm_maskz_srai_epi64(U, A, B)                                   \
13521  ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A),       \
13522    (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)(U)))
13523
13524#define _mm256_mask_permutex_pd(W, U, A, B)                             \
13525  ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A),       \
13526    (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
13527
13528#define _mm256_maskz_permutex_pd(U, A, B)				\
13529  ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A),       \
13530    (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)(U)))
13531
13532#define _mm256_mask_permute_pd(W, U, X, C)					    \
13533  ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C),	    \
13534					      (__v4df)(__m256d)(W),		    \
13535					      (__mmask8)(U)))
13536
13537#define _mm256_maskz_permute_pd(U, X, C)					    \
13538  ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C),	    \
13539					      (__v4df)(__m256d)_mm256_setzero_pd (),\
13540					      (__mmask8)(U)))
13541
13542#define _mm256_mask_permute_ps(W, U, X, C)					    \
13543  ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C),	    \
13544					      (__v8sf)(__m256)(W), (__mmask8)(U)))
13545
13546#define _mm256_maskz_permute_ps(U, X, C)					    \
13547  ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C),	    \
13548					      (__v8sf)(__m256)_mm256_setzero_ps (), \
13549					      (__mmask8)(U)))
13550
13551#define _mm_mask_permute_pd(W, U, X, C)						    \
13552  ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C),	    \
13553					    (__v2df)(__m128d)(W), (__mmask8)(U)))
13554
13555#define _mm_maskz_permute_pd(U, X, C)						    \
13556  ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C),	    \
13557					    (__v2df)(__m128d)_mm_setzero_pd (),	    \
13558					    (__mmask8)(U)))
13559
13560#define _mm_mask_permute_ps(W, U, X, C)						    \
13561  ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C),	    \
13562					  (__v4sf)(__m128)(W), (__mmask8)(U)))
13563
13564#define _mm_maskz_permute_ps(U, X, C)						    \
13565  ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C),	    \
13566					  (__v4sf)(__m128)_mm_setzero_ps (),	    \
13567					  (__mmask8)(U)))
13568
13569#define _mm256_mask_blend_pd(__U, __A, __W)			      \
13570  ((__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) (__A),	      \
13571						     (__v4df) (__W),  \
13572						     (__mmask8) (__U)))
13573
13574#define _mm256_mask_blend_ps(__U, __A, __W)			      \
13575  ((__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) (__A),	      \
13576						    (__v8sf) (__W),   \
13577						    (__mmask8) (__U)))
13578
13579#define _mm256_mask_blend_epi64(__U, __A, __W)			      \
13580  ((__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) (__A),	      \
13581						    (__v4di) (__W),   \
13582						    (__mmask8) (__U)))
13583
13584#define _mm256_mask_blend_epi32(__U, __A, __W)			      \
13585  ((__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) (__A),	      \
13586						    (__v8si) (__W),   \
13587						    (__mmask8) (__U)))
13588
13589#define _mm_mask_blend_pd(__U, __A, __W)			      \
13590  ((__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) (__A),	      \
13591						     (__v2df) (__W),  \
13592						     (__mmask8) (__U)))
13593
13594#define _mm_mask_blend_ps(__U, __A, __W)			      \
13595  ((__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) (__A),	      \
13596						    (__v4sf) (__W),   \
13597						    (__mmask8) (__U)))
13598
13599#define _mm_mask_blend_epi64(__U, __A, __W)			      \
13600  ((__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) (__A),	      \
13601						    (__v2di) (__W),   \
13602						    (__mmask8) (__U)))
13603
13604#define _mm_mask_blend_epi32(__U, __A, __W)			      \
13605  ((__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) (__A),	      \
13606						    (__v4si) (__W),   \
13607						    (__mmask8) (__U)))
13608
13609#define _mm256_cmp_epu32_mask(X, Y, P)					\
13610  ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X),	\
13611					    (__v8si)(__m256i)(Y), (int)(P),\
13612					    (__mmask8)-1))
13613
13614#define _mm256_cmp_epi64_mask(X, Y, P)					\
13615  ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X),	\
13616					   (__v4di)(__m256i)(Y), (int)(P),\
13617					   (__mmask8)-1))
13618
13619#define _mm256_cmp_epi32_mask(X, Y, P)					\
13620  ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X),	\
13621					   (__v8si)(__m256i)(Y), (int)(P),\
13622					   (__mmask8)-1))
13623
13624#define _mm256_cmp_epu64_mask(X, Y, P)					\
13625  ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X),	\
13626					    (__v4di)(__m256i)(Y), (int)(P),\
13627					    (__mmask8)-1))
13628
13629#define _mm256_cmp_pd_mask(X, Y, P)					\
13630  ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X),	\
13631					    (__v4df)(__m256d)(Y), (int)(P),\
13632					    (__mmask8)-1))
13633
13634#define _mm256_cmp_ps_mask(X, Y, P)					\
13635  ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X),	\
13636					     (__v8sf)(__m256)(Y), (int)(P),\
13637					     (__mmask8)-1))
13638
13639#define _mm256_mask_cmp_epi64_mask(M, X, Y, P)				\
13640  ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X),	\
13641					   (__v4di)(__m256i)(Y), (int)(P),\
13642					   (__mmask8)(M)))
13643
13644#define _mm256_mask_cmp_epi32_mask(M, X, Y, P)				\
13645  ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X),	\
13646					   (__v8si)(__m256i)(Y), (int)(P),\
13647					   (__mmask8)(M)))
13648
13649#define _mm256_mask_cmp_epu64_mask(M, X, Y, P)				\
13650  ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X),	\
13651					    (__v4di)(__m256i)(Y), (int)(P),\
13652					    (__mmask8)(M)))
13653
13654#define _mm256_mask_cmp_epu32_mask(M, X, Y, P)				\
13655  ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X),	\
13656					    (__v8si)(__m256i)(Y), (int)(P),\
13657					    (__mmask8)(M)))
13658
13659#define _mm256_mask_cmp_pd_mask(M, X, Y, P)				\
13660  ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X),	\
13661					    (__v4df)(__m256d)(Y), (int)(P),\
13662					    (__mmask8)(M)))
13663
13664#define _mm256_mask_cmp_ps_mask(M, X, Y, P)				\
13665  ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X),	\
13666					     (__v8sf)(__m256)(Y), (int)(P),\
13667					     (__mmask8)(M)))
13668
13669#define _mm_cmp_epi64_mask(X, Y, P)					\
13670  ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X),	\
13671					   (__v2di)(__m128i)(Y), (int)(P),\
13672					   (__mmask8)-1))
13673
13674#define _mm_cmp_epi32_mask(X, Y, P)					\
13675  ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X),	\
13676					   (__v4si)(__m128i)(Y), (int)(P),\
13677					   (__mmask8)-1))
13678
13679#define _mm_cmp_epu64_mask(X, Y, P)					\
13680  ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X),	\
13681					    (__v2di)(__m128i)(Y), (int)(P),\
13682					    (__mmask8)-1))
13683
13684#define _mm_cmp_epu32_mask(X, Y, P)					\
13685  ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X),	\
13686					    (__v4si)(__m128i)(Y), (int)(P),\
13687					    (__mmask8)-1))
13688
13689#define _mm_cmp_pd_mask(X, Y, P)					\
13690  ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X),	\
13691					    (__v2df)(__m128d)(Y), (int)(P),\
13692					    (__mmask8)-1))
13693
13694#define _mm_cmp_ps_mask(X, Y, P)					\
13695  ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X),	\
13696					     (__v4sf)(__m128)(Y), (int)(P),\
13697					     (__mmask8)-1))
13698
13699#define _mm_mask_cmp_epi64_mask(M, X, Y, P)				\
13700  ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X),	\
13701					   (__v2di)(__m128i)(Y), (int)(P),\
13702					   (__mmask8)(M)))
13703
13704#define _mm_mask_cmp_epi32_mask(M, X, Y, P)				\
13705  ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X),	\
13706					   (__v4si)(__m128i)(Y), (int)(P),\
13707					   (__mmask8)(M)))
13708
13709#define _mm_mask_cmp_epu64_mask(M, X, Y, P)				\
13710  ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X),	\
13711					    (__v2di)(__m128i)(Y), (int)(P),\
13712					    (__mmask8)(M)))
13713
13714#define _mm_mask_cmp_epu32_mask(M, X, Y, P)				\
13715  ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X),	\
13716					    (__v4si)(__m128i)(Y), (int)(P),\
13717					    (__mmask8)(M)))
13718
13719#define _mm_mask_cmp_pd_mask(M, X, Y, P)				\
13720  ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X),	\
13721					    (__v2df)(__m128d)(Y), (int)(P),\
13722					    (__mmask8)(M)))
13723
13724#define _mm_mask_cmp_ps_mask(M, X, Y, P)				\
13725  ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X),	\
13726					     (__v4sf)(__m128)(Y), (int)(P),\
13727					     (__mmask8)(M)))
13728
13729#endif
13730
13731#define _mm256_permutexvar_ps(A, B)	_mm256_permutevar8x32_ps ((B), (A))
13732
13733#ifdef __DISABLE_AVX512VL__
13734#undef __DISABLE_AVX512VL__
13735#pragma GCC pop_options
13736#endif /* __DISABLE_AVX512VL__ */
13737
13738#endif /* _AVX512VLINTRIN_H_INCLUDED */
13739