1/* Copyright (C) 2014-2015 Free Software Foundation, Inc.
2
3   This file is part of GCC.
4
5   GCC is free software; you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 3, or (at your option)
8   any later version.
9
10   GCC is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   Under Section 7 of GPL version 3, you are granted additional
16   permissions described in the GCC Runtime Library Exception, version
17   3.1, as published by the Free Software Foundation.
18
19   You should have received a copy of the GNU General Public License and
20   a copy of the GCC Runtime Library Exception along with this program;
21   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22   <http://www.gnu.org/licenses/>.  */
23
24#ifndef _IMMINTRIN_H_INCLUDED
25#error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef _AVX512VLDQINTRIN_H_INCLUDED
29#define _AVX512VLDQINTRIN_H_INCLUDED
30
31#if !defined(__AVX512VL__) || !defined(__AVX512DQ__)
32#pragma GCC push_options
33#pragma GCC target("avx512vl,avx512dq")
34#define __DISABLE_AVX512VLDQ__
35#endif /* __AVX512VLDQ__ */
36
37extern __inline __m256i
38__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
39_mm256_cvttpd_epi64 (__m256d __A)
40{
41  return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
42						     (__v4di)
43						     _mm256_setzero_si256 (),
44						     (__mmask8) -1);
45}
46
47extern __inline __m256i
48__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
49_mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A)
50{
51  return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
52						     (__v4di) __W,
53						     (__mmask8) __U);
54}
55
56extern __inline __m256i
57__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
58_mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A)
59{
60  return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
61						     (__v4di)
62						     _mm256_setzero_si256 (),
63						     (__mmask8) __U);
64}
65
66extern __inline __m128i
67__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
68_mm_cvttpd_epi64 (__m128d __A)
69{
70  return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
71						     (__v2di)
72						     _mm_setzero_di (),
73						     (__mmask8) -1);
74}
75
76extern __inline __m128i
77__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
78_mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A)
79{
80  return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
81						     (__v2di) __W,
82						     (__mmask8) __U);
83}
84
85extern __inline __m128i
86__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
87_mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A)
88{
89  return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
90						     (__v2di)
91						     _mm_setzero_si128 (),
92						     (__mmask8) __U);
93}
94
95extern __inline __m256i
96__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
97_mm256_cvttpd_epu64 (__m256d __A)
98{
99  return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
100						      (__v4di)
101						      _mm256_setzero_si256 (),
102						      (__mmask8) -1);
103}
104
105extern __inline __m256i
106__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
107_mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A)
108{
109  return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
110						      (__v4di) __W,
111						      (__mmask8) __U);
112}
113
114extern __inline __m256i
115__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
116_mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A)
117{
118  return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
119						      (__v4di)
120						      _mm256_setzero_si256 (),
121						      (__mmask8) __U);
122}
123
124extern __inline __m128i
125__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
126_mm_cvttpd_epu64 (__m128d __A)
127{
128  return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
129						      (__v2di)
130						      _mm_setzero_di (),
131						      (__mmask8) -1);
132}
133
134extern __inline __m128i
135__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
136_mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A)
137{
138  return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
139						      (__v2di) __W,
140						      (__mmask8) __U);
141}
142
143extern __inline __m128i
144__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
145_mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A)
146{
147  return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
148						      (__v2di)
149						      _mm_setzero_si128 (),
150						      (__mmask8) __U);
151}
152
153extern __inline __m256i
154__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
155_mm256_cvtpd_epi64 (__m256d __A)
156{
157  return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
158						    (__v4di)
159						    _mm256_setzero_si256 (),
160						    (__mmask8) -1);
161}
162
163extern __inline __m256i
164__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
165_mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A)
166{
167  return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
168						    (__v4di) __W,
169						    (__mmask8) __U);
170}
171
172extern __inline __m256i
173__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
174_mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A)
175{
176  return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
177						    (__v4di)
178						    _mm256_setzero_si256 (),
179						    (__mmask8) __U);
180}
181
182extern __inline __m128i
183__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
184_mm_cvtpd_epi64 (__m128d __A)
185{
186  return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
187						    (__v2di)
188						    _mm_setzero_di (),
189						    (__mmask8) -1);
190}
191
192extern __inline __m128i
193__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
194_mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A)
195{
196  return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
197						    (__v2di) __W,
198						    (__mmask8) __U);
199}
200
201extern __inline __m128i
202__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
203_mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A)
204{
205  return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
206						    (__v2di)
207						    _mm_setzero_si128 (),
208						    (__mmask8) __U);
209}
210
211extern __inline __m256i
212__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
213_mm256_cvtpd_epu64 (__m256d __A)
214{
215  return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
216						     (__v4di)
217						     _mm256_setzero_si256 (),
218						     (__mmask8) -1);
219}
220
221extern __inline __m256i
222__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
223_mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A)
224{
225  return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
226						     (__v4di) __W,
227						     (__mmask8) __U);
228}
229
230extern __inline __m256i
231__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
232_mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A)
233{
234  return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
235						     (__v4di)
236						     _mm256_setzero_si256 (),
237						     (__mmask8) __U);
238}
239
240extern __inline __m128i
241__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
242_mm_cvtpd_epu64 (__m128d __A)
243{
244  return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
245						     (__v2di)
246						     _mm_setzero_di (),
247						     (__mmask8) -1);
248}
249
250extern __inline __m128i
251__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
252_mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A)
253{
254  return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
255						     (__v2di) __W,
256						     (__mmask8) __U);
257}
258
259extern __inline __m128i
260__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
261_mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A)
262{
263  return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
264						     (__v2di)
265						     _mm_setzero_si128 (),
266						     (__mmask8) __U);
267}
268
269extern __inline __m256i
270__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
271_mm256_cvttps_epi64 (__m128 __A)
272{
273  return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
274						     (__v4di)
275						     _mm256_setzero_si256 (),
276						     (__mmask8) -1);
277}
278
279extern __inline __m256i
280__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
281_mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A)
282{
283  return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
284						     (__v4di) __W,
285						     (__mmask8) __U);
286}
287
288extern __inline __m256i
289__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
290_mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A)
291{
292  return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
293						     (__v4di)
294						     _mm256_setzero_si256 (),
295						     (__mmask8) __U);
296}
297
298extern __inline __m128i
299__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
300_mm_cvttps_epi64 (__m128 __A)
301{
302  return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
303						     (__v2di)
304						     _mm_setzero_di (),
305						     (__mmask8) -1);
306}
307
308extern __inline __m128i
309__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
310_mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A)
311{
312  return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
313						     (__v2di) __W,
314						     (__mmask8) __U);
315}
316
317extern __inline __m128i
318__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
319_mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A)
320{
321  return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
322						     (__v2di)
323						     _mm_setzero_di (),
324						     (__mmask8) __U);
325}
326
327extern __inline __m256i
328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
329_mm256_cvttps_epu64 (__m128 __A)
330{
331  return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
332						      (__v4di)
333						      _mm256_setzero_si256 (),
334						      (__mmask8) -1);
335}
336
337extern __inline __m256i
338__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
339_mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A)
340{
341  return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
342						      (__v4di) __W,
343						      (__mmask8) __U);
344}
345
346extern __inline __m256i
347__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
348_mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A)
349{
350  return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
351						      (__v4di)
352						      _mm256_setzero_si256 (),
353						      (__mmask8) __U);
354}
355
356extern __inline __m128i
357__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
358_mm_cvttps_epu64 (__m128 __A)
359{
360  return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
361						      (__v2di)
362						      _mm_setzero_di (),
363						      (__mmask8) -1);
364}
365
366extern __inline __m128i
367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
368_mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A)
369{
370  return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
371						      (__v2di) __W,
372						      (__mmask8) __U);
373}
374
375extern __inline __m128i
376__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
377_mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A)
378{
379  return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
380						      (__v2di)
381						      _mm_setzero_di (),
382						      (__mmask8) __U);
383}
384
385extern __inline __m256d
386__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
387_mm256_broadcast_f64x2 (__m128d __A)
388{
389  return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
390							   __A,
391						           (__v4df)_mm256_undefined_pd(),
392							   (__mmask8) -
393							   1);
394}
395
396extern __inline __m256d
397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
398_mm256_mask_broadcast_f64x2 (__m256d __O, __mmask8 __M, __m128d __A)
399{
400  return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
401							   __A,
402							   (__v4df)
403							   __O, __M);
404}
405
406extern __inline __m256d
407__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
408_mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
409{
410  return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
411							   __A,
412							   (__v4df)
413							   _mm256_setzero_ps (),
414							   __M);
415}
416
417extern __inline __m256i
418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
419_mm256_broadcast_i64x2 (__m128i __A)
420{
421  return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
422							   __A,
423						           (__v4di)_mm256_undefined_si256(),
424							   (__mmask8) -
425							   1);
426}
427
428extern __inline __m256i
429__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
430_mm256_mask_broadcast_i64x2 (__m256i __O, __mmask8 __M, __m128i __A)
431{
432  return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
433							   __A,
434							   (__v4di)
435							   __O, __M);
436}
437
438extern __inline __m256i
439__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
440_mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
441{
442  return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
443							   __A,
444							   (__v4di)
445							   _mm256_setzero_si256 (),
446							   __M);
447}
448
449extern __inline __m256
450__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
451_mm256_broadcast_f32x2 (__m128 __A)
452{
453  return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
454						          (__v8sf)_mm256_undefined_ps(),
455							  (__mmask8) -
456							  1);
457}
458
459extern __inline __m256
460__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
461_mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
462{
463  return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
464							  (__v8sf) __O,
465							  __M);
466}
467
468extern __inline __m256
469__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
470_mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
471{
472  return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
473							  (__v8sf)
474							  _mm256_setzero_ps (),
475							  __M);
476}
477
478extern __inline __m256i
479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
480_mm256_broadcast_i32x2 (__m128i __A)
481{
482  return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
483							   __A,
484						          (__v8si)_mm256_undefined_si256(),
485							   (__mmask8) -
486							   1);
487}
488
489extern __inline __m256i
490__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
491_mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
492{
493  return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
494							   __A,
495							   (__v8si)
496							   __O, __M);
497}
498
499extern __inline __m256i
500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
501_mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
502{
503  return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
504							   __A,
505							   (__v8si)
506							   _mm256_setzero_si256 (),
507							   __M);
508}
509
510extern __inline __m128i
511__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
512_mm_broadcast_i32x2 (__m128i __A)
513{
514  return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
515							   __A,
516						          (__v4si)_mm_undefined_si128(),
517							   (__mmask8) -
518							   1);
519}
520
521extern __inline __m128i
522__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
523_mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
524{
525  return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
526							   __A,
527							   (__v4si)
528							   __O, __M);
529}
530
531extern __inline __m128i
532__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
533_mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
534{
535  return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
536							   __A,
537							   (__v4si)
538							   _mm_setzero_si128 (),
539							   __M);
540}
541
542extern __inline __m256i
543__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
544_mm256_mullo_epi64 (__m256i __A, __m256i __B)
545{
546  return (__m256i) ((__v4du) __A * (__v4du) __B);
547}
548
549extern __inline __m256i
550__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
551_mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
552			 __m256i __B)
553{
554  return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
555						  (__v4di) __B,
556						  (__v4di) __W,
557						  (__mmask8) __U);
558}
559
560extern __inline __m256i
561__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
562_mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
563{
564  return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
565						  (__v4di) __B,
566						  (__v4di)
567						  _mm256_setzero_si256 (),
568						  (__mmask8) __U);
569}
570
571extern __inline __m128i
572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
573_mm_mullo_epi64 (__m128i __A, __m128i __B)
574{
575  return (__m128i) ((__v2du) __A * (__v2du) __B);
576}
577
578extern __inline __m128i
579__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
580_mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
581		      __m128i __B)
582{
583  return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
584						  (__v2di) __B,
585						  (__v2di) __W,
586						  (__mmask8) __U);
587}
588
589extern __inline __m128i
590__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
591_mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
592{
593  return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
594						  (__v2di) __B,
595						  (__v2di)
596						  _mm_setzero_di (),
597						  (__mmask8) __U);
598}
599
600extern __inline __m256d
601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
602_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A,
603		       __m256d __B)
604{
605  return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
606						  (__v4df) __B,
607						  (__v4df) __W,
608						  (__mmask8) __U);
609}
610
611extern __inline __m256d
612__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
613_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B)
614{
615  return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
616						  (__v4df) __B,
617						  (__v4df)
618						  _mm256_setzero_pd (),
619						  (__mmask8) __U);
620}
621
622extern __inline __m128d
623__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
624_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A,
625		    __m128d __B)
626{
627  return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
628						  (__v2df) __B,
629						  (__v2df) __W,
630						  (__mmask8) __U);
631}
632
633extern __inline __m128d
634__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
635_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B)
636{
637  return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
638						  (__v2df) __B,
639						  (__v2df)
640						  _mm_setzero_pd (),
641						  (__mmask8) __U);
642}
643
644extern __inline __m256
645__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
646_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A,
647		       __m256 __B)
648{
649  return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
650						 (__v8sf) __B,
651						 (__v8sf) __W,
652						 (__mmask8) __U);
653}
654
655extern __inline __m256
656__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
657_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B)
658{
659  return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
660						 (__v8sf) __B,
661						 (__v8sf)
662						 _mm256_setzero_ps (),
663						 (__mmask8) __U);
664}
665
666extern __inline __m128
667__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
668_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
669{
670  return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
671						 (__v4sf) __B,
672						 (__v4sf) __W,
673						 (__mmask8) __U);
674}
675
676extern __inline __m128
677__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
678_mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B)
679{
680  return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
681						 (__v4sf) __B,
682						 (__v4sf)
683						 _mm_setzero_ps (),
684						 (__mmask8) __U);
685}
686
687extern __inline __m256i
688__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
689_mm256_cvtps_epi64 (__m128 __A)
690{
691  return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
692						    (__v4di)
693						    _mm256_setzero_si256 (),
694						    (__mmask8) -1);
695}
696
697extern __inline __m256i
698__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
699_mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A)
700{
701  return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
702						    (__v4di) __W,
703						    (__mmask8) __U);
704}
705
706extern __inline __m256i
707__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
708_mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A)
709{
710  return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
711						    (__v4di)
712						    _mm256_setzero_si256 (),
713						    (__mmask8) __U);
714}
715
716extern __inline __m128i
717__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
718_mm_cvtps_epi64 (__m128 __A)
719{
720  return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
721						    (__v2di)
722						    _mm_setzero_di (),
723						    (__mmask8) -1);
724}
725
726extern __inline __m128i
727__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
728_mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A)
729{
730  return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
731						    (__v2di) __W,
732						    (__mmask8) __U);
733}
734
735extern __inline __m128i
736__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
737_mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A)
738{
739  return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
740						    (__v2di)
741						    _mm_setzero_di (),
742						    (__mmask8) __U);
743}
744
745extern __inline __m256i
746__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
747_mm256_cvtps_epu64 (__m128 __A)
748{
749  return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
750						     (__v4di)
751						     _mm256_setzero_si256 (),
752						     (__mmask8) -1);
753}
754
755extern __inline __m256i
756__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
757_mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A)
758{
759  return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
760						     (__v4di) __W,
761						     (__mmask8) __U);
762}
763
764extern __inline __m256i
765__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
766_mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A)
767{
768  return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
769						     (__v4di)
770						     _mm256_setzero_si256 (),
771						     (__mmask8) __U);
772}
773
774extern __inline __m128i
775__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
776_mm_cvtps_epu64 (__m128 __A)
777{
778  return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
779						     (__v2di)
780						     _mm_setzero_di (),
781						     (__mmask8) -1);
782}
783
784extern __inline __m128i
785__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
786_mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A)
787{
788  return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
789						     (__v2di) __W,
790						     (__mmask8) __U);
791}
792
793extern __inline __m128i
794__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
795_mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A)
796{
797  return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
798						     (__v2di)
799						     _mm_setzero_di (),
800						     (__mmask8) __U);
801}
802
803extern __inline __m128
804__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
805_mm256_cvtepi64_ps (__m256i __A)
806{
807  return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
808						   (__v4sf)
809						   _mm_setzero_ps (),
810						   (__mmask8) -1);
811}
812
813extern __inline __m128
814__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
815_mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A)
816{
817  return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
818						   (__v4sf) __W,
819						   (__mmask8) __U);
820}
821
822extern __inline __m128
823__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
824_mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A)
825{
826  return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
827						   (__v4sf)
828						   _mm_setzero_ps (),
829						   (__mmask8) __U);
830}
831
832extern __inline __m128
833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
834_mm_cvtepi64_ps (__m128i __A)
835{
836  return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
837						   (__v4sf)
838						   _mm_setzero_ps (),
839						   (__mmask8) -1);
840}
841
842extern __inline __m128
843__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
844_mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A)
845{
846  return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
847						   (__v4sf) __W,
848						   (__mmask8) __U);
849}
850
851extern __inline __m128
852__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
853_mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A)
854{
855  return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
856						   (__v4sf)
857						   _mm_setzero_ps (),
858						   (__mmask8) __U);
859}
860
861extern __inline __m128
862__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
863_mm256_cvtepu64_ps (__m256i __A)
864{
865  return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
866						    (__v4sf)
867						    _mm_setzero_ps (),
868						    (__mmask8) -1);
869}
870
871extern __inline __m128
872__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
873_mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A)
874{
875  return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
876						    (__v4sf) __W,
877						    (__mmask8) __U);
878}
879
880extern __inline __m128
881__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
882_mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A)
883{
884  return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
885						    (__v4sf)
886						    _mm_setzero_ps (),
887						    (__mmask8) __U);
888}
889
890extern __inline __m128
891__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
892_mm_cvtepu64_ps (__m128i __A)
893{
894  return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
895						    (__v4sf)
896						    _mm_setzero_ps (),
897						    (__mmask8) -1);
898}
899
900extern __inline __m128
901__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
902_mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A)
903{
904  return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
905						    (__v4sf) __W,
906						    (__mmask8) __U);
907}
908
909extern __inline __m128
910__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
911_mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A)
912{
913  return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
914						    (__v4sf)
915						    _mm_setzero_ps (),
916						    (__mmask8) __U);
917}
918
919extern __inline __m256d
920__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
921_mm256_cvtepi64_pd (__m256i __A)
922{
923  return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
924						    (__v4df)
925						    _mm256_setzero_pd (),
926						    (__mmask8) -1);
927}
928
929extern __inline __m256d
930__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
931_mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A)
932{
933  return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
934						    (__v4df) __W,
935						    (__mmask8) __U);
936}
937
938extern __inline __m256d
939__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
940_mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A)
941{
942  return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
943						    (__v4df)
944						    _mm256_setzero_pd (),
945						    (__mmask8) __U);
946}
947
948extern __inline __m128d
949__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
950_mm_cvtepi64_pd (__m128i __A)
951{
952  return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
953						    (__v2df)
954						    _mm_setzero_pd (),
955						    (__mmask8) -1);
956}
957
958extern __inline __m128d
959__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
960_mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A)
961{
962  return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
963						    (__v2df) __W,
964						    (__mmask8) __U);
965}
966
967extern __inline __m128d
968__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
969_mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A)
970{
971  return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
972						    (__v2df)
973						    _mm_setzero_pd (),
974						    (__mmask8) __U);
975}
976
977extern __inline __m256d
978__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
979_mm256_cvtepu64_pd (__m256i __A)
980{
981  return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
982						     (__v4df)
983						     _mm256_setzero_pd (),
984						     (__mmask8) -1);
985}
986
987extern __inline __m256d
988__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
989_mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A)
990{
991  return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
992						     (__v4df) __W,
993						     (__mmask8) __U);
994}
995
996extern __inline __m256d
997__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
998_mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A)
999{
1000  return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
1001						     (__v4df)
1002						     _mm256_setzero_pd (),
1003						     (__mmask8) __U);
1004}
1005
1006extern __inline __m256d
1007__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1008_mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A,
1009		    __m256d __B)
1010{
1011  return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
1012						 (__v4df) __B,
1013						 (__v4df) __W,
1014						 (__mmask8) __U);
1015}
1016
1017extern __inline __m256d
1018__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1019_mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B)
1020{
1021  return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
1022						 (__v4df) __B,
1023						 (__v4df)
1024						 _mm256_setzero_pd (),
1025						 (__mmask8) __U);
1026}
1027
1028extern __inline __m128d
1029__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1030_mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1031{
1032  return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
1033						 (__v2df) __B,
1034						 (__v2df) __W,
1035						 (__mmask8) __U);
1036}
1037
1038extern __inline __m128d
1039__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1040_mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B)
1041{
1042  return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
1043						 (__v2df) __B,
1044						 (__v2df)
1045						 _mm_setzero_pd (),
1046						 (__mmask8) __U);
1047}
1048
1049extern __inline __m256
1050__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1051_mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
1052{
1053  return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
1054						(__v8sf) __B,
1055						(__v8sf) __W,
1056						(__mmask8) __U);
1057}
1058
1059extern __inline __m256
1060__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1061_mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B)
1062{
1063  return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
1064						(__v8sf) __B,
1065						(__v8sf)
1066						_mm256_setzero_ps (),
1067						(__mmask8) __U);
1068}
1069
1070extern __inline __m128
1071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1072_mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1073{
1074  return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
1075						(__v4sf) __B,
1076						(__v4sf) __W,
1077						(__mmask8) __U);
1078}
1079
1080extern __inline __m128
1081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1082_mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B)
1083{
1084  return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
1085						(__v4sf) __B,
1086						(__v4sf)
1087						_mm_setzero_ps (),
1088						(__mmask8) __U);
1089}
1090
1091extern __inline __m128d
1092__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1093_mm_cvtepu64_pd (__m128i __A)
1094{
1095  return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
1096						     (__v2df)
1097						     _mm_setzero_pd (),
1098						     (__mmask8) -1);
1099}
1100
1101extern __inline __m128d
1102__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1103_mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A)
1104{
1105  return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
1106						     (__v2df) __W,
1107						     (__mmask8) __U);
1108}
1109
1110extern __inline __m128d
1111__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1112_mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A)
1113{
1114  return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
1115						     (__v2df)
1116						     _mm_setzero_pd (),
1117						     (__mmask8) __U);
1118}
1119
1120extern __inline __m256d
1121__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1122_mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A,
1123		    __m256d __B)
1124{
1125  return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
1126						 (__v4df) __B,
1127						 (__v4df) __W,
1128						 (__mmask8) __U);
1129}
1130
1131extern __inline __m256d
1132__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1133_mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B)
1134{
1135  return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
1136						 (__v4df) __B,
1137						 (__v4df)
1138						 _mm256_setzero_pd (),
1139						 (__mmask8) __U);
1140}
1141
1142extern __inline __m128d
1143__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1144_mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1145{
1146  return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
1147						 (__v2df) __B,
1148						 (__v2df) __W,
1149						 (__mmask8) __U);
1150}
1151
1152extern __inline __m128d
1153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1154_mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B)
1155{
1156  return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
1157						 (__v2df) __B,
1158						 (__v2df)
1159						 _mm_setzero_pd (),
1160						 (__mmask8) __U);
1161}
1162
1163extern __inline __m256
1164__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1165_mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
1166{
1167  return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
1168						(__v8sf) __B,
1169						(__v8sf) __W,
1170						(__mmask8) __U);
1171}
1172
1173extern __inline __m256
1174__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1175_mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B)
1176{
1177  return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
1178						(__v8sf) __B,
1179						(__v8sf)
1180						_mm256_setzero_ps (),
1181						(__mmask8) __U);
1182}
1183
1184extern __inline __m128
1185__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1186_mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1187{
1188  return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
1189						(__v4sf) __B,
1190						(__v4sf) __W,
1191						(__mmask8) __U);
1192}
1193
1194extern __inline __m128
1195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1196_mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B)
1197{
1198  return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
1199						(__v4sf) __B,
1200						(__v4sf)
1201						_mm_setzero_ps (),
1202						(__mmask8) __U);
1203}
1204
1205extern __inline __m256d
1206__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1207_mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
1208{
1209  return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
1210						(__v4df) __B,
1211						(__v4df) __W,
1212						(__mmask8) __U);
1213}
1214
1215extern __inline __m256d
1216__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1217_mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B)
1218{
1219  return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
1220						(__v4df) __B,
1221						(__v4df)
1222						_mm256_setzero_pd (),
1223						(__mmask8) __U);
1224}
1225
1226extern __inline __m128d
1227__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1228_mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1229{
1230  return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
1231						(__v2df) __B,
1232						(__v2df) __W,
1233						(__mmask8) __U);
1234}
1235
1236extern __inline __m128d
1237__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1238_mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B)
1239{
1240  return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
1241						(__v2df) __B,
1242						(__v2df)
1243						_mm_setzero_pd (),
1244						(__mmask8) __U);
1245}
1246
1247extern __inline __m256
1248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1249_mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
1250{
1251  return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
1252					       (__v8sf) __B,
1253					       (__v8sf) __W,
1254					       (__mmask8) __U);
1255}
1256
1257extern __inline __m256
1258__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1259_mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B)
1260{
1261  return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
1262					       (__v8sf) __B,
1263					       (__v8sf)
1264					       _mm256_setzero_ps (),
1265					       (__mmask8) __U);
1266}
1267
1268extern __inline __m128
1269__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1270_mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1271{
1272  return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
1273					       (__v4sf) __B,
1274					       (__v4sf) __W,
1275					       (__mmask8) __U);
1276}
1277
1278extern __inline __m128
1279__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1280_mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B)
1281{
1282  return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
1283					       (__v4sf) __B,
1284					       (__v4sf)
1285					       _mm_setzero_ps (),
1286					       (__mmask8) __U);
1287}
1288
1289extern __inline __m128i
1290__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1291_mm_movm_epi32 (__mmask8 __A)
1292{
1293  return (__m128i) __builtin_ia32_cvtmask2d128 (__A);
1294}
1295
1296extern __inline __m256i
1297__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1298_mm256_movm_epi32 (__mmask8 __A)
1299{
1300  return (__m256i) __builtin_ia32_cvtmask2d256 (__A);
1301}
1302
1303extern __inline __m128i
1304__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1305_mm_movm_epi64 (__mmask8 __A)
1306{
1307  return (__m128i) __builtin_ia32_cvtmask2q128 (__A);
1308}
1309
1310extern __inline __m256i
1311__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1312_mm256_movm_epi64 (__mmask8 __A)
1313{
1314  return (__m256i) __builtin_ia32_cvtmask2q256 (__A);
1315}
1316
1317extern __inline __mmask8
1318__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1319_mm_movepi32_mask (__m128i __A)
1320{
1321  return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A);
1322}
1323
1324extern __inline __mmask8
1325__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1326_mm256_movepi32_mask (__m256i __A)
1327{
1328  return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A);
1329}
1330
1331extern __inline __mmask8
1332__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1333_mm_movepi64_mask (__m128i __A)
1334{
1335  return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A);
1336}
1337
1338extern __inline __mmask8
1339__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1340_mm256_movepi64_mask (__m256i __A)
1341{
1342  return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A);
1343}
1344
1345#ifdef __OPTIMIZE__
1346extern __inline __m128d
1347__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1348_mm256_extractf64x2_pd (__m256d __A, const int __imm)
1349{
1350  return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
1351							 __imm,
1352							 (__v2df)
1353							 _mm_setzero_pd (),
1354							 (__mmask8) -
1355							 1);
1356}
1357
1358extern __inline __m128d
1359__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1360_mm256_mask_extractf64x2_pd (__m128d __W, __mmask8 __U, __m256d __A,
1361			     const int __imm)
1362{
1363  return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
1364							 __imm,
1365							 (__v2df) __W,
1366							 (__mmask8)
1367							 __U);
1368}
1369
1370extern __inline __m128d
1371__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1372_mm256_maskz_extractf64x2_pd (__mmask8 __U, __m256d __A,
1373			      const int __imm)
1374{
1375  return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
1376							 __imm,
1377							 (__v2df)
1378							 _mm_setzero_pd (),
1379							 (__mmask8)
1380							 __U);
1381}
1382
1383extern __inline __m128i
1384__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1385_mm256_extracti64x2_epi64 (__m256i __A, const int __imm)
1386{
1387  return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
1388							 __imm,
1389							 (__v2di)
1390							 _mm_setzero_di (),
1391							 (__mmask8) -
1392							 1);
1393}
1394
1395extern __inline __m128i
1396__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1397_mm256_mask_extracti64x2_epi64 (__m128i __W, __mmask8 __U, __m256i __A,
1398				const int __imm)
1399{
1400  return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
1401							 __imm,
1402							 (__v2di) __W,
1403							 (__mmask8)
1404							 __U);
1405}
1406
1407extern __inline __m128i
1408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1409_mm256_maskz_extracti64x2_epi64 (__mmask8 __U, __m256i __A,
1410				 const int __imm)
1411{
1412  return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
1413							 __imm,
1414							 (__v2di)
1415							 _mm_setzero_di (),
1416							 (__mmask8)
1417							 __U);
1418}
1419
1420extern __inline __m256d
1421__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1422_mm256_reduce_pd (__m256d __A, int __B)
1423{
1424  return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
1425						    (__v4df)
1426						    _mm256_setzero_pd (),
1427						    (__mmask8) -1);
1428}
1429
1430extern __inline __m256d
1431__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1432_mm256_mask_reduce_pd (__m256d __W, __mmask8 __U, __m256d __A, int __B)
1433{
1434  return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
1435						    (__v4df) __W,
1436						    (__mmask8) __U);
1437}
1438
1439extern __inline __m256d
1440__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1441_mm256_maskz_reduce_pd (__mmask8 __U, __m256d __A, int __B)
1442{
1443  return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
1444						    (__v4df)
1445						    _mm256_setzero_pd (),
1446						    (__mmask8) __U);
1447}
1448
1449extern __inline __m128d
1450__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1451_mm_reduce_pd (__m128d __A, int __B)
1452{
1453  return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
1454						    (__v2df)
1455						    _mm_setzero_pd (),
1456						    (__mmask8) -1);
1457}
1458
1459extern __inline __m128d
1460__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1461_mm_mask_reduce_pd (__m128d __W, __mmask8 __U, __m128d __A, int __B)
1462{
1463  return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
1464						    (__v2df) __W,
1465						    (__mmask8) __U);
1466}
1467
1468extern __inline __m128d
1469__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1470_mm_maskz_reduce_pd (__mmask8 __U, __m128d __A, int __B)
1471{
1472  return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
1473						    (__v2df)
1474						    _mm_setzero_pd (),
1475						    (__mmask8) __U);
1476}
1477
1478extern __inline __m256
1479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1480_mm256_reduce_ps (__m256 __A, int __B)
1481{
1482  return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
1483						   (__v8sf)
1484						   _mm256_setzero_ps (),
1485						   (__mmask8) -1);
1486}
1487
1488extern __inline __m256
1489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1490_mm256_mask_reduce_ps (__m256 __W, __mmask8 __U, __m256 __A, int __B)
1491{
1492  return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
1493						   (__v8sf) __W,
1494						   (__mmask8) __U);
1495}
1496
1497extern __inline __m256
1498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1499_mm256_maskz_reduce_ps (__mmask8 __U, __m256 __A, int __B)
1500{
1501  return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
1502						   (__v8sf)
1503						   _mm256_setzero_ps (),
1504						   (__mmask8) __U);
1505}
1506
1507extern __inline __m128
1508__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1509_mm_reduce_ps (__m128 __A, int __B)
1510{
1511  return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
1512						   (__v4sf)
1513						   _mm_setzero_ps (),
1514						   (__mmask8) -1);
1515}
1516
1517extern __inline __m128
1518__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1519_mm_mask_reduce_ps (__m128 __W, __mmask8 __U, __m128 __A, int __B)
1520{
1521  return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
1522						   (__v4sf) __W,
1523						   (__mmask8) __U);
1524}
1525
1526extern __inline __m128
1527__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1528_mm_maskz_reduce_ps (__mmask8 __U, __m128 __A, int __B)
1529{
1530  return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
1531						   (__v4sf)
1532						   _mm_setzero_ps (),
1533						   (__mmask8) __U);
1534}
1535
1536extern __inline __m256d
1537__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1538_mm256_range_pd (__m256d __A, __m256d __B, int __C)
1539{
1540  return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
1541						   (__v4df) __B, __C,
1542						   (__v4df)
1543						   _mm256_setzero_pd (),
1544						   (__mmask8) -1);
1545}
1546
1547extern __inline __m256d
1548__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1549_mm256_mask_range_pd (__m256d __W, __mmask8 __U,
1550		      __m256d __A, __m256d __B, int __C)
1551{
1552  return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
1553						   (__v4df) __B, __C,
1554						   (__v4df) __W,
1555						   (__mmask8) __U);
1556}
1557
1558extern __inline __m256d
1559__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1560_mm256_maskz_range_pd (__mmask8 __U, __m256d __A, __m256d __B, int __C)
1561{
1562  return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
1563						   (__v4df) __B, __C,
1564						   (__v4df)
1565						   _mm256_setzero_pd (),
1566						   (__mmask8) __U);
1567}
1568
1569extern __inline __m128d
1570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1571_mm_range_pd (__m128d __A, __m128d __B, int __C)
1572{
1573  return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
1574						   (__v2df) __B, __C,
1575						   (__v2df)
1576						   _mm_setzero_pd (),
1577						   (__mmask8) -1);
1578}
1579
1580extern __inline __m128d
1581__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1582_mm_mask_range_pd (__m128d __W, __mmask8 __U,
1583		   __m128d __A, __m128d __B, int __C)
1584{
1585  return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
1586						   (__v2df) __B, __C,
1587						   (__v2df) __W,
1588						   (__mmask8) __U);
1589}
1590
1591extern __inline __m128d
1592__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1593_mm_maskz_range_pd (__mmask8 __U, __m128d __A, __m128d __B, int __C)
1594{
1595  return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
1596						   (__v2df) __B, __C,
1597						   (__v2df)
1598						   _mm_setzero_pd (),
1599						   (__mmask8) __U);
1600}
1601
1602extern __inline __m256
1603__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1604_mm256_range_ps (__m256 __A, __m256 __B, int __C)
1605{
1606  return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
1607						  (__v8sf) __B, __C,
1608						  (__v8sf)
1609						  _mm256_setzero_ps (),
1610						  (__mmask8) -1);
1611}
1612
1613extern __inline __m256
1614__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1615_mm256_mask_range_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B,
1616		      int __C)
1617{
1618  return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
1619						  (__v8sf) __B, __C,
1620						  (__v8sf) __W,
1621						  (__mmask8) __U);
1622}
1623
1624extern __inline __m256
1625__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1626_mm256_maskz_range_ps (__mmask8 __U, __m256 __A, __m256 __B, int __C)
1627{
1628  return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
1629						  (__v8sf) __B, __C,
1630						  (__v8sf)
1631						  _mm256_setzero_ps (),
1632						  (__mmask8) __U);
1633}
1634
1635extern __inline __m128
1636__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1637_mm_range_ps (__m128 __A, __m128 __B, int __C)
1638{
1639  return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
1640						  (__v4sf) __B, __C,
1641						  (__v4sf)
1642						  _mm_setzero_ps (),
1643						  (__mmask8) -1);
1644}
1645
1646extern __inline __m128
1647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1648_mm_mask_range_ps (__m128 __W, __mmask8 __U,
1649		   __m128 __A, __m128 __B, int __C)
1650{
1651  return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
1652						  (__v4sf) __B, __C,
1653						  (__v4sf) __W,
1654						  (__mmask8) __U);
1655}
1656
1657extern __inline __m128
1658__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1659_mm_maskz_range_ps (__mmask8 __U, __m128 __A, __m128 __B, int __C)
1660{
1661  return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
1662						  (__v4sf) __B, __C,
1663						  (__v4sf)
1664						  _mm_setzero_ps (),
1665						  (__mmask8) __U);
1666}
1667
1668extern __inline __mmask8
1669__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1670_mm256_mask_fpclass_pd_mask (__mmask8 __U, __m256d __A,
1671			     const int __imm)
1672{
1673  return (__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) __A,
1674						      __imm, __U);
1675}
1676
1677extern __inline __mmask8
1678__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1679_mm256_fpclass_pd_mask (__m256d __A, const int __imm)
1680{
1681  return (__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) __A,
1682						      __imm,
1683						      (__mmask8) -1);
1684}
1685
1686extern __inline __mmask8
1687__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1688_mm256_mask_fpclass_ps_mask (__mmask8 __U, __m256 __A, const int __imm)
1689{
1690  return (__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) __A,
1691						      __imm, __U);
1692}
1693
1694extern __inline __mmask8
1695__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1696_mm256_fpclass_ps_mask (__m256 __A, const int __imm)
1697{
1698  return (__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) __A,
1699						      __imm,
1700						      (__mmask8) -1);
1701}
1702
1703extern __inline __mmask8
1704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1705_mm_mask_fpclass_pd_mask (__mmask8 __U, __m128d __A, const int __imm)
1706{
1707  return (__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) __A,
1708						      __imm, __U);
1709}
1710
1711extern __inline __mmask8
1712__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1713_mm_fpclass_pd_mask (__m128d __A, const int __imm)
1714{
1715  return (__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) __A,
1716						      __imm,
1717						      (__mmask8) -1);
1718}
1719
1720extern __inline __mmask8
1721__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1722_mm_mask_fpclass_ps_mask (__mmask8 __U, __m128 __A, const int __imm)
1723{
1724  return (__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) __A,
1725						      __imm, __U);
1726}
1727
1728extern __inline __mmask8
1729__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1730_mm_fpclass_ps_mask (__m128 __A, const int __imm)
1731{
1732  return (__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) __A,
1733						      __imm,
1734						      (__mmask8) -1);
1735}
1736
1737extern __inline __m256i
1738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1739_mm256_inserti64x2 (__m256i __A, __m128i __B, const int __imm)
1740{
1741  return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A,
1742							(__v2di) __B,
1743							__imm,
1744							(__v4di)
1745							_mm256_setzero_si256 (),
1746							(__mmask8) -
1747							1);
1748}
1749
1750extern __inline __m256i
1751__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1752_mm256_mask_inserti64x2 (__m256i __W, __mmask8 __U, __m256i __A,
1753			 __m128i __B, const int __imm)
1754{
1755  return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A,
1756							(__v2di) __B,
1757							__imm,
1758							(__v4di) __W,
1759							(__mmask8)
1760							__U);
1761}
1762
1763extern __inline __m256i
1764__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1765_mm256_maskz_inserti64x2 (__mmask8 __U, __m256i __A, __m128i __B,
1766			  const int __imm)
1767{
1768  return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A,
1769							(__v2di) __B,
1770							__imm,
1771							(__v4di)
1772							_mm256_setzero_si256 (),
1773							(__mmask8)
1774							__U);
1775}
1776
1777extern __inline __m256d
1778__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1779_mm256_insertf64x2 (__m256d __A, __m128d __B, const int __imm)
1780{
1781  return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A,
1782							(__v2df) __B,
1783							__imm,
1784							(__v4df)
1785							_mm256_setzero_pd (),
1786							(__mmask8) -
1787							1);
1788}
1789
1790extern __inline __m256d
1791__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1792_mm256_mask_insertf64x2 (__m256d __W, __mmask8 __U, __m256d __A,
1793			 __m128d __B, const int __imm)
1794{
1795  return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A,
1796							(__v2df) __B,
1797							__imm,
1798							(__v4df) __W,
1799							(__mmask8)
1800							__U);
1801}
1802
1803extern __inline __m256d
1804__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1805_mm256_maskz_insertf64x2 (__mmask8 __U, __m256d __A, __m128d __B,
1806			  const int __imm)
1807{
1808  return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A,
1809							(__v2df) __B,
1810							__imm,
1811							(__v4df)
1812							_mm256_setzero_pd (),
1813							(__mmask8)
1814							__U);
1815}
1816
1817#else
1818#define _mm256_insertf64x2(X, Y, C)                                     \
1819  ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
1820    (__v2df)(__m128d) (Y), (int) (C),					\
1821    (__v4df)(__m256d)_mm256_setzero_pd(),				\
1822    (__mmask8)-1))
1823
1824#define _mm256_mask_insertf64x2(W, U, X, Y, C)                          \
1825  ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
1826    (__v2df)(__m128d) (Y), (int) (C),					\
1827    (__v4df)(__m256d)(W),						\
1828    (__mmask8)(U)))
1829
1830#define _mm256_maskz_insertf64x2(U, X, Y, C)				\
1831  ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
1832    (__v2df)(__m128d) (Y), (int) (C),					\
1833    (__v4df)(__m256d)_mm256_setzero_pd(),				\
1834    (__mmask8)(U)))
1835
1836#define _mm256_inserti64x2(X, Y, C)                                     \
1837  ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
1838    (__v2di)(__m128i) (Y), (int) (C),					\
1839    (__v4di)(__m256i)_mm256_setzero_si256 (),				\
1840    (__mmask8)-1))
1841
1842#define _mm256_mask_inserti64x2(W, U, X, Y, C)                          \
1843  ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
1844    (__v2di)(__m128i) (Y), (int) (C),					\
1845    (__v4di)(__m256i)(W),						\
1846    (__mmask8)(U)))
1847
1848#define _mm256_maskz_inserti64x2(U, X, Y, C)                            \
1849  ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
1850    (__v2di)(__m128i) (Y), (int) (C),					\
1851    (__v4di)(__m256i)_mm256_setzero_si256 (),				\
1852    (__mmask8)(U)))
1853
1854#define _mm256_extractf64x2_pd(X, C)                                    \
1855  ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
1856    (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8)-1))
1857
1858#define _mm256_mask_extractf64x2_pd(W, U, X, C)                         \
1859  ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
1860    (int) (C), (__v2df)(__m128d) (W), (__mmask8) (U)))
1861
1862#define _mm256_maskz_extractf64x2_pd(U, X, C)                           \
1863  ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
1864    (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8) (U)))
1865
1866#define _mm256_extracti64x2_epi64(X, C)                                 \
1867  ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
1868    (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8)-1))
1869
1870#define _mm256_mask_extracti64x2_epi64(W, U, X, C)                     \
1871  ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
1872    (int) (C), (__v2di)(__m128i) (W), (__mmask8) (U)))
1873
1874#define _mm256_maskz_extracti64x2_epi64(U, X, C)                        \
1875  ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
1876    (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8) (U)))
1877
1878#define _mm256_reduce_pd(A, B)						\
1879  ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A),	\
1880    (int)(B), (__v4df)_mm256_setzero_pd(), (__mmask8)-1))
1881
1882#define _mm256_mask_reduce_pd(W, U, A, B)				\
1883  ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A),	\
1884    (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
1885
1886#define _mm256_maskz_reduce_pd(U, A, B)					\
1887  ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A),	\
1888    (int)(B), (__v4df)_mm256_setzero_pd(), (__mmask8)(U)))
1889
1890#define _mm_reduce_pd(A, B)						\
1891  ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A),	\
1892    (int)(B), (__v2df)_mm_setzero_pd(), (__mmask8)-1))
1893
1894#define _mm_mask_reduce_pd(W, U, A, B)					\
1895  ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A),	\
1896    (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U)))
1897
1898#define _mm_maskz_reduce_pd(U, A, B)					\
1899  ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A),	\
1900    (int)(B), (__v2df)_mm_setzero_pd(), (__mmask8)(U)))
1901
1902#define _mm256_reduce_ps(A, B)						\
1903  ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A),	\
1904    (int)(B), (__v8sf)_mm256_setzero_ps(), (__mmask8)-1))
1905
1906#define _mm256_mask_reduce_ps(W, U, A, B)				\
1907  ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A),	\
1908    (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U)))
1909
1910#define _mm256_maskz_reduce_ps(U, A, B)					\
1911  ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A),	\
1912    (int)(B), (__v8sf)_mm256_setzero_ps(), (__mmask8)(U)))
1913
1914#define _mm_reduce_ps(A, B)						\
1915  ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A),	\
1916    (int)(B), (__v4sf)_mm_setzero_ps(), (__mmask8)-1))
1917
1918#define _mm_mask_reduce_ps(W, U, A, B)					\
1919  ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A),	\
1920    (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U)))
1921
1922#define _mm_maskz_reduce_ps(U, A, B)					\
1923  ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A),	\
1924    (int)(B), (__v4sf)_mm_setzero_ps(), (__mmask8)(U)))
1925
1926#define _mm256_range_pd(A, B, C)					\
1927  ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A),	\
1928    (__v4df)(__m256d)(B), (int)(C),					\
1929    (__v4df)_mm256_setzero_pd(), (__mmask8)-1))
1930
1931#define _mm256_maskz_range_pd(U, A, B, C)				\
1932  ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A),	\
1933    (__v4df)(__m256d)(B), (int)(C),					\
1934    (__v4df)_mm256_setzero_pd(), (__mmask8)(U)))
1935
1936#define _mm_range_pd(A, B, C)						\
1937  ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A),	\
1938    (__v2df)(__m128d)(B), (int)(C),					\
1939    (__v2df)_mm_setzero_pd(), (__mmask8)-1))
1940
1941#define _mm256_range_ps(A, B, C)					\
1942  ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A),	\
1943    (__v8sf)(__m256)(B), (int)(C),					\
1944    (__v8sf)_mm256_setzero_ps(), (__mmask8)-1))
1945
1946#define _mm256_mask_range_ps(W, U, A, B, C)				\
1947  ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A),	\
1948    (__v8sf)(__m256)(B), (int)(C),					\
1949    (__v8sf)(__m256)(W), (__mmask8)(U)))
1950
1951#define _mm256_maskz_range_ps(U, A, B, C)				\
1952  ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A),	\
1953    (__v8sf)(__m256)(B), (int)(C),					\
1954    (__v8sf)_mm256_setzero_ps(), (__mmask8)(U)))
1955
1956#define _mm_range_ps(A, B, C)						\
1957  ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A),	\
1958    (__v4sf)(__m128)(B), (int)(C),					\
1959    (__v4sf)_mm_setzero_ps(), (__mmask8)-1))
1960
1961#define _mm_mask_range_ps(W, U, A, B, C)				\
1962  ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A),	\
1963    (__v4sf)(__m128)(B), (int)(C),					\
1964    (__v4sf)(__m128)(W), (__mmask8)(U)))
1965
1966#define _mm_maskz_range_ps(U, A, B, C)					\
1967  ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A),	\
1968    (__v4sf)(__m128)(B), (int)(C),					\
1969    (__v4sf)_mm_setzero_ps(), (__mmask8)(U)))
1970
1971#define _mm256_mask_range_pd(W, U, A, B, C)				\
1972  ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A),	\
1973    (__v4df)(__m256d)(B), (int)(C),					\
1974    (__v4df)(__m256d)(W), (__mmask8)(U)))
1975
1976#define _mm_mask_range_pd(W, U, A, B, C)				\
1977  ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A),	\
1978    (__v2df)(__m128d)(B), (int)(C),					\
1979    (__v2df)(__m128d)(W), (__mmask8)(U)))
1980
1981#define _mm_maskz_range_pd(U, A, B, C)					\
1982  ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A),	\
1983    (__v2df)(__m128d)(B), (int)(C),					\
1984    (__v2df)_mm_setzero_pd(), (__mmask8)(U)))
1985
1986#define _mm256_mask_fpclass_pd_mask(u, X, C)                            \
1987  ((__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) (__m256d) (X), \
1988						(int) (C),(__mmask8)(u)))
1989
1990#define _mm256_mask_fpclass_ps_mask(u, X, C)				\
1991  ((__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) (__m256) (X),  \
1992						(int) (C),(__mmask8)(u)))
1993
1994#define _mm_mask_fpclass_pd_mask(u, X, C)                               \
1995  ((__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) (__m128d) (X), \
1996						(int) (C),(__mmask8)(u)))
1997
1998#define _mm_mask_fpclass_ps_mask(u, X, C)                               \
1999  ((__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) (__m128) (X),  \
2000						(int) (C),(__mmask8)(u)))
2001
2002#define _mm256_fpclass_pd_mask(X, C)                                    \
2003  ((__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) (__m256d) (X), \
2004						(int) (C),(__mmask8)-1))
2005
2006#define _mm256_fpclass_ps_mask(X, C)                                    \
2007  ((__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) (__m256) (X),  \
2008						(int) (C),(__mmask8)-1))
2009
2010#define _mm_fpclass_pd_mask(X, C)                                       \
2011  ((__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) (__m128d) (X), \
2012						(int) (C),(__mmask8)-1))
2013
2014#define _mm_fpclass_ps_mask(X, C)                                       \
2015  ((__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) (__m128) (X),  \
2016						(int) (C),(__mmask8)-1))
2017
2018#endif
2019
2020#ifdef __DISABLE_AVX512VLDQ__
2021#undef __DISABLE_AVX512VLDQ__
2022#pragma GCC pop_options
2023#endif /* __DISABLE_AVX512VLDQ__ */
2024
2025#endif /* _AVX512VLDQINTRIN_H_INCLUDED */
2026