1/*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __IMMINTRIN_H
11#error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
12#endif
13
14#ifndef __AVX512VLDQINTRIN_H
15#define __AVX512VLDQINTRIN_H
16
17/* Define the default attributes for the functions in this file. */
18#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(128)))
19#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(256)))
20
21static __inline__ __m256i __DEFAULT_FN_ATTRS256
22_mm256_mullo_epi64 (__m256i __A, __m256i __B) {
23  return (__m256i) ((__v4du) __A * (__v4du) __B);
24}
25
26static __inline__ __m256i __DEFAULT_FN_ATTRS256
27_mm256_mask_mullo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
28  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
29                                             (__v4di)_mm256_mullo_epi64(__A, __B),
30                                             (__v4di)__W);
31}
32
33static __inline__ __m256i __DEFAULT_FN_ATTRS256
34_mm256_maskz_mullo_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
35  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
36                                             (__v4di)_mm256_mullo_epi64(__A, __B),
37                                             (__v4di)_mm256_setzero_si256());
38}
39
40static __inline__ __m128i __DEFAULT_FN_ATTRS128
41_mm_mullo_epi64 (__m128i __A, __m128i __B) {
42  return (__m128i) ((__v2du) __A * (__v2du) __B);
43}
44
45static __inline__ __m128i __DEFAULT_FN_ATTRS128
46_mm_mask_mullo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
47  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
48                                             (__v2di)_mm_mullo_epi64(__A, __B),
49                                             (__v2di)__W);
50}
51
52static __inline__ __m128i __DEFAULT_FN_ATTRS128
53_mm_maskz_mullo_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
54  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
55                                             (__v2di)_mm_mullo_epi64(__A, __B),
56                                             (__v2di)_mm_setzero_si128());
57}
58
59static __inline__ __m256d __DEFAULT_FN_ATTRS256
60_mm256_mask_andnot_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
61  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
62                                              (__v4df)_mm256_andnot_pd(__A, __B),
63                                              (__v4df)__W);
64}
65
66static __inline__ __m256d __DEFAULT_FN_ATTRS256
67_mm256_maskz_andnot_pd(__mmask8 __U, __m256d __A, __m256d __B) {
68  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
69                                              (__v4df)_mm256_andnot_pd(__A, __B),
70                                              (__v4df)_mm256_setzero_pd());
71}
72
73static __inline__ __m128d __DEFAULT_FN_ATTRS128
74_mm_mask_andnot_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
75  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
76                                              (__v2df)_mm_andnot_pd(__A, __B),
77                                              (__v2df)__W);
78}
79
80static __inline__ __m128d __DEFAULT_FN_ATTRS128
81_mm_maskz_andnot_pd(__mmask8 __U, __m128d __A, __m128d __B) {
82  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
83                                              (__v2df)_mm_andnot_pd(__A, __B),
84                                              (__v2df)_mm_setzero_pd());
85}
86
87static __inline__ __m256 __DEFAULT_FN_ATTRS256
88_mm256_mask_andnot_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
89  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
90                                             (__v8sf)_mm256_andnot_ps(__A, __B),
91                                             (__v8sf)__W);
92}
93
94static __inline__ __m256 __DEFAULT_FN_ATTRS256
95_mm256_maskz_andnot_ps(__mmask8 __U, __m256 __A, __m256 __B) {
96  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
97                                             (__v8sf)_mm256_andnot_ps(__A, __B),
98                                             (__v8sf)_mm256_setzero_ps());
99}
100
101static __inline__ __m128 __DEFAULT_FN_ATTRS128
102_mm_mask_andnot_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
103  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
104                                             (__v4sf)_mm_andnot_ps(__A, __B),
105                                             (__v4sf)__W);
106}
107
108static __inline__ __m128 __DEFAULT_FN_ATTRS128
109_mm_maskz_andnot_ps(__mmask8 __U, __m128 __A, __m128 __B) {
110  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
111                                             (__v4sf)_mm_andnot_ps(__A, __B),
112                                             (__v4sf)_mm_setzero_ps());
113}
114
115static __inline__ __m256d __DEFAULT_FN_ATTRS256
116_mm256_mask_and_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
117  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
118                                              (__v4df)_mm256_and_pd(__A, __B),
119                                              (__v4df)__W);
120}
121
122static __inline__ __m256d __DEFAULT_FN_ATTRS256
123_mm256_maskz_and_pd(__mmask8 __U, __m256d __A, __m256d __B) {
124  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
125                                              (__v4df)_mm256_and_pd(__A, __B),
126                                              (__v4df)_mm256_setzero_pd());
127}
128
129static __inline__ __m128d __DEFAULT_FN_ATTRS128
130_mm_mask_and_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
131  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
132                                              (__v2df)_mm_and_pd(__A, __B),
133                                              (__v2df)__W);
134}
135
136static __inline__ __m128d __DEFAULT_FN_ATTRS128
137_mm_maskz_and_pd(__mmask8 __U, __m128d __A, __m128d __B) {
138  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
139                                              (__v2df)_mm_and_pd(__A, __B),
140                                              (__v2df)_mm_setzero_pd());
141}
142
143static __inline__ __m256 __DEFAULT_FN_ATTRS256
144_mm256_mask_and_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
145  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
146                                             (__v8sf)_mm256_and_ps(__A, __B),
147                                             (__v8sf)__W);
148}
149
150static __inline__ __m256 __DEFAULT_FN_ATTRS256
151_mm256_maskz_and_ps(__mmask8 __U, __m256 __A, __m256 __B) {
152  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
153                                             (__v8sf)_mm256_and_ps(__A, __B),
154                                             (__v8sf)_mm256_setzero_ps());
155}
156
157static __inline__ __m128 __DEFAULT_FN_ATTRS128
158_mm_mask_and_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
159  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
160                                             (__v4sf)_mm_and_ps(__A, __B),
161                                             (__v4sf)__W);
162}
163
164static __inline__ __m128 __DEFAULT_FN_ATTRS128
165_mm_maskz_and_ps(__mmask8 __U, __m128 __A, __m128 __B) {
166  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
167                                             (__v4sf)_mm_and_ps(__A, __B),
168                                             (__v4sf)_mm_setzero_ps());
169}
170
171static __inline__ __m256d __DEFAULT_FN_ATTRS256
172_mm256_mask_xor_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
173  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
174                                              (__v4df)_mm256_xor_pd(__A, __B),
175                                              (__v4df)__W);
176}
177
178static __inline__ __m256d __DEFAULT_FN_ATTRS256
179_mm256_maskz_xor_pd(__mmask8 __U, __m256d __A, __m256d __B) {
180  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
181                                              (__v4df)_mm256_xor_pd(__A, __B),
182                                              (__v4df)_mm256_setzero_pd());
183}
184
185static __inline__ __m128d __DEFAULT_FN_ATTRS128
186_mm_mask_xor_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
187  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
188                                              (__v2df)_mm_xor_pd(__A, __B),
189                                              (__v2df)__W);
190}
191
192static __inline__ __m128d __DEFAULT_FN_ATTRS128
193_mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) {
194  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
195                                              (__v2df)_mm_xor_pd(__A, __B),
196                                              (__v2df)_mm_setzero_pd());
197}
198
199static __inline__ __m256 __DEFAULT_FN_ATTRS256
200_mm256_mask_xor_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
201  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
202                                             (__v8sf)_mm256_xor_ps(__A, __B),
203                                             (__v8sf)__W);
204}
205
206static __inline__ __m256 __DEFAULT_FN_ATTRS256
207_mm256_maskz_xor_ps(__mmask8 __U, __m256 __A, __m256 __B) {
208  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
209                                             (__v8sf)_mm256_xor_ps(__A, __B),
210                                             (__v8sf)_mm256_setzero_ps());
211}
212
213static __inline__ __m128 __DEFAULT_FN_ATTRS128
214_mm_mask_xor_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
215  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
216                                             (__v4sf)_mm_xor_ps(__A, __B),
217                                             (__v4sf)__W);
218}
219
220static __inline__ __m128 __DEFAULT_FN_ATTRS128
221_mm_maskz_xor_ps(__mmask8 __U, __m128 __A, __m128 __B) {
222  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
223                                             (__v4sf)_mm_xor_ps(__A, __B),
224                                             (__v4sf)_mm_setzero_ps());
225}
226
227static __inline__ __m256d __DEFAULT_FN_ATTRS256
228_mm256_mask_or_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
229  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
230                                              (__v4df)_mm256_or_pd(__A, __B),
231                                              (__v4df)__W);
232}
233
234static __inline__ __m256d __DEFAULT_FN_ATTRS256
235_mm256_maskz_or_pd(__mmask8 __U, __m256d __A, __m256d __B) {
236  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
237                                              (__v4df)_mm256_or_pd(__A, __B),
238                                              (__v4df)_mm256_setzero_pd());
239}
240
241static __inline__ __m128d __DEFAULT_FN_ATTRS128
242_mm_mask_or_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
243  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
244                                              (__v2df)_mm_or_pd(__A, __B),
245                                              (__v2df)__W);
246}
247
248static __inline__ __m128d __DEFAULT_FN_ATTRS128
249_mm_maskz_or_pd(__mmask8 __U, __m128d __A, __m128d __B) {
250  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
251                                              (__v2df)_mm_or_pd(__A, __B),
252                                              (__v2df)_mm_setzero_pd());
253}
254
255static __inline__ __m256 __DEFAULT_FN_ATTRS256
256_mm256_mask_or_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
257  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
258                                             (__v8sf)_mm256_or_ps(__A, __B),
259                                             (__v8sf)__W);
260}
261
262static __inline__ __m256 __DEFAULT_FN_ATTRS256
263_mm256_maskz_or_ps(__mmask8 __U, __m256 __A, __m256 __B) {
264  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
265                                             (__v8sf)_mm256_or_ps(__A, __B),
266                                             (__v8sf)_mm256_setzero_ps());
267}
268
269static __inline__ __m128 __DEFAULT_FN_ATTRS128
270_mm_mask_or_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
271  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
272                                             (__v4sf)_mm_or_ps(__A, __B),
273                                             (__v4sf)__W);
274}
275
276static __inline__ __m128 __DEFAULT_FN_ATTRS128
277_mm_maskz_or_ps(__mmask8 __U, __m128 __A, __m128 __B) {
278  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
279                                             (__v4sf)_mm_or_ps(__A, __B),
280                                             (__v4sf)_mm_setzero_ps());
281}
282
283static __inline__ __m128i __DEFAULT_FN_ATTRS128
284_mm_cvtpd_epi64 (__m128d __A) {
285  return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
286                (__v2di) _mm_setzero_si128(),
287                (__mmask8) -1);
288}
289
290static __inline__ __m128i __DEFAULT_FN_ATTRS128
291_mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) {
292  return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
293                (__v2di) __W,
294                (__mmask8) __U);
295}
296
297static __inline__ __m128i __DEFAULT_FN_ATTRS128
298_mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A) {
299  return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
300                (__v2di) _mm_setzero_si128(),
301                (__mmask8) __U);
302}
303
304static __inline__ __m256i __DEFAULT_FN_ATTRS256
305_mm256_cvtpd_epi64 (__m256d __A) {
306  return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
307                (__v4di) _mm256_setzero_si256(),
308                (__mmask8) -1);
309}
310
311static __inline__ __m256i __DEFAULT_FN_ATTRS256
312_mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) {
313  return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
314                (__v4di) __W,
315                (__mmask8) __U);
316}
317
318static __inline__ __m256i __DEFAULT_FN_ATTRS256
319_mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A) {
320  return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
321                (__v4di) _mm256_setzero_si256(),
322                (__mmask8) __U);
323}
324
325static __inline__ __m128i __DEFAULT_FN_ATTRS128
326_mm_cvtpd_epu64 (__m128d __A) {
327  return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
328                (__v2di) _mm_setzero_si128(),
329                (__mmask8) -1);
330}
331
332static __inline__ __m128i __DEFAULT_FN_ATTRS128
333_mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) {
334  return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
335                (__v2di) __W,
336                (__mmask8) __U);
337}
338
339static __inline__ __m128i __DEFAULT_FN_ATTRS128
340_mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A) {
341  return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
342                (__v2di) _mm_setzero_si128(),
343                (__mmask8) __U);
344}
345
346static __inline__ __m256i __DEFAULT_FN_ATTRS256
347_mm256_cvtpd_epu64 (__m256d __A) {
348  return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
349                (__v4di) _mm256_setzero_si256(),
350                (__mmask8) -1);
351}
352
353static __inline__ __m256i __DEFAULT_FN_ATTRS256
354_mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) {
355  return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
356                (__v4di) __W,
357                (__mmask8) __U);
358}
359
360static __inline__ __m256i __DEFAULT_FN_ATTRS256
361_mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A) {
362  return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
363                (__v4di) _mm256_setzero_si256(),
364                (__mmask8) __U);
365}
366
367static __inline__ __m128i __DEFAULT_FN_ATTRS128
368_mm_cvtps_epi64 (__m128 __A) {
369  return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
370                (__v2di) _mm_setzero_si128(),
371                (__mmask8) -1);
372}
373
374static __inline__ __m128i __DEFAULT_FN_ATTRS128
375_mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) {
376  return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
377                (__v2di) __W,
378                (__mmask8) __U);
379}
380
381static __inline__ __m128i __DEFAULT_FN_ATTRS128
382_mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) {
383  return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
384                (__v2di) _mm_setzero_si128(),
385                (__mmask8) __U);
386}
387
388static __inline__ __m256i __DEFAULT_FN_ATTRS256
389_mm256_cvtps_epi64 (__m128 __A) {
390  return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
391                (__v4di) _mm256_setzero_si256(),
392                (__mmask8) -1);
393}
394
395static __inline__ __m256i __DEFAULT_FN_ATTRS256
396_mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) {
397  return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
398                (__v4di) __W,
399                (__mmask8) __U);
400}
401
402static __inline__ __m256i __DEFAULT_FN_ATTRS256
403_mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) {
404  return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
405                (__v4di) _mm256_setzero_si256(),
406                (__mmask8) __U);
407}
408
409static __inline__ __m128i __DEFAULT_FN_ATTRS128
410_mm_cvtps_epu64 (__m128 __A) {
411  return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
412                (__v2di) _mm_setzero_si128(),
413                (__mmask8) -1);
414}
415
416static __inline__ __m128i __DEFAULT_FN_ATTRS128
417_mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) {
418  return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
419                (__v2di) __W,
420                (__mmask8) __U);
421}
422
423static __inline__ __m128i __DEFAULT_FN_ATTRS128
424_mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) {
425  return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
426                (__v2di) _mm_setzero_si128(),
427                (__mmask8) __U);
428}
429
430static __inline__ __m256i __DEFAULT_FN_ATTRS256
431_mm256_cvtps_epu64 (__m128 __A) {
432  return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
433                (__v4di) _mm256_setzero_si256(),
434                (__mmask8) -1);
435}
436
437static __inline__ __m256i __DEFAULT_FN_ATTRS256
438_mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) {
439  return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
440                (__v4di) __W,
441                (__mmask8) __U);
442}
443
444static __inline__ __m256i __DEFAULT_FN_ATTRS256
445_mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) {
446  return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
447                (__v4di) _mm256_setzero_si256(),
448                (__mmask8) __U);
449}
450
451static __inline__ __m128d __DEFAULT_FN_ATTRS128
452_mm_cvtepi64_pd (__m128i __A) {
453  return (__m128d)__builtin_convertvector((__v2di)__A, __v2df);
454}
455
456static __inline__ __m128d __DEFAULT_FN_ATTRS128
457_mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) {
458  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
459                                              (__v2df)_mm_cvtepi64_pd(__A),
460                                              (__v2df)__W);
461}
462
463static __inline__ __m128d __DEFAULT_FN_ATTRS128
464_mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) {
465  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
466                                              (__v2df)_mm_cvtepi64_pd(__A),
467                                              (__v2df)_mm_setzero_pd());
468}
469
470static __inline__ __m256d __DEFAULT_FN_ATTRS256
471_mm256_cvtepi64_pd (__m256i __A) {
472  return (__m256d)__builtin_convertvector((__v4di)__A, __v4df);
473}
474
475static __inline__ __m256d __DEFAULT_FN_ATTRS256
476_mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) {
477  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
478                                              (__v4df)_mm256_cvtepi64_pd(__A),
479                                              (__v4df)__W);
480}
481
482static __inline__ __m256d __DEFAULT_FN_ATTRS256
483_mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A) {
484  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
485                                              (__v4df)_mm256_cvtepi64_pd(__A),
486                                              (__v4df)_mm256_setzero_pd());
487}
488
489static __inline__ __m128 __DEFAULT_FN_ATTRS128
490_mm_cvtepi64_ps (__m128i __A) {
491  return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
492                (__v4sf) _mm_setzero_ps(),
493                (__mmask8) -1);
494}
495
496static __inline__ __m128 __DEFAULT_FN_ATTRS128
497_mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A) {
498  return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
499                (__v4sf) __W,
500                (__mmask8) __U);
501}
502
503static __inline__ __m128 __DEFAULT_FN_ATTRS128
504_mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) {
505  return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
506                (__v4sf) _mm_setzero_ps(),
507                (__mmask8) __U);
508}
509
510static __inline__ __m128 __DEFAULT_FN_ATTRS256
511_mm256_cvtepi64_ps (__m256i __A) {
512  return (__m128)__builtin_convertvector((__v4di)__A, __v4sf);
513}
514
515static __inline__ __m128 __DEFAULT_FN_ATTRS256
516_mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
517  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
518                                             (__v4sf)_mm256_cvtepi64_ps(__A),
519                                             (__v4sf)__W);
520}
521
522static __inline__ __m128 __DEFAULT_FN_ATTRS256
523_mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) {
524  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
525                                             (__v4sf)_mm256_cvtepi64_ps(__A),
526                                             (__v4sf)_mm_setzero_ps());
527}
528
529static __inline__ __m128i __DEFAULT_FN_ATTRS128
530_mm_cvttpd_epi64 (__m128d __A) {
531  return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
532                (__v2di) _mm_setzero_si128(),
533                (__mmask8) -1);
534}
535
536static __inline__ __m128i __DEFAULT_FN_ATTRS128
537_mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) {
538  return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
539                (__v2di) __W,
540                (__mmask8) __U);
541}
542
543static __inline__ __m128i __DEFAULT_FN_ATTRS128
544_mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A) {
545  return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
546                (__v2di) _mm_setzero_si128(),
547                (__mmask8) __U);
548}
549
550static __inline__ __m256i __DEFAULT_FN_ATTRS256
551_mm256_cvttpd_epi64 (__m256d __A) {
552  return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
553                (__v4di) _mm256_setzero_si256(),
554                (__mmask8) -1);
555}
556
557static __inline__ __m256i __DEFAULT_FN_ATTRS256
558_mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) {
559  return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
560                (__v4di) __W,
561                (__mmask8) __U);
562}
563
564static __inline__ __m256i __DEFAULT_FN_ATTRS256
565_mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A) {
566  return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
567                (__v4di) _mm256_setzero_si256(),
568                (__mmask8) __U);
569}
570
571static __inline__ __m128i __DEFAULT_FN_ATTRS128
572_mm_cvttpd_epu64 (__m128d __A) {
573  return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
574                (__v2di) _mm_setzero_si128(),
575                (__mmask8) -1);
576}
577
578static __inline__ __m128i __DEFAULT_FN_ATTRS128
579_mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) {
580  return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
581                (__v2di) __W,
582                (__mmask8) __U);
583}
584
585static __inline__ __m128i __DEFAULT_FN_ATTRS128
586_mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A) {
587  return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
588                (__v2di) _mm_setzero_si128(),
589                (__mmask8) __U);
590}
591
592static __inline__ __m256i __DEFAULT_FN_ATTRS256
593_mm256_cvttpd_epu64 (__m256d __A) {
594  return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
595                (__v4di) _mm256_setzero_si256(),
596                (__mmask8) -1);
597}
598
599static __inline__ __m256i __DEFAULT_FN_ATTRS256
600_mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) {
601  return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
602                (__v4di) __W,
603                (__mmask8) __U);
604}
605
606static __inline__ __m256i __DEFAULT_FN_ATTRS256
607_mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A) {
608  return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
609                (__v4di) _mm256_setzero_si256(),
610                (__mmask8) __U);
611}
612
613static __inline__ __m128i __DEFAULT_FN_ATTRS128
614_mm_cvttps_epi64 (__m128 __A) {
615  return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
616                (__v2di) _mm_setzero_si128(),
617                (__mmask8) -1);
618}
619
620static __inline__ __m128i __DEFAULT_FN_ATTRS128
621_mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) {
622  return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
623                (__v2di) __W,
624                (__mmask8) __U);
625}
626
627static __inline__ __m128i __DEFAULT_FN_ATTRS128
628_mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) {
629  return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
630                (__v2di) _mm_setzero_si128(),
631                (__mmask8) __U);
632}
633
634static __inline__ __m256i __DEFAULT_FN_ATTRS256
635_mm256_cvttps_epi64 (__m128 __A) {
636  return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
637                (__v4di) _mm256_setzero_si256(),
638                (__mmask8) -1);
639}
640
641static __inline__ __m256i __DEFAULT_FN_ATTRS256
642_mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) {
643  return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
644                (__v4di) __W,
645                (__mmask8) __U);
646}
647
648static __inline__ __m256i __DEFAULT_FN_ATTRS256
649_mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) {
650  return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
651                (__v4di) _mm256_setzero_si256(),
652                (__mmask8) __U);
653}
654
655static __inline__ __m128i __DEFAULT_FN_ATTRS128
656_mm_cvttps_epu64 (__m128 __A) {
657  return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
658                (__v2di) _mm_setzero_si128(),
659                (__mmask8) -1);
660}
661
662static __inline__ __m128i __DEFAULT_FN_ATTRS128
663_mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) {
664  return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
665                (__v2di) __W,
666                (__mmask8) __U);
667}
668
669static __inline__ __m128i __DEFAULT_FN_ATTRS128
670_mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) {
671  return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
672                (__v2di) _mm_setzero_si128(),
673                (__mmask8) __U);
674}
675
676static __inline__ __m256i __DEFAULT_FN_ATTRS256
677_mm256_cvttps_epu64 (__m128 __A) {
678  return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
679                (__v4di) _mm256_setzero_si256(),
680                (__mmask8) -1);
681}
682
683static __inline__ __m256i __DEFAULT_FN_ATTRS256
684_mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) {
685  return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
686                (__v4di) __W,
687                (__mmask8) __U);
688}
689
690static __inline__ __m256i __DEFAULT_FN_ATTRS256
691_mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) {
692  return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
693                (__v4di) _mm256_setzero_si256(),
694                (__mmask8) __U);
695}
696
697static __inline__ __m128d __DEFAULT_FN_ATTRS128
698_mm_cvtepu64_pd (__m128i __A) {
699  return (__m128d)__builtin_convertvector((__v2du)__A, __v2df);
700}
701
702static __inline__ __m128d __DEFAULT_FN_ATTRS128
703_mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) {
704  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
705                                              (__v2df)_mm_cvtepu64_pd(__A),
706                                              (__v2df)__W);
707}
708
709static __inline__ __m128d __DEFAULT_FN_ATTRS128
710_mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) {
711  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
712                                              (__v2df)_mm_cvtepu64_pd(__A),
713                                              (__v2df)_mm_setzero_pd());
714}
715
716static __inline__ __m256d __DEFAULT_FN_ATTRS256
717_mm256_cvtepu64_pd (__m256i __A) {
718  return (__m256d)__builtin_convertvector((__v4du)__A, __v4df);
719}
720
721static __inline__ __m256d __DEFAULT_FN_ATTRS256
722_mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) {
723  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
724                                              (__v4df)_mm256_cvtepu64_pd(__A),
725                                              (__v4df)__W);
726}
727
728static __inline__ __m256d __DEFAULT_FN_ATTRS256
729_mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A) {
730  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
731                                              (__v4df)_mm256_cvtepu64_pd(__A),
732                                              (__v4df)_mm256_setzero_pd());
733}
734
735static __inline__ __m128 __DEFAULT_FN_ATTRS128
736_mm_cvtepu64_ps (__m128i __A) {
737  return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
738                (__v4sf) _mm_setzero_ps(),
739                (__mmask8) -1);
740}
741
742static __inline__ __m128 __DEFAULT_FN_ATTRS128
743_mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A) {
744  return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
745                (__v4sf) __W,
746                (__mmask8) __U);
747}
748
749static __inline__ __m128 __DEFAULT_FN_ATTRS128
750_mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) {
751  return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
752                (__v4sf) _mm_setzero_ps(),
753                (__mmask8) __U);
754}
755
756static __inline__ __m128 __DEFAULT_FN_ATTRS256
757_mm256_cvtepu64_ps (__m256i __A) {
758  return (__m128)__builtin_convertvector((__v4du)__A, __v4sf);
759}
760
761static __inline__ __m128 __DEFAULT_FN_ATTRS256
762_mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
763  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
764                                             (__v4sf)_mm256_cvtepu64_ps(__A),
765                                             (__v4sf)__W);
766}
767
768static __inline__ __m128 __DEFAULT_FN_ATTRS256
769_mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) {
770  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
771                                             (__v4sf)_mm256_cvtepu64_ps(__A),
772                                             (__v4sf)_mm_setzero_ps());
773}
774
775#define _mm_range_pd(A, B, C) \
776  (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
777                                          (__v2df)(__m128d)(B), (int)(C), \
778                                          (__v2df)_mm_setzero_pd(), \
779                                          (__mmask8)-1)
780
781#define _mm_mask_range_pd(W, U, A, B, C) \
782  (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
783                                          (__v2df)(__m128d)(B), (int)(C), \
784                                          (__v2df)(__m128d)(W), \
785                                          (__mmask8)(U))
786
787#define _mm_maskz_range_pd(U, A, B, C) \
788  (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
789                                          (__v2df)(__m128d)(B), (int)(C), \
790                                          (__v2df)_mm_setzero_pd(), \
791                                          (__mmask8)(U))
792
793#define _mm256_range_pd(A, B, C) \
794  (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
795                                          (__v4df)(__m256d)(B), (int)(C), \
796                                          (__v4df)_mm256_setzero_pd(), \
797                                          (__mmask8)-1)
798
799#define _mm256_mask_range_pd(W, U, A, B, C) \
800  (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
801                                          (__v4df)(__m256d)(B), (int)(C), \
802                                          (__v4df)(__m256d)(W), \
803                                          (__mmask8)(U))
804
805#define _mm256_maskz_range_pd(U, A, B, C) \
806  (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
807                                          (__v4df)(__m256d)(B), (int)(C), \
808                                          (__v4df)_mm256_setzero_pd(), \
809                                          (__mmask8)(U))
810
811#define _mm_range_ps(A, B, C) \
812  (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
813                                         (__v4sf)(__m128)(B), (int)(C), \
814                                         (__v4sf)_mm_setzero_ps(), \
815                                         (__mmask8)-1)
816
817#define _mm_mask_range_ps(W, U, A, B, C) \
818  (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
819                                         (__v4sf)(__m128)(B), (int)(C), \
820                                         (__v4sf)(__m128)(W), (__mmask8)(U))
821
822#define _mm_maskz_range_ps(U, A, B, C) \
823  (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
824                                         (__v4sf)(__m128)(B), (int)(C), \
825                                         (__v4sf)_mm_setzero_ps(), \
826                                         (__mmask8)(U))
827
828#define _mm256_range_ps(A, B, C) \
829  (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
830                                         (__v8sf)(__m256)(B), (int)(C), \
831                                         (__v8sf)_mm256_setzero_ps(), \
832                                         (__mmask8)-1)
833
834#define _mm256_mask_range_ps(W, U, A, B, C) \
835  (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
836                                         (__v8sf)(__m256)(B), (int)(C), \
837                                         (__v8sf)(__m256)(W), (__mmask8)(U))
838
839#define _mm256_maskz_range_ps(U, A, B, C) \
840  (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
841                                         (__v8sf)(__m256)(B), (int)(C), \
842                                         (__v8sf)_mm256_setzero_ps(), \
843                                         (__mmask8)(U))
844
845#define _mm_reduce_pd(A, B) \
846  (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
847                                           (__v2df)_mm_setzero_pd(), \
848                                           (__mmask8)-1)
849
850#define _mm_mask_reduce_pd(W, U, A, B) \
851  (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
852                                           (__v2df)(__m128d)(W), \
853                                           (__mmask8)(U))
854
855#define _mm_maskz_reduce_pd(U, A, B) \
856  (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
857                                           (__v2df)_mm_setzero_pd(), \
858                                           (__mmask8)(U))
859
860#define _mm256_reduce_pd(A, B) \
861  (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
862                                           (__v4df)_mm256_setzero_pd(), \
863                                           (__mmask8)-1)
864
865#define _mm256_mask_reduce_pd(W, U, A, B) \
866  (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
867                                           (__v4df)(__m256d)(W), \
868                                           (__mmask8)(U))
869
870#define _mm256_maskz_reduce_pd(U, A, B) \
871  (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
872                                           (__v4df)_mm256_setzero_pd(), \
873                                           (__mmask8)(U))
874
875#define _mm_reduce_ps(A, B) \
876  (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
877                                          (__v4sf)_mm_setzero_ps(), \
878                                          (__mmask8)-1)
879
880#define _mm_mask_reduce_ps(W, U, A, B) \
881  (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
882                                          (__v4sf)(__m128)(W), \
883                                          (__mmask8)(U))
884
885#define _mm_maskz_reduce_ps(U, A, B) \
886  (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
887                                          (__v4sf)_mm_setzero_ps(), \
888                                          (__mmask8)(U))
889
890#define _mm256_reduce_ps(A, B) \
891  (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
892                                          (__v8sf)_mm256_setzero_ps(), \
893                                          (__mmask8)-1)
894
895#define _mm256_mask_reduce_ps(W, U, A, B) \
896  (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
897                                          (__v8sf)(__m256)(W), \
898                                          (__mmask8)(U))
899
900#define _mm256_maskz_reduce_ps(U, A, B) \
901  (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
902                                          (__v8sf)_mm256_setzero_ps(), \
903                                          (__mmask8)(U))
904
905static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
906_mm_movepi32_mask (__m128i __A)
907{
908  return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A);
909}
910
911static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
912_mm256_movepi32_mask (__m256i __A)
913{
914  return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A);
915}
916
917static __inline__ __m128i __DEFAULT_FN_ATTRS128
918_mm_movm_epi32 (__mmask8 __A)
919{
920  return (__m128i) __builtin_ia32_cvtmask2d128 (__A);
921}
922
923static __inline__ __m256i __DEFAULT_FN_ATTRS256
924_mm256_movm_epi32 (__mmask8 __A)
925{
926  return (__m256i) __builtin_ia32_cvtmask2d256 (__A);
927}
928
929static __inline__ __m128i __DEFAULT_FN_ATTRS128
930_mm_movm_epi64 (__mmask8 __A)
931{
932  return (__m128i) __builtin_ia32_cvtmask2q128 (__A);
933}
934
935static __inline__ __m256i __DEFAULT_FN_ATTRS256
936_mm256_movm_epi64 (__mmask8 __A)
937{
938  return (__m256i) __builtin_ia32_cvtmask2q256 (__A);
939}
940
941static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
942_mm_movepi64_mask (__m128i __A)
943{
944  return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A);
945}
946
947static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
948_mm256_movepi64_mask (__m256i __A)
949{
950  return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A);
951}
952
953static __inline__ __m256 __DEFAULT_FN_ATTRS256
954_mm256_broadcast_f32x2 (__m128 __A)
955{
956  return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
957                                         0, 1, 0, 1, 0, 1, 0, 1);
958}
959
960static __inline__ __m256 __DEFAULT_FN_ATTRS256
961_mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
962{
963  return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
964                                             (__v8sf)_mm256_broadcast_f32x2(__A),
965                                             (__v8sf)__O);
966}
967
968static __inline__ __m256 __DEFAULT_FN_ATTRS256
969_mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
970{
971  return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
972                                             (__v8sf)_mm256_broadcast_f32x2(__A),
973                                             (__v8sf)_mm256_setzero_ps());
974}
975
976static __inline__ __m256d __DEFAULT_FN_ATTRS256
977_mm256_broadcast_f64x2(__m128d __A)
978{
979  return (__m256d)__builtin_shufflevector((__v2df)__A, (__v2df)__A,
980                                          0, 1, 0, 1);
981}
982
983static __inline__ __m256d __DEFAULT_FN_ATTRS256
984_mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A)
985{
986  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M,
987                                            (__v4df)_mm256_broadcast_f64x2(__A),
988                                            (__v4df)__O);
989}
990
991static __inline__ __m256d __DEFAULT_FN_ATTRS256
992_mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
993{
994  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M,
995                                            (__v4df)_mm256_broadcast_f64x2(__A),
996                                            (__v4df)_mm256_setzero_pd());
997}
998
999static __inline__ __m128i __DEFAULT_FN_ATTRS128
1000_mm_broadcast_i32x2 (__m128i __A)
1001{
1002  return (__m128i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
1003                                          0, 1, 0, 1);
1004}
1005
1006static __inline__ __m128i __DEFAULT_FN_ATTRS128
1007_mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
1008{
1009  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
1010                                             (__v4si)_mm_broadcast_i32x2(__A),
1011                                             (__v4si)__O);
1012}
1013
1014static __inline__ __m128i __DEFAULT_FN_ATTRS128
1015_mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
1016{
1017  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
1018                                             (__v4si)_mm_broadcast_i32x2(__A),
1019                                             (__v4si)_mm_setzero_si128());
1020}
1021
1022static __inline__ __m256i __DEFAULT_FN_ATTRS256
1023_mm256_broadcast_i32x2 (__m128i __A)
1024{
1025  return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
1026                                          0, 1, 0, 1, 0, 1, 0, 1);
1027}
1028
1029static __inline__ __m256i __DEFAULT_FN_ATTRS256
1030_mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
1031{
1032  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
1033                                             (__v8si)_mm256_broadcast_i32x2(__A),
1034                                             (__v8si)__O);
1035}
1036
1037static __inline__ __m256i __DEFAULT_FN_ATTRS256
1038_mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
1039{
1040  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
1041                                             (__v8si)_mm256_broadcast_i32x2(__A),
1042                                             (__v8si)_mm256_setzero_si256());
1043}
1044
1045static __inline__ __m256i __DEFAULT_FN_ATTRS256
1046_mm256_broadcast_i64x2(__m128i __A)
1047{
1048  return (__m256i)__builtin_shufflevector((__v2di)__A, (__v2di)__A,
1049                                          0, 1, 0, 1);
1050}
1051
1052static __inline__ __m256i __DEFAULT_FN_ATTRS256
1053_mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A)
1054{
1055  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
1056                                            (__v4di)_mm256_broadcast_i64x2(__A),
1057                                            (__v4di)__O);
1058}
1059
1060static __inline__ __m256i __DEFAULT_FN_ATTRS256
1061_mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
1062{
1063  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
1064                                            (__v4di)_mm256_broadcast_i64x2(__A),
1065                                            (__v4di)_mm256_setzero_si256());
1066}
1067
1068#define _mm256_extractf64x2_pd(A, imm) \
1069  (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1070                                                (int)(imm), \
1071                                                (__v2df)_mm_undefined_pd(), \
1072                                                (__mmask8)-1)
1073
1074#define _mm256_mask_extractf64x2_pd(W, U, A, imm) \
1075  (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1076                                                (int)(imm), \
1077                                                (__v2df)(__m128d)(W), \
1078                                                (__mmask8)(U))
1079
1080#define _mm256_maskz_extractf64x2_pd(U, A, imm) \
1081  (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1082                                                (int)(imm), \
1083                                                (__v2df)_mm_setzero_pd(), \
1084                                                (__mmask8)(U))
1085
1086#define _mm256_extracti64x2_epi64(A, imm) \
1087  (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1088                                                (int)(imm), \
1089                                                (__v2di)_mm_undefined_si128(), \
1090                                                (__mmask8)-1)
1091
1092#define _mm256_mask_extracti64x2_epi64(W, U, A, imm) \
1093  (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1094                                                (int)(imm), \
1095                                                (__v2di)(__m128i)(W), \
1096                                                (__mmask8)(U))
1097
1098#define _mm256_maskz_extracti64x2_epi64(U, A, imm) \
1099  (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1100                                                (int)(imm), \
1101                                                (__v2di)_mm_setzero_si128(), \
1102                                                (__mmask8)(U))
1103
1104#define _mm256_insertf64x2(A, B, imm) \
1105  (__m256d)__builtin_ia32_insertf64x2_256((__v4df)(__m256d)(A), \
1106                                          (__v2df)(__m128d)(B), (int)(imm))
1107
1108#define _mm256_mask_insertf64x2(W, U, A, B, imm) \
1109  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
1110                                  (__v4df)_mm256_insertf64x2((A), (B), (imm)), \
1111                                  (__v4df)(__m256d)(W))
1112
1113#define _mm256_maskz_insertf64x2(U, A, B, imm) \
1114  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
1115                                  (__v4df)_mm256_insertf64x2((A), (B), (imm)), \
1116                                  (__v4df)_mm256_setzero_pd())
1117
1118#define _mm256_inserti64x2(A, B, imm) \
1119  (__m256i)__builtin_ia32_inserti64x2_256((__v4di)(__m256i)(A), \
1120                                          (__v2di)(__m128i)(B), (int)(imm))
1121
1122#define _mm256_mask_inserti64x2(W, U, A, B, imm) \
1123  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
1124                                  (__v4di)_mm256_inserti64x2((A), (B), (imm)), \
1125                                  (__v4di)(__m256i)(W))
1126
1127#define _mm256_maskz_inserti64x2(U, A, B, imm) \
1128  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
1129                                  (__v4di)_mm256_inserti64x2((A), (B), (imm)), \
1130                                  (__v4di)_mm256_setzero_si256())
1131
1132#define _mm_mask_fpclass_pd_mask(U, A, imm) \
1133  (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
1134                                             (__mmask8)(U))
1135
1136#define _mm_fpclass_pd_mask(A, imm) \
1137  (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
1138                                             (__mmask8)-1)
1139
1140#define _mm256_mask_fpclass_pd_mask(U, A, imm) \
1141  (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
1142                                             (__mmask8)(U))
1143
1144#define _mm256_fpclass_pd_mask(A, imm) \
1145  (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
1146                                             (__mmask8)-1)
1147
1148#define _mm_mask_fpclass_ps_mask(U, A, imm) \
1149  (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
1150                                             (__mmask8)(U))
1151
1152#define _mm_fpclass_ps_mask(A, imm) \
1153  (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
1154                                             (__mmask8)-1)
1155
1156#define _mm256_mask_fpclass_ps_mask(U, A, imm) \
1157  (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
1158                                             (__mmask8)(U))
1159
1160#define _mm256_fpclass_ps_mask(A, imm) \
1161  (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
1162                                             (__mmask8)-1)
1163
1164#undef __DEFAULT_FN_ATTRS128
1165#undef __DEFAULT_FN_ATTRS256
1166
1167#endif
1168