1/*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __IMMINTRIN_H
11#error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
12#endif
13
14#ifndef __AVX512VLINTRIN_H
15#define __AVX512VLINTRIN_H
16
17#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(128)))
18#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(256)))
19
20typedef short __v2hi __attribute__((__vector_size__(4)));
21typedef char __v4qi __attribute__((__vector_size__(4)));
22typedef char __v2qi __attribute__((__vector_size__(2)));
23
24/* Integer compare */
25
26#define _mm_cmpeq_epi32_mask(A, B) \
27    _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
28#define _mm_mask_cmpeq_epi32_mask(k, A, B) \
29    _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
30#define _mm_cmpge_epi32_mask(A, B) \
31    _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
32#define _mm_mask_cmpge_epi32_mask(k, A, B) \
33    _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
34#define _mm_cmpgt_epi32_mask(A, B) \
35    _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
36#define _mm_mask_cmpgt_epi32_mask(k, A, B) \
37    _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
38#define _mm_cmple_epi32_mask(A, B) \
39    _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
40#define _mm_mask_cmple_epi32_mask(k, A, B) \
41    _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
42#define _mm_cmplt_epi32_mask(A, B) \
43    _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
44#define _mm_mask_cmplt_epi32_mask(k, A, B) \
45    _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
46#define _mm_cmpneq_epi32_mask(A, B) \
47    _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
48#define _mm_mask_cmpneq_epi32_mask(k, A, B) \
49    _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
50
51#define _mm256_cmpeq_epi32_mask(A, B) \
52    _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
53#define _mm256_mask_cmpeq_epi32_mask(k, A, B) \
54    _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
55#define _mm256_cmpge_epi32_mask(A, B) \
56    _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
57#define _mm256_mask_cmpge_epi32_mask(k, A, B) \
58    _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
59#define _mm256_cmpgt_epi32_mask(A, B) \
60    _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
61#define _mm256_mask_cmpgt_epi32_mask(k, A, B) \
62    _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
63#define _mm256_cmple_epi32_mask(A, B) \
64    _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
65#define _mm256_mask_cmple_epi32_mask(k, A, B) \
66    _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
67#define _mm256_cmplt_epi32_mask(A, B) \
68    _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
69#define _mm256_mask_cmplt_epi32_mask(k, A, B) \
70    _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
71#define _mm256_cmpneq_epi32_mask(A, B) \
72    _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
73#define _mm256_mask_cmpneq_epi32_mask(k, A, B) \
74    _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
75
76#define _mm_cmpeq_epu32_mask(A, B) \
77    _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
78#define _mm_mask_cmpeq_epu32_mask(k, A, B) \
79    _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
80#define _mm_cmpge_epu32_mask(A, B) \
81    _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
82#define _mm_mask_cmpge_epu32_mask(k, A, B) \
83    _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
84#define _mm_cmpgt_epu32_mask(A, B) \
85    _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
86#define _mm_mask_cmpgt_epu32_mask(k, A, B) \
87    _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
88#define _mm_cmple_epu32_mask(A, B) \
89    _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
90#define _mm_mask_cmple_epu32_mask(k, A, B) \
91    _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
92#define _mm_cmplt_epu32_mask(A, B) \
93    _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
94#define _mm_mask_cmplt_epu32_mask(k, A, B) \
95    _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
96#define _mm_cmpneq_epu32_mask(A, B) \
97    _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
98#define _mm_mask_cmpneq_epu32_mask(k, A, B) \
99    _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
100
101#define _mm256_cmpeq_epu32_mask(A, B) \
102    _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
103#define _mm256_mask_cmpeq_epu32_mask(k, A, B) \
104    _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
105#define _mm256_cmpge_epu32_mask(A, B) \
106    _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
107#define _mm256_mask_cmpge_epu32_mask(k, A, B) \
108    _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
109#define _mm256_cmpgt_epu32_mask(A, B) \
110    _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
111#define _mm256_mask_cmpgt_epu32_mask(k, A, B) \
112    _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
113#define _mm256_cmple_epu32_mask(A, B) \
114    _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
115#define _mm256_mask_cmple_epu32_mask(k, A, B) \
116    _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
117#define _mm256_cmplt_epu32_mask(A, B) \
118    _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
119#define _mm256_mask_cmplt_epu32_mask(k, A, B) \
120    _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
121#define _mm256_cmpneq_epu32_mask(A, B) \
122    _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
123#define _mm256_mask_cmpneq_epu32_mask(k, A, B) \
124    _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
125
126#define _mm_cmpeq_epi64_mask(A, B) \
127    _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
128#define _mm_mask_cmpeq_epi64_mask(k, A, B) \
129    _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
130#define _mm_cmpge_epi64_mask(A, B) \
131    _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
132#define _mm_mask_cmpge_epi64_mask(k, A, B) \
133    _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
134#define _mm_cmpgt_epi64_mask(A, B) \
135    _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
136#define _mm_mask_cmpgt_epi64_mask(k, A, B) \
137    _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
138#define _mm_cmple_epi64_mask(A, B) \
139    _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
140#define _mm_mask_cmple_epi64_mask(k, A, B) \
141    _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
142#define _mm_cmplt_epi64_mask(A, B) \
143    _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
144#define _mm_mask_cmplt_epi64_mask(k, A, B) \
145    _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
146#define _mm_cmpneq_epi64_mask(A, B) \
147    _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
148#define _mm_mask_cmpneq_epi64_mask(k, A, B) \
149    _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
150
151#define _mm256_cmpeq_epi64_mask(A, B) \
152    _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
153#define _mm256_mask_cmpeq_epi64_mask(k, A, B) \
154    _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
155#define _mm256_cmpge_epi64_mask(A, B) \
156    _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
157#define _mm256_mask_cmpge_epi64_mask(k, A, B) \
158    _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
159#define _mm256_cmpgt_epi64_mask(A, B) \
160    _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
161#define _mm256_mask_cmpgt_epi64_mask(k, A, B) \
162    _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
163#define _mm256_cmple_epi64_mask(A, B) \
164    _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
165#define _mm256_mask_cmple_epi64_mask(k, A, B) \
166    _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
167#define _mm256_cmplt_epi64_mask(A, B) \
168    _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
169#define _mm256_mask_cmplt_epi64_mask(k, A, B) \
170    _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
171#define _mm256_cmpneq_epi64_mask(A, B) \
172    _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
173#define _mm256_mask_cmpneq_epi64_mask(k, A, B) \
174    _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
175
176#define _mm_cmpeq_epu64_mask(A, B) \
177    _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
178#define _mm_mask_cmpeq_epu64_mask(k, A, B) \
179    _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
180#define _mm_cmpge_epu64_mask(A, B) \
181    _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
182#define _mm_mask_cmpge_epu64_mask(k, A, B) \
183    _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
184#define _mm_cmpgt_epu64_mask(A, B) \
185    _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
186#define _mm_mask_cmpgt_epu64_mask(k, A, B) \
187    _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
188#define _mm_cmple_epu64_mask(A, B) \
189    _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
190#define _mm_mask_cmple_epu64_mask(k, A, B) \
191    _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
192#define _mm_cmplt_epu64_mask(A, B) \
193    _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
194#define _mm_mask_cmplt_epu64_mask(k, A, B) \
195    _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
196#define _mm_cmpneq_epu64_mask(A, B) \
197    _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
198#define _mm_mask_cmpneq_epu64_mask(k, A, B) \
199    _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
200
201#define _mm256_cmpeq_epu64_mask(A, B) \
202    _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
203#define _mm256_mask_cmpeq_epu64_mask(k, A, B) \
204    _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
205#define _mm256_cmpge_epu64_mask(A, B) \
206    _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
207#define _mm256_mask_cmpge_epu64_mask(k, A, B) \
208    _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
209#define _mm256_cmpgt_epu64_mask(A, B) \
210    _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
211#define _mm256_mask_cmpgt_epu64_mask(k, A, B) \
212    _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
213#define _mm256_cmple_epu64_mask(A, B) \
214    _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
215#define _mm256_mask_cmple_epu64_mask(k, A, B) \
216    _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
217#define _mm256_cmplt_epu64_mask(A, B) \
218    _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
219#define _mm256_mask_cmplt_epu64_mask(k, A, B) \
220    _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
221#define _mm256_cmpneq_epu64_mask(A, B) \
222    _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
223#define _mm256_mask_cmpneq_epu64_mask(k, A, B) \
224    _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
225
226static __inline__ __m256i __DEFAULT_FN_ATTRS256
227_mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
228{
229  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
230                                             (__v8si)_mm256_add_epi32(__A, __B),
231                                             (__v8si)__W);
232}
233
234static __inline__ __m256i __DEFAULT_FN_ATTRS256
235_mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B)
236{
237  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
238                                             (__v8si)_mm256_add_epi32(__A, __B),
239                                             (__v8si)_mm256_setzero_si256());
240}
241
242static __inline__ __m256i __DEFAULT_FN_ATTRS256
243_mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
244{
245  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
246                                             (__v4di)_mm256_add_epi64(__A, __B),
247                                             (__v4di)__W);
248}
249
250static __inline__ __m256i __DEFAULT_FN_ATTRS256
251_mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B)
252{
253  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
254                                             (__v4di)_mm256_add_epi64(__A, __B),
255                                             (__v4di)_mm256_setzero_si256());
256}
257
258static __inline__ __m256i __DEFAULT_FN_ATTRS256
259_mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
260{
261  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
262                                             (__v8si)_mm256_sub_epi32(__A, __B),
263                                             (__v8si)__W);
264}
265
266static __inline__ __m256i __DEFAULT_FN_ATTRS256
267_mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B)
268{
269  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
270                                             (__v8si)_mm256_sub_epi32(__A, __B),
271                                             (__v8si)_mm256_setzero_si256());
272}
273
274static __inline__ __m256i __DEFAULT_FN_ATTRS256
275_mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
276{
277  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
278                                             (__v4di)_mm256_sub_epi64(__A, __B),
279                                             (__v4di)__W);
280}
281
282static __inline__ __m256i __DEFAULT_FN_ATTRS256
283_mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B)
284{
285  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
286                                             (__v4di)_mm256_sub_epi64(__A, __B),
287                                             (__v4di)_mm256_setzero_si256());
288}
289
290static __inline__ __m128i __DEFAULT_FN_ATTRS128
291_mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
292{
293  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
294                                             (__v4si)_mm_add_epi32(__A, __B),
295                                             (__v4si)__W);
296}
297
298static __inline__ __m128i __DEFAULT_FN_ATTRS128
299_mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B)
300{
301  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
302                                             (__v4si)_mm_add_epi32(__A, __B),
303                                             (__v4si)_mm_setzero_si128());
304}
305
306static __inline__ __m128i __DEFAULT_FN_ATTRS128
307_mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
308{
309  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
310                                             (__v2di)_mm_add_epi64(__A, __B),
311                                             (__v2di)__W);
312}
313
314static __inline__ __m128i __DEFAULT_FN_ATTRS128
315_mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B)
316{
317  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
318                                             (__v2di)_mm_add_epi64(__A, __B),
319                                             (__v2di)_mm_setzero_si128());
320}
321
322static __inline__ __m128i __DEFAULT_FN_ATTRS128
323_mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
324{
325  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
326                                             (__v4si)_mm_sub_epi32(__A, __B),
327                                             (__v4si)__W);
328}
329
330static __inline__ __m128i __DEFAULT_FN_ATTRS128
331_mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B)
332{
333  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
334                                             (__v4si)_mm_sub_epi32(__A, __B),
335                                             (__v4si)_mm_setzero_si128());
336}
337
338static __inline__ __m128i __DEFAULT_FN_ATTRS128
339_mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
340{
341  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
342                                             (__v2di)_mm_sub_epi64(__A, __B),
343                                             (__v2di)__W);
344}
345
346static __inline__ __m128i __DEFAULT_FN_ATTRS128
347_mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B)
348{
349  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
350                                             (__v2di)_mm_sub_epi64(__A, __B),
351                                             (__v2di)_mm_setzero_si128());
352}
353
354static __inline__ __m256i __DEFAULT_FN_ATTRS256
355_mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
356{
357  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
358                                             (__v4di)_mm256_mul_epi32(__X, __Y),
359                                             (__v4di)__W);
360}
361
362static __inline__ __m256i __DEFAULT_FN_ATTRS256
363_mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
364{
365  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
366                                             (__v4di)_mm256_mul_epi32(__X, __Y),
367                                             (__v4di)_mm256_setzero_si256());
368}
369
370static __inline__ __m128i __DEFAULT_FN_ATTRS128
371_mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
372{
373  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
374                                             (__v2di)_mm_mul_epi32(__X, __Y),
375                                             (__v2di)__W);
376}
377
378static __inline__ __m128i __DEFAULT_FN_ATTRS128
379_mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y)
380{
381  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
382                                             (__v2di)_mm_mul_epi32(__X, __Y),
383                                             (__v2di)_mm_setzero_si128());
384}
385
386static __inline__ __m256i __DEFAULT_FN_ATTRS256
387_mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
388{
389  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
390                                             (__v4di)_mm256_mul_epu32(__X, __Y),
391                                             (__v4di)__W);
392}
393
394static __inline__ __m256i __DEFAULT_FN_ATTRS256
395_mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y)
396{
397  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
398                                             (__v4di)_mm256_mul_epu32(__X, __Y),
399                                             (__v4di)_mm256_setzero_si256());
400}
401
402static __inline__ __m128i __DEFAULT_FN_ATTRS128
403_mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
404{
405  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
406                                             (__v2di)_mm_mul_epu32(__X, __Y),
407                                             (__v2di)__W);
408}
409
410static __inline__ __m128i __DEFAULT_FN_ATTRS128
411_mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y)
412{
413  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
414                                             (__v2di)_mm_mul_epu32(__X, __Y),
415                                             (__v2di)_mm_setzero_si128());
416}
417
418static __inline__ __m256i __DEFAULT_FN_ATTRS256
419_mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B)
420{
421  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
422                                             (__v8si)_mm256_mullo_epi32(__A, __B),
423                                             (__v8si)_mm256_setzero_si256());
424}
425
426static __inline__ __m256i __DEFAULT_FN_ATTRS256
427_mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
428{
429  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
430                                             (__v8si)_mm256_mullo_epi32(__A, __B),
431                                             (__v8si)__W);
432}
433
434static __inline__ __m128i __DEFAULT_FN_ATTRS128
435_mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B)
436{
437  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
438                                             (__v4si)_mm_mullo_epi32(__A, __B),
439                                             (__v4si)_mm_setzero_si128());
440}
441
442static __inline__ __m128i __DEFAULT_FN_ATTRS128
443_mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
444{
445  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
446                                             (__v4si)_mm_mullo_epi32(__A, __B),
447                                             (__v4si)__W);
448}
449
450static __inline__ __m256i __DEFAULT_FN_ATTRS256
451_mm256_and_epi32(__m256i __a, __m256i __b)
452{
453  return (__m256i)((__v8su)__a & (__v8su)__b);
454}
455
456static __inline__ __m256i __DEFAULT_FN_ATTRS256
457_mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
458{
459  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
460                                             (__v8si)_mm256_and_epi32(__A, __B),
461                                             (__v8si)__W);
462}
463
464static __inline__ __m256i __DEFAULT_FN_ATTRS256
465_mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
466{
467  return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B);
468}
469
470static __inline__ __m128i __DEFAULT_FN_ATTRS128
471_mm_and_epi32(__m128i __a, __m128i __b)
472{
473  return (__m128i)((__v4su)__a & (__v4su)__b);
474}
475
476static __inline__ __m128i __DEFAULT_FN_ATTRS128
477_mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
478{
479  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
480                                             (__v4si)_mm_and_epi32(__A, __B),
481                                             (__v4si)__W);
482}
483
484static __inline__ __m128i __DEFAULT_FN_ATTRS128
485_mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B)
486{
487  return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B);
488}
489
490static __inline__ __m256i __DEFAULT_FN_ATTRS256
491_mm256_andnot_epi32(__m256i __A, __m256i __B)
492{
493  return (__m256i)(~(__v8su)__A & (__v8su)__B);
494}
495
496static __inline__ __m256i __DEFAULT_FN_ATTRS256
497_mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
498{
499  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
500                                          (__v8si)_mm256_andnot_epi32(__A, __B),
501                                          (__v8si)__W);
502}
503
504static __inline__ __m256i __DEFAULT_FN_ATTRS256
505_mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B)
506{
507  return (__m256i)_mm256_mask_andnot_epi32(_mm256_setzero_si256(),
508                                           __U, __A, __B);
509}
510
511static __inline__ __m128i __DEFAULT_FN_ATTRS128
512_mm_andnot_epi32(__m128i __A, __m128i __B)
513{
514  return (__m128i)(~(__v4su)__A & (__v4su)__B);
515}
516
517static __inline__ __m128i __DEFAULT_FN_ATTRS128
518_mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
519{
520  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
521                                             (__v4si)_mm_andnot_epi32(__A, __B),
522                                             (__v4si)__W);
523}
524
525static __inline__ __m128i __DEFAULT_FN_ATTRS128
526_mm_maskz_andnot_epi32(__mmask8 __U, __m128i __A, __m128i __B)
527{
528  return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B);
529}
530
531static __inline__ __m256i __DEFAULT_FN_ATTRS256
532_mm256_or_epi32(__m256i __a, __m256i __b)
533{
534  return (__m256i)((__v8su)__a | (__v8su)__b);
535}
536
537static __inline__ __m256i __DEFAULT_FN_ATTRS256
538_mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
539{
540  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
541                                             (__v8si)_mm256_or_epi32(__A, __B),
542                                             (__v8si)__W);
543}
544
545static __inline__ __m256i __DEFAULT_FN_ATTRS256
546_mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B)
547{
548  return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B);
549}
550
551static __inline__ __m128i __DEFAULT_FN_ATTRS128
552_mm_or_epi32(__m128i __a, __m128i __b)
553{
554  return (__m128i)((__v4su)__a | (__v4su)__b);
555}
556
557static __inline__ __m128i __DEFAULT_FN_ATTRS128
558_mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
559{
560  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
561                                             (__v4si)_mm_or_epi32(__A, __B),
562                                             (__v4si)__W);
563}
564
565static __inline__ __m128i __DEFAULT_FN_ATTRS128
566_mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B)
567{
568  return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B);
569}
570
571static __inline__ __m256i __DEFAULT_FN_ATTRS256
572_mm256_xor_epi32(__m256i __a, __m256i __b)
573{
574  return (__m256i)((__v8su)__a ^ (__v8su)__b);
575}
576
577static __inline__ __m256i __DEFAULT_FN_ATTRS256
578_mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
579{
580  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
581                                             (__v8si)_mm256_xor_epi32(__A, __B),
582                                             (__v8si)__W);
583}
584
585static __inline__ __m256i __DEFAULT_FN_ATTRS256
586_mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B)
587{
588  return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B);
589}
590
591static __inline__ __m128i __DEFAULT_FN_ATTRS128
592_mm_xor_epi32(__m128i __a, __m128i __b)
593{
594  return (__m128i)((__v4su)__a ^ (__v4su)__b);
595}
596
597static __inline__ __m128i __DEFAULT_FN_ATTRS128
598_mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
599{
600  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
601                                             (__v4si)_mm_xor_epi32(__A, __B),
602                                             (__v4si)__W);
603}
604
605static __inline__ __m128i __DEFAULT_FN_ATTRS128
606_mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B)
607{
608  return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B);
609}
610
611static __inline__ __m256i __DEFAULT_FN_ATTRS256
612_mm256_and_epi64(__m256i __a, __m256i __b)
613{
614  return (__m256i)((__v4du)__a & (__v4du)__b);
615}
616
617static __inline__ __m256i __DEFAULT_FN_ATTRS256
618_mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
619{
620  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
621                                             (__v4di)_mm256_and_epi64(__A, __B),
622                                             (__v4di)__W);
623}
624
625static __inline__ __m256i __DEFAULT_FN_ATTRS256
626_mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B)
627{
628  return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B);
629}
630
631static __inline__ __m128i __DEFAULT_FN_ATTRS128
632_mm_and_epi64(__m128i __a, __m128i __b)
633{
634  return (__m128i)((__v2du)__a & (__v2du)__b);
635}
636
637static __inline__ __m128i __DEFAULT_FN_ATTRS128
638_mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
639{
640  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
641                                             (__v2di)_mm_and_epi64(__A, __B),
642                                             (__v2di)__W);
643}
644
645static __inline__ __m128i __DEFAULT_FN_ATTRS128
646_mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B)
647{
648  return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B);
649}
650
651static __inline__ __m256i __DEFAULT_FN_ATTRS256
652_mm256_andnot_epi64(__m256i __A, __m256i __B)
653{
654  return (__m256i)(~(__v4du)__A & (__v4du)__B);
655}
656
657static __inline__ __m256i __DEFAULT_FN_ATTRS256
658_mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
659{
660  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
661                                          (__v4di)_mm256_andnot_epi64(__A, __B),
662                                          (__v4di)__W);
663}
664
665static __inline__ __m256i __DEFAULT_FN_ATTRS256
666_mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B)
667{
668  return (__m256i)_mm256_mask_andnot_epi64(_mm256_setzero_si256(),
669                                           __U, __A, __B);
670}
671
672static __inline__ __m128i __DEFAULT_FN_ATTRS128
673_mm_andnot_epi64(__m128i __A, __m128i __B)
674{
675  return (__m128i)(~(__v2du)__A & (__v2du)__B);
676}
677
678static __inline__ __m128i __DEFAULT_FN_ATTRS128
679_mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
680{
681  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
682                                             (__v2di)_mm_andnot_epi64(__A, __B),
683                                             (__v2di)__W);
684}
685
686static __inline__ __m128i __DEFAULT_FN_ATTRS128
687_mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B)
688{
689  return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B);
690}
691
692static __inline__ __m256i __DEFAULT_FN_ATTRS256
693_mm256_or_epi64(__m256i __a, __m256i __b)
694{
695  return (__m256i)((__v4du)__a | (__v4du)__b);
696}
697
698static __inline__ __m256i __DEFAULT_FN_ATTRS256
699_mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
700{
701  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
702                                             (__v4di)_mm256_or_epi64(__A, __B),
703                                             (__v4di)__W);
704}
705
706static __inline__ __m256i __DEFAULT_FN_ATTRS256
707_mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B)
708{
709  return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B);
710}
711
712static __inline__ __m128i __DEFAULT_FN_ATTRS128
713_mm_or_epi64(__m128i __a, __m128i __b)
714{
715  return (__m128i)((__v2du)__a | (__v2du)__b);
716}
717
718static __inline__ __m128i __DEFAULT_FN_ATTRS128
719_mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
720{
721  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
722                                             (__v2di)_mm_or_epi64(__A, __B),
723                                             (__v2di)__W);
724}
725
726static __inline__ __m128i __DEFAULT_FN_ATTRS128
727_mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B)
728{
729  return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B);
730}
731
732static __inline__ __m256i __DEFAULT_FN_ATTRS256
733_mm256_xor_epi64(__m256i __a, __m256i __b)
734{
735  return (__m256i)((__v4du)__a ^ (__v4du)__b);
736}
737
738static __inline__ __m256i __DEFAULT_FN_ATTRS256
739_mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
740{
741  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
742                                             (__v4di)_mm256_xor_epi64(__A, __B),
743                                             (__v4di)__W);
744}
745
746static __inline__ __m256i __DEFAULT_FN_ATTRS256
747_mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B)
748{
749  return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B);
750}
751
752static __inline__ __m128i __DEFAULT_FN_ATTRS128
753_mm_xor_epi64(__m128i __a, __m128i __b)
754{
755  return (__m128i)((__v2du)__a ^ (__v2du)__b);
756}
757
758static __inline__ __m128i __DEFAULT_FN_ATTRS128
759_mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A,
760        __m128i __B)
761{
762  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
763                                             (__v2di)_mm_xor_epi64(__A, __B),
764                                             (__v2di)__W);
765}
766
767static __inline__ __m128i __DEFAULT_FN_ATTRS128
768_mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
769{
770  return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B);
771}
772
773#define _mm_cmp_epi32_mask(a, b, p) \
774  ((__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
775                                         (__v4si)(__m128i)(b), (int)(p), \
776                                         (__mmask8)-1))
777
778#define _mm_mask_cmp_epi32_mask(m, a, b, p) \
779  ((__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
780                                         (__v4si)(__m128i)(b), (int)(p), \
781                                         (__mmask8)(m)))
782
783#define _mm_cmp_epu32_mask(a, b, p) \
784  ((__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
785                                          (__v4si)(__m128i)(b), (int)(p), \
786                                          (__mmask8)-1))
787
788#define _mm_mask_cmp_epu32_mask(m, a, b, p) \
789  ((__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
790                                          (__v4si)(__m128i)(b), (int)(p), \
791                                          (__mmask8)(m)))
792
793#define _mm256_cmp_epi32_mask(a, b, p) \
794  ((__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
795                                         (__v8si)(__m256i)(b), (int)(p), \
796                                         (__mmask8)-1))
797
798#define _mm256_mask_cmp_epi32_mask(m, a, b, p) \
799  ((__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
800                                         (__v8si)(__m256i)(b), (int)(p), \
801                                         (__mmask8)(m)))
802
803#define _mm256_cmp_epu32_mask(a, b, p) \
804  ((__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
805                                          (__v8si)(__m256i)(b), (int)(p), \
806                                          (__mmask8)-1))
807
808#define _mm256_mask_cmp_epu32_mask(m, a, b, p) \
809  ((__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
810                                          (__v8si)(__m256i)(b), (int)(p), \
811                                          (__mmask8)(m)))
812
813#define _mm_cmp_epi64_mask(a, b, p) \
814  ((__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
815                                         (__v2di)(__m128i)(b), (int)(p), \
816                                         (__mmask8)-1))
817
818#define _mm_mask_cmp_epi64_mask(m, a, b, p) \
819  ((__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
820                                         (__v2di)(__m128i)(b), (int)(p), \
821                                         (__mmask8)(m)))
822
823#define _mm_cmp_epu64_mask(a, b, p) \
824  ((__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
825                                          (__v2di)(__m128i)(b), (int)(p), \
826                                          (__mmask8)-1))
827
828#define _mm_mask_cmp_epu64_mask(m, a, b, p) \
829  ((__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
830                                          (__v2di)(__m128i)(b), (int)(p), \
831                                          (__mmask8)(m)))
832
833#define _mm256_cmp_epi64_mask(a, b, p) \
834  ((__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
835                                         (__v4di)(__m256i)(b), (int)(p), \
836                                         (__mmask8)-1))
837
838#define _mm256_mask_cmp_epi64_mask(m, a, b, p) \
839  ((__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
840                                         (__v4di)(__m256i)(b), (int)(p), \
841                                         (__mmask8)(m)))
842
843#define _mm256_cmp_epu64_mask(a, b, p) \
844  ((__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
845                                          (__v4di)(__m256i)(b), (int)(p), \
846                                          (__mmask8)-1))
847
848#define _mm256_mask_cmp_epu64_mask(m, a, b, p) \
849  ((__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
850                                          (__v4di)(__m256i)(b), (int)(p), \
851                                          (__mmask8)(m)))
852
853#define _mm256_cmp_ps_mask(a, b, p)  \
854  ((__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
855                                          (__v8sf)(__m256)(b), (int)(p), \
856                                          (__mmask8)-1))
857
858#define _mm256_mask_cmp_ps_mask(m, a, b, p)  \
859  ((__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
860                                          (__v8sf)(__m256)(b), (int)(p), \
861                                          (__mmask8)(m)))
862
863#define _mm256_cmp_pd_mask(a, b, p)  \
864  ((__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
865                                          (__v4df)(__m256d)(b), (int)(p), \
866                                          (__mmask8)-1))
867
868#define _mm256_mask_cmp_pd_mask(m, a, b, p)  \
869  ((__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
870                                          (__v4df)(__m256d)(b), (int)(p), \
871                                          (__mmask8)(m)))
872
873#define _mm_cmp_ps_mask(a, b, p)  \
874  ((__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
875                                          (__v4sf)(__m128)(b), (int)(p), \
876                                          (__mmask8)-1))
877
878#define _mm_mask_cmp_ps_mask(m, a, b, p)  \
879  ((__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
880                                          (__v4sf)(__m128)(b), (int)(p), \
881                                          (__mmask8)(m)))
882
883#define _mm_cmp_pd_mask(a, b, p)  \
884  ((__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
885                                          (__v2df)(__m128d)(b), (int)(p), \
886                                          (__mmask8)-1))
887
888#define _mm_mask_cmp_pd_mask(m, a, b, p)  \
889  ((__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
890                                          (__v2df)(__m128d)(b), (int)(p), \
891                                          (__mmask8)(m)))
892
893static __inline__ __m128d __DEFAULT_FN_ATTRS128
894_mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
895{
896  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
897                    __builtin_ia32_vfmaddpd ((__v2df) __A,
898                                             (__v2df) __B,
899                                             (__v2df) __C),
900                    (__v2df) __A);
901}
902
903static __inline__ __m128d __DEFAULT_FN_ATTRS128
904_mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
905{
906  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
907                    __builtin_ia32_vfmaddpd ((__v2df) __A,
908                                             (__v2df) __B,
909                                             (__v2df) __C),
910                    (__v2df) __C);
911}
912
913static __inline__ __m128d __DEFAULT_FN_ATTRS128
914_mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
915{
916  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
917                    __builtin_ia32_vfmaddpd ((__v2df) __A,
918                                             (__v2df) __B,
919                                             (__v2df) __C),
920                    (__v2df)_mm_setzero_pd());
921}
922
923static __inline__ __m128d __DEFAULT_FN_ATTRS128
924_mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
925{
926  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
927                    __builtin_ia32_vfmaddpd ((__v2df) __A,
928                                             (__v2df) __B,
929                                             -(__v2df) __C),
930                    (__v2df) __A);
931}
932
933static __inline__ __m128d __DEFAULT_FN_ATTRS128
934_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
935{
936  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
937                    __builtin_ia32_vfmaddpd ((__v2df) __A,
938                                             (__v2df) __B,
939                                             -(__v2df) __C),
940                    (__v2df)_mm_setzero_pd());
941}
942
943static __inline__ __m128d __DEFAULT_FN_ATTRS128
944_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
945{
946  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
947                    __builtin_ia32_vfmaddpd (-(__v2df) __A,
948                                             (__v2df) __B,
949                                             (__v2df) __C),
950                    (__v2df) __C);
951}
952
953static __inline__ __m128d __DEFAULT_FN_ATTRS128
954_mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
955{
956  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
957                    __builtin_ia32_vfmaddpd (-(__v2df) __A,
958                                             (__v2df) __B,
959                                             (__v2df) __C),
960                    (__v2df)_mm_setzero_pd());
961}
962
963static __inline__ __m128d __DEFAULT_FN_ATTRS128
964_mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
965{
966  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
967                    __builtin_ia32_vfmaddpd (-(__v2df) __A,
968                                             (__v2df) __B,
969                                             -(__v2df) __C),
970                    (__v2df)_mm_setzero_pd());
971}
972
973static __inline__ __m256d __DEFAULT_FN_ATTRS256
974_mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
975{
976  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
977                    __builtin_ia32_vfmaddpd256 ((__v4df) __A,
978                                                (__v4df) __B,
979                                                (__v4df) __C),
980                    (__v4df) __A);
981}
982
983static __inline__ __m256d __DEFAULT_FN_ATTRS256
984_mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
985{
986  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
987                    __builtin_ia32_vfmaddpd256 ((__v4df) __A,
988                                                (__v4df) __B,
989                                                (__v4df) __C),
990                    (__v4df) __C);
991}
992
993static __inline__ __m256d __DEFAULT_FN_ATTRS256
994_mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
995{
996  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
997                    __builtin_ia32_vfmaddpd256 ((__v4df) __A,
998                                                (__v4df) __B,
999                                                (__v4df) __C),
1000                    (__v4df)_mm256_setzero_pd());
1001}
1002
1003static __inline__ __m256d __DEFAULT_FN_ATTRS256
1004_mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1005{
1006  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1007                    __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1008                                                (__v4df) __B,
1009                                                -(__v4df) __C),
1010                    (__v4df) __A);
1011}
1012
1013static __inline__ __m256d __DEFAULT_FN_ATTRS256
1014_mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1015{
1016  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1017                    __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1018                                                (__v4df) __B,
1019                                                -(__v4df) __C),
1020                    (__v4df)_mm256_setzero_pd());
1021}
1022
1023static __inline__ __m256d __DEFAULT_FN_ATTRS256
1024_mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1025{
1026  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1027                    __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
1028                                                (__v4df) __B,
1029                                                (__v4df) __C),
1030                    (__v4df) __C);
1031}
1032
1033static __inline__ __m256d __DEFAULT_FN_ATTRS256
1034_mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1035{
1036  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1037                    __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
1038                                                (__v4df) __B,
1039                                                (__v4df) __C),
1040                    (__v4df)_mm256_setzero_pd());
1041}
1042
1043static __inline__ __m256d __DEFAULT_FN_ATTRS256
1044_mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1045{
1046  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1047                    __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
1048                                                (__v4df) __B,
1049                                                -(__v4df) __C),
1050                    (__v4df)_mm256_setzero_pd());
1051}
1052
1053static __inline__ __m128 __DEFAULT_FN_ATTRS128
1054_mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1055{
1056  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1057                    __builtin_ia32_vfmaddps ((__v4sf) __A,
1058                                             (__v4sf) __B,
1059                                             (__v4sf) __C),
1060                    (__v4sf) __A);
1061}
1062
1063static __inline__ __m128 __DEFAULT_FN_ATTRS128
1064_mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1065{
1066  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1067                    __builtin_ia32_vfmaddps ((__v4sf) __A,
1068                                             (__v4sf) __B,
1069                                             (__v4sf) __C),
1070                    (__v4sf) __C);
1071}
1072
1073static __inline__ __m128 __DEFAULT_FN_ATTRS128
1074_mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1075{
1076  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1077                    __builtin_ia32_vfmaddps ((__v4sf) __A,
1078                                             (__v4sf) __B,
1079                                             (__v4sf) __C),
1080                    (__v4sf)_mm_setzero_ps());
1081}
1082
1083static __inline__ __m128 __DEFAULT_FN_ATTRS128
1084_mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1085{
1086  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1087                    __builtin_ia32_vfmaddps ((__v4sf) __A,
1088                                             (__v4sf) __B,
1089                                             -(__v4sf) __C),
1090                    (__v4sf) __A);
1091}
1092
1093static __inline__ __m128 __DEFAULT_FN_ATTRS128
1094_mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1095{
1096  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1097                    __builtin_ia32_vfmaddps ((__v4sf) __A,
1098                                             (__v4sf) __B,
1099                                             -(__v4sf) __C),
1100                    (__v4sf)_mm_setzero_ps());
1101}
1102
1103static __inline__ __m128 __DEFAULT_FN_ATTRS128
1104_mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1105{
1106  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1107                    __builtin_ia32_vfmaddps (-(__v4sf) __A,
1108                                             (__v4sf) __B,
1109                                             (__v4sf) __C),
1110                    (__v4sf) __C);
1111}
1112
1113static __inline__ __m128 __DEFAULT_FN_ATTRS128
1114_mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1115{
1116  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1117                    __builtin_ia32_vfmaddps (-(__v4sf) __A,
1118                                             (__v4sf) __B,
1119                                             (__v4sf) __C),
1120                    (__v4sf)_mm_setzero_ps());
1121}
1122
1123static __inline__ __m128 __DEFAULT_FN_ATTRS128
1124_mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1125{
1126  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1127                    __builtin_ia32_vfmaddps (-(__v4sf) __A,
1128                                             (__v4sf) __B,
1129                                             -(__v4sf) __C),
1130                    (__v4sf)_mm_setzero_ps());
1131}
1132
1133static __inline__ __m256 __DEFAULT_FN_ATTRS256
1134_mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1135{
1136  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1137                    __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1138                                                (__v8sf) __B,
1139                                                (__v8sf) __C),
1140                    (__v8sf) __A);
1141}
1142
1143static __inline__ __m256 __DEFAULT_FN_ATTRS256
1144_mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1145{
1146  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1147                    __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1148                                                (__v8sf) __B,
1149                                                (__v8sf) __C),
1150                    (__v8sf) __C);
1151}
1152
1153static __inline__ __m256 __DEFAULT_FN_ATTRS256
1154_mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1155{
1156  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1157                    __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1158                                                (__v8sf) __B,
1159                                                (__v8sf) __C),
1160                    (__v8sf)_mm256_setzero_ps());
1161}
1162
1163static __inline__ __m256 __DEFAULT_FN_ATTRS256
1164_mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1165{
1166  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1167                    __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1168                                                (__v8sf) __B,
1169                                                -(__v8sf) __C),
1170                    (__v8sf) __A);
1171}
1172
1173static __inline__ __m256 __DEFAULT_FN_ATTRS256
1174_mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1175{
1176  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1177                    __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1178                                                (__v8sf) __B,
1179                                                -(__v8sf) __C),
1180                    (__v8sf)_mm256_setzero_ps());
1181}
1182
1183static __inline__ __m256 __DEFAULT_FN_ATTRS256
1184_mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1185{
1186  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1187                    __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1188                                                (__v8sf) __B,
1189                                                (__v8sf) __C),
1190                    (__v8sf) __C);
1191}
1192
1193static __inline__ __m256 __DEFAULT_FN_ATTRS256
1194_mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1195{
1196  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1197                    __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1198                                                (__v8sf) __B,
1199                                                (__v8sf) __C),
1200                    (__v8sf)_mm256_setzero_ps());
1201}
1202
1203static __inline__ __m256 __DEFAULT_FN_ATTRS256
1204_mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1205{
1206  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1207                    __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1208                                                (__v8sf) __B,
1209                                                -(__v8sf) __C),
1210                    (__v8sf)_mm256_setzero_ps());
1211}
1212
1213static __inline__ __m128d __DEFAULT_FN_ATTRS128
1214_mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1215{
1216  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1217                    __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1218                                                (__v2df) __B,
1219                                                (__v2df) __C),
1220                    (__v2df) __A);
1221}
1222
1223static __inline__ __m128d __DEFAULT_FN_ATTRS128
1224_mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1225{
1226  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1227                    __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1228                                                (__v2df) __B,
1229                                                (__v2df) __C),
1230                    (__v2df) __C);
1231}
1232
1233static __inline__ __m128d __DEFAULT_FN_ATTRS128
1234_mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1235{
1236  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1237                    __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1238                                                (__v2df) __B,
1239                                                (__v2df) __C),
1240                    (__v2df)_mm_setzero_pd());
1241}
1242
1243static __inline__ __m128d __DEFAULT_FN_ATTRS128
1244_mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1245{
1246  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1247                    __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1248                                                (__v2df) __B,
1249                                                -(__v2df) __C),
1250                    (__v2df) __A);
1251}
1252
1253static __inline__ __m128d __DEFAULT_FN_ATTRS128
1254_mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1255{
1256  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1257                    __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1258                                                (__v2df) __B,
1259                                                -(__v2df) __C),
1260                    (__v2df)_mm_setzero_pd());
1261}
1262
1263static __inline__ __m256d __DEFAULT_FN_ATTRS256
1264_mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1265{
1266  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1267                    __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1268                                                   (__v4df) __B,
1269                                                   (__v4df) __C),
1270                    (__v4df) __A);
1271}
1272
1273static __inline__ __m256d __DEFAULT_FN_ATTRS256
1274_mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1275{
1276  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1277                    __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1278                                                   (__v4df) __B,
1279                                                   (__v4df) __C),
1280                    (__v4df) __C);
1281}
1282
1283static __inline__ __m256d __DEFAULT_FN_ATTRS256
1284_mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1285{
1286  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1287                    __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1288                                                   (__v4df) __B,
1289                                                   (__v4df) __C),
1290                    (__v4df)_mm256_setzero_pd());
1291}
1292
1293static __inline__ __m256d __DEFAULT_FN_ATTRS256
1294_mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1295{
1296  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1297                    __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1298                                                   (__v4df) __B,
1299                                                   -(__v4df) __C),
1300                    (__v4df) __A);
1301}
1302
1303static __inline__ __m256d __DEFAULT_FN_ATTRS256
1304_mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1305{
1306  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1307                    __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1308                                                   (__v4df) __B,
1309                                                   -(__v4df) __C),
1310                    (__v4df)_mm256_setzero_pd());
1311}
1312
1313static __inline__ __m128 __DEFAULT_FN_ATTRS128
1314_mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1315{
1316  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1317                    __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1318                                                (__v4sf) __B,
1319                                                (__v4sf) __C),
1320                    (__v4sf) __A);
1321}
1322
1323static __inline__ __m128 __DEFAULT_FN_ATTRS128
1324_mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1325{
1326  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1327                    __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1328                                                (__v4sf) __B,
1329                                                (__v4sf) __C),
1330                    (__v4sf) __C);
1331}
1332
1333static __inline__ __m128 __DEFAULT_FN_ATTRS128
1334_mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1335{
1336  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1337                    __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1338                                                (__v4sf) __B,
1339                                                (__v4sf) __C),
1340                    (__v4sf)_mm_setzero_ps());
1341}
1342
1343static __inline__ __m128 __DEFAULT_FN_ATTRS128
1344_mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1345{
1346  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1347                    __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1348                                                (__v4sf) __B,
1349                                                -(__v4sf) __C),
1350                    (__v4sf) __A);
1351}
1352
1353static __inline__ __m128 __DEFAULT_FN_ATTRS128
1354_mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1355{
1356  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1357                    __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1358                                                (__v4sf) __B,
1359                                                -(__v4sf) __C),
1360                    (__v4sf)_mm_setzero_ps());
1361}
1362
1363static __inline__ __m256 __DEFAULT_FN_ATTRS256
1364_mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B,
1365                         __m256 __C)
1366{
1367  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1368                    __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1369                                                   (__v8sf) __B,
1370                                                   (__v8sf) __C),
1371                    (__v8sf) __A);
1372}
1373
1374static __inline__ __m256 __DEFAULT_FN_ATTRS256
1375_mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1376{
1377  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1378                    __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1379                                                   (__v8sf) __B,
1380                                                   (__v8sf) __C),
1381                    (__v8sf) __C);
1382}
1383
1384static __inline__ __m256 __DEFAULT_FN_ATTRS256
1385_mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1386{
1387  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1388                    __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1389                                                   (__v8sf) __B,
1390                                                   (__v8sf) __C),
1391                    (__v8sf)_mm256_setzero_ps());
1392}
1393
1394static __inline__ __m256 __DEFAULT_FN_ATTRS256
1395_mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1396{
1397  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1398                    __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1399                                                   (__v8sf) __B,
1400                                                   -(__v8sf) __C),
1401                    (__v8sf) __A);
1402}
1403
1404static __inline__ __m256 __DEFAULT_FN_ATTRS256
1405_mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1406{
1407  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1408                    __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1409                                                   (__v8sf) __B,
1410                                                   -(__v8sf) __C),
1411                    (__v8sf)_mm256_setzero_ps());
1412}
1413
1414static __inline__ __m128d __DEFAULT_FN_ATTRS128
1415_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1416{
1417  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1418                    __builtin_ia32_vfmaddpd ((__v2df) __A,
1419                                             (__v2df) __B,
1420                                             -(__v2df) __C),
1421                    (__v2df) __C);
1422}
1423
1424static __inline__ __m256d __DEFAULT_FN_ATTRS256
1425_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1426{
1427  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1428                    __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1429                                                (__v4df) __B,
1430                                                -(__v4df) __C),
1431                    (__v4df) __C);
1432}
1433
1434static __inline__ __m128 __DEFAULT_FN_ATTRS128
1435_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1436{
1437  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1438                    __builtin_ia32_vfmaddps ((__v4sf) __A,
1439                                             (__v4sf) __B,
1440                                             -(__v4sf) __C),
1441                    (__v4sf) __C);
1442}
1443
1444static __inline__ __m256 __DEFAULT_FN_ATTRS256
1445_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1446{
1447  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1448                    __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1449                                                (__v8sf) __B,
1450                                                -(__v8sf) __C),
1451                    (__v8sf) __C);
1452}
1453
1454static __inline__ __m128d __DEFAULT_FN_ATTRS128
1455_mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1456{
1457  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1458                    __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1459                                                (__v2df) __B,
1460                                                -(__v2df) __C),
1461                    (__v2df) __C);
1462}
1463
1464static __inline__ __m256d __DEFAULT_FN_ATTRS256
1465_mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1466{
1467  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1468                    __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1469                                                   (__v4df) __B,
1470                                                   -(__v4df) __C),
1471                    (__v4df) __C);
1472}
1473
1474static __inline__ __m128 __DEFAULT_FN_ATTRS128
1475_mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1476{
1477  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1478                    __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1479                                                (__v4sf) __B,
1480                                                -(__v4sf) __C),
1481                    (__v4sf) __C);
1482}
1483
1484static __inline__ __m256 __DEFAULT_FN_ATTRS256
1485_mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1486{
1487  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1488                    __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1489                                                   (__v8sf) __B,
1490                                                   -(__v8sf) __C),
1491                    (__v8sf) __C);
1492}
1493
1494static __inline__ __m128d __DEFAULT_FN_ATTRS128
1495_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1496{
1497  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1498                    __builtin_ia32_vfmaddpd ((__v2df) __A,
1499                                             -(__v2df) __B,
1500                                             (__v2df) __C),
1501                    (__v2df) __A);
1502}
1503
1504static __inline__ __m256d __DEFAULT_FN_ATTRS256
1505_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1506{
1507  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1508                    __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1509                                                -(__v4df) __B,
1510                                                (__v4df) __C),
1511                    (__v4df) __A);
1512}
1513
1514static __inline__ __m128 __DEFAULT_FN_ATTRS128
1515_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1516{
1517  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1518                    __builtin_ia32_vfmaddps ((__v4sf) __A,
1519                                             -(__v4sf) __B,
1520                                             (__v4sf) __C),
1521                    (__v4sf) __A);
1522}
1523
1524static __inline__ __m256 __DEFAULT_FN_ATTRS256
1525_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1526{
1527  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1528                    __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1529                                                -(__v8sf) __B,
1530                                                (__v8sf) __C),
1531                    (__v8sf) __A);
1532}
1533
1534static __inline__ __m128d __DEFAULT_FN_ATTRS128
1535_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1536{
1537  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1538                    __builtin_ia32_vfmaddpd ((__v2df) __A,
1539                                             -(__v2df) __B,
1540                                             -(__v2df) __C),
1541                    (__v2df) __A);
1542}
1543
1544static __inline__ __m128d __DEFAULT_FN_ATTRS128
1545_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1546{
1547  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1548                    __builtin_ia32_vfmaddpd ((__v2df) __A,
1549                                             -(__v2df) __B,
1550                                             -(__v2df) __C),
1551                    (__v2df) __C);
1552}
1553
1554static __inline__ __m256d __DEFAULT_FN_ATTRS256
1555_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1556{
1557  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1558                    __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1559                                                -(__v4df) __B,
1560                                                -(__v4df) __C),
1561                    (__v4df) __A);
1562}
1563
1564static __inline__ __m256d __DEFAULT_FN_ATTRS256
1565_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1566{
1567  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1568                    __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1569                                                -(__v4df) __B,
1570                                                -(__v4df) __C),
1571                    (__v4df) __C);
1572}
1573
1574static __inline__ __m128 __DEFAULT_FN_ATTRS128
1575_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1576{
1577  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1578                    __builtin_ia32_vfmaddps ((__v4sf) __A,
1579                                             -(__v4sf) __B,
1580                                             -(__v4sf) __C),
1581                    (__v4sf) __A);
1582}
1583
1584static __inline__ __m128 __DEFAULT_FN_ATTRS128
1585_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1586{
1587  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1588                    __builtin_ia32_vfmaddps ((__v4sf) __A,
1589                                             -(__v4sf) __B,
1590                                             -(__v4sf) __C),
1591                    (__v4sf) __C);
1592}
1593
1594static __inline__ __m256 __DEFAULT_FN_ATTRS256
1595_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1596{
1597  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1598                    __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1599                                                -(__v8sf) __B,
1600                                                -(__v8sf) __C),
1601                    (__v8sf) __A);
1602}
1603
1604static __inline__ __m256 __DEFAULT_FN_ATTRS256
1605_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1606{
1607  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1608                    __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1609                                                -(__v8sf) __B,
1610                                                -(__v8sf) __C),
1611                    (__v8sf) __C);
1612}
1613
1614static __inline__ __m128d __DEFAULT_FN_ATTRS128
1615_mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
1616  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1617                                              (__v2df)_mm_add_pd(__A, __B),
1618                                              (__v2df)__W);
1619}
1620
1621static __inline__ __m128d __DEFAULT_FN_ATTRS128
1622_mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) {
1623  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1624                                              (__v2df)_mm_add_pd(__A, __B),
1625                                              (__v2df)_mm_setzero_pd());
1626}
1627
1628static __inline__ __m256d __DEFAULT_FN_ATTRS256
1629_mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
1630  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1631                                              (__v4df)_mm256_add_pd(__A, __B),
1632                                              (__v4df)__W);
1633}
1634
1635static __inline__ __m256d __DEFAULT_FN_ATTRS256
1636_mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) {
1637  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1638                                              (__v4df)_mm256_add_pd(__A, __B),
1639                                              (__v4df)_mm256_setzero_pd());
1640}
1641
1642static __inline__ __m128 __DEFAULT_FN_ATTRS128
1643_mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
1644  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1645                                             (__v4sf)_mm_add_ps(__A, __B),
1646                                             (__v4sf)__W);
1647}
1648
1649static __inline__ __m128 __DEFAULT_FN_ATTRS128
1650_mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) {
1651  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1652                                             (__v4sf)_mm_add_ps(__A, __B),
1653                                             (__v4sf)_mm_setzero_ps());
1654}
1655
1656static __inline__ __m256 __DEFAULT_FN_ATTRS256
1657_mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
1658  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1659                                             (__v8sf)_mm256_add_ps(__A, __B),
1660                                             (__v8sf)__W);
1661}
1662
1663static __inline__ __m256 __DEFAULT_FN_ATTRS256
1664_mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) {
1665  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1666                                             (__v8sf)_mm256_add_ps(__A, __B),
1667                                             (__v8sf)_mm256_setzero_ps());
1668}
1669
1670static __inline__ __m128i __DEFAULT_FN_ATTRS128
1671_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) {
1672  return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
1673                (__v4si) __W,
1674                (__v4si) __A);
1675}
1676
1677static __inline__ __m256i __DEFAULT_FN_ATTRS256
1678_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) {
1679  return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
1680                (__v8si) __W,
1681                (__v8si) __A);
1682}
1683
1684static __inline__ __m128d __DEFAULT_FN_ATTRS128
1685_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) {
1686  return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
1687                 (__v2df) __W,
1688                 (__v2df) __A);
1689}
1690
1691static __inline__ __m256d __DEFAULT_FN_ATTRS256
1692_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) {
1693  return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
1694                 (__v4df) __W,
1695                 (__v4df) __A);
1696}
1697
1698static __inline__ __m128 __DEFAULT_FN_ATTRS128
1699_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) {
1700  return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
1701                (__v4sf) __W,
1702                (__v4sf) __A);
1703}
1704
1705static __inline__ __m256 __DEFAULT_FN_ATTRS256
1706_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) {
1707  return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
1708                (__v8sf) __W,
1709                (__v8sf) __A);
1710}
1711
1712static __inline__ __m128i __DEFAULT_FN_ATTRS128
1713_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) {
1714  return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
1715                (__v2di) __W,
1716                (__v2di) __A);
1717}
1718
1719static __inline__ __m256i __DEFAULT_FN_ATTRS256
1720_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) {
1721  return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
1722                (__v4di) __W,
1723                (__v4di) __A);
1724}
1725
1726static __inline__ __m128d __DEFAULT_FN_ATTRS128
1727_mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) {
1728  return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1729                  (__v2df) __W,
1730                  (__mmask8) __U);
1731}
1732
1733static __inline__ __m128d __DEFAULT_FN_ATTRS128
1734_mm_maskz_compress_pd (__mmask8 __U, __m128d __A) {
1735  return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1736                  (__v2df)
1737                  _mm_setzero_pd (),
1738                  (__mmask8) __U);
1739}
1740
1741static __inline__ __m256d __DEFAULT_FN_ATTRS256
1742_mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) {
1743  return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1744                  (__v4df) __W,
1745                  (__mmask8) __U);
1746}
1747
1748static __inline__ __m256d __DEFAULT_FN_ATTRS256
1749_mm256_maskz_compress_pd (__mmask8 __U, __m256d __A) {
1750  return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1751                  (__v4df)
1752                  _mm256_setzero_pd (),
1753                  (__mmask8) __U);
1754}
1755
1756static __inline__ __m128i __DEFAULT_FN_ATTRS128
1757_mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
1758  return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1759                  (__v2di) __W,
1760                  (__mmask8) __U);
1761}
1762
1763static __inline__ __m128i __DEFAULT_FN_ATTRS128
1764_mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A) {
1765  return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1766                  (__v2di)
1767                  _mm_setzero_si128 (),
1768                  (__mmask8) __U);
1769}
1770
1771static __inline__ __m256i __DEFAULT_FN_ATTRS256
1772_mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
1773  return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1774                  (__v4di) __W,
1775                  (__mmask8) __U);
1776}
1777
1778static __inline__ __m256i __DEFAULT_FN_ATTRS256
1779_mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A) {
1780  return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1781                  (__v4di)
1782                  _mm256_setzero_si256 (),
1783                  (__mmask8) __U);
1784}
1785
1786static __inline__ __m128 __DEFAULT_FN_ATTRS128
1787_mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) {
1788  return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1789                 (__v4sf) __W,
1790                 (__mmask8) __U);
1791}
1792
1793static __inline__ __m128 __DEFAULT_FN_ATTRS128
1794_mm_maskz_compress_ps (__mmask8 __U, __m128 __A) {
1795  return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1796                 (__v4sf)
1797                 _mm_setzero_ps (),
1798                 (__mmask8) __U);
1799}
1800
1801static __inline__ __m256 __DEFAULT_FN_ATTRS256
1802_mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) {
1803  return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1804                 (__v8sf) __W,
1805                 (__mmask8) __U);
1806}
1807
1808static __inline__ __m256 __DEFAULT_FN_ATTRS256
1809_mm256_maskz_compress_ps (__mmask8 __U, __m256 __A) {
1810  return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1811                 (__v8sf)
1812                 _mm256_setzero_ps (),
1813                 (__mmask8) __U);
1814}
1815
1816static __inline__ __m128i __DEFAULT_FN_ATTRS128
1817_mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
1818  return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1819                  (__v4si) __W,
1820                  (__mmask8) __U);
1821}
1822
1823static __inline__ __m128i __DEFAULT_FN_ATTRS128
1824_mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A) {
1825  return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1826                  (__v4si)
1827                  _mm_setzero_si128 (),
1828                  (__mmask8) __U);
1829}
1830
1831static __inline__ __m256i __DEFAULT_FN_ATTRS256
1832_mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
1833  return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1834                  (__v8si) __W,
1835                  (__mmask8) __U);
1836}
1837
1838static __inline__ __m256i __DEFAULT_FN_ATTRS256
1839_mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A) {
1840  return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1841                  (__v8si)
1842                  _mm256_setzero_si256 (),
1843                  (__mmask8) __U);
1844}
1845
1846static __inline__ void __DEFAULT_FN_ATTRS128
1847_mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) {
1848  __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
1849            (__v2df) __A,
1850            (__mmask8) __U);
1851}
1852
1853static __inline__ void __DEFAULT_FN_ATTRS256
1854_mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) {
1855  __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
1856            (__v4df) __A,
1857            (__mmask8) __U);
1858}
1859
1860static __inline__ void __DEFAULT_FN_ATTRS128
1861_mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) {
1862  __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
1863            (__v2di) __A,
1864            (__mmask8) __U);
1865}
1866
1867static __inline__ void __DEFAULT_FN_ATTRS256
1868_mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A) {
1869  __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
1870            (__v4di) __A,
1871            (__mmask8) __U);
1872}
1873
1874static __inline__ void __DEFAULT_FN_ATTRS128
1875_mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) {
1876  __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
1877            (__v4sf) __A,
1878            (__mmask8) __U);
1879}
1880
1881static __inline__ void __DEFAULT_FN_ATTRS256
1882_mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A) {
1883  __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
1884            (__v8sf) __A,
1885            (__mmask8) __U);
1886}
1887
1888static __inline__ void __DEFAULT_FN_ATTRS128
1889_mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) {
1890  __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
1891            (__v4si) __A,
1892            (__mmask8) __U);
1893}
1894
1895static __inline__ void __DEFAULT_FN_ATTRS256
1896_mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) {
1897  __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
1898            (__v8si) __A,
1899            (__mmask8) __U);
1900}
1901
1902static __inline__ __m128d __DEFAULT_FN_ATTRS128
1903_mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
1904  return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
1905                                              (__v2df)_mm_cvtepi32_pd(__A),
1906                                              (__v2df)__W);
1907}
1908
1909static __inline__ __m128d __DEFAULT_FN_ATTRS128
1910_mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
1911  return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
1912                                              (__v2df)_mm_cvtepi32_pd(__A),
1913                                              (__v2df)_mm_setzero_pd());
1914}
1915
1916static __inline__ __m256d __DEFAULT_FN_ATTRS256
1917_mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
1918  return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
1919                                              (__v4df)_mm256_cvtepi32_pd(__A),
1920                                              (__v4df)__W);
1921}
1922
1923static __inline__ __m256d __DEFAULT_FN_ATTRS256
1924_mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
1925  return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
1926                                              (__v4df)_mm256_cvtepi32_pd(__A),
1927                                              (__v4df)_mm256_setzero_pd());
1928}
1929
1930static __inline__ __m128 __DEFAULT_FN_ATTRS128
1931_mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
1932  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1933                                             (__v4sf)_mm_cvtepi32_ps(__A),
1934                                             (__v4sf)__W);
1935}
1936
1937static __inline__ __m128 __DEFAULT_FN_ATTRS128
1938_mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A) {
1939  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1940                                             (__v4sf)_mm_cvtepi32_ps(__A),
1941                                             (__v4sf)_mm_setzero_ps());
1942}
1943
1944static __inline__ __m256 __DEFAULT_FN_ATTRS256
1945_mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
1946  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1947                                             (__v8sf)_mm256_cvtepi32_ps(__A),
1948                                             (__v8sf)__W);
1949}
1950
1951static __inline__ __m256 __DEFAULT_FN_ATTRS256
1952_mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A) {
1953  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1954                                             (__v8sf)_mm256_cvtepi32_ps(__A),
1955                                             (__v8sf)_mm256_setzero_ps());
1956}
1957
1958static __inline__ __m128i __DEFAULT_FN_ATTRS128
1959_mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
1960  return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1961                (__v4si) __W,
1962                (__mmask8) __U);
1963}
1964
1965static __inline__ __m128i __DEFAULT_FN_ATTRS128
1966_mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A) {
1967  return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1968                (__v4si)
1969                _mm_setzero_si128 (),
1970                (__mmask8) __U);
1971}
1972
1973static __inline__ __m128i __DEFAULT_FN_ATTRS256
1974_mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
1975  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1976                                             (__v4si)_mm256_cvtpd_epi32(__A),
1977                                             (__v4si)__W);
1978}
1979
1980static __inline__ __m128i __DEFAULT_FN_ATTRS256
1981_mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) {
1982  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1983                                             (__v4si)_mm256_cvtpd_epi32(__A),
1984                                             (__v4si)_mm_setzero_si128());
1985}
1986
1987static __inline__ __m128 __DEFAULT_FN_ATTRS128
1988_mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) {
1989  return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
1990            (__v4sf) __W,
1991            (__mmask8) __U);
1992}
1993
1994static __inline__ __m128 __DEFAULT_FN_ATTRS128
1995_mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) {
1996  return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
1997            (__v4sf)
1998            _mm_setzero_ps (),
1999            (__mmask8) __U);
2000}
2001
2002static __inline__ __m128 __DEFAULT_FN_ATTRS256
2003_mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) {
2004  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2005                                             (__v4sf)_mm256_cvtpd_ps(__A),
2006                                             (__v4sf)__W);
2007}
2008
2009static __inline__ __m128 __DEFAULT_FN_ATTRS256
2010_mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) {
2011  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2012                                             (__v4sf)_mm256_cvtpd_ps(__A),
2013                                             (__v4sf)_mm_setzero_ps());
2014}
2015
2016static __inline__ __m128i __DEFAULT_FN_ATTRS128
2017_mm_cvtpd_epu32 (__m128d __A) {
2018  return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2019                 (__v4si)
2020                 _mm_setzero_si128 (),
2021                 (__mmask8) -1);
2022}
2023
2024static __inline__ __m128i __DEFAULT_FN_ATTRS128
2025_mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
2026  return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2027                 (__v4si) __W,
2028                 (__mmask8) __U);
2029}
2030
2031static __inline__ __m128i __DEFAULT_FN_ATTRS128
2032_mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A) {
2033  return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2034                 (__v4si)
2035                 _mm_setzero_si128 (),
2036                 (__mmask8) __U);
2037}
2038
2039static __inline__ __m128i __DEFAULT_FN_ATTRS256
2040_mm256_cvtpd_epu32 (__m256d __A) {
2041  return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2042                 (__v4si)
2043                 _mm_setzero_si128 (),
2044                 (__mmask8) -1);
2045}
2046
2047static __inline__ __m128i __DEFAULT_FN_ATTRS256
2048_mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
2049  return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2050                 (__v4si) __W,
2051                 (__mmask8) __U);
2052}
2053
2054static __inline__ __m128i __DEFAULT_FN_ATTRS256
2055_mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A) {
2056  return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2057                 (__v4si)
2058                 _mm_setzero_si128 (),
2059                 (__mmask8) __U);
2060}
2061
2062static __inline__ __m128i __DEFAULT_FN_ATTRS128
2063_mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
2064  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2065                                             (__v4si)_mm_cvtps_epi32(__A),
2066                                             (__v4si)__W);
2067}
2068
2069static __inline__ __m128i __DEFAULT_FN_ATTRS128
2070_mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A) {
2071  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2072                                             (__v4si)_mm_cvtps_epi32(__A),
2073                                             (__v4si)_mm_setzero_si128());
2074}
2075
2076static __inline__ __m256i __DEFAULT_FN_ATTRS256
2077_mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
2078  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2079                                             (__v8si)_mm256_cvtps_epi32(__A),
2080                                             (__v8si)__W);
2081}
2082
2083static __inline__ __m256i __DEFAULT_FN_ATTRS256
2084_mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A) {
2085  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2086                                             (__v8si)_mm256_cvtps_epi32(__A),
2087                                             (__v8si)_mm256_setzero_si256());
2088}
2089
2090static __inline__ __m128d __DEFAULT_FN_ATTRS128
2091_mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) {
2092  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2093                                              (__v2df)_mm_cvtps_pd(__A),
2094                                              (__v2df)__W);
2095}
2096
2097static __inline__ __m128d __DEFAULT_FN_ATTRS128
2098_mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
2099  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2100                                              (__v2df)_mm_cvtps_pd(__A),
2101                                              (__v2df)_mm_setzero_pd());
2102}
2103
2104static __inline__ __m256d __DEFAULT_FN_ATTRS256
2105_mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) {
2106  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2107                                              (__v4df)_mm256_cvtps_pd(__A),
2108                                              (__v4df)__W);
2109}
2110
2111static __inline__ __m256d __DEFAULT_FN_ATTRS256
2112_mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
2113  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2114                                              (__v4df)_mm256_cvtps_pd(__A),
2115                                              (__v4df)_mm256_setzero_pd());
2116}
2117
2118static __inline__ __m128i __DEFAULT_FN_ATTRS128
2119_mm_cvtps_epu32 (__m128 __A) {
2120  return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2121                 (__v4si)
2122                 _mm_setzero_si128 (),
2123                 (__mmask8) -1);
2124}
2125
2126static __inline__ __m128i __DEFAULT_FN_ATTRS128
2127_mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
2128  return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2129                 (__v4si) __W,
2130                 (__mmask8) __U);
2131}
2132
2133static __inline__ __m128i __DEFAULT_FN_ATTRS128
2134_mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A) {
2135  return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2136                 (__v4si)
2137                 _mm_setzero_si128 (),
2138                 (__mmask8) __U);
2139}
2140
2141static __inline__ __m256i __DEFAULT_FN_ATTRS256
2142_mm256_cvtps_epu32 (__m256 __A) {
2143  return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2144                 (__v8si)
2145                 _mm256_setzero_si256 (),
2146                 (__mmask8) -1);
2147}
2148
2149static __inline__ __m256i __DEFAULT_FN_ATTRS256
2150_mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
2151  return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2152                 (__v8si) __W,
2153                 (__mmask8) __U);
2154}
2155
2156static __inline__ __m256i __DEFAULT_FN_ATTRS256
2157_mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A) {
2158  return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2159                 (__v8si)
2160                 _mm256_setzero_si256 (),
2161                 (__mmask8) __U);
2162}
2163
2164static __inline__ __m128i __DEFAULT_FN_ATTRS128
2165_mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
2166  return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2167                 (__v4si) __W,
2168                 (__mmask8) __U);
2169}
2170
2171static __inline__ __m128i __DEFAULT_FN_ATTRS128
2172_mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A) {
2173  return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2174                 (__v4si)
2175                 _mm_setzero_si128 (),
2176                 (__mmask8) __U);
2177}
2178
2179static __inline__ __m128i __DEFAULT_FN_ATTRS256
2180_mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
2181  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2182                                             (__v4si)_mm256_cvttpd_epi32(__A),
2183                                             (__v4si)__W);
2184}
2185
2186static __inline__ __m128i __DEFAULT_FN_ATTRS256
2187_mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A) {
2188  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2189                                             (__v4si)_mm256_cvttpd_epi32(__A),
2190                                             (__v4si)_mm_setzero_si128());
2191}
2192
2193static __inline__ __m128i __DEFAULT_FN_ATTRS128
2194_mm_cvttpd_epu32 (__m128d __A) {
2195  return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2196                  (__v4si)
2197                  _mm_setzero_si128 (),
2198                  (__mmask8) -1);
2199}
2200
2201static __inline__ __m128i __DEFAULT_FN_ATTRS128
2202_mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
2203  return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2204                  (__v4si) __W,
2205                  (__mmask8) __U);
2206}
2207
2208static __inline__ __m128i __DEFAULT_FN_ATTRS128
2209_mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A) {
2210  return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2211                  (__v4si)
2212                  _mm_setzero_si128 (),
2213                  (__mmask8) __U);
2214}
2215
2216static __inline__ __m128i __DEFAULT_FN_ATTRS256
2217_mm256_cvttpd_epu32 (__m256d __A) {
2218  return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2219                  (__v4si)
2220                  _mm_setzero_si128 (),
2221                  (__mmask8) -1);
2222}
2223
2224static __inline__ __m128i __DEFAULT_FN_ATTRS256
2225_mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
2226  return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2227                  (__v4si) __W,
2228                  (__mmask8) __U);
2229}
2230
2231static __inline__ __m128i __DEFAULT_FN_ATTRS256
2232_mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A) {
2233  return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2234                  (__v4si)
2235                  _mm_setzero_si128 (),
2236                  (__mmask8) __U);
2237}
2238
2239static __inline__ __m128i __DEFAULT_FN_ATTRS128
2240_mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
2241  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2242                                             (__v4si)_mm_cvttps_epi32(__A),
2243                                             (__v4si)__W);
2244}
2245
2246static __inline__ __m128i __DEFAULT_FN_ATTRS128
2247_mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A) {
2248  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2249                                             (__v4si)_mm_cvttps_epi32(__A),
2250                                             (__v4si)_mm_setzero_si128());
2251}
2252
2253static __inline__ __m256i __DEFAULT_FN_ATTRS256
2254_mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
2255  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2256                                             (__v8si)_mm256_cvttps_epi32(__A),
2257                                             (__v8si)__W);
2258}
2259
2260static __inline__ __m256i __DEFAULT_FN_ATTRS256
2261_mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A) {
2262  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2263                                             (__v8si)_mm256_cvttps_epi32(__A),
2264                                             (__v8si)_mm256_setzero_si256());
2265}
2266
2267static __inline__ __m128i __DEFAULT_FN_ATTRS128
2268_mm_cvttps_epu32 (__m128 __A) {
2269  return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2270                  (__v4si)
2271                  _mm_setzero_si128 (),
2272                  (__mmask8) -1);
2273}
2274
2275static __inline__ __m128i __DEFAULT_FN_ATTRS128
2276_mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
2277  return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2278                  (__v4si) __W,
2279                  (__mmask8) __U);
2280}
2281
2282static __inline__ __m128i __DEFAULT_FN_ATTRS128
2283_mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A) {
2284  return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2285                  (__v4si)
2286                  _mm_setzero_si128 (),
2287                  (__mmask8) __U);
2288}
2289
2290static __inline__ __m256i __DEFAULT_FN_ATTRS256
2291_mm256_cvttps_epu32 (__m256 __A) {
2292  return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2293                  (__v8si)
2294                  _mm256_setzero_si256 (),
2295                  (__mmask8) -1);
2296}
2297
2298static __inline__ __m256i __DEFAULT_FN_ATTRS256
2299_mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
2300  return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2301                  (__v8si) __W,
2302                  (__mmask8) __U);
2303}
2304
2305static __inline__ __m256i __DEFAULT_FN_ATTRS256
2306_mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) {
2307  return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2308                  (__v8si)
2309                  _mm256_setzero_si256 (),
2310                  (__mmask8) __U);
2311}
2312
2313static __inline__ __m128d __DEFAULT_FN_ATTRS128
2314_mm_cvtepu32_pd (__m128i __A) {
2315  return (__m128d) __builtin_convertvector(
2316      __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df);
2317}
2318
2319static __inline__ __m128d __DEFAULT_FN_ATTRS128
2320_mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
2321  return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
2322                                              (__v2df)_mm_cvtepu32_pd(__A),
2323                                              (__v2df)__W);
2324}
2325
2326static __inline__ __m128d __DEFAULT_FN_ATTRS128
2327_mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
2328  return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
2329                                              (__v2df)_mm_cvtepu32_pd(__A),
2330                                              (__v2df)_mm_setzero_pd());
2331}
2332
2333static __inline__ __m256d __DEFAULT_FN_ATTRS256
2334_mm256_cvtepu32_pd (__m128i __A) {
2335  return (__m256d)__builtin_convertvector((__v4su)__A, __v4df);
2336}
2337
2338static __inline__ __m256d __DEFAULT_FN_ATTRS256
2339_mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
2340  return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
2341                                              (__v4df)_mm256_cvtepu32_pd(__A),
2342                                              (__v4df)__W);
2343}
2344
2345static __inline__ __m256d __DEFAULT_FN_ATTRS256
2346_mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
2347  return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
2348                                              (__v4df)_mm256_cvtepu32_pd(__A),
2349                                              (__v4df)_mm256_setzero_pd());
2350}
2351
2352static __inline__ __m128 __DEFAULT_FN_ATTRS128
2353_mm_cvtepu32_ps (__m128i __A) {
2354  return (__m128)__builtin_convertvector((__v4su)__A, __v4sf);
2355}
2356
2357static __inline__ __m128 __DEFAULT_FN_ATTRS128
2358_mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
2359  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2360                                             (__v4sf)_mm_cvtepu32_ps(__A),
2361                                             (__v4sf)__W);
2362}
2363
2364static __inline__ __m128 __DEFAULT_FN_ATTRS128
2365_mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) {
2366  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2367                                             (__v4sf)_mm_cvtepu32_ps(__A),
2368                                             (__v4sf)_mm_setzero_ps());
2369}
2370
2371static __inline__ __m256 __DEFAULT_FN_ATTRS256
2372_mm256_cvtepu32_ps (__m256i __A) {
2373  return (__m256)__builtin_convertvector((__v8su)__A, __v8sf);
2374}
2375
2376static __inline__ __m256 __DEFAULT_FN_ATTRS256
2377_mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
2378  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2379                                             (__v8sf)_mm256_cvtepu32_ps(__A),
2380                                             (__v8sf)__W);
2381}
2382
2383static __inline__ __m256 __DEFAULT_FN_ATTRS256
2384_mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) {
2385  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2386                                             (__v8sf)_mm256_cvtepu32_ps(__A),
2387                                             (__v8sf)_mm256_setzero_ps());
2388}
2389
2390static __inline__ __m128d __DEFAULT_FN_ATTRS128
2391_mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2392  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2393                                              (__v2df)_mm_div_pd(__A, __B),
2394                                              (__v2df)__W);
2395}
2396
2397static __inline__ __m128d __DEFAULT_FN_ATTRS128
2398_mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2399  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2400                                              (__v2df)_mm_div_pd(__A, __B),
2401                                              (__v2df)_mm_setzero_pd());
2402}
2403
2404static __inline__ __m256d __DEFAULT_FN_ATTRS256
2405_mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2406  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2407                                              (__v4df)_mm256_div_pd(__A, __B),
2408                                              (__v4df)__W);
2409}
2410
2411static __inline__ __m256d __DEFAULT_FN_ATTRS256
2412_mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2413  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2414                                              (__v4df)_mm256_div_pd(__A, __B),
2415                                              (__v4df)_mm256_setzero_pd());
2416}
2417
2418static __inline__ __m128 __DEFAULT_FN_ATTRS128
2419_mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2420  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2421                                             (__v4sf)_mm_div_ps(__A, __B),
2422                                             (__v4sf)__W);
2423}
2424
2425static __inline__ __m128 __DEFAULT_FN_ATTRS128
2426_mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2427  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2428                                             (__v4sf)_mm_div_ps(__A, __B),
2429                                             (__v4sf)_mm_setzero_ps());
2430}
2431
2432static __inline__ __m256 __DEFAULT_FN_ATTRS256
2433_mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2434  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2435                                             (__v8sf)_mm256_div_ps(__A, __B),
2436                                             (__v8sf)__W);
2437}
2438
2439static __inline__ __m256 __DEFAULT_FN_ATTRS256
2440_mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2441  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2442                                             (__v8sf)_mm256_div_ps(__A, __B),
2443                                             (__v8sf)_mm256_setzero_ps());
2444}
2445
2446static __inline__ __m128d __DEFAULT_FN_ATTRS128
2447_mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) {
2448  return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2449                (__v2df) __W,
2450                (__mmask8) __U);
2451}
2452
2453static __inline__ __m128d __DEFAULT_FN_ATTRS128
2454_mm_maskz_expand_pd (__mmask8 __U, __m128d __A) {
2455  return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2456                 (__v2df)
2457                 _mm_setzero_pd (),
2458                 (__mmask8) __U);
2459}
2460
2461static __inline__ __m256d __DEFAULT_FN_ATTRS256
2462_mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) {
2463  return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2464                (__v4df) __W,
2465                (__mmask8) __U);
2466}
2467
2468static __inline__ __m256d __DEFAULT_FN_ATTRS256
2469_mm256_maskz_expand_pd (__mmask8 __U, __m256d __A) {
2470  return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2471                 (__v4df)
2472                 _mm256_setzero_pd (),
2473                 (__mmask8) __U);
2474}
2475
2476static __inline__ __m128i __DEFAULT_FN_ATTRS128
2477_mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
2478  return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2479                (__v2di) __W,
2480                (__mmask8) __U);
2481}
2482
2483static __inline__ __m128i __DEFAULT_FN_ATTRS128
2484_mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A) {
2485  return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2486                 (__v2di)
2487                 _mm_setzero_si128 (),
2488                 (__mmask8) __U);
2489}
2490
2491static __inline__ __m256i __DEFAULT_FN_ATTRS256
2492_mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
2493  return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2494                (__v4di) __W,
2495                (__mmask8) __U);
2496}
2497
2498static __inline__ __m256i __DEFAULT_FN_ATTRS256
2499_mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) {
2500  return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2501                 (__v4di)
2502                 _mm256_setzero_si256 (),
2503                 (__mmask8) __U);
2504}
2505
2506static __inline__ __m128d __DEFAULT_FN_ATTRS128
2507_mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) {
2508  return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P,
2509              (__v2df) __W,
2510              (__mmask8)
2511              __U);
2512}
2513
2514static __inline__ __m128d __DEFAULT_FN_ATTRS128
2515_mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
2516  return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P,
2517               (__v2df)
2518               _mm_setzero_pd (),
2519               (__mmask8)
2520               __U);
2521}
2522
2523static __inline__ __m256d __DEFAULT_FN_ATTRS256
2524_mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) {
2525  return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P,
2526              (__v4df) __W,
2527              (__mmask8)
2528              __U);
2529}
2530
2531static __inline__ __m256d __DEFAULT_FN_ATTRS256
2532_mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
2533  return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P,
2534               (__v4df)
2535               _mm256_setzero_pd (),
2536               (__mmask8)
2537               __U);
2538}
2539
2540static __inline__ __m128i __DEFAULT_FN_ATTRS128
2541_mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) {
2542  return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P,
2543              (__v2di) __W,
2544              (__mmask8)
2545              __U);
2546}
2547
2548static __inline__ __m128i __DEFAULT_FN_ATTRS128
2549_mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
2550  return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P,
2551               (__v2di)
2552               _mm_setzero_si128 (),
2553               (__mmask8)
2554               __U);
2555}
2556
2557static __inline__ __m256i __DEFAULT_FN_ATTRS256
2558_mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
2559             void const *__P) {
2560  return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P,
2561              (__v4di) __W,
2562              (__mmask8)
2563              __U);
2564}
2565
2566static __inline__ __m256i __DEFAULT_FN_ATTRS256
2567_mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
2568  return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P,
2569               (__v4di)
2570               _mm256_setzero_si256 (),
2571               (__mmask8)
2572               __U);
2573}
2574
2575static __inline__ __m128 __DEFAULT_FN_ATTRS128
2576_mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) {
2577  return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P,
2578                   (__v4sf) __W,
2579                   (__mmask8) __U);
2580}
2581
2582static __inline__ __m128 __DEFAULT_FN_ATTRS128
2583_mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
2584  return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P,
2585              (__v4sf)
2586              _mm_setzero_ps (),
2587              (__mmask8)
2588              __U);
2589}
2590
2591static __inline__ __m256 __DEFAULT_FN_ATTRS256
2592_mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) {
2593  return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P,
2594                   (__v8sf) __W,
2595                   (__mmask8) __U);
2596}
2597
2598static __inline__ __m256 __DEFAULT_FN_ATTRS256
2599_mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
2600  return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P,
2601              (__v8sf)
2602              _mm256_setzero_ps (),
2603              (__mmask8)
2604              __U);
2605}
2606
2607static __inline__ __m128i __DEFAULT_FN_ATTRS128
2608_mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) {
2609  return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P,
2610              (__v4si) __W,
2611              (__mmask8)
2612              __U);
2613}
2614
2615static __inline__ __m128i __DEFAULT_FN_ATTRS128
2616_mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
2617  return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P,
2618               (__v4si)
2619               _mm_setzero_si128 (),
2620               (__mmask8)     __U);
2621}
2622
2623static __inline__ __m256i __DEFAULT_FN_ATTRS256
2624_mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
2625             void const *__P) {
2626  return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P,
2627              (__v8si) __W,
2628              (__mmask8)
2629              __U);
2630}
2631
2632static __inline__ __m256i __DEFAULT_FN_ATTRS256
2633_mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
2634  return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P,
2635               (__v8si)
2636               _mm256_setzero_si256 (),
2637               (__mmask8)
2638               __U);
2639}
2640
2641static __inline__ __m128 __DEFAULT_FN_ATTRS128
2642_mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) {
2643  return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2644               (__v4sf) __W,
2645               (__mmask8) __U);
2646}
2647
2648static __inline__ __m128 __DEFAULT_FN_ATTRS128
2649_mm_maskz_expand_ps (__mmask8 __U, __m128 __A) {
2650  return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2651                (__v4sf)
2652                _mm_setzero_ps (),
2653                (__mmask8) __U);
2654}
2655
2656static __inline__ __m256 __DEFAULT_FN_ATTRS256
2657_mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) {
2658  return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2659               (__v8sf) __W,
2660               (__mmask8) __U);
2661}
2662
2663static __inline__ __m256 __DEFAULT_FN_ATTRS256
2664_mm256_maskz_expand_ps (__mmask8 __U, __m256 __A) {
2665  return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2666                (__v8sf)
2667                _mm256_setzero_ps (),
2668                (__mmask8) __U);
2669}
2670
2671static __inline__ __m128i __DEFAULT_FN_ATTRS128
2672_mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
2673  return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2674                (__v4si) __W,
2675                (__mmask8) __U);
2676}
2677
2678static __inline__ __m128i __DEFAULT_FN_ATTRS128
2679_mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A) {
2680  return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2681                 (__v4si)
2682                 _mm_setzero_si128 (),
2683                 (__mmask8) __U);
2684}
2685
2686static __inline__ __m256i __DEFAULT_FN_ATTRS256
2687_mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
2688  return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2689                (__v8si) __W,
2690                (__mmask8) __U);
2691}
2692
2693static __inline__ __m256i __DEFAULT_FN_ATTRS256
2694_mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A) {
2695  return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2696                 (__v8si)
2697                 _mm256_setzero_si256 (),
2698                 (__mmask8) __U);
2699}
2700
2701static __inline__ __m128d __DEFAULT_FN_ATTRS128
2702_mm_getexp_pd (__m128d __A) {
2703  return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2704                (__v2df)
2705                _mm_setzero_pd (),
2706                (__mmask8) -1);
2707}
2708
2709static __inline__ __m128d __DEFAULT_FN_ATTRS128
2710_mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) {
2711  return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2712                (__v2df) __W,
2713                (__mmask8) __U);
2714}
2715
2716static __inline__ __m128d __DEFAULT_FN_ATTRS128
2717_mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) {
2718  return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2719                (__v2df)
2720                _mm_setzero_pd (),
2721                (__mmask8) __U);
2722}
2723
2724static __inline__ __m256d __DEFAULT_FN_ATTRS256
2725_mm256_getexp_pd (__m256d __A) {
2726  return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2727                (__v4df)
2728                _mm256_setzero_pd (),
2729                (__mmask8) -1);
2730}
2731
2732static __inline__ __m256d __DEFAULT_FN_ATTRS256
2733_mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) {
2734  return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2735                (__v4df) __W,
2736                (__mmask8) __U);
2737}
2738
2739static __inline__ __m256d __DEFAULT_FN_ATTRS256
2740_mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A) {
2741  return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2742                (__v4df)
2743                _mm256_setzero_pd (),
2744                (__mmask8) __U);
2745}
2746
2747static __inline__ __m128 __DEFAULT_FN_ATTRS128
2748_mm_getexp_ps (__m128 __A) {
2749  return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2750               (__v4sf)
2751               _mm_setzero_ps (),
2752               (__mmask8) -1);
2753}
2754
2755static __inline__ __m128 __DEFAULT_FN_ATTRS128
2756_mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) {
2757  return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2758               (__v4sf) __W,
2759               (__mmask8) __U);
2760}
2761
2762static __inline__ __m128 __DEFAULT_FN_ATTRS128
2763_mm_maskz_getexp_ps (__mmask8 __U, __m128 __A) {
2764  return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2765               (__v4sf)
2766               _mm_setzero_ps (),
2767               (__mmask8) __U);
2768}
2769
2770static __inline__ __m256 __DEFAULT_FN_ATTRS256
2771_mm256_getexp_ps (__m256 __A) {
2772  return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2773               (__v8sf)
2774               _mm256_setzero_ps (),
2775               (__mmask8) -1);
2776}
2777
2778static __inline__ __m256 __DEFAULT_FN_ATTRS256
2779_mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) {
2780  return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2781               (__v8sf) __W,
2782               (__mmask8) __U);
2783}
2784
2785static __inline__ __m256 __DEFAULT_FN_ATTRS256
2786_mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) {
2787  return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2788               (__v8sf)
2789               _mm256_setzero_ps (),
2790               (__mmask8) __U);
2791}
2792
2793static __inline__ __m128d __DEFAULT_FN_ATTRS128
2794_mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2795  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2796                                              (__v2df)_mm_max_pd(__A, __B),
2797                                              (__v2df)__W);
2798}
2799
2800static __inline__ __m128d __DEFAULT_FN_ATTRS128
2801_mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2802  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2803                                              (__v2df)_mm_max_pd(__A, __B),
2804                                              (__v2df)_mm_setzero_pd());
2805}
2806
2807static __inline__ __m256d __DEFAULT_FN_ATTRS256
2808_mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2809  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2810                                              (__v4df)_mm256_max_pd(__A, __B),
2811                                              (__v4df)__W);
2812}
2813
2814static __inline__ __m256d __DEFAULT_FN_ATTRS256
2815_mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2816  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2817                                              (__v4df)_mm256_max_pd(__A, __B),
2818                                              (__v4df)_mm256_setzero_pd());
2819}
2820
2821static __inline__ __m128 __DEFAULT_FN_ATTRS128
2822_mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2823  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2824                                             (__v4sf)_mm_max_ps(__A, __B),
2825                                             (__v4sf)__W);
2826}
2827
2828static __inline__ __m128 __DEFAULT_FN_ATTRS128
2829_mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2830  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2831                                             (__v4sf)_mm_max_ps(__A, __B),
2832                                             (__v4sf)_mm_setzero_ps());
2833}
2834
2835static __inline__ __m256 __DEFAULT_FN_ATTRS256
2836_mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2837  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2838                                             (__v8sf)_mm256_max_ps(__A, __B),
2839                                             (__v8sf)__W);
2840}
2841
2842static __inline__ __m256 __DEFAULT_FN_ATTRS256
2843_mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2844  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2845                                             (__v8sf)_mm256_max_ps(__A, __B),
2846                                             (__v8sf)_mm256_setzero_ps());
2847}
2848
2849static __inline__ __m128d __DEFAULT_FN_ATTRS128
2850_mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2851  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2852                                              (__v2df)_mm_min_pd(__A, __B),
2853                                              (__v2df)__W);
2854}
2855
2856static __inline__ __m128d __DEFAULT_FN_ATTRS128
2857_mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2858  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2859                                              (__v2df)_mm_min_pd(__A, __B),
2860                                              (__v2df)_mm_setzero_pd());
2861}
2862
2863static __inline__ __m256d __DEFAULT_FN_ATTRS256
2864_mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2865  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2866                                              (__v4df)_mm256_min_pd(__A, __B),
2867                                              (__v4df)__W);
2868}
2869
2870static __inline__ __m256d __DEFAULT_FN_ATTRS256
2871_mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2872  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2873                                              (__v4df)_mm256_min_pd(__A, __B),
2874                                              (__v4df)_mm256_setzero_pd());
2875}
2876
2877static __inline__ __m128 __DEFAULT_FN_ATTRS128
2878_mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2879  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2880                                             (__v4sf)_mm_min_ps(__A, __B),
2881                                             (__v4sf)__W);
2882}
2883
2884static __inline__ __m128 __DEFAULT_FN_ATTRS128
2885_mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2886  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2887                                             (__v4sf)_mm_min_ps(__A, __B),
2888                                             (__v4sf)_mm_setzero_ps());
2889}
2890
2891static __inline__ __m256 __DEFAULT_FN_ATTRS256
2892_mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2893  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2894                                             (__v8sf)_mm256_min_ps(__A, __B),
2895                                             (__v8sf)__W);
2896}
2897
2898static __inline__ __m256 __DEFAULT_FN_ATTRS256
2899_mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2900  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2901                                             (__v8sf)_mm256_min_ps(__A, __B),
2902                                             (__v8sf)_mm256_setzero_ps());
2903}
2904
2905static __inline__ __m128d __DEFAULT_FN_ATTRS128
2906_mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2907  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2908                                              (__v2df)_mm_mul_pd(__A, __B),
2909                                              (__v2df)__W);
2910}
2911
2912static __inline__ __m128d __DEFAULT_FN_ATTRS128
2913_mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2914  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2915                                              (__v2df)_mm_mul_pd(__A, __B),
2916                                              (__v2df)_mm_setzero_pd());
2917}
2918
2919static __inline__ __m256d __DEFAULT_FN_ATTRS256
2920_mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2921  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2922                                              (__v4df)_mm256_mul_pd(__A, __B),
2923                                              (__v4df)__W);
2924}
2925
2926static __inline__ __m256d __DEFAULT_FN_ATTRS256
2927_mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2928  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2929                                              (__v4df)_mm256_mul_pd(__A, __B),
2930                                              (__v4df)_mm256_setzero_pd());
2931}
2932
2933static __inline__ __m128 __DEFAULT_FN_ATTRS128
2934_mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2935  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2936                                             (__v4sf)_mm_mul_ps(__A, __B),
2937                                             (__v4sf)__W);
2938}
2939
2940static __inline__ __m128 __DEFAULT_FN_ATTRS128
2941_mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2942  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2943                                             (__v4sf)_mm_mul_ps(__A, __B),
2944                                             (__v4sf)_mm_setzero_ps());
2945}
2946
2947static __inline__ __m256 __DEFAULT_FN_ATTRS256
2948_mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2949  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2950                                             (__v8sf)_mm256_mul_ps(__A, __B),
2951                                             (__v8sf)__W);
2952}
2953
2954static __inline__ __m256 __DEFAULT_FN_ATTRS256
2955_mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2956  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2957                                             (__v8sf)_mm256_mul_ps(__A, __B),
2958                                             (__v8sf)_mm256_setzero_ps());
2959}
2960
2961static __inline__ __m128i __DEFAULT_FN_ATTRS128
2962_mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
2963  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2964                                             (__v4si)_mm_abs_epi32(__A),
2965                                             (__v4si)__W);
2966}
2967
2968static __inline__ __m128i __DEFAULT_FN_ATTRS128
2969_mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) {
2970  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2971                                             (__v4si)_mm_abs_epi32(__A),
2972                                             (__v4si)_mm_setzero_si128());
2973}
2974
2975static __inline__ __m256i __DEFAULT_FN_ATTRS256
2976_mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
2977  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2978                                             (__v8si)_mm256_abs_epi32(__A),
2979                                             (__v8si)__W);
2980}
2981
2982static __inline__ __m256i __DEFAULT_FN_ATTRS256
2983_mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) {
2984  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2985                                             (__v8si)_mm256_abs_epi32(__A),
2986                                             (__v8si)_mm256_setzero_si256());
2987}
2988
2989static __inline__ __m128i __DEFAULT_FN_ATTRS128
2990_mm_abs_epi64 (__m128i __A) {
2991  return (__m128i)__builtin_elementwise_abs((__v2di)__A);
2992}
2993
2994static __inline__ __m128i __DEFAULT_FN_ATTRS128
2995_mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
2996  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
2997                                             (__v2di)_mm_abs_epi64(__A),
2998                                             (__v2di)__W);
2999}
3000
3001static __inline__ __m128i __DEFAULT_FN_ATTRS128
3002_mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) {
3003  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3004                                             (__v2di)_mm_abs_epi64(__A),
3005                                             (__v2di)_mm_setzero_si128());
3006}
3007
3008static __inline__ __m256i __DEFAULT_FN_ATTRS256
3009_mm256_abs_epi64 (__m256i __A) {
3010  return (__m256i)__builtin_elementwise_abs((__v4di)__A);
3011}
3012
3013static __inline__ __m256i __DEFAULT_FN_ATTRS256
3014_mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
3015  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3016                                             (__v4di)_mm256_abs_epi64(__A),
3017                                             (__v4di)__W);
3018}
3019
3020static __inline__ __m256i __DEFAULT_FN_ATTRS256
3021_mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A) {
3022  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3023                                             (__v4di)_mm256_abs_epi64(__A),
3024                                             (__v4di)_mm256_setzero_si256());
3025}
3026
3027static __inline__ __m128i __DEFAULT_FN_ATTRS128
3028_mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
3029  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3030                                             (__v4si)_mm_max_epi32(__A, __B),
3031                                             (__v4si)_mm_setzero_si128());
3032}
3033
3034static __inline__ __m128i __DEFAULT_FN_ATTRS128
3035_mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3036  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3037                                             (__v4si)_mm_max_epi32(__A, __B),
3038                                             (__v4si)__W);
3039}
3040
3041static __inline__ __m256i __DEFAULT_FN_ATTRS256
3042_mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
3043  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3044                                             (__v8si)_mm256_max_epi32(__A, __B),
3045                                             (__v8si)_mm256_setzero_si256());
3046}
3047
3048static __inline__ __m256i __DEFAULT_FN_ATTRS256
3049_mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3050  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3051                                             (__v8si)_mm256_max_epi32(__A, __B),
3052                                             (__v8si)__W);
3053}
3054
3055static __inline__ __m128i __DEFAULT_FN_ATTRS128
3056_mm_max_epi64 (__m128i __A, __m128i __B) {
3057  return (__m128i)__builtin_elementwise_max((__v2di)__A, (__v2di)__B);
3058}
3059
3060static __inline__ __m128i __DEFAULT_FN_ATTRS128
3061_mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
3062  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3063                                             (__v2di)_mm_max_epi64(__A, __B),
3064                                             (__v2di)_mm_setzero_si128());
3065}
3066
3067static __inline__ __m128i __DEFAULT_FN_ATTRS128
3068_mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3069  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3070                                             (__v2di)_mm_max_epi64(__A, __B),
3071                                             (__v2di)__W);
3072}
3073
3074static __inline__ __m256i __DEFAULT_FN_ATTRS256
3075_mm256_max_epi64 (__m256i __A, __m256i __B) {
3076  return (__m256i)__builtin_elementwise_max((__v4di)__A, (__v4di)__B);
3077}
3078
3079static __inline__ __m256i __DEFAULT_FN_ATTRS256
3080_mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
3081  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3082                                             (__v4di)_mm256_max_epi64(__A, __B),
3083                                             (__v4di)_mm256_setzero_si256());
3084}
3085
3086static __inline__ __m256i __DEFAULT_FN_ATTRS256
3087_mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3088  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3089                                             (__v4di)_mm256_max_epi64(__A, __B),
3090                                             (__v4di)__W);
3091}
3092
3093static __inline__ __m128i __DEFAULT_FN_ATTRS128
3094_mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
3095  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3096                                             (__v4si)_mm_max_epu32(__A, __B),
3097                                             (__v4si)_mm_setzero_si128());
3098}
3099
3100static __inline__ __m128i __DEFAULT_FN_ATTRS128
3101_mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3102  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3103                                             (__v4si)_mm_max_epu32(__A, __B),
3104                                             (__v4si)__W);
3105}
3106
3107static __inline__ __m256i __DEFAULT_FN_ATTRS256
3108_mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
3109  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3110                                             (__v8si)_mm256_max_epu32(__A, __B),
3111                                             (__v8si)_mm256_setzero_si256());
3112}
3113
3114static __inline__ __m256i __DEFAULT_FN_ATTRS256
3115_mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3116  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3117                                             (__v8si)_mm256_max_epu32(__A, __B),
3118                                             (__v8si)__W);
3119}
3120
3121static __inline__ __m128i __DEFAULT_FN_ATTRS128
3122_mm_max_epu64 (__m128i __A, __m128i __B) {
3123  return (__m128i)__builtin_elementwise_max((__v2du)__A, (__v2du)__B);
3124}
3125
3126static __inline__ __m128i __DEFAULT_FN_ATTRS128
3127_mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
3128  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3129                                             (__v2di)_mm_max_epu64(__A, __B),
3130                                             (__v2di)_mm_setzero_si128());
3131}
3132
3133static __inline__ __m128i __DEFAULT_FN_ATTRS128
3134_mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3135  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3136                                             (__v2di)_mm_max_epu64(__A, __B),
3137                                             (__v2di)__W);
3138}
3139
3140static __inline__ __m256i __DEFAULT_FN_ATTRS256
3141_mm256_max_epu64 (__m256i __A, __m256i __B) {
3142  return (__m256i)__builtin_elementwise_max((__v4du)__A, (__v4du)__B);
3143}
3144
3145static __inline__ __m256i __DEFAULT_FN_ATTRS256
3146_mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
3147  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3148                                             (__v4di)_mm256_max_epu64(__A, __B),
3149                                             (__v4di)_mm256_setzero_si256());
3150}
3151
3152static __inline__ __m256i __DEFAULT_FN_ATTRS256
3153_mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3154  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3155                                             (__v4di)_mm256_max_epu64(__A, __B),
3156                                             (__v4di)__W);
3157}
3158
3159static __inline__ __m128i __DEFAULT_FN_ATTRS128
3160_mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
3161  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3162                                             (__v4si)_mm_min_epi32(__A, __B),
3163                                             (__v4si)_mm_setzero_si128());
3164}
3165
3166static __inline__ __m128i __DEFAULT_FN_ATTRS128
3167_mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3168  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3169                                             (__v4si)_mm_min_epi32(__A, __B),
3170                                             (__v4si)__W);
3171}
3172
3173static __inline__ __m256i __DEFAULT_FN_ATTRS256
3174_mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
3175  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3176                                             (__v8si)_mm256_min_epi32(__A, __B),
3177                                             (__v8si)_mm256_setzero_si256());
3178}
3179
3180static __inline__ __m256i __DEFAULT_FN_ATTRS256
3181_mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3182  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3183                                             (__v8si)_mm256_min_epi32(__A, __B),
3184                                             (__v8si)__W);
3185}
3186
3187static __inline__ __m128i __DEFAULT_FN_ATTRS128
3188_mm_min_epi64 (__m128i __A, __m128i __B) {
3189  return (__m128i)__builtin_elementwise_min((__v2di)__A, (__v2di)__B);
3190}
3191
3192static __inline__ __m128i __DEFAULT_FN_ATTRS128
3193_mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3194  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3195                                             (__v2di)_mm_min_epi64(__A, __B),
3196                                             (__v2di)__W);
3197}
3198
3199static __inline__ __m128i __DEFAULT_FN_ATTRS128
3200_mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
3201  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3202                                             (__v2di)_mm_min_epi64(__A, __B),
3203                                             (__v2di)_mm_setzero_si128());
3204}
3205
3206static __inline__ __m256i __DEFAULT_FN_ATTRS256
3207_mm256_min_epi64 (__m256i __A, __m256i __B) {
3208  return (__m256i)__builtin_elementwise_min((__v4di)__A, (__v4di)__B);
3209}
3210
3211static __inline__ __m256i __DEFAULT_FN_ATTRS256
3212_mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3213  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3214                                             (__v4di)_mm256_min_epi64(__A, __B),
3215                                             (__v4di)__W);
3216}
3217
3218static __inline__ __m256i __DEFAULT_FN_ATTRS256
3219_mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
3220  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3221                                             (__v4di)_mm256_min_epi64(__A, __B),
3222                                             (__v4di)_mm256_setzero_si256());
3223}
3224
3225static __inline__ __m128i __DEFAULT_FN_ATTRS128
3226_mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
3227  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3228                                             (__v4si)_mm_min_epu32(__A, __B),
3229                                             (__v4si)_mm_setzero_si128());
3230}
3231
3232static __inline__ __m128i __DEFAULT_FN_ATTRS128
3233_mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3234  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3235                                             (__v4si)_mm_min_epu32(__A, __B),
3236                                             (__v4si)__W);
3237}
3238
3239static __inline__ __m256i __DEFAULT_FN_ATTRS256
3240_mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
3241  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3242                                             (__v8si)_mm256_min_epu32(__A, __B),
3243                                             (__v8si)_mm256_setzero_si256());
3244}
3245
3246static __inline__ __m256i __DEFAULT_FN_ATTRS256
3247_mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3248  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3249                                             (__v8si)_mm256_min_epu32(__A, __B),
3250                                             (__v8si)__W);
3251}
3252
3253static __inline__ __m128i __DEFAULT_FN_ATTRS128
3254_mm_min_epu64 (__m128i __A, __m128i __B) {
3255  return (__m128i)__builtin_elementwise_min((__v2du)__A, (__v2du)__B);
3256}
3257
3258static __inline__ __m128i __DEFAULT_FN_ATTRS128
3259_mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3260  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3261                                             (__v2di)_mm_min_epu64(__A, __B),
3262                                             (__v2di)__W);
3263}
3264
3265static __inline__ __m128i __DEFAULT_FN_ATTRS128
3266_mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
3267  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3268                                             (__v2di)_mm_min_epu64(__A, __B),
3269                                             (__v2di)_mm_setzero_si128());
3270}
3271
3272static __inline__ __m256i __DEFAULT_FN_ATTRS256
3273_mm256_min_epu64 (__m256i __A, __m256i __B) {
3274  return (__m256i)__builtin_elementwise_min((__v4du)__A, (__v4du)__B);
3275}
3276
3277static __inline__ __m256i __DEFAULT_FN_ATTRS256
3278_mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3279  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3280                                             (__v4di)_mm256_min_epu64(__A, __B),
3281                                             (__v4di)__W);
3282}
3283
3284static __inline__ __m256i __DEFAULT_FN_ATTRS256
3285_mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
3286  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3287                                             (__v4di)_mm256_min_epu64(__A, __B),
3288                                             (__v4di)_mm256_setzero_si256());
3289}
3290
3291#define _mm_roundscale_pd(A, imm) \
3292  ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3293                                               (int)(imm), \
3294                                               (__v2df)_mm_setzero_pd(), \
3295                                               (__mmask8)-1))
3296
3297
3298#define _mm_mask_roundscale_pd(W, U, A, imm) \
3299  ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3300                                               (int)(imm), \
3301                                               (__v2df)(__m128d)(W), \
3302                                               (__mmask8)(U)))
3303
3304
3305#define _mm_maskz_roundscale_pd(U, A, imm) \
3306  ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3307                                               (int)(imm), \
3308                                               (__v2df)_mm_setzero_pd(), \
3309                                               (__mmask8)(U)))
3310
3311
3312#define _mm256_roundscale_pd(A, imm) \
3313  ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3314                                               (int)(imm), \
3315                                               (__v4df)_mm256_setzero_pd(), \
3316                                               (__mmask8)-1))
3317
3318
3319#define _mm256_mask_roundscale_pd(W, U, A, imm) \
3320  ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3321                                               (int)(imm), \
3322                                               (__v4df)(__m256d)(W), \
3323                                               (__mmask8)(U)))
3324
3325
3326#define _mm256_maskz_roundscale_pd(U, A, imm)  \
3327  ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3328                                               (int)(imm), \
3329                                               (__v4df)_mm256_setzero_pd(), \
3330                                               (__mmask8)(U)))
3331
3332#define _mm_roundscale_ps(A, imm)  \
3333  ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3334                                              (__v4sf)_mm_setzero_ps(), \
3335                                              (__mmask8)-1))
3336
3337
3338#define _mm_mask_roundscale_ps(W, U, A, imm)  \
3339  ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3340                                              (__v4sf)(__m128)(W), \
3341                                              (__mmask8)(U)))
3342
3343
3344#define _mm_maskz_roundscale_ps(U, A, imm)  \
3345  ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3346                                              (__v4sf)_mm_setzero_ps(), \
3347                                              (__mmask8)(U)))
3348
3349#define _mm256_roundscale_ps(A, imm)  \
3350  ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3351                                              (__v8sf)_mm256_setzero_ps(), \
3352                                              (__mmask8)-1))
3353
3354#define _mm256_mask_roundscale_ps(W, U, A, imm)  \
3355  ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3356                                              (__v8sf)(__m256)(W), \
3357                                              (__mmask8)(U)))
3358
3359
3360#define _mm256_maskz_roundscale_ps(U, A, imm)  \
3361  ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3362                                              (__v8sf)_mm256_setzero_ps(), \
3363                                              (__mmask8)(U)))
3364
3365static __inline__ __m128d __DEFAULT_FN_ATTRS128
3366_mm_scalef_pd (__m128d __A, __m128d __B) {
3367  return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3368                (__v2df) __B,
3369                (__v2df)
3370                _mm_setzero_pd (),
3371                (__mmask8) -1);
3372}
3373
3374static __inline__ __m128d __DEFAULT_FN_ATTRS128
3375_mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
3376        __m128d __B) {
3377  return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3378                (__v2df) __B,
3379                (__v2df) __W,
3380                (__mmask8) __U);
3381}
3382
3383static __inline__ __m128d __DEFAULT_FN_ATTRS128
3384_mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) {
3385  return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3386                (__v2df) __B,
3387                (__v2df)
3388                _mm_setzero_pd (),
3389                (__mmask8) __U);
3390}
3391
3392static __inline__ __m256d __DEFAULT_FN_ATTRS256
3393_mm256_scalef_pd (__m256d __A, __m256d __B) {
3394  return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3395                (__v4df) __B,
3396                (__v4df)
3397                _mm256_setzero_pd (),
3398                (__mmask8) -1);
3399}
3400
3401static __inline__ __m256d __DEFAULT_FN_ATTRS256
3402_mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
3403           __m256d __B) {
3404  return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3405                (__v4df) __B,
3406                (__v4df) __W,
3407                (__mmask8) __U);
3408}
3409
3410static __inline__ __m256d __DEFAULT_FN_ATTRS256
3411_mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) {
3412  return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3413                (__v4df) __B,
3414                (__v4df)
3415                _mm256_setzero_pd (),
3416                (__mmask8) __U);
3417}
3418
3419static __inline__ __m128 __DEFAULT_FN_ATTRS128
3420_mm_scalef_ps (__m128 __A, __m128 __B) {
3421  return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3422               (__v4sf) __B,
3423               (__v4sf)
3424               _mm_setzero_ps (),
3425               (__mmask8) -1);
3426}
3427
3428static __inline__ __m128 __DEFAULT_FN_ATTRS128
3429_mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3430  return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3431               (__v4sf) __B,
3432               (__v4sf) __W,
3433               (__mmask8) __U);
3434}
3435
3436static __inline__ __m128 __DEFAULT_FN_ATTRS128
3437_mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) {
3438  return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3439               (__v4sf) __B,
3440               (__v4sf)
3441               _mm_setzero_ps (),
3442               (__mmask8) __U);
3443}
3444
3445static __inline__ __m256 __DEFAULT_FN_ATTRS256
3446_mm256_scalef_ps (__m256 __A, __m256 __B) {
3447  return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3448               (__v8sf) __B,
3449               (__v8sf)
3450               _mm256_setzero_ps (),
3451               (__mmask8) -1);
3452}
3453
3454static __inline__ __m256 __DEFAULT_FN_ATTRS256
3455_mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
3456           __m256 __B) {
3457  return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3458               (__v8sf) __B,
3459               (__v8sf) __W,
3460               (__mmask8) __U);
3461}
3462
3463static __inline__ __m256 __DEFAULT_FN_ATTRS256
3464_mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
3465  return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3466               (__v8sf) __B,
3467               (__v8sf)
3468               _mm256_setzero_ps (),
3469               (__mmask8) __U);
3470}
3471
3472#define _mm_i64scatter_pd(addr, index, v1, scale) \
3473  __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)-1, \
3474                               (__v2di)(__m128i)(index), \
3475                               (__v2df)(__m128d)(v1), (int)(scale))
3476
3477#define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) \
3478  __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)(mask), \
3479                               (__v2di)(__m128i)(index), \
3480                               (__v2df)(__m128d)(v1), (int)(scale))
3481
3482#define _mm_i64scatter_epi64(addr, index, v1, scale) \
3483  __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)-1, \
3484                               (__v2di)(__m128i)(index), \
3485                               (__v2di)(__m128i)(v1), (int)(scale))
3486
3487#define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
3488  __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)(mask), \
3489                               (__v2di)(__m128i)(index), \
3490                               (__v2di)(__m128i)(v1), (int)(scale))
3491
3492#define _mm256_i64scatter_pd(addr, index, v1, scale) \
3493  __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)-1, \
3494                               (__v4di)(__m256i)(index), \
3495                               (__v4df)(__m256d)(v1), (int)(scale))
3496
3497#define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) \
3498  __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)(mask), \
3499                               (__v4di)(__m256i)(index), \
3500                               (__v4df)(__m256d)(v1), (int)(scale))
3501
3502#define _mm256_i64scatter_epi64(addr, index, v1, scale) \
3503  __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)-1, \
3504                               (__v4di)(__m256i)(index), \
3505                               (__v4di)(__m256i)(v1), (int)(scale))
3506
3507#define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
3508  __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)(mask), \
3509                               (__v4di)(__m256i)(index), \
3510                               (__v4di)(__m256i)(v1), (int)(scale))
3511
3512#define _mm_i64scatter_ps(addr, index, v1, scale) \
3513  __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)-1, \
3514                               (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3515                               (int)(scale))
3516
3517#define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) \
3518  __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)(mask), \
3519                               (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3520                               (int)(scale))
3521
3522#define _mm_i64scatter_epi32(addr, index, v1, scale) \
3523  __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)-1, \
3524                               (__v2di)(__m128i)(index), \
3525                               (__v4si)(__m128i)(v1), (int)(scale))
3526
3527#define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
3528  __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)(mask), \
3529                               (__v2di)(__m128i)(index), \
3530                               (__v4si)(__m128i)(v1), (int)(scale))
3531
3532#define _mm256_i64scatter_ps(addr, index, v1, scale) \
3533  __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)-1, \
3534                               (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3535                               (int)(scale))
3536
3537#define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) \
3538  __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)(mask), \
3539                               (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3540                               (int)(scale))
3541
3542#define _mm256_i64scatter_epi32(addr, index, v1, scale) \
3543  __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)-1, \
3544                               (__v4di)(__m256i)(index), \
3545                               (__v4si)(__m128i)(v1), (int)(scale))
3546
3547#define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
3548  __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)(mask), \
3549                               (__v4di)(__m256i)(index), \
3550                               (__v4si)(__m128i)(v1), (int)(scale))
3551
3552#define _mm_i32scatter_pd(addr, index, v1, scale) \
3553  __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)-1, \
3554                               (__v4si)(__m128i)(index), \
3555                               (__v2df)(__m128d)(v1), (int)(scale))
3556
3557#define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) \
3558    __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)(mask), \
3559                                 (__v4si)(__m128i)(index), \
3560                                 (__v2df)(__m128d)(v1), (int)(scale))
3561
3562#define _mm_i32scatter_epi64(addr, index, v1, scale) \
3563    __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)-1, \
3564                                 (__v4si)(__m128i)(index), \
3565                                 (__v2di)(__m128i)(v1), (int)(scale))
3566
3567#define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
3568    __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)(mask), \
3569                                 (__v4si)(__m128i)(index), \
3570                                 (__v2di)(__m128i)(v1), (int)(scale))
3571
3572#define _mm256_i32scatter_pd(addr, index, v1, scale) \
3573    __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)-1, \
3574                                 (__v4si)(__m128i)(index), \
3575                                 (__v4df)(__m256d)(v1), (int)(scale))
3576
3577#define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) \
3578    __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)(mask), \
3579                                 (__v4si)(__m128i)(index), \
3580                                 (__v4df)(__m256d)(v1), (int)(scale))
3581
3582#define _mm256_i32scatter_epi64(addr, index, v1, scale) \
3583    __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)-1, \
3584                                 (__v4si)(__m128i)(index), \
3585                                 (__v4di)(__m256i)(v1), (int)(scale))
3586
3587#define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
3588    __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)(mask), \
3589                                 (__v4si)(__m128i)(index), \
3590                                 (__v4di)(__m256i)(v1), (int)(scale))
3591
3592#define _mm_i32scatter_ps(addr, index, v1, scale) \
3593    __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)-1, \
3594                                 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3595                                 (int)(scale))
3596
3597#define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) \
3598    __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)(mask), \
3599                                 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3600                                 (int)(scale))
3601
3602#define _mm_i32scatter_epi32(addr, index, v1, scale) \
3603    __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)-1, \
3604                                 (__v4si)(__m128i)(index), \
3605                                 (__v4si)(__m128i)(v1), (int)(scale))
3606
3607#define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
3608    __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)(mask), \
3609                                 (__v4si)(__m128i)(index), \
3610                                 (__v4si)(__m128i)(v1), (int)(scale))
3611
3612#define _mm256_i32scatter_ps(addr, index, v1, scale) \
3613    __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)-1, \
3614                                 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3615                                 (int)(scale))
3616
3617#define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) \
3618    __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)(mask), \
3619                                 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3620                                 (int)(scale))
3621
3622#define _mm256_i32scatter_epi32(addr, index, v1, scale) \
3623    __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)-1, \
3624                                 (__v8si)(__m256i)(index), \
3625                                 (__v8si)(__m256i)(v1), (int)(scale))
3626
3627#define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
3628    __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)(mask), \
3629                                 (__v8si)(__m256i)(index), \
3630                                 (__v8si)(__m256i)(v1), (int)(scale))
3631
3632  static __inline__ __m128d __DEFAULT_FN_ATTRS128
3633  _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) {
3634    return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3635                                                (__v2df)_mm_sqrt_pd(__A),
3636                                                (__v2df)__W);
3637  }
3638
3639  static __inline__ __m128d __DEFAULT_FN_ATTRS128
3640  _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) {
3641    return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3642                                                (__v2df)_mm_sqrt_pd(__A),
3643                                                (__v2df)_mm_setzero_pd());
3644  }
3645
3646  static __inline__ __m256d __DEFAULT_FN_ATTRS256
3647  _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) {
3648    return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3649                                                (__v4df)_mm256_sqrt_pd(__A),
3650                                                (__v4df)__W);
3651  }
3652
3653  static __inline__ __m256d __DEFAULT_FN_ATTRS256
3654  _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) {
3655    return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3656                                                (__v4df)_mm256_sqrt_pd(__A),
3657                                                (__v4df)_mm256_setzero_pd());
3658  }
3659
3660  static __inline__ __m128 __DEFAULT_FN_ATTRS128
3661  _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) {
3662    return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3663                                               (__v4sf)_mm_sqrt_ps(__A),
3664                                               (__v4sf)__W);
3665  }
3666
3667  static __inline__ __m128 __DEFAULT_FN_ATTRS128
3668  _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) {
3669    return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3670                                               (__v4sf)_mm_sqrt_ps(__A),
3671                                               (__v4sf)_mm_setzero_ps());
3672  }
3673
3674  static __inline__ __m256 __DEFAULT_FN_ATTRS256
3675  _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) {
3676    return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3677                                               (__v8sf)_mm256_sqrt_ps(__A),
3678                                               (__v8sf)__W);
3679  }
3680
3681  static __inline__ __m256 __DEFAULT_FN_ATTRS256
3682  _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) {
3683    return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3684                                               (__v8sf)_mm256_sqrt_ps(__A),
3685                                               (__v8sf)_mm256_setzero_ps());
3686  }
3687
3688  static __inline__ __m128d __DEFAULT_FN_ATTRS128
3689  _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
3690    return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3691                                                (__v2df)_mm_sub_pd(__A, __B),
3692                                                (__v2df)__W);
3693  }
3694
3695  static __inline__ __m128d __DEFAULT_FN_ATTRS128
3696  _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) {
3697    return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3698                                                (__v2df)_mm_sub_pd(__A, __B),
3699                                                (__v2df)_mm_setzero_pd());
3700  }
3701
3702  static __inline__ __m256d __DEFAULT_FN_ATTRS256
3703  _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
3704    return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3705                                                (__v4df)_mm256_sub_pd(__A, __B),
3706                                                (__v4df)__W);
3707  }
3708
3709  static __inline__ __m256d __DEFAULT_FN_ATTRS256
3710  _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) {
3711    return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3712                                                (__v4df)_mm256_sub_pd(__A, __B),
3713                                                (__v4df)_mm256_setzero_pd());
3714  }
3715
3716  static __inline__ __m128 __DEFAULT_FN_ATTRS128
3717  _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3718    return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3719                                               (__v4sf)_mm_sub_ps(__A, __B),
3720                                               (__v4sf)__W);
3721  }
3722
3723  static __inline__ __m128 __DEFAULT_FN_ATTRS128
3724  _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) {
3725    return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3726                                               (__v4sf)_mm_sub_ps(__A, __B),
3727                                               (__v4sf)_mm_setzero_ps());
3728  }
3729
3730  static __inline__ __m256 __DEFAULT_FN_ATTRS256
3731  _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
3732    return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3733                                               (__v8sf)_mm256_sub_ps(__A, __B),
3734                                               (__v8sf)__W);
3735  }
3736
3737  static __inline__ __m256 __DEFAULT_FN_ATTRS256
3738  _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) {
3739    return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3740                                               (__v8sf)_mm256_sub_ps(__A, __B),
3741                                               (__v8sf)_mm256_setzero_ps());
3742  }
3743
3744  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3745  _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B) {
3746    return (__m128i)__builtin_ia32_vpermi2vard128((__v4si) __A, (__v4si)__I,
3747                                                  (__v4si)__B);
3748  }
3749
3750  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3751  _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I,
3752                              __m128i __B) {
3753    return (__m128i)__builtin_ia32_selectd_128(__U,
3754                                    (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3755                                    (__v4si)__A);
3756  }
3757
3758  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3759  _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U,
3760                               __m128i __B) {
3761    return (__m128i)__builtin_ia32_selectd_128(__U,
3762                                    (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3763                                    (__v4si)__I);
3764  }
3765
3766  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3767  _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I,
3768                               __m128i __B) {
3769    return (__m128i)__builtin_ia32_selectd_128(__U,
3770                                    (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3771                                    (__v4si)_mm_setzero_si128());
3772  }
3773
3774  static __inline__ __m256i __DEFAULT_FN_ATTRS256
3775  _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B) {
3776    return (__m256i)__builtin_ia32_vpermi2vard256((__v8si)__A, (__v8si) __I,
3777                                                  (__v8si) __B);
3778  }
3779
3780  static __inline__ __m256i __DEFAULT_FN_ATTRS256
3781  _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I,
3782                                 __m256i __B) {
3783    return (__m256i)__builtin_ia32_selectd_256(__U,
3784                                 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3785                                 (__v8si)__A);
3786  }
3787
3788  static __inline__ __m256i __DEFAULT_FN_ATTRS256
3789  _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U,
3790                                  __m256i __B) {
3791    return (__m256i)__builtin_ia32_selectd_256(__U,
3792                                 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3793                                 (__v8si)__I);
3794  }
3795
3796  static __inline__ __m256i __DEFAULT_FN_ATTRS256
3797  _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I,
3798                                  __m256i __B) {
3799    return (__m256i)__builtin_ia32_selectd_256(__U,
3800                                 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3801                                 (__v8si)_mm256_setzero_si256());
3802  }
3803
3804  static __inline__ __m128d __DEFAULT_FN_ATTRS128
3805  _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B) {
3806    return (__m128d)__builtin_ia32_vpermi2varpd128((__v2df)__A, (__v2di)__I,
3807                                                   (__v2df)__B);
3808  }
3809
3810  static __inline__ __m128d __DEFAULT_FN_ATTRS128
3811  _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B) {
3812    return (__m128d)__builtin_ia32_selectpd_128(__U,
3813                                       (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3814                                       (__v2df)__A);
3815  }
3816
3817  static __inline__ __m128d __DEFAULT_FN_ATTRS128
3818  _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B) {
3819    return (__m128d)__builtin_ia32_selectpd_128(__U,
3820                                       (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3821                                       (__v2df)(__m128d)__I);
3822  }
3823
3824  static __inline__ __m128d __DEFAULT_FN_ATTRS128
3825  _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B) {
3826    return (__m128d)__builtin_ia32_selectpd_128(__U,
3827                                       (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3828                                       (__v2df)_mm_setzero_pd());
3829  }
3830
3831  static __inline__ __m256d __DEFAULT_FN_ATTRS256
3832  _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B) {
3833    return (__m256d)__builtin_ia32_vpermi2varpd256((__v4df)__A, (__v4di)__I,
3834                                                   (__v4df)__B);
3835  }
3836
3837  static __inline__ __m256d __DEFAULT_FN_ATTRS256
3838  _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I,
3839                              __m256d __B) {
3840    return (__m256d)__builtin_ia32_selectpd_256(__U,
3841                                    (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3842                                    (__v4df)__A);
3843  }
3844
3845  static __inline__ __m256d __DEFAULT_FN_ATTRS256
3846  _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U,
3847                               __m256d __B) {
3848    return (__m256d)__builtin_ia32_selectpd_256(__U,
3849                                    (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3850                                    (__v4df)(__m256d)__I);
3851  }
3852
3853  static __inline__ __m256d __DEFAULT_FN_ATTRS256
3854  _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I,
3855                               __m256d __B) {
3856    return (__m256d)__builtin_ia32_selectpd_256(__U,
3857                                    (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3858                                    (__v4df)_mm256_setzero_pd());
3859  }
3860
3861  static __inline__ __m128 __DEFAULT_FN_ATTRS128
3862  _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B) {
3863    return (__m128)__builtin_ia32_vpermi2varps128((__v4sf)__A, (__v4si)__I,
3864                                                  (__v4sf)__B);
3865  }
3866
3867  static __inline__ __m128 __DEFAULT_FN_ATTRS128
3868  _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B) {
3869    return (__m128)__builtin_ia32_selectps_128(__U,
3870                                       (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3871                                       (__v4sf)__A);
3872  }
3873
3874  static __inline__ __m128 __DEFAULT_FN_ATTRS128
3875  _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B) {
3876    return (__m128)__builtin_ia32_selectps_128(__U,
3877                                       (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3878                                       (__v4sf)(__m128)__I);
3879  }
3880
3881  static __inline__ __m128 __DEFAULT_FN_ATTRS128
3882  _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B) {
3883    return (__m128)__builtin_ia32_selectps_128(__U,
3884                                       (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3885                                       (__v4sf)_mm_setzero_ps());
3886  }
3887
3888  static __inline__ __m256 __DEFAULT_FN_ATTRS256
3889  _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B) {
3890    return (__m256)__builtin_ia32_vpermi2varps256((__v8sf)__A, (__v8si)__I,
3891                                                  (__v8sf) __B);
3892  }
3893
3894  static __inline__ __m256 __DEFAULT_FN_ATTRS256
3895  _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B) {
3896    return (__m256)__builtin_ia32_selectps_256(__U,
3897                                    (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3898                                    (__v8sf)__A);
3899  }
3900
3901  static __inline__ __m256 __DEFAULT_FN_ATTRS256
3902  _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U,
3903                               __m256 __B) {
3904    return (__m256)__builtin_ia32_selectps_256(__U,
3905                                    (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3906                                    (__v8sf)(__m256)__I);
3907  }
3908
3909  static __inline__ __m256 __DEFAULT_FN_ATTRS256
3910  _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I,
3911                               __m256 __B) {
3912    return (__m256)__builtin_ia32_selectps_256(__U,
3913                                    (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3914                                    (__v8sf)_mm256_setzero_ps());
3915  }
3916
3917  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3918  _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B) {
3919    return (__m128i)__builtin_ia32_vpermi2varq128((__v2di)__A, (__v2di)__I,
3920                                                  (__v2di)__B);
3921  }
3922
3923  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3924  _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I,
3925                              __m128i __B) {
3926    return (__m128i)__builtin_ia32_selectq_128(__U,
3927                                    (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3928                                    (__v2di)__A);
3929  }
3930
3931  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3932  _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U,
3933                               __m128i __B) {
3934    return (__m128i)__builtin_ia32_selectq_128(__U,
3935                                    (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3936                                    (__v2di)__I);
3937  }
3938
3939  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3940  _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I,
3941                               __m128i __B) {
3942    return (__m128i)__builtin_ia32_selectq_128(__U,
3943                                    (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3944                                    (__v2di)_mm_setzero_si128());
3945  }
3946
3947
3948  static __inline__ __m256i __DEFAULT_FN_ATTRS256
3949  _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B) {
3950    return (__m256i)__builtin_ia32_vpermi2varq256((__v4di)__A, (__v4di) __I,
3951                                                  (__v4di) __B);
3952  }
3953
3954  static __inline__ __m256i __DEFAULT_FN_ATTRS256
3955  _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I,
3956                                 __m256i __B) {
3957    return (__m256i)__builtin_ia32_selectq_256(__U,
3958                                 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3959                                 (__v4di)__A);
3960  }
3961
3962  static __inline__ __m256i __DEFAULT_FN_ATTRS256
3963  _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U,
3964                                  __m256i __B) {
3965    return (__m256i)__builtin_ia32_selectq_256(__U,
3966                                 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3967                                 (__v4di)__I);
3968  }
3969
3970  static __inline__ __m256i __DEFAULT_FN_ATTRS256
3971  _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I,
3972                                  __m256i __B) {
3973    return (__m256i)__builtin_ia32_selectq_256(__U,
3974                                 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3975                                 (__v4di)_mm256_setzero_si256());
3976  }
3977
3978  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3979  _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
3980  {
3981    return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3982                                               (__v4si)_mm_cvtepi8_epi32(__A),
3983                                               (__v4si)__W);
3984  }
3985
3986  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3987  _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A)
3988  {
3989    return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3990                                               (__v4si)_mm_cvtepi8_epi32(__A),
3991                                               (__v4si)_mm_setzero_si128());
3992  }
3993
3994  static __inline__ __m256i __DEFAULT_FN_ATTRS256
3995  _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
3996  {
3997    return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
3998                                               (__v8si)_mm256_cvtepi8_epi32(__A),
3999                                               (__v8si)__W);
4000  }
4001
4002  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4003  _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
4004  {
4005    return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4006                                               (__v8si)_mm256_cvtepi8_epi32(__A),
4007                                               (__v8si)_mm256_setzero_si256());
4008  }
4009
4010  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4011  _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4012  {
4013    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4014                                               (__v2di)_mm_cvtepi8_epi64(__A),
4015                                               (__v2di)__W);
4016  }
4017
4018  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4019  _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
4020  {
4021    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4022                                               (__v2di)_mm_cvtepi8_epi64(__A),
4023                                               (__v2di)_mm_setzero_si128());
4024  }
4025
4026  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4027  _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4028  {
4029    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4030                                               (__v4di)_mm256_cvtepi8_epi64(__A),
4031                                               (__v4di)__W);
4032  }
4033
4034  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4035  _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
4036  {
4037    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4038                                               (__v4di)_mm256_cvtepi8_epi64(__A),
4039                                               (__v4di)_mm256_setzero_si256());
4040  }
4041
4042  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4043  _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
4044  {
4045    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4046                                               (__v2di)_mm_cvtepi32_epi64(__X),
4047                                               (__v2di)__W);
4048  }
4049
4050  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4051  _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
4052  {
4053    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4054                                               (__v2di)_mm_cvtepi32_epi64(__X),
4055                                               (__v2di)_mm_setzero_si128());
4056  }
4057
4058  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4059  _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
4060  {
4061    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4062                                               (__v4di)_mm256_cvtepi32_epi64(__X),
4063                                               (__v4di)__W);
4064  }
4065
4066  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4067  _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
4068  {
4069    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4070                                               (__v4di)_mm256_cvtepi32_epi64(__X),
4071                                               (__v4di)_mm256_setzero_si256());
4072  }
4073
4074  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4075  _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4076  {
4077    return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4078                                               (__v4si)_mm_cvtepi16_epi32(__A),
4079                                               (__v4si)__W);
4080  }
4081
4082  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4083  _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A)
4084  {
4085    return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4086                                               (__v4si)_mm_cvtepi16_epi32(__A),
4087                                               (__v4si)_mm_setzero_si128());
4088  }
4089
4090  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4091  _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4092  {
4093    return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4094                                               (__v8si)_mm256_cvtepi16_epi32(__A),
4095                                               (__v8si)__W);
4096  }
4097
4098  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4099  _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
4100  {
4101    return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4102                                               (__v8si)_mm256_cvtepi16_epi32(__A),
4103                                               (__v8si)_mm256_setzero_si256());
4104  }
4105
4106  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4107  _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4108  {
4109    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4110                                               (__v2di)_mm_cvtepi16_epi64(__A),
4111                                               (__v2di)__W);
4112  }
4113
4114  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4115  _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
4116  {
4117    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4118                                               (__v2di)_mm_cvtepi16_epi64(__A),
4119                                               (__v2di)_mm_setzero_si128());
4120  }
4121
4122  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4123  _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4124  {
4125    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4126                                               (__v4di)_mm256_cvtepi16_epi64(__A),
4127                                               (__v4di)__W);
4128  }
4129
4130  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4131  _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
4132  {
4133    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4134                                               (__v4di)_mm256_cvtepi16_epi64(__A),
4135                                               (__v4di)_mm256_setzero_si256());
4136  }
4137
4138
4139  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4140  _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4141  {
4142    return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4143                                               (__v4si)_mm_cvtepu8_epi32(__A),
4144                                               (__v4si)__W);
4145  }
4146
4147  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4148  _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
4149  {
4150    return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4151                                               (__v4si)_mm_cvtepu8_epi32(__A),
4152                                               (__v4si)_mm_setzero_si128());
4153  }
4154
4155  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4156  _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4157  {
4158    return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4159                                               (__v8si)_mm256_cvtepu8_epi32(__A),
4160                                               (__v8si)__W);
4161  }
4162
4163  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4164  _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
4165  {
4166    return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4167                                               (__v8si)_mm256_cvtepu8_epi32(__A),
4168                                               (__v8si)_mm256_setzero_si256());
4169  }
4170
4171  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4172  _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4173  {
4174    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4175                                               (__v2di)_mm_cvtepu8_epi64(__A),
4176                                               (__v2di)__W);
4177  }
4178
4179  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4180  _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
4181  {
4182    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4183                                               (__v2di)_mm_cvtepu8_epi64(__A),
4184                                               (__v2di)_mm_setzero_si128());
4185  }
4186
4187  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4188  _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4189  {
4190    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4191                                               (__v4di)_mm256_cvtepu8_epi64(__A),
4192                                               (__v4di)__W);
4193  }
4194
4195  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4196  _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
4197  {
4198    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4199                                               (__v4di)_mm256_cvtepu8_epi64(__A),
4200                                               (__v4di)_mm256_setzero_si256());
4201  }
4202
4203  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4204  _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
4205  {
4206    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4207                                               (__v2di)_mm_cvtepu32_epi64(__X),
4208                                               (__v2di)__W);
4209  }
4210
4211  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4212  _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
4213  {
4214    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4215                                               (__v2di)_mm_cvtepu32_epi64(__X),
4216                                               (__v2di)_mm_setzero_si128());
4217  }
4218
4219  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4220  _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
4221  {
4222    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4223                                               (__v4di)_mm256_cvtepu32_epi64(__X),
4224                                               (__v4di)__W);
4225  }
4226
4227  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4228  _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
4229  {
4230    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4231                                               (__v4di)_mm256_cvtepu32_epi64(__X),
4232                                               (__v4di)_mm256_setzero_si256());
4233  }
4234
4235  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4236  _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4237  {
4238    return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4239                                               (__v4si)_mm_cvtepu16_epi32(__A),
4240                                               (__v4si)__W);
4241  }
4242
4243  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4244  _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
4245  {
4246    return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4247                                               (__v4si)_mm_cvtepu16_epi32(__A),
4248                                               (__v4si)_mm_setzero_si128());
4249  }
4250
4251  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4252  _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4253  {
4254    return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4255                                               (__v8si)_mm256_cvtepu16_epi32(__A),
4256                                               (__v8si)__W);
4257  }
4258
4259  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4260  _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
4261  {
4262    return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4263                                               (__v8si)_mm256_cvtepu16_epi32(__A),
4264                                               (__v8si)_mm256_setzero_si256());
4265  }
4266
4267  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4268  _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4269  {
4270    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4271                                               (__v2di)_mm_cvtepu16_epi64(__A),
4272                                               (__v2di)__W);
4273  }
4274
4275  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4276  _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
4277  {
4278    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4279                                               (__v2di)_mm_cvtepu16_epi64(__A),
4280                                               (__v2di)_mm_setzero_si128());
4281  }
4282
4283  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4284  _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4285  {
4286    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4287                                               (__v4di)_mm256_cvtepu16_epi64(__A),
4288                                               (__v4di)__W);
4289  }
4290
4291  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4292  _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
4293  {
4294    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4295                                               (__v4di)_mm256_cvtepu16_epi64(__A),
4296                                               (__v4di)_mm256_setzero_si256());
4297  }
4298
4299
4300#define _mm_rol_epi32(a, b) \
4301  ((__m128i)__builtin_ia32_prold128((__v4si)(__m128i)(a), (int)(b)))
4302
4303#define _mm_mask_rol_epi32(w, u, a, b) \
4304  ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4305                                       (__v4si)_mm_rol_epi32((a), (b)), \
4306                                       (__v4si)(__m128i)(w)))
4307
4308#define _mm_maskz_rol_epi32(u, a, b) \
4309  ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4310                                       (__v4si)_mm_rol_epi32((a), (b)), \
4311                                       (__v4si)_mm_setzero_si128()))
4312
4313#define _mm256_rol_epi32(a, b) \
4314  ((__m256i)__builtin_ia32_prold256((__v8si)(__m256i)(a), (int)(b)))
4315
4316#define _mm256_mask_rol_epi32(w, u, a, b) \
4317  ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4318                                       (__v8si)_mm256_rol_epi32((a), (b)), \
4319                                       (__v8si)(__m256i)(w)))
4320
4321#define _mm256_maskz_rol_epi32(u, a, b) \
4322  ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4323                                       (__v8si)_mm256_rol_epi32((a), (b)), \
4324                                       (__v8si)_mm256_setzero_si256()))
4325
4326#define _mm_rol_epi64(a, b) \
4327  ((__m128i)__builtin_ia32_prolq128((__v2di)(__m128i)(a), (int)(b)))
4328
4329#define _mm_mask_rol_epi64(w, u, a, b) \
4330  ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4331                                       (__v2di)_mm_rol_epi64((a), (b)), \
4332                                       (__v2di)(__m128i)(w)))
4333
4334#define _mm_maskz_rol_epi64(u, a, b) \
4335  ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4336                                       (__v2di)_mm_rol_epi64((a), (b)), \
4337                                       (__v2di)_mm_setzero_si128()))
4338
4339#define _mm256_rol_epi64(a, b) \
4340  ((__m256i)__builtin_ia32_prolq256((__v4di)(__m256i)(a), (int)(b)))
4341
4342#define _mm256_mask_rol_epi64(w, u, a, b) \
4343  ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4344                                       (__v4di)_mm256_rol_epi64((a), (b)), \
4345                                       (__v4di)(__m256i)(w)))
4346
4347#define _mm256_maskz_rol_epi64(u, a, b) \
4348  ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4349                                       (__v4di)_mm256_rol_epi64((a), (b)), \
4350                                       (__v4di)_mm256_setzero_si256()))
4351
4352static __inline__ __m128i __DEFAULT_FN_ATTRS128
4353_mm_rolv_epi32 (__m128i __A, __m128i __B)
4354{
4355  return (__m128i)__builtin_ia32_prolvd128((__v4si)__A, (__v4si)__B);
4356}
4357
4358static __inline__ __m128i __DEFAULT_FN_ATTRS128
4359_mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4360{
4361  return (__m128i)__builtin_ia32_selectd_128(__U,
4362                                             (__v4si)_mm_rolv_epi32(__A, __B),
4363                                             (__v4si)__W);
4364}
4365
4366static __inline__ __m128i __DEFAULT_FN_ATTRS128
4367_mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4368{
4369  return (__m128i)__builtin_ia32_selectd_128(__U,
4370                                             (__v4si)_mm_rolv_epi32(__A, __B),
4371                                             (__v4si)_mm_setzero_si128());
4372}
4373
4374static __inline__ __m256i __DEFAULT_FN_ATTRS256
4375_mm256_rolv_epi32 (__m256i __A, __m256i __B)
4376{
4377  return (__m256i)__builtin_ia32_prolvd256((__v8si)__A, (__v8si)__B);
4378}
4379
4380static __inline__ __m256i __DEFAULT_FN_ATTRS256
4381_mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4382{
4383  return (__m256i)__builtin_ia32_selectd_256(__U,
4384                                            (__v8si)_mm256_rolv_epi32(__A, __B),
4385                                            (__v8si)__W);
4386}
4387
4388static __inline__ __m256i __DEFAULT_FN_ATTRS256
4389_mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4390{
4391  return (__m256i)__builtin_ia32_selectd_256(__U,
4392                                            (__v8si)_mm256_rolv_epi32(__A, __B),
4393                                            (__v8si)_mm256_setzero_si256());
4394}
4395
4396static __inline__ __m128i __DEFAULT_FN_ATTRS128
4397_mm_rolv_epi64 (__m128i __A, __m128i __B)
4398{
4399  return (__m128i)__builtin_ia32_prolvq128((__v2di)__A, (__v2di)__B);
4400}
4401
4402static __inline__ __m128i __DEFAULT_FN_ATTRS128
4403_mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4404{
4405  return (__m128i)__builtin_ia32_selectq_128(__U,
4406                                             (__v2di)_mm_rolv_epi64(__A, __B),
4407                                             (__v2di)__W);
4408}
4409
4410static __inline__ __m128i __DEFAULT_FN_ATTRS128
4411_mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
4412{
4413  return (__m128i)__builtin_ia32_selectq_128(__U,
4414                                             (__v2di)_mm_rolv_epi64(__A, __B),
4415                                             (__v2di)_mm_setzero_si128());
4416}
4417
4418static __inline__ __m256i __DEFAULT_FN_ATTRS256
4419_mm256_rolv_epi64 (__m256i __A, __m256i __B)
4420{
4421  return (__m256i)__builtin_ia32_prolvq256((__v4di)__A, (__v4di)__B);
4422}
4423
4424static __inline__ __m256i __DEFAULT_FN_ATTRS256
4425_mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4426{
4427  return (__m256i)__builtin_ia32_selectq_256(__U,
4428                                            (__v4di)_mm256_rolv_epi64(__A, __B),
4429                                            (__v4di)__W);
4430}
4431
4432static __inline__ __m256i __DEFAULT_FN_ATTRS256
4433_mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
4434{
4435  return (__m256i)__builtin_ia32_selectq_256(__U,
4436                                            (__v4di)_mm256_rolv_epi64(__A, __B),
4437                                            (__v4di)_mm256_setzero_si256());
4438}
4439
4440#define _mm_ror_epi32(a, b) \
4441  ((__m128i)__builtin_ia32_prord128((__v4si)(__m128i)(a), (int)(b)))
4442
4443#define _mm_mask_ror_epi32(w, u, a, b) \
4444  ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4445                                       (__v4si)_mm_ror_epi32((a), (b)), \
4446                                       (__v4si)(__m128i)(w)))
4447
4448#define _mm_maskz_ror_epi32(u, a, b) \
4449  ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4450                                       (__v4si)_mm_ror_epi32((a), (b)), \
4451                                       (__v4si)_mm_setzero_si128()))
4452
4453#define _mm256_ror_epi32(a, b) \
4454  ((__m256i)__builtin_ia32_prord256((__v8si)(__m256i)(a), (int)(b)))
4455
4456#define _mm256_mask_ror_epi32(w, u, a, b) \
4457  ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4458                                       (__v8si)_mm256_ror_epi32((a), (b)), \
4459                                       (__v8si)(__m256i)(w)))
4460
4461#define _mm256_maskz_ror_epi32(u, a, b) \
4462  ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4463                                       (__v8si)_mm256_ror_epi32((a), (b)), \
4464                                       (__v8si)_mm256_setzero_si256()))
4465
4466#define _mm_ror_epi64(a, b) \
4467  ((__m128i)__builtin_ia32_prorq128((__v2di)(__m128i)(a), (int)(b)))
4468
4469#define _mm_mask_ror_epi64(w, u, a, b) \
4470  ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4471                                       (__v2di)_mm_ror_epi64((a), (b)), \
4472                                       (__v2di)(__m128i)(w)))
4473
4474#define _mm_maskz_ror_epi64(u, a, b) \
4475  ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4476                                       (__v2di)_mm_ror_epi64((a), (b)), \
4477                                       (__v2di)_mm_setzero_si128()))
4478
4479#define _mm256_ror_epi64(a, b) \
4480  ((__m256i)__builtin_ia32_prorq256((__v4di)(__m256i)(a), (int)(b)))
4481
4482#define _mm256_mask_ror_epi64(w, u, a, b) \
4483  ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4484                                       (__v4di)_mm256_ror_epi64((a), (b)), \
4485                                       (__v4di)(__m256i)(w)))
4486
4487#define _mm256_maskz_ror_epi64(u, a, b) \
4488  ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4489                                       (__v4di)_mm256_ror_epi64((a), (b)), \
4490                                       (__v4di)_mm256_setzero_si256()))
4491
4492static __inline__ __m128i __DEFAULT_FN_ATTRS128
4493_mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4494{
4495  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4496                                             (__v4si)_mm_sll_epi32(__A, __B),
4497                                             (__v4si)__W);
4498}
4499
4500static __inline__ __m128i __DEFAULT_FN_ATTRS128
4501_mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B)
4502{
4503  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4504                                             (__v4si)_mm_sll_epi32(__A, __B),
4505                                             (__v4si)_mm_setzero_si128());
4506}
4507
4508static __inline__ __m256i __DEFAULT_FN_ATTRS256
4509_mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4510{
4511  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4512                                             (__v8si)_mm256_sll_epi32(__A, __B),
4513                                             (__v8si)__W);
4514}
4515
4516static __inline__ __m256i __DEFAULT_FN_ATTRS256
4517_mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B)
4518{
4519  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4520                                             (__v8si)_mm256_sll_epi32(__A, __B),
4521                                             (__v8si)_mm256_setzero_si256());
4522}
4523
4524static __inline__ __m128i __DEFAULT_FN_ATTRS128
4525_mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
4526{
4527  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4528                                             (__v4si)_mm_slli_epi32(__A, (int)__B),
4529                                             (__v4si)__W);
4530}
4531
4532static __inline__ __m128i __DEFAULT_FN_ATTRS128
4533_mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
4534{
4535  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4536                                             (__v4si)_mm_slli_epi32(__A, (int)__B),
4537                                             (__v4si)_mm_setzero_si128());
4538}
4539
4540static __inline__ __m256i __DEFAULT_FN_ATTRS256
4541_mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
4542{
4543  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4544                                             (__v8si)_mm256_slli_epi32(__A, (int)__B),
4545                                             (__v8si)__W);
4546}
4547
4548static __inline__ __m256i __DEFAULT_FN_ATTRS256
4549_mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
4550{
4551  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4552                                             (__v8si)_mm256_slli_epi32(__A, (int)__B),
4553                                             (__v8si)_mm256_setzero_si256());
4554}
4555
4556static __inline__ __m128i __DEFAULT_FN_ATTRS128
4557_mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4558{
4559  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4560                                             (__v2di)_mm_sll_epi64(__A, __B),
4561                                             (__v2di)__W);
4562}
4563
4564static __inline__ __m128i __DEFAULT_FN_ATTRS128
4565_mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B)
4566{
4567  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4568                                             (__v2di)_mm_sll_epi64(__A, __B),
4569                                             (__v2di)_mm_setzero_si128());
4570}
4571
4572static __inline__ __m256i __DEFAULT_FN_ATTRS256
4573_mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4574{
4575  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4576                                             (__v4di)_mm256_sll_epi64(__A, __B),
4577                                             (__v4di)__W);
4578}
4579
4580static __inline__ __m256i __DEFAULT_FN_ATTRS256
4581_mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B)
4582{
4583  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4584                                             (__v4di)_mm256_sll_epi64(__A, __B),
4585                                             (__v4di)_mm256_setzero_si256());
4586}
4587
4588static __inline__ __m128i __DEFAULT_FN_ATTRS128
4589_mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
4590{
4591  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4592                                             (__v2di)_mm_slli_epi64(__A, (int)__B),
4593                                             (__v2di)__W);
4594}
4595
4596static __inline__ __m128i __DEFAULT_FN_ATTRS128
4597_mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, unsigned int __B)
4598{
4599  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4600                                             (__v2di)_mm_slli_epi64(__A, (int)__B),
4601                                             (__v2di)_mm_setzero_si128());
4602}
4603
4604static __inline__ __m256i __DEFAULT_FN_ATTRS256
4605_mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
4606{
4607  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4608                                             (__v4di)_mm256_slli_epi64(__A, (int)__B),
4609                                             (__v4di)__W);
4610}
4611
4612static __inline__ __m256i __DEFAULT_FN_ATTRS256
4613_mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, unsigned int __B)
4614{
4615  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4616                                             (__v4di)_mm256_slli_epi64(__A, (int)__B),
4617                                             (__v4di)_mm256_setzero_si256());
4618}
4619
4620static __inline__ __m128i __DEFAULT_FN_ATTRS128
4621_mm_rorv_epi32 (__m128i __A, __m128i __B)
4622{
4623  return (__m128i)__builtin_ia32_prorvd128((__v4si)__A, (__v4si)__B);
4624}
4625
4626static __inline__ __m128i __DEFAULT_FN_ATTRS128
4627_mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4628{
4629  return (__m128i)__builtin_ia32_selectd_128(__U,
4630                                             (__v4si)_mm_rorv_epi32(__A, __B),
4631                                             (__v4si)__W);
4632}
4633
4634static __inline__ __m128i __DEFAULT_FN_ATTRS128
4635_mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4636{
4637  return (__m128i)__builtin_ia32_selectd_128(__U,
4638                                             (__v4si)_mm_rorv_epi32(__A, __B),
4639                                             (__v4si)_mm_setzero_si128());
4640}
4641
4642static __inline__ __m256i __DEFAULT_FN_ATTRS256
4643_mm256_rorv_epi32 (__m256i __A, __m256i __B)
4644{
4645  return (__m256i)__builtin_ia32_prorvd256((__v8si)__A, (__v8si)__B);
4646}
4647
4648static __inline__ __m256i __DEFAULT_FN_ATTRS256
4649_mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4650{
4651  return (__m256i)__builtin_ia32_selectd_256(__U,
4652                                            (__v8si)_mm256_rorv_epi32(__A, __B),
4653                                            (__v8si)__W);
4654}
4655
4656static __inline__ __m256i __DEFAULT_FN_ATTRS256
4657_mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4658{
4659  return (__m256i)__builtin_ia32_selectd_256(__U,
4660                                            (__v8si)_mm256_rorv_epi32(__A, __B),
4661                                            (__v8si)_mm256_setzero_si256());
4662}
4663
4664static __inline__ __m128i __DEFAULT_FN_ATTRS128
4665_mm_rorv_epi64 (__m128i __A, __m128i __B)
4666{
4667  return (__m128i)__builtin_ia32_prorvq128((__v2di)__A, (__v2di)__B);
4668}
4669
4670static __inline__ __m128i __DEFAULT_FN_ATTRS128
4671_mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4672{
4673  return (__m128i)__builtin_ia32_selectq_128(__U,
4674                                             (__v2di)_mm_rorv_epi64(__A, __B),
4675                                             (__v2di)__W);
4676}
4677
4678static __inline__ __m128i __DEFAULT_FN_ATTRS128
4679_mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
4680{
4681  return (__m128i)__builtin_ia32_selectq_128(__U,
4682                                             (__v2di)_mm_rorv_epi64(__A, __B),
4683                                             (__v2di)_mm_setzero_si128());
4684}
4685
4686static __inline__ __m256i __DEFAULT_FN_ATTRS256
4687_mm256_rorv_epi64 (__m256i __A, __m256i __B)
4688{
4689  return (__m256i)__builtin_ia32_prorvq256((__v4di)__A, (__v4di)__B);
4690}
4691
4692static __inline__ __m256i __DEFAULT_FN_ATTRS256
4693_mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4694{
4695  return (__m256i)__builtin_ia32_selectq_256(__U,
4696                                            (__v4di)_mm256_rorv_epi64(__A, __B),
4697                                            (__v4di)__W);
4698}
4699
4700static __inline__ __m256i __DEFAULT_FN_ATTRS256
4701_mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
4702{
4703  return (__m256i)__builtin_ia32_selectq_256(__U,
4704                                            (__v4di)_mm256_rorv_epi64(__A, __B),
4705                                            (__v4di)_mm256_setzero_si256());
4706}
4707
4708static __inline__ __m128i __DEFAULT_FN_ATTRS128
4709_mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4710{
4711  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4712                                             (__v2di)_mm_sllv_epi64(__X, __Y),
4713                                             (__v2di)__W);
4714}
4715
4716static __inline__ __m128i __DEFAULT_FN_ATTRS128
4717_mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
4718{
4719  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4720                                             (__v2di)_mm_sllv_epi64(__X, __Y),
4721                                             (__v2di)_mm_setzero_si128());
4722}
4723
4724static __inline__ __m256i __DEFAULT_FN_ATTRS256
4725_mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4726{
4727  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4728                                            (__v4di)_mm256_sllv_epi64(__X, __Y),
4729                                            (__v4di)__W);
4730}
4731
4732static __inline__ __m256i __DEFAULT_FN_ATTRS256
4733_mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
4734{
4735  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4736                                            (__v4di)_mm256_sllv_epi64(__X, __Y),
4737                                            (__v4di)_mm256_setzero_si256());
4738}
4739
4740static __inline__ __m128i __DEFAULT_FN_ATTRS128
4741_mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4742{
4743  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4744                                             (__v4si)_mm_sllv_epi32(__X, __Y),
4745                                             (__v4si)__W);
4746}
4747
4748static __inline__ __m128i __DEFAULT_FN_ATTRS128
4749_mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4750{
4751  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4752                                             (__v4si)_mm_sllv_epi32(__X, __Y),
4753                                             (__v4si)_mm_setzero_si128());
4754}
4755
4756static __inline__ __m256i __DEFAULT_FN_ATTRS256
4757_mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4758{
4759  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4760                                            (__v8si)_mm256_sllv_epi32(__X, __Y),
4761                                            (__v8si)__W);
4762}
4763
4764static __inline__ __m256i __DEFAULT_FN_ATTRS256
4765_mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4766{
4767  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4768                                            (__v8si)_mm256_sllv_epi32(__X, __Y),
4769                                            (__v8si)_mm256_setzero_si256());
4770}
4771
4772static __inline__ __m128i __DEFAULT_FN_ATTRS128
4773_mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4774{
4775  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4776                                             (__v2di)_mm_srlv_epi64(__X, __Y),
4777                                             (__v2di)__W);
4778}
4779
4780static __inline__ __m128i __DEFAULT_FN_ATTRS128
4781_mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
4782{
4783  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4784                                             (__v2di)_mm_srlv_epi64(__X, __Y),
4785                                             (__v2di)_mm_setzero_si128());
4786}
4787
4788static __inline__ __m256i __DEFAULT_FN_ATTRS256
4789_mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4790{
4791  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4792                                            (__v4di)_mm256_srlv_epi64(__X, __Y),
4793                                            (__v4di)__W);
4794}
4795
4796static __inline__ __m256i __DEFAULT_FN_ATTRS256
4797_mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
4798{
4799  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4800                                            (__v4di)_mm256_srlv_epi64(__X, __Y),
4801                                            (__v4di)_mm256_setzero_si256());
4802}
4803
4804static __inline__ __m128i __DEFAULT_FN_ATTRS128
4805_mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4806{
4807  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4808                                            (__v4si)_mm_srlv_epi32(__X, __Y),
4809                                            (__v4si)__W);
4810}
4811
4812static __inline__ __m128i __DEFAULT_FN_ATTRS128
4813_mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4814{
4815  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4816                                            (__v4si)_mm_srlv_epi32(__X, __Y),
4817                                            (__v4si)_mm_setzero_si128());
4818}
4819
4820static __inline__ __m256i __DEFAULT_FN_ATTRS256
4821_mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4822{
4823  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4824                                            (__v8si)_mm256_srlv_epi32(__X, __Y),
4825                                            (__v8si)__W);
4826}
4827
4828static __inline__ __m256i __DEFAULT_FN_ATTRS256
4829_mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4830{
4831  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4832                                            (__v8si)_mm256_srlv_epi32(__X, __Y),
4833                                            (__v8si)_mm256_setzero_si256());
4834}
4835
4836static __inline__ __m128i __DEFAULT_FN_ATTRS128
4837_mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4838{
4839  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4840                                             (__v4si)_mm_srl_epi32(__A, __B),
4841                                             (__v4si)__W);
4842}
4843
4844static __inline__ __m128i __DEFAULT_FN_ATTRS128
4845_mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B)
4846{
4847  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4848                                             (__v4si)_mm_srl_epi32(__A, __B),
4849                                             (__v4si)_mm_setzero_si128());
4850}
4851
4852static __inline__ __m256i __DEFAULT_FN_ATTRS256
4853_mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4854{
4855  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4856                                             (__v8si)_mm256_srl_epi32(__A, __B),
4857                                             (__v8si)__W);
4858}
4859
4860static __inline__ __m256i __DEFAULT_FN_ATTRS256
4861_mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B)
4862{
4863  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4864                                             (__v8si)_mm256_srl_epi32(__A, __B),
4865                                             (__v8si)_mm256_setzero_si256());
4866}
4867
4868static __inline__ __m128i __DEFAULT_FN_ATTRS128
4869_mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
4870{
4871  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4872                                             (__v4si)_mm_srli_epi32(__A, (int)__B),
4873                                             (__v4si)__W);
4874}
4875
4876static __inline__ __m128i __DEFAULT_FN_ATTRS128
4877_mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
4878{
4879  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4880                                             (__v4si)_mm_srli_epi32(__A, (int)__B),
4881                                             (__v4si)_mm_setzero_si128());
4882}
4883
4884static __inline__ __m256i __DEFAULT_FN_ATTRS256
4885_mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
4886{
4887  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4888                                             (__v8si)_mm256_srli_epi32(__A, (int)__B),
4889                                             (__v8si)__W);
4890}
4891
4892static __inline__ __m256i __DEFAULT_FN_ATTRS256
4893_mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
4894{
4895  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4896                                             (__v8si)_mm256_srli_epi32(__A, (int)__B),
4897                                             (__v8si)_mm256_setzero_si256());
4898}
4899
4900static __inline__ __m128i __DEFAULT_FN_ATTRS128
4901_mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4902{
4903  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4904                                             (__v2di)_mm_srl_epi64(__A, __B),
4905                                             (__v2di)__W);
4906}
4907
4908static __inline__ __m128i __DEFAULT_FN_ATTRS128
4909_mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B)
4910{
4911  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4912                                             (__v2di)_mm_srl_epi64(__A, __B),
4913                                             (__v2di)_mm_setzero_si128());
4914}
4915
4916static __inline__ __m256i __DEFAULT_FN_ATTRS256
4917_mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4918{
4919  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4920                                             (__v4di)_mm256_srl_epi64(__A, __B),
4921                                             (__v4di)__W);
4922}
4923
4924static __inline__ __m256i __DEFAULT_FN_ATTRS256
4925_mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B)
4926{
4927  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4928                                             (__v4di)_mm256_srl_epi64(__A, __B),
4929                                             (__v4di)_mm256_setzero_si256());
4930}
4931
4932static __inline__ __m128i __DEFAULT_FN_ATTRS128
4933_mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
4934{
4935  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4936                                             (__v2di)_mm_srli_epi64(__A, (int)__B),
4937                                             (__v2di)__W);
4938}
4939
4940static __inline__ __m128i __DEFAULT_FN_ATTRS128
4941_mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, unsigned int __B)
4942{
4943  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4944                                             (__v2di)_mm_srli_epi64(__A, (int)__B),
4945                                             (__v2di)_mm_setzero_si128());
4946}
4947
4948static __inline__ __m256i __DEFAULT_FN_ATTRS256
4949_mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
4950{
4951  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4952                                             (__v4di)_mm256_srli_epi64(__A, (int)__B),
4953                                             (__v4di)__W);
4954}
4955
4956static __inline__ __m256i __DEFAULT_FN_ATTRS256
4957_mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, unsigned int __B)
4958{
4959  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4960                                             (__v4di)_mm256_srli_epi64(__A, (int)__B),
4961                                             (__v4di)_mm256_setzero_si256());
4962}
4963
4964static __inline__ __m128i __DEFAULT_FN_ATTRS128
4965_mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4966{
4967  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4968                                            (__v4si)_mm_srav_epi32(__X, __Y),
4969                                            (__v4si)__W);
4970}
4971
4972static __inline__ __m128i __DEFAULT_FN_ATTRS128
4973_mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4974{
4975  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4976                                            (__v4si)_mm_srav_epi32(__X, __Y),
4977                                            (__v4si)_mm_setzero_si128());
4978}
4979
4980static __inline__ __m256i __DEFAULT_FN_ATTRS256
4981_mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4982{
4983  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4984                                            (__v8si)_mm256_srav_epi32(__X, __Y),
4985                                            (__v8si)__W);
4986}
4987
4988static __inline__ __m256i __DEFAULT_FN_ATTRS256
4989_mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4990{
4991  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4992                                            (__v8si)_mm256_srav_epi32(__X, __Y),
4993                                            (__v8si)_mm256_setzero_si256());
4994}
4995
4996static __inline__ __m128i __DEFAULT_FN_ATTRS128
4997_mm_srav_epi64(__m128i __X, __m128i __Y)
4998{
4999  return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y);
5000}
5001
5002static __inline__ __m128i __DEFAULT_FN_ATTRS128
5003_mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
5004{
5005  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5006                                             (__v2di)_mm_srav_epi64(__X, __Y),
5007                                             (__v2di)__W);
5008}
5009
5010static __inline__ __m128i __DEFAULT_FN_ATTRS128
5011_mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
5012{
5013  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5014                                             (__v2di)_mm_srav_epi64(__X, __Y),
5015                                             (__v2di)_mm_setzero_si128());
5016}
5017
5018static __inline__ __m256i __DEFAULT_FN_ATTRS256
5019_mm256_srav_epi64(__m256i __X, __m256i __Y)
5020{
5021  return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y);
5022}
5023
5024static __inline__ __m256i __DEFAULT_FN_ATTRS256
5025_mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
5026{
5027  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5028                                             (__v4di)_mm256_srav_epi64(__X, __Y),
5029                                             (__v4di)__W);
5030}
5031
5032static __inline__ __m256i __DEFAULT_FN_ATTRS256
5033_mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
5034{
5035  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5036                                             (__v4di)_mm256_srav_epi64(__X, __Y),
5037                                             (__v4di)_mm256_setzero_si256());
5038}
5039
5040static __inline__ __m128i __DEFAULT_FN_ATTRS128
5041_mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
5042{
5043  return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
5044                 (__v4si) __A,
5045                 (__v4si) __W);
5046}
5047
5048static __inline__ __m128i __DEFAULT_FN_ATTRS128
5049_mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
5050{
5051  return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
5052                 (__v4si) __A,
5053                 (__v4si) _mm_setzero_si128 ());
5054}
5055
5056
5057static __inline__ __m256i __DEFAULT_FN_ATTRS256
5058_mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
5059{
5060  return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
5061                 (__v8si) __A,
5062                 (__v8si) __W);
5063}
5064
5065static __inline__ __m256i __DEFAULT_FN_ATTRS256
5066_mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
5067{
5068  return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
5069                 (__v8si) __A,
5070                 (__v8si) _mm256_setzero_si256 ());
5071}
5072
5073static __inline __m128i __DEFAULT_FN_ATTRS128
5074_mm_load_epi32 (void const *__P)
5075{
5076  return *(const __m128i *) __P;
5077}
5078
5079static __inline__ __m128i __DEFAULT_FN_ATTRS128
5080_mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
5081{
5082  return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P,
5083              (__v4si) __W,
5084              (__mmask8)
5085              __U);
5086}
5087
5088static __inline__ __m128i __DEFAULT_FN_ATTRS128
5089_mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
5090{
5091  return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P,
5092              (__v4si)
5093              _mm_setzero_si128 (),
5094              (__mmask8)
5095              __U);
5096}
5097
5098static __inline __m256i __DEFAULT_FN_ATTRS256
5099_mm256_load_epi32 (void const *__P)
5100{
5101  return *(const __m256i *) __P;
5102}
5103
5104static __inline__ __m256i __DEFAULT_FN_ATTRS256
5105_mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
5106{
5107  return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P,
5108              (__v8si) __W,
5109              (__mmask8)
5110              __U);
5111}
5112
5113static __inline__ __m256i __DEFAULT_FN_ATTRS256
5114_mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
5115{
5116  return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P,
5117              (__v8si)
5118              _mm256_setzero_si256 (),
5119              (__mmask8)
5120              __U);
5121}
5122
5123static __inline void __DEFAULT_FN_ATTRS128
5124_mm_store_epi32 (void *__P, __m128i __A)
5125{
5126  *(__m128i *) __P = __A;
5127}
5128
5129static __inline__ void __DEFAULT_FN_ATTRS128
5130_mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
5131{
5132  __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
5133          (__v4si) __A,
5134          (__mmask8) __U);
5135}
5136
5137static __inline void __DEFAULT_FN_ATTRS256
5138_mm256_store_epi32 (void *__P, __m256i __A)
5139{
5140  *(__m256i *) __P = __A;
5141}
5142
5143static __inline__ void __DEFAULT_FN_ATTRS256
5144_mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
5145{
5146  __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
5147          (__v8si) __A,
5148          (__mmask8) __U);
5149}
5150
5151static __inline__ __m128i __DEFAULT_FN_ATTRS128
5152_mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
5153{
5154  return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
5155                 (__v2di) __A,
5156                 (__v2di) __W);
5157}
5158
5159static __inline__ __m128i __DEFAULT_FN_ATTRS128
5160_mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
5161{
5162  return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
5163                 (__v2di) __A,
5164                 (__v2di) _mm_setzero_si128 ());
5165}
5166
5167static __inline__ __m256i __DEFAULT_FN_ATTRS256
5168_mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
5169{
5170  return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
5171                 (__v4di) __A,
5172                 (__v4di) __W);
5173}
5174
5175static __inline__ __m256i __DEFAULT_FN_ATTRS256
5176_mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
5177{
5178  return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
5179                 (__v4di) __A,
5180                 (__v4di) _mm256_setzero_si256 ());
5181}
5182
5183static __inline __m128i __DEFAULT_FN_ATTRS128
5184_mm_load_epi64 (void const *__P)
5185{
5186  return *(const __m128i *) __P;
5187}
5188
5189static __inline__ __m128i __DEFAULT_FN_ATTRS128
5190_mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
5191{
5192  return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P,
5193              (__v2di) __W,
5194              (__mmask8)
5195              __U);
5196}
5197
5198static __inline__ __m128i __DEFAULT_FN_ATTRS128
5199_mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
5200{
5201  return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P,
5202              (__v2di)
5203              _mm_setzero_si128 (),
5204              (__mmask8)
5205              __U);
5206}
5207
5208static __inline __m256i __DEFAULT_FN_ATTRS256
5209_mm256_load_epi64 (void const *__P)
5210{
5211  return *(const __m256i *) __P;
5212}
5213
5214static __inline__ __m256i __DEFAULT_FN_ATTRS256
5215_mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
5216{
5217  return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P,
5218              (__v4di) __W,
5219              (__mmask8)
5220              __U);
5221}
5222
5223static __inline__ __m256i __DEFAULT_FN_ATTRS256
5224_mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
5225{
5226  return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P,
5227              (__v4di)
5228              _mm256_setzero_si256 (),
5229              (__mmask8)
5230              __U);
5231}
5232
5233static __inline void __DEFAULT_FN_ATTRS128
5234_mm_store_epi64 (void *__P, __m128i __A)
5235{
5236  *(__m128i *) __P = __A;
5237}
5238
5239static __inline__ void __DEFAULT_FN_ATTRS128
5240_mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
5241{
5242  __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
5243          (__v2di) __A,
5244          (__mmask8) __U);
5245}
5246
5247static __inline void __DEFAULT_FN_ATTRS256
5248_mm256_store_epi64 (void *__P, __m256i __A)
5249{
5250  *(__m256i *) __P = __A;
5251}
5252
5253static __inline__ void __DEFAULT_FN_ATTRS256
5254_mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
5255{
5256  __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
5257          (__v4di) __A,
5258          (__mmask8) __U);
5259}
5260
5261static __inline__ __m128d __DEFAULT_FN_ATTRS128
5262_mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
5263{
5264  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5265                                              (__v2df)_mm_movedup_pd(__A),
5266                                              (__v2df)__W);
5267}
5268
5269static __inline__ __m128d __DEFAULT_FN_ATTRS128
5270_mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
5271{
5272  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5273                                              (__v2df)_mm_movedup_pd(__A),
5274                                              (__v2df)_mm_setzero_pd());
5275}
5276
5277static __inline__ __m256d __DEFAULT_FN_ATTRS256
5278_mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
5279{
5280  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5281                                              (__v4df)_mm256_movedup_pd(__A),
5282                                              (__v4df)__W);
5283}
5284
5285static __inline__ __m256d __DEFAULT_FN_ATTRS256
5286_mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
5287{
5288  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5289                                              (__v4df)_mm256_movedup_pd(__A),
5290                                              (__v4df)_mm256_setzero_pd());
5291}
5292
5293static __inline__ __m128i __DEFAULT_FN_ATTRS128
5294_mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A)
5295{
5296   return (__m128i)__builtin_ia32_selectd_128(__M,
5297                                              (__v4si) _mm_set1_epi32(__A),
5298                                              (__v4si)__O);
5299}
5300
5301static __inline__ __m128i __DEFAULT_FN_ATTRS128
5302_mm_maskz_set1_epi32( __mmask8 __M, int __A)
5303{
5304   return (__m128i)__builtin_ia32_selectd_128(__M,
5305                                              (__v4si) _mm_set1_epi32(__A),
5306                                              (__v4si)_mm_setzero_si128());
5307}
5308
5309static __inline__ __m256i __DEFAULT_FN_ATTRS256
5310_mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A)
5311{
5312   return (__m256i)__builtin_ia32_selectd_256(__M,
5313                                              (__v8si) _mm256_set1_epi32(__A),
5314                                              (__v8si)__O);
5315}
5316
5317static __inline__ __m256i __DEFAULT_FN_ATTRS256
5318_mm256_maskz_set1_epi32( __mmask8 __M, int __A)
5319{
5320   return (__m256i)__builtin_ia32_selectd_256(__M,
5321                                              (__v8si) _mm256_set1_epi32(__A),
5322                                              (__v8si)_mm256_setzero_si256());
5323}
5324
5325
5326static __inline__ __m128i __DEFAULT_FN_ATTRS128
5327_mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
5328{
5329  return (__m128i) __builtin_ia32_selectq_128(__M,
5330                                              (__v2di) _mm_set1_epi64x(__A),
5331                                              (__v2di) __O);
5332}
5333
5334static __inline__ __m128i __DEFAULT_FN_ATTRS128
5335_mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
5336{
5337  return (__m128i) __builtin_ia32_selectq_128(__M,
5338                                              (__v2di) _mm_set1_epi64x(__A),
5339                                              (__v2di) _mm_setzero_si128());
5340}
5341
5342static __inline__ __m256i __DEFAULT_FN_ATTRS256
5343_mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
5344{
5345  return (__m256i) __builtin_ia32_selectq_256(__M,
5346                                              (__v4di) _mm256_set1_epi64x(__A),
5347                                              (__v4di) __O) ;
5348}
5349
5350static __inline__ __m256i __DEFAULT_FN_ATTRS256
5351_mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
5352{
5353   return (__m256i) __builtin_ia32_selectq_256(__M,
5354                                               (__v4di) _mm256_set1_epi64x(__A),
5355                                               (__v4di) _mm256_setzero_si256());
5356}
5357
5358#define _mm_fixupimm_pd(A, B, C, imm) \
5359  ((__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5360                                              (__v2df)(__m128d)(B), \
5361                                              (__v2di)(__m128i)(C), (int)(imm), \
5362                                              (__mmask8)-1))
5363
5364#define _mm_mask_fixupimm_pd(A, U, B, C, imm) \
5365  ((__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5366                                              (__v2df)(__m128d)(B), \
5367                                              (__v2di)(__m128i)(C), (int)(imm), \
5368                                              (__mmask8)(U)))
5369
5370#define _mm_maskz_fixupimm_pd(U, A, B, C, imm) \
5371  ((__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \
5372                                               (__v2df)(__m128d)(B), \
5373                                               (__v2di)(__m128i)(C), \
5374                                               (int)(imm), (__mmask8)(U)))
5375
5376#define _mm256_fixupimm_pd(A, B, C, imm) \
5377  ((__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5378                                              (__v4df)(__m256d)(B), \
5379                                              (__v4di)(__m256i)(C), (int)(imm), \
5380                                              (__mmask8)-1))
5381
5382#define _mm256_mask_fixupimm_pd(A, U, B, C, imm) \
5383  ((__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5384                                              (__v4df)(__m256d)(B), \
5385                                              (__v4di)(__m256i)(C), (int)(imm), \
5386                                              (__mmask8)(U)))
5387
5388#define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) \
5389  ((__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \
5390                                               (__v4df)(__m256d)(B), \
5391                                               (__v4di)(__m256i)(C), \
5392                                               (int)(imm), (__mmask8)(U)))
5393
5394#define _mm_fixupimm_ps(A, B, C, imm) \
5395  ((__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5396                                             (__v4sf)(__m128)(B), \
5397                                             (__v4si)(__m128i)(C), (int)(imm), \
5398                                             (__mmask8)-1))
5399
5400#define _mm_mask_fixupimm_ps(A, U, B, C, imm) \
5401  ((__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5402                                             (__v4sf)(__m128)(B), \
5403                                             (__v4si)(__m128i)(C), (int)(imm), \
5404                                             (__mmask8)(U)))
5405
5406#define _mm_maskz_fixupimm_ps(U, A, B, C, imm) \
5407  ((__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \
5408                                              (__v4sf)(__m128)(B), \
5409                                              (__v4si)(__m128i)(C), (int)(imm), \
5410                                              (__mmask8)(U)))
5411
5412#define _mm256_fixupimm_ps(A, B, C, imm) \
5413  ((__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5414                                             (__v8sf)(__m256)(B), \
5415                                             (__v8si)(__m256i)(C), (int)(imm), \
5416                                             (__mmask8)-1))
5417
5418#define _mm256_mask_fixupimm_ps(A, U, B, C, imm) \
5419  ((__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5420                                             (__v8sf)(__m256)(B), \
5421                                             (__v8si)(__m256i)(C), (int)(imm), \
5422                                             (__mmask8)(U)))
5423
5424#define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) \
5425  ((__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \
5426                                              (__v8sf)(__m256)(B), \
5427                                              (__v8si)(__m256i)(C), (int)(imm), \
5428                                              (__mmask8)(U)))
5429
5430static __inline__ __m128d __DEFAULT_FN_ATTRS128
5431_mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
5432{
5433  return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P,
5434               (__v2df) __W,
5435               (__mmask8) __U);
5436}
5437
5438static __inline__ __m128d __DEFAULT_FN_ATTRS128
5439_mm_maskz_load_pd (__mmask8 __U, void const *__P)
5440{
5441  return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P,
5442               (__v2df)
5443               _mm_setzero_pd (),
5444               (__mmask8) __U);
5445}
5446
5447static __inline__ __m256d __DEFAULT_FN_ATTRS256
5448_mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
5449{
5450  return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P,
5451               (__v4df) __W,
5452               (__mmask8) __U);
5453}
5454
5455static __inline__ __m256d __DEFAULT_FN_ATTRS256
5456_mm256_maskz_load_pd (__mmask8 __U, void const *__P)
5457{
5458  return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P,
5459               (__v4df)
5460               _mm256_setzero_pd (),
5461               (__mmask8) __U);
5462}
5463
5464static __inline__ __m128 __DEFAULT_FN_ATTRS128
5465_mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
5466{
5467  return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P,
5468              (__v4sf) __W,
5469              (__mmask8) __U);
5470}
5471
5472static __inline__ __m128 __DEFAULT_FN_ATTRS128
5473_mm_maskz_load_ps (__mmask8 __U, void const *__P)
5474{
5475  return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P,
5476              (__v4sf)
5477              _mm_setzero_ps (),
5478              (__mmask8) __U);
5479}
5480
5481static __inline__ __m256 __DEFAULT_FN_ATTRS256
5482_mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
5483{
5484  return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P,
5485              (__v8sf) __W,
5486              (__mmask8) __U);
5487}
5488
5489static __inline__ __m256 __DEFAULT_FN_ATTRS256
5490_mm256_maskz_load_ps (__mmask8 __U, void const *__P)
5491{
5492  return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P,
5493              (__v8sf)
5494              _mm256_setzero_ps (),
5495              (__mmask8) __U);
5496}
5497
5498static __inline __m128i __DEFAULT_FN_ATTRS128
5499_mm_loadu_epi64 (void const *__P)
5500{
5501  struct __loadu_epi64 {
5502    __m128i_u __v;
5503  } __attribute__((__packed__, __may_alias__));
5504  return ((const struct __loadu_epi64*)__P)->__v;
5505}
5506
5507static __inline__ __m128i __DEFAULT_FN_ATTRS128
5508_mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
5509{
5510  return (__m128i) __builtin_ia32_loaddqudi128_mask ((const __v2di *) __P,
5511                 (__v2di) __W,
5512                 (__mmask8) __U);
5513}
5514
5515static __inline__ __m128i __DEFAULT_FN_ATTRS128
5516_mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5517{
5518  return (__m128i) __builtin_ia32_loaddqudi128_mask ((const __v2di *) __P,
5519                 (__v2di)
5520                 _mm_setzero_si128 (),
5521                 (__mmask8) __U);
5522}
5523
5524static __inline __m256i __DEFAULT_FN_ATTRS256
5525_mm256_loadu_epi64 (void const *__P)
5526{
5527  struct __loadu_epi64 {
5528    __m256i_u __v;
5529  } __attribute__((__packed__, __may_alias__));
5530  return ((const struct __loadu_epi64*)__P)->__v;
5531}
5532
5533static __inline__ __m256i __DEFAULT_FN_ATTRS256
5534_mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
5535{
5536  return (__m256i) __builtin_ia32_loaddqudi256_mask ((const __v4di *) __P,
5537                 (__v4di) __W,
5538                 (__mmask8) __U);
5539}
5540
5541static __inline__ __m256i __DEFAULT_FN_ATTRS256
5542_mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5543{
5544  return (__m256i) __builtin_ia32_loaddqudi256_mask ((const __v4di *) __P,
5545                 (__v4di)
5546                 _mm256_setzero_si256 (),
5547                 (__mmask8) __U);
5548}
5549
5550static __inline __m128i __DEFAULT_FN_ATTRS128
5551_mm_loadu_epi32 (void const *__P)
5552{
5553  struct __loadu_epi32 {
5554    __m128i_u __v;
5555  } __attribute__((__packed__, __may_alias__));
5556  return ((const struct __loadu_epi32*)__P)->__v;
5557}
5558
5559static __inline__ __m128i __DEFAULT_FN_ATTRS128
5560_mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
5561{
5562  return (__m128i) __builtin_ia32_loaddqusi128_mask ((const __v4si *) __P,
5563                 (__v4si) __W,
5564                 (__mmask8) __U);
5565}
5566
5567static __inline__ __m128i __DEFAULT_FN_ATTRS128
5568_mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
5569{
5570  return (__m128i) __builtin_ia32_loaddqusi128_mask ((const __v4si *) __P,
5571                 (__v4si)
5572                 _mm_setzero_si128 (),
5573                 (__mmask8) __U);
5574}
5575
5576static __inline __m256i __DEFAULT_FN_ATTRS256
5577_mm256_loadu_epi32 (void const *__P)
5578{
5579  struct __loadu_epi32 {
5580    __m256i_u __v;
5581  } __attribute__((__packed__, __may_alias__));
5582  return ((const struct __loadu_epi32*)__P)->__v;
5583}
5584
5585static __inline__ __m256i __DEFAULT_FN_ATTRS256
5586_mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
5587{
5588  return (__m256i) __builtin_ia32_loaddqusi256_mask ((const __v8si *) __P,
5589                 (__v8si) __W,
5590                 (__mmask8) __U);
5591}
5592
5593static __inline__ __m256i __DEFAULT_FN_ATTRS256
5594_mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
5595{
5596  return (__m256i) __builtin_ia32_loaddqusi256_mask ((const __v8si *) __P,
5597                 (__v8si)
5598                 _mm256_setzero_si256 (),
5599                 (__mmask8) __U);
5600}
5601
5602static __inline__ __m128d __DEFAULT_FN_ATTRS128
5603_mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
5604{
5605  return (__m128d) __builtin_ia32_loadupd128_mask ((const __v2df *) __P,
5606               (__v2df) __W,
5607               (__mmask8) __U);
5608}
5609
5610static __inline__ __m128d __DEFAULT_FN_ATTRS128
5611_mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
5612{
5613  return (__m128d) __builtin_ia32_loadupd128_mask ((const __v2df *) __P,
5614               (__v2df)
5615               _mm_setzero_pd (),
5616               (__mmask8) __U);
5617}
5618
5619static __inline__ __m256d __DEFAULT_FN_ATTRS256
5620_mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
5621{
5622  return (__m256d) __builtin_ia32_loadupd256_mask ((const __v4df *) __P,
5623               (__v4df) __W,
5624               (__mmask8) __U);
5625}
5626
5627static __inline__ __m256d __DEFAULT_FN_ATTRS256
5628_mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
5629{
5630  return (__m256d) __builtin_ia32_loadupd256_mask ((const __v4df *) __P,
5631               (__v4df)
5632               _mm256_setzero_pd (),
5633               (__mmask8) __U);
5634}
5635
5636static __inline__ __m128 __DEFAULT_FN_ATTRS128
5637_mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
5638{
5639  return (__m128) __builtin_ia32_loadups128_mask ((const __v4sf *) __P,
5640              (__v4sf) __W,
5641              (__mmask8) __U);
5642}
5643
5644static __inline__ __m128 __DEFAULT_FN_ATTRS128
5645_mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
5646{
5647  return (__m128) __builtin_ia32_loadups128_mask ((const __v4sf *) __P,
5648              (__v4sf)
5649              _mm_setzero_ps (),
5650              (__mmask8) __U);
5651}
5652
5653static __inline__ __m256 __DEFAULT_FN_ATTRS256
5654_mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
5655{
5656  return (__m256) __builtin_ia32_loadups256_mask ((const __v8sf *) __P,
5657              (__v8sf) __W,
5658              (__mmask8) __U);
5659}
5660
5661static __inline__ __m256 __DEFAULT_FN_ATTRS256
5662_mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
5663{
5664  return (__m256) __builtin_ia32_loadups256_mask ((const __v8sf *) __P,
5665              (__v8sf)
5666              _mm256_setzero_ps (),
5667              (__mmask8) __U);
5668}
5669
5670static __inline__ void __DEFAULT_FN_ATTRS128
5671_mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
5672{
5673  __builtin_ia32_storeapd128_mask ((__v2df *) __P,
5674           (__v2df) __A,
5675           (__mmask8) __U);
5676}
5677
5678static __inline__ void __DEFAULT_FN_ATTRS256
5679_mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
5680{
5681  __builtin_ia32_storeapd256_mask ((__v4df *) __P,
5682           (__v4df) __A,
5683           (__mmask8) __U);
5684}
5685
5686static __inline__ void __DEFAULT_FN_ATTRS128
5687_mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
5688{
5689  __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
5690           (__v4sf) __A,
5691           (__mmask8) __U);
5692}
5693
5694static __inline__ void __DEFAULT_FN_ATTRS256
5695_mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
5696{
5697  __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
5698           (__v8sf) __A,
5699           (__mmask8) __U);
5700}
5701
5702static __inline void __DEFAULT_FN_ATTRS128
5703_mm_storeu_epi64 (void *__P, __m128i __A)
5704{
5705  struct __storeu_epi64 {
5706    __m128i_u __v;
5707  } __attribute__((__packed__, __may_alias__));
5708  ((struct __storeu_epi64*)__P)->__v = __A;
5709}
5710
5711static __inline__ void __DEFAULT_FN_ATTRS128
5712_mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
5713{
5714  __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
5715             (__v2di) __A,
5716             (__mmask8) __U);
5717}
5718
5719static __inline void __DEFAULT_FN_ATTRS256
5720_mm256_storeu_epi64 (void *__P, __m256i __A)
5721{
5722  struct __storeu_epi64 {
5723    __m256i_u __v;
5724  } __attribute__((__packed__, __may_alias__));
5725  ((struct __storeu_epi64*)__P)->__v = __A;
5726}
5727
5728static __inline__ void __DEFAULT_FN_ATTRS256
5729_mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
5730{
5731  __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
5732             (__v4di) __A,
5733             (__mmask8) __U);
5734}
5735
5736static __inline void __DEFAULT_FN_ATTRS128
5737_mm_storeu_epi32 (void *__P, __m128i __A)
5738{
5739  struct __storeu_epi32 {
5740    __m128i_u __v;
5741  } __attribute__((__packed__, __may_alias__));
5742  ((struct __storeu_epi32*)__P)->__v = __A;
5743}
5744
5745static __inline__ void __DEFAULT_FN_ATTRS128
5746_mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
5747{
5748  __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
5749             (__v4si) __A,
5750             (__mmask8) __U);
5751}
5752
5753static __inline void __DEFAULT_FN_ATTRS256
5754_mm256_storeu_epi32 (void *__P, __m256i __A)
5755{
5756  struct __storeu_epi32 {
5757    __m256i_u __v;
5758  } __attribute__((__packed__, __may_alias__));
5759  ((struct __storeu_epi32*)__P)->__v = __A;
5760}
5761
5762static __inline__ void __DEFAULT_FN_ATTRS256
5763_mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
5764{
5765  __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
5766             (__v8si) __A,
5767             (__mmask8) __U);
5768}
5769
5770static __inline__ void __DEFAULT_FN_ATTRS128
5771_mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
5772{
5773  __builtin_ia32_storeupd128_mask ((__v2df *) __P,
5774           (__v2df) __A,
5775           (__mmask8) __U);
5776}
5777
5778static __inline__ void __DEFAULT_FN_ATTRS256
5779_mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
5780{
5781  __builtin_ia32_storeupd256_mask ((__v4df *) __P,
5782           (__v4df) __A,
5783           (__mmask8) __U);
5784}
5785
5786static __inline__ void __DEFAULT_FN_ATTRS128
5787_mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
5788{
5789  __builtin_ia32_storeups128_mask ((__v4sf *) __P,
5790           (__v4sf) __A,
5791           (__mmask8) __U);
5792}
5793
5794static __inline__ void __DEFAULT_FN_ATTRS256
5795_mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
5796{
5797  __builtin_ia32_storeups256_mask ((__v8sf *) __P,
5798           (__v8sf) __A,
5799           (__mmask8) __U);
5800}
5801
5802
5803static __inline__ __m128d __DEFAULT_FN_ATTRS128
5804_mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5805{
5806  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5807                                              (__v2df)_mm_unpackhi_pd(__A, __B),
5808                                              (__v2df)__W);
5809}
5810
5811static __inline__ __m128d __DEFAULT_FN_ATTRS128
5812_mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
5813{
5814  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5815                                              (__v2df)_mm_unpackhi_pd(__A, __B),
5816                                              (__v2df)_mm_setzero_pd());
5817}
5818
5819static __inline__ __m256d __DEFAULT_FN_ATTRS256
5820_mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
5821{
5822  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5823                                           (__v4df)_mm256_unpackhi_pd(__A, __B),
5824                                           (__v4df)__W);
5825}
5826
5827static __inline__ __m256d __DEFAULT_FN_ATTRS256
5828_mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
5829{
5830  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5831                                           (__v4df)_mm256_unpackhi_pd(__A, __B),
5832                                           (__v4df)_mm256_setzero_pd());
5833}
5834
5835static __inline__ __m128 __DEFAULT_FN_ATTRS128
5836_mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5837{
5838  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5839                                             (__v4sf)_mm_unpackhi_ps(__A, __B),
5840                                             (__v4sf)__W);
5841}
5842
5843static __inline__ __m128 __DEFAULT_FN_ATTRS128
5844_mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
5845{
5846  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5847                                             (__v4sf)_mm_unpackhi_ps(__A, __B),
5848                                             (__v4sf)_mm_setzero_ps());
5849}
5850
5851static __inline__ __m256 __DEFAULT_FN_ATTRS256
5852_mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
5853{
5854  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5855                                           (__v8sf)_mm256_unpackhi_ps(__A, __B),
5856                                           (__v8sf)__W);
5857}
5858
5859static __inline__ __m256 __DEFAULT_FN_ATTRS256
5860_mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
5861{
5862  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5863                                           (__v8sf)_mm256_unpackhi_ps(__A, __B),
5864                                           (__v8sf)_mm256_setzero_ps());
5865}
5866
5867static __inline__ __m128d __DEFAULT_FN_ATTRS128
5868_mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5869{
5870  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5871                                              (__v2df)_mm_unpacklo_pd(__A, __B),
5872                                              (__v2df)__W);
5873}
5874
5875static __inline__ __m128d __DEFAULT_FN_ATTRS128
5876_mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
5877{
5878  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5879                                              (__v2df)_mm_unpacklo_pd(__A, __B),
5880                                              (__v2df)_mm_setzero_pd());
5881}
5882
5883static __inline__ __m256d __DEFAULT_FN_ATTRS256
5884_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
5885{
5886  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5887                                           (__v4df)_mm256_unpacklo_pd(__A, __B),
5888                                           (__v4df)__W);
5889}
5890
5891static __inline__ __m256d __DEFAULT_FN_ATTRS256
5892_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
5893{
5894  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5895                                           (__v4df)_mm256_unpacklo_pd(__A, __B),
5896                                           (__v4df)_mm256_setzero_pd());
5897}
5898
5899static __inline__ __m128 __DEFAULT_FN_ATTRS128
5900_mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5901{
5902  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5903                                             (__v4sf)_mm_unpacklo_ps(__A, __B),
5904                                             (__v4sf)__W);
5905}
5906
5907static __inline__ __m128 __DEFAULT_FN_ATTRS128
5908_mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
5909{
5910  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5911                                             (__v4sf)_mm_unpacklo_ps(__A, __B),
5912                                             (__v4sf)_mm_setzero_ps());
5913}
5914
5915static __inline__ __m256 __DEFAULT_FN_ATTRS256
5916_mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
5917{
5918  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5919                                           (__v8sf)_mm256_unpacklo_ps(__A, __B),
5920                                           (__v8sf)__W);
5921}
5922
5923static __inline__ __m256 __DEFAULT_FN_ATTRS256
5924_mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B)
5925{
5926  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5927                                           (__v8sf)_mm256_unpacklo_ps(__A, __B),
5928                                           (__v8sf)_mm256_setzero_ps());
5929}
5930
5931static __inline__ __m128d __DEFAULT_FN_ATTRS128
5932_mm_rcp14_pd (__m128d __A)
5933{
5934  return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5935                (__v2df)
5936                _mm_setzero_pd (),
5937                (__mmask8) -1);
5938}
5939
5940static __inline__ __m128d __DEFAULT_FN_ATTRS128
5941_mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
5942{
5943  return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5944                (__v2df) __W,
5945                (__mmask8) __U);
5946}
5947
5948static __inline__ __m128d __DEFAULT_FN_ATTRS128
5949_mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
5950{
5951  return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5952                (__v2df)
5953                _mm_setzero_pd (),
5954                (__mmask8) __U);
5955}
5956
5957static __inline__ __m256d __DEFAULT_FN_ATTRS256
5958_mm256_rcp14_pd (__m256d __A)
5959{
5960  return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5961                (__v4df)
5962                _mm256_setzero_pd (),
5963                (__mmask8) -1);
5964}
5965
5966static __inline__ __m256d __DEFAULT_FN_ATTRS256
5967_mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
5968{
5969  return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5970                (__v4df) __W,
5971                (__mmask8) __U);
5972}
5973
5974static __inline__ __m256d __DEFAULT_FN_ATTRS256
5975_mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
5976{
5977  return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5978                (__v4df)
5979                _mm256_setzero_pd (),
5980                (__mmask8) __U);
5981}
5982
5983static __inline__ __m128 __DEFAULT_FN_ATTRS128
5984_mm_rcp14_ps (__m128 __A)
5985{
5986  return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
5987               (__v4sf)
5988               _mm_setzero_ps (),
5989               (__mmask8) -1);
5990}
5991
5992static __inline__ __m128 __DEFAULT_FN_ATTRS128
5993_mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
5994{
5995  return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
5996               (__v4sf) __W,
5997               (__mmask8) __U);
5998}
5999
6000static __inline__ __m128 __DEFAULT_FN_ATTRS128
6001_mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
6002{
6003  return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
6004               (__v4sf)
6005               _mm_setzero_ps (),
6006               (__mmask8) __U);
6007}
6008
6009static __inline__ __m256 __DEFAULT_FN_ATTRS256
6010_mm256_rcp14_ps (__m256 __A)
6011{
6012  return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6013               (__v8sf)
6014               _mm256_setzero_ps (),
6015               (__mmask8) -1);
6016}
6017
6018static __inline__ __m256 __DEFAULT_FN_ATTRS256
6019_mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
6020{
6021  return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6022               (__v8sf) __W,
6023               (__mmask8) __U);
6024}
6025
6026static __inline__ __m256 __DEFAULT_FN_ATTRS256
6027_mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
6028{
6029  return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6030               (__v8sf)
6031               _mm256_setzero_ps (),
6032               (__mmask8) __U);
6033}
6034
6035#define _mm_mask_permute_pd(W, U, X, C) \
6036  ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6037                                        (__v2df)_mm_permute_pd((X), (C)), \
6038                                        (__v2df)(__m128d)(W)))
6039
6040#define _mm_maskz_permute_pd(U, X, C) \
6041  ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6042                                        (__v2df)_mm_permute_pd((X), (C)), \
6043                                        (__v2df)_mm_setzero_pd()))
6044
6045#define _mm256_mask_permute_pd(W, U, X, C) \
6046  ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6047                                        (__v4df)_mm256_permute_pd((X), (C)), \
6048                                        (__v4df)(__m256d)(W)))
6049
6050#define _mm256_maskz_permute_pd(U, X, C) \
6051  ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6052                                        (__v4df)_mm256_permute_pd((X), (C)), \
6053                                        (__v4df)_mm256_setzero_pd()))
6054
6055#define _mm_mask_permute_ps(W, U, X, C) \
6056  ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6057                                       (__v4sf)_mm_permute_ps((X), (C)), \
6058                                       (__v4sf)(__m128)(W)))
6059
6060#define _mm_maskz_permute_ps(U, X, C) \
6061  ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6062                                       (__v4sf)_mm_permute_ps((X), (C)), \
6063                                       (__v4sf)_mm_setzero_ps()))
6064
6065#define _mm256_mask_permute_ps(W, U, X, C) \
6066  ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6067                                       (__v8sf)_mm256_permute_ps((X), (C)), \
6068                                       (__v8sf)(__m256)(W)))
6069
6070#define _mm256_maskz_permute_ps(U, X, C) \
6071  ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6072                                       (__v8sf)_mm256_permute_ps((X), (C)), \
6073                                       (__v8sf)_mm256_setzero_ps()))
6074
6075static __inline__ __m128d __DEFAULT_FN_ATTRS128
6076_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
6077{
6078  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
6079                                            (__v2df)_mm_permutevar_pd(__A, __C),
6080                                            (__v2df)__W);
6081}
6082
6083static __inline__ __m128d __DEFAULT_FN_ATTRS128
6084_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
6085{
6086  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
6087                                            (__v2df)_mm_permutevar_pd(__A, __C),
6088                                            (__v2df)_mm_setzero_pd());
6089}
6090
6091static __inline__ __m256d __DEFAULT_FN_ATTRS256
6092_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
6093{
6094  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
6095                                         (__v4df)_mm256_permutevar_pd(__A, __C),
6096                                         (__v4df)__W);
6097}
6098
6099static __inline__ __m256d __DEFAULT_FN_ATTRS256
6100_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
6101{
6102  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
6103                                         (__v4df)_mm256_permutevar_pd(__A, __C),
6104                                         (__v4df)_mm256_setzero_pd());
6105}
6106
6107static __inline__ __m128 __DEFAULT_FN_ATTRS128
6108_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
6109{
6110  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
6111                                            (__v4sf)_mm_permutevar_ps(__A, __C),
6112                                            (__v4sf)__W);
6113}
6114
6115static __inline__ __m128 __DEFAULT_FN_ATTRS128
6116_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
6117{
6118  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
6119                                            (__v4sf)_mm_permutevar_ps(__A, __C),
6120                                            (__v4sf)_mm_setzero_ps());
6121}
6122
6123static __inline__ __m256 __DEFAULT_FN_ATTRS256
6124_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
6125{
6126  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
6127                                          (__v8sf)_mm256_permutevar_ps(__A, __C),
6128                                          (__v8sf)__W);
6129}
6130
6131static __inline__ __m256 __DEFAULT_FN_ATTRS256
6132_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C)
6133{
6134  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
6135                                          (__v8sf)_mm256_permutevar_ps(__A, __C),
6136                                          (__v8sf)_mm256_setzero_ps());
6137}
6138
6139static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6140_mm_test_epi32_mask (__m128i __A, __m128i __B)
6141{
6142  return _mm_cmpneq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
6143}
6144
6145static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6146_mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
6147{
6148  return _mm_mask_cmpneq_epi32_mask (__U, _mm_and_si128 (__A, __B),
6149                                     _mm_setzero_si128());
6150}
6151
6152static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6153_mm256_test_epi32_mask (__m256i __A, __m256i __B)
6154{
6155  return _mm256_cmpneq_epi32_mask (_mm256_and_si256 (__A, __B),
6156                                   _mm256_setzero_si256());
6157}
6158
6159static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6160_mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
6161{
6162  return _mm256_mask_cmpneq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
6163                                        _mm256_setzero_si256());
6164}
6165
6166static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6167_mm_test_epi64_mask (__m128i __A, __m128i __B)
6168{
6169  return _mm_cmpneq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
6170}
6171
6172static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6173_mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
6174{
6175  return _mm_mask_cmpneq_epi64_mask (__U, _mm_and_si128 (__A, __B),
6176                                     _mm_setzero_si128());
6177}
6178
6179static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6180_mm256_test_epi64_mask (__m256i __A, __m256i __B)
6181{
6182  return _mm256_cmpneq_epi64_mask (_mm256_and_si256 (__A, __B),
6183                                   _mm256_setzero_si256());
6184}
6185
6186static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6187_mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
6188{
6189  return _mm256_mask_cmpneq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
6190                                        _mm256_setzero_si256());
6191}
6192
6193static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6194_mm_testn_epi32_mask (__m128i __A, __m128i __B)
6195{
6196  return _mm_cmpeq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
6197}
6198
6199static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6200_mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
6201{
6202  return _mm_mask_cmpeq_epi32_mask (__U, _mm_and_si128 (__A, __B),
6203                                    _mm_setzero_si128());
6204}
6205
6206static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6207_mm256_testn_epi32_mask (__m256i __A, __m256i __B)
6208{
6209  return _mm256_cmpeq_epi32_mask (_mm256_and_si256 (__A, __B),
6210                                  _mm256_setzero_si256());
6211}
6212
6213static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6214_mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
6215{
6216  return _mm256_mask_cmpeq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
6217                                       _mm256_setzero_si256());
6218}
6219
6220static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6221_mm_testn_epi64_mask (__m128i __A, __m128i __B)
6222{
6223  return _mm_cmpeq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
6224}
6225
6226static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6227_mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
6228{
6229  return _mm_mask_cmpeq_epi64_mask (__U, _mm_and_si128 (__A, __B),
6230                                    _mm_setzero_si128());
6231}
6232
6233static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6234_mm256_testn_epi64_mask (__m256i __A, __m256i __B)
6235{
6236  return _mm256_cmpeq_epi64_mask (_mm256_and_si256 (__A, __B),
6237                                  _mm256_setzero_si256());
6238}
6239
6240static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6241_mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
6242{
6243  return _mm256_mask_cmpeq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
6244                                       _mm256_setzero_si256());
6245}
6246
6247static __inline__ __m128i __DEFAULT_FN_ATTRS128
6248_mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6249{
6250  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6251                                           (__v4si)_mm_unpackhi_epi32(__A, __B),
6252                                           (__v4si)__W);
6253}
6254
6255static __inline__ __m128i __DEFAULT_FN_ATTRS128
6256_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6257{
6258  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6259                                           (__v4si)_mm_unpackhi_epi32(__A, __B),
6260                                           (__v4si)_mm_setzero_si128());
6261}
6262
6263static __inline__ __m256i __DEFAULT_FN_ATTRS256
6264_mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6265{
6266  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6267                                        (__v8si)_mm256_unpackhi_epi32(__A, __B),
6268                                        (__v8si)__W);
6269}
6270
6271static __inline__ __m256i __DEFAULT_FN_ATTRS256
6272_mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
6273{
6274  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6275                                        (__v8si)_mm256_unpackhi_epi32(__A, __B),
6276                                        (__v8si)_mm256_setzero_si256());
6277}
6278
6279static __inline__ __m128i __DEFAULT_FN_ATTRS128
6280_mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6281{
6282  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6283                                           (__v2di)_mm_unpackhi_epi64(__A, __B),
6284                                           (__v2di)__W);
6285}
6286
6287static __inline__ __m128i __DEFAULT_FN_ATTRS128
6288_mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6289{
6290  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6291                                           (__v2di)_mm_unpackhi_epi64(__A, __B),
6292                                           (__v2di)_mm_setzero_si128());
6293}
6294
6295static __inline__ __m256i __DEFAULT_FN_ATTRS256
6296_mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6297{
6298  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6299                                        (__v4di)_mm256_unpackhi_epi64(__A, __B),
6300                                        (__v4di)__W);
6301}
6302
6303static __inline__ __m256i __DEFAULT_FN_ATTRS256
6304_mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
6305{
6306  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6307                                        (__v4di)_mm256_unpackhi_epi64(__A, __B),
6308                                        (__v4di)_mm256_setzero_si256());
6309}
6310
6311static __inline__ __m128i __DEFAULT_FN_ATTRS128
6312_mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6313{
6314  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6315                                           (__v4si)_mm_unpacklo_epi32(__A, __B),
6316                                           (__v4si)__W);
6317}
6318
6319static __inline__ __m128i __DEFAULT_FN_ATTRS128
6320_mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6321{
6322  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6323                                           (__v4si)_mm_unpacklo_epi32(__A, __B),
6324                                           (__v4si)_mm_setzero_si128());
6325}
6326
6327static __inline__ __m256i __DEFAULT_FN_ATTRS256
6328_mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6329{
6330  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6331                                        (__v8si)_mm256_unpacklo_epi32(__A, __B),
6332                                        (__v8si)__W);
6333}
6334
6335static __inline__ __m256i __DEFAULT_FN_ATTRS256
6336_mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
6337{
6338  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6339                                        (__v8si)_mm256_unpacklo_epi32(__A, __B),
6340                                        (__v8si)_mm256_setzero_si256());
6341}
6342
6343static __inline__ __m128i __DEFAULT_FN_ATTRS128
6344_mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6345{
6346  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6347                                           (__v2di)_mm_unpacklo_epi64(__A, __B),
6348                                           (__v2di)__W);
6349}
6350
6351static __inline__ __m128i __DEFAULT_FN_ATTRS128
6352_mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6353{
6354  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6355                                           (__v2di)_mm_unpacklo_epi64(__A, __B),
6356                                           (__v2di)_mm_setzero_si128());
6357}
6358
6359static __inline__ __m256i __DEFAULT_FN_ATTRS256
6360_mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6361{
6362  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6363                                        (__v4di)_mm256_unpacklo_epi64(__A, __B),
6364                                        (__v4di)__W);
6365}
6366
6367static __inline__ __m256i __DEFAULT_FN_ATTRS256
6368_mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
6369{
6370  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6371                                        (__v4di)_mm256_unpacklo_epi64(__A, __B),
6372                                        (__v4di)_mm256_setzero_si256());
6373}
6374
6375static __inline__ __m128i __DEFAULT_FN_ATTRS128
6376_mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6377{
6378  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6379                                             (__v4si)_mm_sra_epi32(__A, __B),
6380                                             (__v4si)__W);
6381}
6382
6383static __inline__ __m128i __DEFAULT_FN_ATTRS128
6384_mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6385{
6386  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6387                                             (__v4si)_mm_sra_epi32(__A, __B),
6388                                             (__v4si)_mm_setzero_si128());
6389}
6390
6391static __inline__ __m256i __DEFAULT_FN_ATTRS256
6392_mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
6393{
6394  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6395                                             (__v8si)_mm256_sra_epi32(__A, __B),
6396                                             (__v8si)__W);
6397}
6398
6399static __inline__ __m256i __DEFAULT_FN_ATTRS256
6400_mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B)
6401{
6402  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6403                                             (__v8si)_mm256_sra_epi32(__A, __B),
6404                                             (__v8si)_mm256_setzero_si256());
6405}
6406
6407static __inline__ __m128i __DEFAULT_FN_ATTRS128
6408_mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
6409{
6410  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6411                                             (__v4si)_mm_srai_epi32(__A, (int)__B),
6412                                             (__v4si)__W);
6413}
6414
6415static __inline__ __m128i __DEFAULT_FN_ATTRS128
6416_mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
6417{
6418  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6419                                             (__v4si)_mm_srai_epi32(__A, (int)__B),
6420                                             (__v4si)_mm_setzero_si128());
6421}
6422
6423static __inline__ __m256i __DEFAULT_FN_ATTRS256
6424_mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
6425{
6426  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6427                                             (__v8si)_mm256_srai_epi32(__A, (int)__B),
6428                                             (__v8si)__W);
6429}
6430
6431static __inline__ __m256i __DEFAULT_FN_ATTRS256
6432_mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
6433{
6434  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6435                                             (__v8si)_mm256_srai_epi32(__A, (int)__B),
6436                                             (__v8si)_mm256_setzero_si256());
6437}
6438
6439static __inline__ __m128i __DEFAULT_FN_ATTRS128
6440_mm_sra_epi64(__m128i __A, __m128i __B)
6441{
6442  return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B);
6443}
6444
6445static __inline__ __m128i __DEFAULT_FN_ATTRS128
6446_mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6447{
6448  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6449                                             (__v2di)_mm_sra_epi64(__A, __B), \
6450                                             (__v2di)__W);
6451}
6452
6453static __inline__ __m128i __DEFAULT_FN_ATTRS128
6454_mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6455{
6456  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6457                                             (__v2di)_mm_sra_epi64(__A, __B), \
6458                                             (__v2di)_mm_setzero_si128());
6459}
6460
6461static __inline__ __m256i __DEFAULT_FN_ATTRS256
6462_mm256_sra_epi64(__m256i __A, __m128i __B)
6463{
6464  return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B);
6465}
6466
6467static __inline__ __m256i __DEFAULT_FN_ATTRS256
6468_mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
6469{
6470  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6471                                           (__v4di)_mm256_sra_epi64(__A, __B), \
6472                                           (__v4di)__W);
6473}
6474
6475static __inline__ __m256i __DEFAULT_FN_ATTRS256
6476_mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B)
6477{
6478  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6479                                           (__v4di)_mm256_sra_epi64(__A, __B), \
6480                                           (__v4di)_mm256_setzero_si256());
6481}
6482
6483static __inline__ __m128i __DEFAULT_FN_ATTRS128
6484_mm_srai_epi64(__m128i __A, unsigned int __imm)
6485{
6486  return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, (int)__imm);
6487}
6488
6489static __inline__ __m128i __DEFAULT_FN_ATTRS128
6490_mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __imm)
6491{
6492  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6493                                           (__v2di)_mm_srai_epi64(__A, __imm), \
6494                                           (__v2di)__W);
6495}
6496
6497static __inline__ __m128i __DEFAULT_FN_ATTRS128
6498_mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, unsigned int __imm)
6499{
6500  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6501                                           (__v2di)_mm_srai_epi64(__A, __imm), \
6502                                           (__v2di)_mm_setzero_si128());
6503}
6504
6505static __inline__ __m256i __DEFAULT_FN_ATTRS256
6506_mm256_srai_epi64(__m256i __A, unsigned int __imm)
6507{
6508  return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, (int)__imm);
6509}
6510
6511static __inline__ __m256i __DEFAULT_FN_ATTRS256
6512_mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A,
6513                       unsigned int __imm)
6514{
6515  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6516                                        (__v4di)_mm256_srai_epi64(__A, __imm), \
6517                                        (__v4di)__W);
6518}
6519
6520static __inline__ __m256i __DEFAULT_FN_ATTRS256
6521_mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, unsigned int __imm)
6522{
6523  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6524                                        (__v4di)_mm256_srai_epi64(__A, __imm), \
6525                                        (__v4di)_mm256_setzero_si256());
6526}
6527
6528#define _mm_ternarylogic_epi32(A, B, C, imm)                                   \
6529  ((__m128i)__builtin_ia32_pternlogd128_mask(                                  \
6530      (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C),        \
6531      (unsigned char)(imm), (__mmask8)-1))
6532
6533#define _mm_mask_ternarylogic_epi32(A, U, B, C, imm)                           \
6534  ((__m128i)__builtin_ia32_pternlogd128_mask(                                  \
6535      (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C),        \
6536      (unsigned char)(imm), (__mmask8)(U)))
6537
6538#define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm)                          \
6539  ((__m128i)__builtin_ia32_pternlogd128_maskz(                                 \
6540      (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C),        \
6541      (unsigned char)(imm), (__mmask8)(U)))
6542
6543#define _mm256_ternarylogic_epi32(A, B, C, imm)                                \
6544  ((__m256i)__builtin_ia32_pternlogd256_mask(                                  \
6545      (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C),        \
6546      (unsigned char)(imm), (__mmask8)-1))
6547
6548#define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm)                        \
6549  ((__m256i)__builtin_ia32_pternlogd256_mask(                                  \
6550      (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C),        \
6551      (unsigned char)(imm), (__mmask8)(U)))
6552
6553#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm)                       \
6554  ((__m256i)__builtin_ia32_pternlogd256_maskz(                                 \
6555      (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C),        \
6556      (unsigned char)(imm), (__mmask8)(U)))
6557
6558#define _mm_ternarylogic_epi64(A, B, C, imm)                                   \
6559  ((__m128i)__builtin_ia32_pternlogq128_mask(                                  \
6560      (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C),        \
6561      (unsigned char)(imm), (__mmask8)-1))
6562
6563#define _mm_mask_ternarylogic_epi64(A, U, B, C, imm)                           \
6564  ((__m128i)__builtin_ia32_pternlogq128_mask(                                  \
6565      (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C),        \
6566      (unsigned char)(imm), (__mmask8)(U)))
6567
6568#define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm)                          \
6569  ((__m128i)__builtin_ia32_pternlogq128_maskz(                                 \
6570      (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C),        \
6571      (unsigned char)(imm), (__mmask8)(U)))
6572
6573#define _mm256_ternarylogic_epi64(A, B, C, imm)                                \
6574  ((__m256i)__builtin_ia32_pternlogq256_mask(                                  \
6575      (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C),        \
6576      (unsigned char)(imm), (__mmask8)-1))
6577
6578#define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm)                        \
6579  ((__m256i)__builtin_ia32_pternlogq256_mask(                                  \
6580      (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C),        \
6581      (unsigned char)(imm), (__mmask8)(U)))
6582
6583#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm)                       \
6584  ((__m256i)__builtin_ia32_pternlogq256_maskz(                                 \
6585      (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C),        \
6586      (unsigned char)(imm), (__mmask8)(U)))
6587
6588#define _mm256_shuffle_f32x4(A, B, imm) \
6589  ((__m256)__builtin_ia32_shuf_f32x4_256((__v8sf)(__m256)(A), \
6590                                         (__v8sf)(__m256)(B), (int)(imm)))
6591
6592#define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) \
6593  ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6594                                       (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
6595                                       (__v8sf)(__m256)(W)))
6596
6597#define _mm256_maskz_shuffle_f32x4(U, A, B, imm) \
6598  ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6599                                       (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
6600                                       (__v8sf)_mm256_setzero_ps()))
6601
6602#define _mm256_shuffle_f64x2(A, B, imm) \
6603  ((__m256d)__builtin_ia32_shuf_f64x2_256((__v4df)(__m256d)(A), \
6604                                          (__v4df)(__m256d)(B), (int)(imm)))
6605
6606#define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) \
6607  ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6608                                       (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
6609                                       (__v4df)(__m256d)(W)))
6610
6611#define _mm256_maskz_shuffle_f64x2(U, A, B, imm) \
6612  ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6613                                       (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
6614                                       (__v4df)_mm256_setzero_pd()))
6615
6616#define _mm256_shuffle_i32x4(A, B, imm) \
6617  ((__m256i)__builtin_ia32_shuf_i32x4_256((__v8si)(__m256i)(A), \
6618                                          (__v8si)(__m256i)(B), (int)(imm)))
6619
6620#define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) \
6621  ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
6622                                       (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
6623                                       (__v8si)(__m256i)(W)))
6624
6625#define _mm256_maskz_shuffle_i32x4(U, A, B, imm) \
6626  ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
6627                                       (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
6628                                       (__v8si)_mm256_setzero_si256()))
6629
6630#define _mm256_shuffle_i64x2(A, B, imm) \
6631  ((__m256i)__builtin_ia32_shuf_i64x2_256((__v4di)(__m256i)(A), \
6632                                          (__v4di)(__m256i)(B), (int)(imm)))
6633
6634#define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) \
6635  ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
6636                                       (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
6637                                       (__v4di)(__m256i)(W)))
6638
6639
6640#define _mm256_maskz_shuffle_i64x2(U, A, B, imm) \
6641  ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
6642                                       (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
6643                                       (__v4di)_mm256_setzero_si256()))
6644
6645#define _mm_mask_shuffle_pd(W, U, A, B, M) \
6646  ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6647                                        (__v2df)_mm_shuffle_pd((A), (B), (M)), \
6648                                        (__v2df)(__m128d)(W)))
6649
6650#define _mm_maskz_shuffle_pd(U, A, B, M) \
6651  ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6652                                        (__v2df)_mm_shuffle_pd((A), (B), (M)), \
6653                                        (__v2df)_mm_setzero_pd()))
6654
6655#define _mm256_mask_shuffle_pd(W, U, A, B, M) \
6656  ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6657                                        (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
6658                                        (__v4df)(__m256d)(W)))
6659
6660#define _mm256_maskz_shuffle_pd(U, A, B, M) \
6661  ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6662                                        (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
6663                                        (__v4df)_mm256_setzero_pd()))
6664
6665#define _mm_mask_shuffle_ps(W, U, A, B, M) \
6666  ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6667                                       (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
6668                                       (__v4sf)(__m128)(W)))
6669
6670#define _mm_maskz_shuffle_ps(U, A, B, M) \
6671  ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6672                                       (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
6673                                       (__v4sf)_mm_setzero_ps()))
6674
6675#define _mm256_mask_shuffle_ps(W, U, A, B, M) \
6676  ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6677                                       (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
6678                                       (__v8sf)(__m256)(W)))
6679
6680#define _mm256_maskz_shuffle_ps(U, A, B, M) \
6681  ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6682                                       (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
6683                                       (__v8sf)_mm256_setzero_ps()))
6684
6685static __inline__ __m128d __DEFAULT_FN_ATTRS128
6686_mm_rsqrt14_pd (__m128d __A)
6687{
6688  return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6689                 (__v2df)
6690                 _mm_setzero_pd (),
6691                 (__mmask8) -1);
6692}
6693
6694static __inline__ __m128d __DEFAULT_FN_ATTRS128
6695_mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
6696{
6697  return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6698                 (__v2df) __W,
6699                 (__mmask8) __U);
6700}
6701
6702static __inline__ __m128d __DEFAULT_FN_ATTRS128
6703_mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
6704{
6705  return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6706                 (__v2df)
6707                 _mm_setzero_pd (),
6708                 (__mmask8) __U);
6709}
6710
6711static __inline__ __m256d __DEFAULT_FN_ATTRS256
6712_mm256_rsqrt14_pd (__m256d __A)
6713{
6714  return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6715                 (__v4df)
6716                 _mm256_setzero_pd (),
6717                 (__mmask8) -1);
6718}
6719
6720static __inline__ __m256d __DEFAULT_FN_ATTRS256
6721_mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
6722{
6723  return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6724                 (__v4df) __W,
6725                 (__mmask8) __U);
6726}
6727
6728static __inline__ __m256d __DEFAULT_FN_ATTRS256
6729_mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
6730{
6731  return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6732                 (__v4df)
6733                 _mm256_setzero_pd (),
6734                 (__mmask8) __U);
6735}
6736
6737static __inline__ __m128 __DEFAULT_FN_ATTRS128
6738_mm_rsqrt14_ps (__m128 __A)
6739{
6740  return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6741                (__v4sf)
6742                _mm_setzero_ps (),
6743                (__mmask8) -1);
6744}
6745
6746static __inline__ __m128 __DEFAULT_FN_ATTRS128
6747_mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
6748{
6749  return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6750                (__v4sf) __W,
6751                (__mmask8) __U);
6752}
6753
6754static __inline__ __m128 __DEFAULT_FN_ATTRS128
6755_mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
6756{
6757  return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6758                (__v4sf)
6759                _mm_setzero_ps (),
6760                (__mmask8) __U);
6761}
6762
6763static __inline__ __m256 __DEFAULT_FN_ATTRS256
6764_mm256_rsqrt14_ps (__m256 __A)
6765{
6766  return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6767                (__v8sf)
6768                _mm256_setzero_ps (),
6769                (__mmask8) -1);
6770}
6771
6772static __inline__ __m256 __DEFAULT_FN_ATTRS256
6773_mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
6774{
6775  return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6776                (__v8sf) __W,
6777                (__mmask8) __U);
6778}
6779
6780static __inline__ __m256 __DEFAULT_FN_ATTRS256
6781_mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
6782{
6783  return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6784                (__v8sf)
6785                _mm256_setzero_ps (),
6786                (__mmask8) __U);
6787}
6788
6789static __inline__ __m256 __DEFAULT_FN_ATTRS256
6790_mm256_broadcast_f32x4(__m128 __A)
6791{
6792  return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6793                                         0, 1, 2, 3, 0, 1, 2, 3);
6794}
6795
6796static __inline__ __m256 __DEFAULT_FN_ATTRS256
6797_mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A)
6798{
6799  return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
6800                                            (__v8sf)_mm256_broadcast_f32x4(__A),
6801                                            (__v8sf)__O);
6802}
6803
6804static __inline__ __m256 __DEFAULT_FN_ATTRS256
6805_mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
6806{
6807  return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
6808                                            (__v8sf)_mm256_broadcast_f32x4(__A),
6809                                            (__v8sf)_mm256_setzero_ps());
6810}
6811
6812static __inline__ __m256i __DEFAULT_FN_ATTRS256
6813_mm256_broadcast_i32x4(__m128i __A)
6814{
6815  return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6816                                          0, 1, 2, 3, 0, 1, 2, 3);
6817}
6818
6819static __inline__ __m256i __DEFAULT_FN_ATTRS256
6820_mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A)
6821{
6822  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
6823                                            (__v8si)_mm256_broadcast_i32x4(__A),
6824                                            (__v8si)__O);
6825}
6826
6827static __inline__ __m256i __DEFAULT_FN_ATTRS256
6828_mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A)
6829{
6830  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
6831                                            (__v8si)_mm256_broadcast_i32x4(__A),
6832                                            (__v8si)_mm256_setzero_si256());
6833}
6834
6835static __inline__ __m256d __DEFAULT_FN_ATTRS256
6836_mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
6837{
6838  return (__m256d)__builtin_ia32_selectpd_256(__M,
6839                                              (__v4df) _mm256_broadcastsd_pd(__A),
6840                                              (__v4df) __O);
6841}
6842
6843static __inline__ __m256d __DEFAULT_FN_ATTRS256
6844_mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
6845{
6846  return (__m256d)__builtin_ia32_selectpd_256(__M,
6847                                              (__v4df) _mm256_broadcastsd_pd(__A),
6848                                              (__v4df) _mm256_setzero_pd());
6849}
6850
6851static __inline__ __m128 __DEFAULT_FN_ATTRS128
6852_mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
6853{
6854  return (__m128)__builtin_ia32_selectps_128(__M,
6855                                             (__v4sf) _mm_broadcastss_ps(__A),
6856                                             (__v4sf) __O);
6857}
6858
6859static __inline__ __m128 __DEFAULT_FN_ATTRS128
6860_mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
6861{
6862  return (__m128)__builtin_ia32_selectps_128(__M,
6863                                             (__v4sf) _mm_broadcastss_ps(__A),
6864                                             (__v4sf) _mm_setzero_ps());
6865}
6866
6867static __inline__ __m256 __DEFAULT_FN_ATTRS256
6868_mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
6869{
6870  return (__m256)__builtin_ia32_selectps_256(__M,
6871                                             (__v8sf) _mm256_broadcastss_ps(__A),
6872                                             (__v8sf) __O);
6873}
6874
6875static __inline__ __m256 __DEFAULT_FN_ATTRS256
6876_mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
6877{
6878  return (__m256)__builtin_ia32_selectps_256(__M,
6879                                             (__v8sf) _mm256_broadcastss_ps(__A),
6880                                             (__v8sf) _mm256_setzero_ps());
6881}
6882
6883static __inline__ __m128i __DEFAULT_FN_ATTRS128
6884_mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
6885{
6886  return (__m128i)__builtin_ia32_selectd_128(__M,
6887                                             (__v4si) _mm_broadcastd_epi32(__A),
6888                                             (__v4si) __O);
6889}
6890
6891static __inline__ __m128i __DEFAULT_FN_ATTRS128
6892_mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
6893{
6894  return (__m128i)__builtin_ia32_selectd_128(__M,
6895                                             (__v4si) _mm_broadcastd_epi32(__A),
6896                                             (__v4si) _mm_setzero_si128());
6897}
6898
6899static __inline__ __m256i __DEFAULT_FN_ATTRS256
6900_mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
6901{
6902  return (__m256i)__builtin_ia32_selectd_256(__M,
6903                                             (__v8si) _mm256_broadcastd_epi32(__A),
6904                                             (__v8si) __O);
6905}
6906
6907static __inline__ __m256i __DEFAULT_FN_ATTRS256
6908_mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
6909{
6910  return (__m256i)__builtin_ia32_selectd_256(__M,
6911                                             (__v8si) _mm256_broadcastd_epi32(__A),
6912                                             (__v8si) _mm256_setzero_si256());
6913}
6914
6915static __inline__ __m128i __DEFAULT_FN_ATTRS128
6916_mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
6917{
6918  return (__m128i)__builtin_ia32_selectq_128(__M,
6919                                             (__v2di) _mm_broadcastq_epi64(__A),
6920                                             (__v2di) __O);
6921}
6922
6923static __inline__ __m128i __DEFAULT_FN_ATTRS128
6924_mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
6925{
6926  return (__m128i)__builtin_ia32_selectq_128(__M,
6927                                             (__v2di) _mm_broadcastq_epi64(__A),
6928                                             (__v2di) _mm_setzero_si128());
6929}
6930
6931static __inline__ __m256i __DEFAULT_FN_ATTRS256
6932_mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
6933{
6934  return (__m256i)__builtin_ia32_selectq_256(__M,
6935                                             (__v4di) _mm256_broadcastq_epi64(__A),
6936                                             (__v4di) __O);
6937}
6938
6939static __inline__ __m256i __DEFAULT_FN_ATTRS256
6940_mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
6941{
6942  return (__m256i)__builtin_ia32_selectq_256(__M,
6943                                             (__v4di) _mm256_broadcastq_epi64(__A),
6944                                             (__v4di) _mm256_setzero_si256());
6945}
6946
6947static __inline__ __m128i __DEFAULT_FN_ATTRS128
6948_mm_cvtsepi32_epi8 (__m128i __A)
6949{
6950  return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6951               (__v16qi)_mm_undefined_si128(),
6952               (__mmask8) -1);
6953}
6954
6955static __inline__ __m128i __DEFAULT_FN_ATTRS128
6956_mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
6957{
6958  return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6959               (__v16qi) __O, __M);
6960}
6961
6962static __inline__ __m128i __DEFAULT_FN_ATTRS128
6963_mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
6964{
6965  return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6966               (__v16qi) _mm_setzero_si128 (),
6967               __M);
6968}
6969
6970static __inline__ void __DEFAULT_FN_ATTRS128
6971_mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
6972{
6973  __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
6974}
6975
6976static __inline__ __m128i __DEFAULT_FN_ATTRS256
6977_mm256_cvtsepi32_epi8 (__m256i __A)
6978{
6979  return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
6980               (__v16qi)_mm_undefined_si128(),
6981               (__mmask8) -1);
6982}
6983
6984static __inline__ __m128i __DEFAULT_FN_ATTRS256
6985_mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
6986{
6987  return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
6988               (__v16qi) __O, __M);
6989}
6990
6991static __inline__ __m128i __DEFAULT_FN_ATTRS256
6992_mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
6993{
6994  return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
6995               (__v16qi) _mm_setzero_si128 (),
6996               __M);
6997}
6998
6999static __inline__ void __DEFAULT_FN_ATTRS256
7000_mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7001{
7002  __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
7003}
7004
7005static __inline__ __m128i __DEFAULT_FN_ATTRS128
7006_mm_cvtsepi32_epi16 (__m128i __A)
7007{
7008  return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7009               (__v8hi)_mm_setzero_si128 (),
7010               (__mmask8) -1);
7011}
7012
7013static __inline__ __m128i __DEFAULT_FN_ATTRS128
7014_mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7015{
7016  return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7017               (__v8hi)__O,
7018               __M);
7019}
7020
7021static __inline__ __m128i __DEFAULT_FN_ATTRS128
7022_mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
7023{
7024  return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7025               (__v8hi) _mm_setzero_si128 (),
7026               __M);
7027}
7028
7029static __inline__ void __DEFAULT_FN_ATTRS128
7030_mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7031{
7032  __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7033}
7034
7035static __inline__ __m128i __DEFAULT_FN_ATTRS256
7036_mm256_cvtsepi32_epi16 (__m256i __A)
7037{
7038  return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7039               (__v8hi)_mm_undefined_si128(),
7040               (__mmask8) -1);
7041}
7042
7043static __inline__ __m128i __DEFAULT_FN_ATTRS256
7044_mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7045{
7046  return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7047               (__v8hi) __O, __M);
7048}
7049
7050static __inline__ __m128i __DEFAULT_FN_ATTRS256
7051_mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
7052{
7053  return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7054               (__v8hi) _mm_setzero_si128 (),
7055               __M);
7056}
7057
7058static __inline__ void __DEFAULT_FN_ATTRS256
7059_mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7060{
7061  __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7062}
7063
7064static __inline__ __m128i __DEFAULT_FN_ATTRS128
7065_mm_cvtsepi64_epi8 (__m128i __A)
7066{
7067  return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7068               (__v16qi)_mm_undefined_si128(),
7069               (__mmask8) -1);
7070}
7071
7072static __inline__ __m128i __DEFAULT_FN_ATTRS128
7073_mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7074{
7075  return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7076               (__v16qi) __O, __M);
7077}
7078
7079static __inline__ __m128i __DEFAULT_FN_ATTRS128
7080_mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
7081{
7082  return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7083               (__v16qi) _mm_setzero_si128 (),
7084               __M);
7085}
7086
7087static __inline__ void __DEFAULT_FN_ATTRS128
7088_mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7089{
7090  __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7091}
7092
7093static __inline__ __m128i __DEFAULT_FN_ATTRS256
7094_mm256_cvtsepi64_epi8 (__m256i __A)
7095{
7096  return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7097               (__v16qi)_mm_undefined_si128(),
7098               (__mmask8) -1);
7099}
7100
7101static __inline__ __m128i __DEFAULT_FN_ATTRS256
7102_mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7103{
7104  return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7105               (__v16qi) __O, __M);
7106}
7107
7108static __inline__ __m128i __DEFAULT_FN_ATTRS256
7109_mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
7110{
7111  return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7112               (__v16qi) _mm_setzero_si128 (),
7113               __M);
7114}
7115
7116static __inline__ void __DEFAULT_FN_ATTRS256
7117_mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7118{
7119  __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7120}
7121
7122static __inline__ __m128i __DEFAULT_FN_ATTRS128
7123_mm_cvtsepi64_epi32 (__m128i __A)
7124{
7125  return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7126               (__v4si)_mm_undefined_si128(),
7127               (__mmask8) -1);
7128}
7129
7130static __inline__ __m128i __DEFAULT_FN_ATTRS128
7131_mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7132{
7133  return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7134               (__v4si) __O, __M);
7135}
7136
7137static __inline__ __m128i __DEFAULT_FN_ATTRS128
7138_mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
7139{
7140  return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7141               (__v4si) _mm_setzero_si128 (),
7142               __M);
7143}
7144
7145static __inline__ void __DEFAULT_FN_ATTRS128
7146_mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
7147{
7148  __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7149}
7150
7151static __inline__ __m128i __DEFAULT_FN_ATTRS256
7152_mm256_cvtsepi64_epi32 (__m256i __A)
7153{
7154  return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7155               (__v4si)_mm_undefined_si128(),
7156               (__mmask8) -1);
7157}
7158
7159static __inline__ __m128i __DEFAULT_FN_ATTRS256
7160_mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7161{
7162  return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7163               (__v4si)__O,
7164               __M);
7165}
7166
7167static __inline__ __m128i __DEFAULT_FN_ATTRS256
7168_mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
7169{
7170  return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7171               (__v4si) _mm_setzero_si128 (),
7172               __M);
7173}
7174
7175static __inline__ void __DEFAULT_FN_ATTRS256
7176_mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
7177{
7178  __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7179}
7180
7181static __inline__ __m128i __DEFAULT_FN_ATTRS128
7182_mm_cvtsepi64_epi16 (__m128i __A)
7183{
7184  return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7185               (__v8hi)_mm_undefined_si128(),
7186               (__mmask8) -1);
7187}
7188
7189static __inline__ __m128i __DEFAULT_FN_ATTRS128
7190_mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7191{
7192  return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7193               (__v8hi) __O, __M);
7194}
7195
7196static __inline__ __m128i __DEFAULT_FN_ATTRS128
7197_mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
7198{
7199  return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7200               (__v8hi) _mm_setzero_si128 (),
7201               __M);
7202}
7203
7204static __inline__ void __DEFAULT_FN_ATTRS128
7205_mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7206{
7207  __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7208}
7209
7210static __inline__ __m128i __DEFAULT_FN_ATTRS256
7211_mm256_cvtsepi64_epi16 (__m256i __A)
7212{
7213  return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7214               (__v8hi)_mm_undefined_si128(),
7215               (__mmask8) -1);
7216}
7217
7218static __inline__ __m128i __DEFAULT_FN_ATTRS256
7219_mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7220{
7221  return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7222               (__v8hi) __O, __M);
7223}
7224
7225static __inline__ __m128i __DEFAULT_FN_ATTRS256
7226_mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
7227{
7228  return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7229               (__v8hi) _mm_setzero_si128 (),
7230               __M);
7231}
7232
7233static __inline__ void __DEFAULT_FN_ATTRS256
7234_mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7235{
7236  __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7237}
7238
7239static __inline__ __m128i __DEFAULT_FN_ATTRS128
7240_mm_cvtusepi32_epi8 (__m128i __A)
7241{
7242  return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7243                (__v16qi)_mm_undefined_si128(),
7244                (__mmask8) -1);
7245}
7246
7247static __inline__ __m128i __DEFAULT_FN_ATTRS128
7248_mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7249{
7250  return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7251                (__v16qi) __O,
7252                __M);
7253}
7254
7255static __inline__ __m128i __DEFAULT_FN_ATTRS128
7256_mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
7257{
7258  return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7259                (__v16qi) _mm_setzero_si128 (),
7260                __M);
7261}
7262
7263static __inline__ void __DEFAULT_FN_ATTRS128
7264_mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7265{
7266  __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7267}
7268
7269static __inline__ __m128i __DEFAULT_FN_ATTRS256
7270_mm256_cvtusepi32_epi8 (__m256i __A)
7271{
7272  return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7273                (__v16qi)_mm_undefined_si128(),
7274                (__mmask8) -1);
7275}
7276
7277static __inline__ __m128i __DEFAULT_FN_ATTRS256
7278_mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7279{
7280  return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7281                (__v16qi) __O,
7282                __M);
7283}
7284
7285static __inline__ __m128i __DEFAULT_FN_ATTRS256
7286_mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
7287{
7288  return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7289                (__v16qi) _mm_setzero_si128 (),
7290                __M);
7291}
7292
7293static __inline__ void __DEFAULT_FN_ATTRS256
7294_mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7295{
7296  __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
7297}
7298
7299static __inline__ __m128i __DEFAULT_FN_ATTRS128
7300_mm_cvtusepi32_epi16 (__m128i __A)
7301{
7302  return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7303                (__v8hi)_mm_undefined_si128(),
7304                (__mmask8) -1);
7305}
7306
7307static __inline__ __m128i __DEFAULT_FN_ATTRS128
7308_mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7309{
7310  return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7311                (__v8hi) __O, __M);
7312}
7313
7314static __inline__ __m128i __DEFAULT_FN_ATTRS128
7315_mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
7316{
7317  return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7318                (__v8hi) _mm_setzero_si128 (),
7319                __M);
7320}
7321
7322static __inline__ void __DEFAULT_FN_ATTRS128
7323_mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7324{
7325  __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7326}
7327
7328static __inline__ __m128i __DEFAULT_FN_ATTRS256
7329_mm256_cvtusepi32_epi16 (__m256i __A)
7330{
7331  return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7332                (__v8hi) _mm_undefined_si128(),
7333                (__mmask8) -1);
7334}
7335
7336static __inline__ __m128i __DEFAULT_FN_ATTRS256
7337_mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7338{
7339  return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7340                (__v8hi) __O, __M);
7341}
7342
7343static __inline__ __m128i __DEFAULT_FN_ATTRS256
7344_mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
7345{
7346  return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7347                (__v8hi) _mm_setzero_si128 (),
7348                __M);
7349}
7350
7351static __inline__ void __DEFAULT_FN_ATTRS256
7352_mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7353{
7354  __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7355}
7356
7357static __inline__ __m128i __DEFAULT_FN_ATTRS128
7358_mm_cvtusepi64_epi8 (__m128i __A)
7359{
7360  return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7361                (__v16qi)_mm_undefined_si128(),
7362                (__mmask8) -1);
7363}
7364
7365static __inline__ __m128i __DEFAULT_FN_ATTRS128
7366_mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7367{
7368  return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7369                (__v16qi) __O,
7370                __M);
7371}
7372
7373static __inline__ __m128i __DEFAULT_FN_ATTRS128
7374_mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
7375{
7376  return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7377                (__v16qi) _mm_setzero_si128 (),
7378                __M);
7379}
7380
7381static __inline__ void __DEFAULT_FN_ATTRS128
7382_mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7383{
7384  __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7385}
7386
7387static __inline__ __m128i __DEFAULT_FN_ATTRS256
7388_mm256_cvtusepi64_epi8 (__m256i __A)
7389{
7390  return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7391                (__v16qi)_mm_undefined_si128(),
7392                (__mmask8) -1);
7393}
7394
7395static __inline__ __m128i __DEFAULT_FN_ATTRS256
7396_mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7397{
7398  return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7399                (__v16qi) __O,
7400                __M);
7401}
7402
7403static __inline__ __m128i __DEFAULT_FN_ATTRS256
7404_mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
7405{
7406  return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7407                (__v16qi) _mm_setzero_si128 (),
7408                __M);
7409}
7410
7411static __inline__ void __DEFAULT_FN_ATTRS256
7412_mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7413{
7414  __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7415}
7416
7417static __inline__ __m128i __DEFAULT_FN_ATTRS128
7418_mm_cvtusepi64_epi32 (__m128i __A)
7419{
7420  return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7421                (__v4si)_mm_undefined_si128(),
7422                (__mmask8) -1);
7423}
7424
7425static __inline__ __m128i __DEFAULT_FN_ATTRS128
7426_mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7427{
7428  return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7429                (__v4si) __O, __M);
7430}
7431
7432static __inline__ __m128i __DEFAULT_FN_ATTRS128
7433_mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
7434{
7435  return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7436                (__v4si) _mm_setzero_si128 (),
7437                __M);
7438}
7439
7440static __inline__ void __DEFAULT_FN_ATTRS128
7441_mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
7442{
7443  __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7444}
7445
7446static __inline__ __m128i __DEFAULT_FN_ATTRS256
7447_mm256_cvtusepi64_epi32 (__m256i __A)
7448{
7449  return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7450                (__v4si)_mm_undefined_si128(),
7451                (__mmask8) -1);
7452}
7453
7454static __inline__ __m128i __DEFAULT_FN_ATTRS256
7455_mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7456{
7457  return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7458                (__v4si) __O, __M);
7459}
7460
7461static __inline__ __m128i __DEFAULT_FN_ATTRS256
7462_mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
7463{
7464  return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7465                (__v4si) _mm_setzero_si128 (),
7466                __M);
7467}
7468
7469static __inline__ void __DEFAULT_FN_ATTRS256
7470_mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
7471{
7472  __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7473}
7474
7475static __inline__ __m128i __DEFAULT_FN_ATTRS128
7476_mm_cvtusepi64_epi16 (__m128i __A)
7477{
7478  return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7479                (__v8hi)_mm_undefined_si128(),
7480                (__mmask8) -1);
7481}
7482
7483static __inline__ __m128i __DEFAULT_FN_ATTRS128
7484_mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7485{
7486  return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7487                (__v8hi) __O, __M);
7488}
7489
7490static __inline__ __m128i __DEFAULT_FN_ATTRS128
7491_mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
7492{
7493  return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7494                (__v8hi) _mm_setzero_si128 (),
7495                __M);
7496}
7497
7498static __inline__ void __DEFAULT_FN_ATTRS128
7499_mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7500{
7501  __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7502}
7503
7504static __inline__ __m128i __DEFAULT_FN_ATTRS256
7505_mm256_cvtusepi64_epi16 (__m256i __A)
7506{
7507  return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7508                (__v8hi)_mm_undefined_si128(),
7509                (__mmask8) -1);
7510}
7511
7512static __inline__ __m128i __DEFAULT_FN_ATTRS256
7513_mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7514{
7515  return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7516                (__v8hi) __O, __M);
7517}
7518
7519static __inline__ __m128i __DEFAULT_FN_ATTRS256
7520_mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
7521{
7522  return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7523                (__v8hi) _mm_setzero_si128 (),
7524                __M);
7525}
7526
7527static __inline__ void __DEFAULT_FN_ATTRS256
7528_mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7529{
7530  __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7531}
7532
7533static __inline__ __m128i __DEFAULT_FN_ATTRS128
7534_mm_cvtepi32_epi8 (__m128i __A)
7535{
7536  return (__m128i)__builtin_shufflevector(
7537      __builtin_convertvector((__v4si)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
7538      2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
7539}
7540
7541static __inline__ __m128i __DEFAULT_FN_ATTRS128
7542_mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7543{
7544  return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7545              (__v16qi) __O, __M);
7546}
7547
7548static __inline__ __m128i __DEFAULT_FN_ATTRS128
7549_mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
7550{
7551  return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7552              (__v16qi)
7553              _mm_setzero_si128 (),
7554              __M);
7555}
7556
7557static __inline__ void __DEFAULT_FN_ATTRS128
7558_mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7559{
7560  __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7561}
7562
7563static __inline__ __m128i __DEFAULT_FN_ATTRS256
7564_mm256_cvtepi32_epi8 (__m256i __A)
7565{
7566  return (__m128i)__builtin_shufflevector(
7567      __builtin_convertvector((__v8si)__A, __v8qi),
7568      (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
7569      12, 13, 14, 15);
7570}
7571
7572static __inline__ __m128i __DEFAULT_FN_ATTRS256
7573_mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7574{
7575  return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7576              (__v16qi) __O, __M);
7577}
7578
7579static __inline__ __m128i __DEFAULT_FN_ATTRS256
7580_mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
7581{
7582  return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7583              (__v16qi) _mm_setzero_si128 (),
7584              __M);
7585}
7586
7587static __inline__ void __DEFAULT_FN_ATTRS256
7588_mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7589{
7590  __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
7591}
7592
7593static __inline__ __m128i __DEFAULT_FN_ATTRS128
7594_mm_cvtepi32_epi16 (__m128i __A)
7595{
7596  return (__m128i)__builtin_shufflevector(
7597      __builtin_convertvector((__v4si)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
7598      2, 3, 4, 5, 6, 7);
7599}
7600
7601static __inline__ __m128i __DEFAULT_FN_ATTRS128
7602_mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7603{
7604  return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
7605              (__v8hi) __O, __M);
7606}
7607
7608static __inline__ __m128i __DEFAULT_FN_ATTRS128
7609_mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
7610{
7611  return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
7612              (__v8hi) _mm_setzero_si128 (),
7613              __M);
7614}
7615
7616static __inline__ void __DEFAULT_FN_ATTRS128
7617_mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7618{
7619  __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7620}
7621
7622static __inline__ __m128i __DEFAULT_FN_ATTRS256
7623_mm256_cvtepi32_epi16 (__m256i __A)
7624{
7625  return (__m128i)__builtin_convertvector((__v8si)__A, __v8hi);
7626}
7627
7628static __inline__ __m128i __DEFAULT_FN_ATTRS256
7629_mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7630{
7631  return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
7632              (__v8hi) __O, __M);
7633}
7634
7635static __inline__ __m128i __DEFAULT_FN_ATTRS256
7636_mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
7637{
7638  return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
7639              (__v8hi) _mm_setzero_si128 (),
7640              __M);
7641}
7642
7643static __inline__ void __DEFAULT_FN_ATTRS256
7644_mm256_mask_cvtepi32_storeu_epi16 (void *  __P, __mmask8 __M, __m256i __A)
7645{
7646  __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7647}
7648
7649static __inline__ __m128i __DEFAULT_FN_ATTRS128
7650_mm_cvtepi64_epi8 (__m128i __A)
7651{
7652  return (__m128i)__builtin_shufflevector(
7653      __builtin_convertvector((__v2di)__A, __v2qi), (__v2qi){0, 0}, 0, 1, 2, 3,
7654      3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
7655}
7656
7657static __inline__ __m128i __DEFAULT_FN_ATTRS128
7658_mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7659{
7660  return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
7661              (__v16qi) __O, __M);
7662}
7663
7664static __inline__ __m128i __DEFAULT_FN_ATTRS128
7665_mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
7666{
7667  return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
7668              (__v16qi) _mm_setzero_si128 (),
7669              __M);
7670}
7671
7672static __inline__ void __DEFAULT_FN_ATTRS128
7673_mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7674{
7675  __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7676}
7677
7678static __inline__ __m128i __DEFAULT_FN_ATTRS256
7679_mm256_cvtepi64_epi8 (__m256i __A)
7680{
7681  return (__m128i)__builtin_shufflevector(
7682      __builtin_convertvector((__v4di)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
7683      2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
7684}
7685
7686static __inline__ __m128i __DEFAULT_FN_ATTRS256
7687_mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7688{
7689  return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
7690              (__v16qi) __O, __M);
7691}
7692
7693static __inline__ __m128i __DEFAULT_FN_ATTRS256
7694_mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
7695{
7696  return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
7697              (__v16qi) _mm_setzero_si128 (),
7698              __M);
7699}
7700
7701static __inline__ void __DEFAULT_FN_ATTRS256
7702_mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7703{
7704  __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7705}
7706
7707static __inline__ __m128i __DEFAULT_FN_ATTRS128
7708_mm_cvtepi64_epi32 (__m128i __A)
7709{
7710  return (__m128i)__builtin_shufflevector(
7711      __builtin_convertvector((__v2di)__A, __v2si), (__v2si){0, 0}, 0, 1, 2, 3);
7712}
7713
7714static __inline__ __m128i __DEFAULT_FN_ATTRS128
7715_mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7716{
7717  return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
7718              (__v4si) __O, __M);
7719}
7720
7721static __inline__ __m128i __DEFAULT_FN_ATTRS128
7722_mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
7723{
7724  return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
7725              (__v4si) _mm_setzero_si128 (),
7726              __M);
7727}
7728
7729static __inline__ void __DEFAULT_FN_ATTRS128
7730_mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
7731{
7732  __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7733}
7734
7735static __inline__ __m128i __DEFAULT_FN_ATTRS256
7736_mm256_cvtepi64_epi32 (__m256i __A)
7737{
7738  return (__m128i)__builtin_convertvector((__v4di)__A, __v4si);
7739}
7740
7741static __inline__ __m128i __DEFAULT_FN_ATTRS256
7742_mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7743{
7744  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
7745                                             (__v4si)_mm256_cvtepi64_epi32(__A),
7746                                             (__v4si)__O);
7747}
7748
7749static __inline__ __m128i __DEFAULT_FN_ATTRS256
7750_mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
7751{
7752  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
7753                                             (__v4si)_mm256_cvtepi64_epi32(__A),
7754                                             (__v4si)_mm_setzero_si128());
7755}
7756
7757static __inline__ void __DEFAULT_FN_ATTRS256
7758_mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
7759{
7760  __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7761}
7762
7763static __inline__ __m128i __DEFAULT_FN_ATTRS128
7764_mm_cvtepi64_epi16 (__m128i __A)
7765{
7766  return (__m128i)__builtin_shufflevector(
7767      __builtin_convertvector((__v2di)__A, __v2hi), (__v2hi){0, 0}, 0, 1, 2, 3,
7768      3, 3, 3, 3);
7769}
7770
7771static __inline__ __m128i __DEFAULT_FN_ATTRS128
7772_mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7773{
7774  return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
7775              (__v8hi)__O,
7776              __M);
7777}
7778
7779static __inline__ __m128i __DEFAULT_FN_ATTRS128
7780_mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
7781{
7782  return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
7783              (__v8hi) _mm_setzero_si128 (),
7784              __M);
7785}
7786
7787static __inline__ void __DEFAULT_FN_ATTRS128
7788_mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7789{
7790  __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7791}
7792
7793static __inline__ __m128i __DEFAULT_FN_ATTRS256
7794_mm256_cvtepi64_epi16 (__m256i __A)
7795{
7796  return (__m128i)__builtin_shufflevector(
7797      __builtin_convertvector((__v4di)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
7798      2, 3, 4, 5, 6, 7);
7799}
7800
7801static __inline__ __m128i __DEFAULT_FN_ATTRS256
7802_mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7803{
7804  return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
7805              (__v8hi) __O, __M);
7806}
7807
7808static __inline__ __m128i __DEFAULT_FN_ATTRS256
7809_mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
7810{
7811  return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
7812              (__v8hi) _mm_setzero_si128 (),
7813              __M);
7814}
7815
7816static __inline__ void __DEFAULT_FN_ATTRS256
7817_mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7818{
7819  __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7820}
7821
7822#define _mm256_extractf32x4_ps(A, imm) \
7823  ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7824                                                (int)(imm), \
7825                                                (__v4sf)_mm_undefined_ps(), \
7826                                                (__mmask8)-1))
7827
7828#define _mm256_mask_extractf32x4_ps(W, U, A, imm) \
7829  ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7830                                                (int)(imm), \
7831                                                (__v4sf)(__m128)(W), \
7832                                                (__mmask8)(U)))
7833
7834#define _mm256_maskz_extractf32x4_ps(U, A, imm) \
7835  ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7836                                                (int)(imm), \
7837                                                (__v4sf)_mm_setzero_ps(), \
7838                                                (__mmask8)(U)))
7839
7840#define _mm256_extracti32x4_epi32(A, imm) \
7841  ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7842                                                 (int)(imm), \
7843                                                 (__v4si)_mm_undefined_si128(), \
7844                                                 (__mmask8)-1))
7845
7846#define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \
7847  ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7848                                                 (int)(imm), \
7849                                                 (__v4si)(__m128i)(W), \
7850                                                 (__mmask8)(U)))
7851
7852#define _mm256_maskz_extracti32x4_epi32(U, A, imm) \
7853  ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7854                                                 (int)(imm), \
7855                                                 (__v4si)_mm_setzero_si128(), \
7856                                                 (__mmask8)(U)))
7857
7858#define _mm256_insertf32x4(A, B, imm) \
7859  ((__m256)__builtin_ia32_insertf32x4_256((__v8sf)(__m256)(A), \
7860                                          (__v4sf)(__m128)(B), (int)(imm)))
7861
7862#define _mm256_mask_insertf32x4(W, U, A, B, imm) \
7863  ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7864                                  (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
7865                                  (__v8sf)(__m256)(W)))
7866
7867#define _mm256_maskz_insertf32x4(U, A, B, imm) \
7868  ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7869                                  (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
7870                                  (__v8sf)_mm256_setzero_ps()))
7871
7872#define _mm256_inserti32x4(A, B, imm) \
7873  ((__m256i)__builtin_ia32_inserti32x4_256((__v8si)(__m256i)(A), \
7874                                           (__v4si)(__m128i)(B), (int)(imm)))
7875
7876#define _mm256_mask_inserti32x4(W, U, A, B, imm) \
7877  ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7878                                  (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
7879                                  (__v8si)(__m256i)(W)))
7880
7881#define _mm256_maskz_inserti32x4(U, A, B, imm) \
7882  ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7883                                  (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
7884                                  (__v8si)_mm256_setzero_si256()))
7885
7886#define _mm_getmant_pd(A, B, C) \
7887  ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7888                                             (int)(((C)<<2) | (B)), \
7889                                             (__v2df)_mm_setzero_pd(), \
7890                                             (__mmask8)-1))
7891
7892#define _mm_mask_getmant_pd(W, U, A, B, C) \
7893  ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7894                                             (int)(((C)<<2) | (B)), \
7895                                             (__v2df)(__m128d)(W), \
7896                                             (__mmask8)(U)))
7897
7898#define _mm_maskz_getmant_pd(U, A, B, C) \
7899  ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7900                                             (int)(((C)<<2) | (B)), \
7901                                             (__v2df)_mm_setzero_pd(), \
7902                                             (__mmask8)(U)))
7903
7904#define _mm256_getmant_pd(A, B, C) \
7905  ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7906                                             (int)(((C)<<2) | (B)), \
7907                                             (__v4df)_mm256_setzero_pd(), \
7908                                             (__mmask8)-1))
7909
7910#define _mm256_mask_getmant_pd(W, U, A, B, C) \
7911  ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7912                                             (int)(((C)<<2) | (B)), \
7913                                             (__v4df)(__m256d)(W), \
7914                                             (__mmask8)(U)))
7915
7916#define _mm256_maskz_getmant_pd(U, A, B, C) \
7917  ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7918                                             (int)(((C)<<2) | (B)), \
7919                                             (__v4df)_mm256_setzero_pd(), \
7920                                             (__mmask8)(U)))
7921
7922#define _mm_getmant_ps(A, B, C) \
7923  ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7924                                            (int)(((C)<<2) | (B)), \
7925                                            (__v4sf)_mm_setzero_ps(), \
7926                                            (__mmask8)-1))
7927
7928#define _mm_mask_getmant_ps(W, U, A, B, C) \
7929  ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7930                                            (int)(((C)<<2) | (B)), \
7931                                            (__v4sf)(__m128)(W), \
7932                                            (__mmask8)(U)))
7933
7934#define _mm_maskz_getmant_ps(U, A, B, C) \
7935  ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7936                                            (int)(((C)<<2) | (B)), \
7937                                            (__v4sf)_mm_setzero_ps(), \
7938                                            (__mmask8)(U)))
7939
7940#define _mm256_getmant_ps(A, B, C) \
7941  ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7942                                            (int)(((C)<<2) | (B)), \
7943                                            (__v8sf)_mm256_setzero_ps(), \
7944                                            (__mmask8)-1))
7945
7946#define _mm256_mask_getmant_ps(W, U, A, B, C) \
7947  ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7948                                            (int)(((C)<<2) | (B)), \
7949                                            (__v8sf)(__m256)(W), \
7950                                            (__mmask8)(U)))
7951
7952#define _mm256_maskz_getmant_ps(U, A, B, C) \
7953  ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7954                                            (int)(((C)<<2) | (B)), \
7955                                            (__v8sf)_mm256_setzero_ps(), \
7956                                            (__mmask8)(U)))
7957
7958#define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
7959  ((__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \
7960                                         (void const *)(addr), \
7961                                         (__v2di)(__m128i)(index), \
7962                                         (__mmask8)(mask), (int)(scale)))
7963
7964#define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7965  ((__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \
7966                                         (void const *)(addr), \
7967                                         (__v2di)(__m128i)(index), \
7968                                         (__mmask8)(mask), (int)(scale)))
7969
7970#define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
7971  ((__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \
7972                                         (void const *)(addr), \
7973                                         (__v4di)(__m256i)(index), \
7974                                         (__mmask8)(mask), (int)(scale)))
7975
7976#define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7977  ((__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \
7978                                         (void const *)(addr), \
7979                                         (__v4di)(__m256i)(index), \
7980                                         (__mmask8)(mask), (int)(scale)))
7981
7982#define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
7983  ((__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \
7984                                        (void const *)(addr), \
7985                                        (__v2di)(__m128i)(index), \
7986                                        (__mmask8)(mask), (int)(scale)))
7987
7988#define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7989  ((__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \
7990                                         (void const *)(addr), \
7991                                         (__v2di)(__m128i)(index), \
7992                                         (__mmask8)(mask), (int)(scale)))
7993
7994#define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
7995  ((__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \
7996                                        (void const *)(addr), \
7997                                        (__v4di)(__m256i)(index), \
7998                                        (__mmask8)(mask), (int)(scale)))
7999
8000#define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
8001  ((__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \
8002                                         (void const *)(addr), \
8003                                         (__v4di)(__m256i)(index), \
8004                                         (__mmask8)(mask), (int)(scale)))
8005
8006#define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
8007  ((__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \
8008                                         (void const *)(addr), \
8009                                         (__v4si)(__m128i)(index), \
8010                                         (__mmask8)(mask), (int)(scale)))
8011
8012#define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
8013  ((__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \
8014                                         (void const *)(addr), \
8015                                         (__v4si)(__m128i)(index), \
8016                                         (__mmask8)(mask), (int)(scale)))
8017
8018#define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
8019  ((__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \
8020                                         (void const *)(addr), \
8021                                         (__v4si)(__m128i)(index), \
8022                                         (__mmask8)(mask), (int)(scale)))
8023
8024#define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
8025  ((__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \
8026                                         (void const *)(addr), \
8027                                         (__v4si)(__m128i)(index), \
8028                                         (__mmask8)(mask), (int)(scale)))
8029
8030#define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
8031  ((__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \
8032                                        (void const *)(addr), \
8033                                        (__v4si)(__m128i)(index), \
8034                                        (__mmask8)(mask), (int)(scale)))
8035
8036#define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
8037  ((__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \
8038                                         (void const *)(addr), \
8039                                         (__v4si)(__m128i)(index), \
8040                                         (__mmask8)(mask), (int)(scale)))
8041
8042#define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
8043  ((__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \
8044                                        (void const *)(addr), \
8045                                        (__v8si)(__m256i)(index), \
8046                                        (__mmask8)(mask), (int)(scale)))
8047
8048#define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
8049  ((__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \
8050                                         (void const *)(addr), \
8051                                         (__v8si)(__m256i)(index), \
8052                                         (__mmask8)(mask), (int)(scale)))
8053
8054#define _mm256_permutex_pd(X, C) \
8055  ((__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(X), (int)(C)))
8056
8057#define _mm256_mask_permutex_pd(W, U, X, C) \
8058  ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8059                                       (__v4df)_mm256_permutex_pd((X), (C)), \
8060                                       (__v4df)(__m256d)(W)))
8061
8062#define _mm256_maskz_permutex_pd(U, X, C) \
8063  ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8064                                        (__v4df)_mm256_permutex_pd((X), (C)), \
8065                                        (__v4df)_mm256_setzero_pd()))
8066
8067#define _mm256_permutex_epi64(X, C) \
8068  ((__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(X), (int)(C)))
8069
8070#define _mm256_mask_permutex_epi64(W, U, X, C) \
8071  ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8072                                      (__v4di)_mm256_permutex_epi64((X), (C)), \
8073                                      (__v4di)(__m256i)(W)))
8074
8075#define _mm256_maskz_permutex_epi64(U, X, C) \
8076  ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8077                                      (__v4di)_mm256_permutex_epi64((X), (C)), \
8078                                      (__v4di)_mm256_setzero_si256()))
8079
8080static __inline__ __m256d __DEFAULT_FN_ATTRS256
8081_mm256_permutexvar_pd (__m256i __X, __m256d __Y)
8082{
8083  return (__m256d)__builtin_ia32_permvardf256((__v4df)__Y, (__v4di)__X);
8084}
8085
8086static __inline__ __m256d __DEFAULT_FN_ATTRS256
8087_mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
8088          __m256d __Y)
8089{
8090  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
8091                                        (__v4df)_mm256_permutexvar_pd(__X, __Y),
8092                                        (__v4df)__W);
8093}
8094
8095static __inline__ __m256d __DEFAULT_FN_ATTRS256
8096_mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
8097{
8098  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
8099                                        (__v4df)_mm256_permutexvar_pd(__X, __Y),
8100                                        (__v4df)_mm256_setzero_pd());
8101}
8102
8103static __inline__ __m256i __DEFAULT_FN_ATTRS256
8104_mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y)
8105{
8106  return (__m256i)__builtin_ia32_permvardi256((__v4di) __Y, (__v4di) __X);
8107}
8108
8109static __inline__ __m256i __DEFAULT_FN_ATTRS256
8110_mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
8111{
8112  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
8113                                     (__v4di)_mm256_permutexvar_epi64(__X, __Y),
8114                                     (__v4di)_mm256_setzero_si256());
8115}
8116
8117static __inline__ __m256i __DEFAULT_FN_ATTRS256
8118_mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
8119             __m256i __Y)
8120{
8121  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
8122                                     (__v4di)_mm256_permutexvar_epi64(__X, __Y),
8123                                     (__v4di)__W);
8124}
8125
8126#define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A))
8127
8128static __inline__ __m256 __DEFAULT_FN_ATTRS256
8129_mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y)
8130{
8131  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8132                                        (__v8sf)_mm256_permutexvar_ps(__X, __Y),
8133                                        (__v8sf)__W);
8134}
8135
8136static __inline__ __m256 __DEFAULT_FN_ATTRS256
8137_mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y)
8138{
8139  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8140                                        (__v8sf)_mm256_permutexvar_ps(__X, __Y),
8141                                        (__v8sf)_mm256_setzero_ps());
8142}
8143
8144#define _mm256_permutexvar_epi32(A, B) _mm256_permutevar8x32_epi32((B), (A))
8145
8146static __inline__ __m256i __DEFAULT_FN_ATTRS256
8147_mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X,
8148                              __m256i __Y)
8149{
8150  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
8151                                     (__v8si)_mm256_permutexvar_epi32(__X, __Y),
8152                                     (__v8si)__W);
8153}
8154
8155static __inline__ __m256i __DEFAULT_FN_ATTRS256
8156_mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
8157{
8158  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
8159                                     (__v8si)_mm256_permutexvar_epi32(__X, __Y),
8160                                     (__v8si)_mm256_setzero_si256());
8161}
8162
8163#define _mm_alignr_epi32(A, B, imm) \
8164  ((__m128i)__builtin_ia32_alignd128((__v4si)(__m128i)(A), \
8165                                     (__v4si)(__m128i)(B), (int)(imm)))
8166
8167#define _mm_mask_alignr_epi32(W, U, A, B, imm) \
8168  ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8169                                    (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
8170                                    (__v4si)(__m128i)(W)))
8171
8172#define _mm_maskz_alignr_epi32(U, A, B, imm) \
8173  ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8174                                    (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
8175                                    (__v4si)_mm_setzero_si128()))
8176
8177#define _mm256_alignr_epi32(A, B, imm) \
8178  ((__m256i)__builtin_ia32_alignd256((__v8si)(__m256i)(A), \
8179                                     (__v8si)(__m256i)(B), (int)(imm)))
8180
8181#define _mm256_mask_alignr_epi32(W, U, A, B, imm) \
8182  ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8183                                 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
8184                                 (__v8si)(__m256i)(W)))
8185
8186#define _mm256_maskz_alignr_epi32(U, A, B, imm) \
8187  ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8188                                 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
8189                                 (__v8si)_mm256_setzero_si256()))
8190
8191#define _mm_alignr_epi64(A, B, imm) \
8192  ((__m128i)__builtin_ia32_alignq128((__v2di)(__m128i)(A), \
8193                                     (__v2di)(__m128i)(B), (int)(imm)))
8194
8195#define _mm_mask_alignr_epi64(W, U, A, B, imm) \
8196  ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
8197                                    (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
8198                                    (__v2di)(__m128i)(W)))
8199
8200#define _mm_maskz_alignr_epi64(U, A, B, imm) \
8201  ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
8202                                    (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
8203                                    (__v2di)_mm_setzero_si128()))
8204
8205#define _mm256_alignr_epi64(A, B, imm) \
8206  ((__m256i)__builtin_ia32_alignq256((__v4di)(__m256i)(A), \
8207                                     (__v4di)(__m256i)(B), (int)(imm)))
8208
8209#define _mm256_mask_alignr_epi64(W, U, A, B, imm) \
8210  ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8211                                 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
8212                                 (__v4di)(__m256i)(W)))
8213
8214#define _mm256_maskz_alignr_epi64(U, A, B, imm) \
8215  ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8216                                 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
8217                                 (__v4di)_mm256_setzero_si256()))
8218
8219static __inline__ __m128 __DEFAULT_FN_ATTRS128
8220_mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
8221{
8222  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8223                                             (__v4sf)_mm_movehdup_ps(__A),
8224                                             (__v4sf)__W);
8225}
8226
8227static __inline__ __m128 __DEFAULT_FN_ATTRS128
8228_mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
8229{
8230  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8231                                             (__v4sf)_mm_movehdup_ps(__A),
8232                                             (__v4sf)_mm_setzero_ps());
8233}
8234
8235static __inline__ __m256 __DEFAULT_FN_ATTRS256
8236_mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
8237{
8238  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8239                                             (__v8sf)_mm256_movehdup_ps(__A),
8240                                             (__v8sf)__W);
8241}
8242
8243static __inline__ __m256 __DEFAULT_FN_ATTRS256
8244_mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
8245{
8246  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8247                                             (__v8sf)_mm256_movehdup_ps(__A),
8248                                             (__v8sf)_mm256_setzero_ps());
8249}
8250
8251static __inline__ __m128 __DEFAULT_FN_ATTRS128
8252_mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
8253{
8254  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8255                                             (__v4sf)_mm_moveldup_ps(__A),
8256                                             (__v4sf)__W);
8257}
8258
8259static __inline__ __m128 __DEFAULT_FN_ATTRS128
8260_mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
8261{
8262  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8263                                             (__v4sf)_mm_moveldup_ps(__A),
8264                                             (__v4sf)_mm_setzero_ps());
8265}
8266
8267static __inline__ __m256 __DEFAULT_FN_ATTRS256
8268_mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
8269{
8270  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8271                                             (__v8sf)_mm256_moveldup_ps(__A),
8272                                             (__v8sf)__W);
8273}
8274
8275static __inline__ __m256 __DEFAULT_FN_ATTRS256
8276_mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
8277{
8278  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8279                                             (__v8sf)_mm256_moveldup_ps(__A),
8280                                             (__v8sf)_mm256_setzero_ps());
8281}
8282
8283#define _mm256_mask_shuffle_epi32(W, U, A, I) \
8284  ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8285                                       (__v8si)_mm256_shuffle_epi32((A), (I)), \
8286                                       (__v8si)(__m256i)(W)))
8287
8288#define _mm256_maskz_shuffle_epi32(U, A, I) \
8289  ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8290                                       (__v8si)_mm256_shuffle_epi32((A), (I)), \
8291                                       (__v8si)_mm256_setzero_si256()))
8292
8293#define _mm_mask_shuffle_epi32(W, U, A, I) \
8294  ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8295                                       (__v4si)_mm_shuffle_epi32((A), (I)), \
8296                                       (__v4si)(__m128i)(W)))
8297
8298#define _mm_maskz_shuffle_epi32(U, A, I) \
8299  ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8300                                       (__v4si)_mm_shuffle_epi32((A), (I)), \
8301                                       (__v4si)_mm_setzero_si128()))
8302
8303static __inline__ __m128d __DEFAULT_FN_ATTRS128
8304_mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
8305{
8306  return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
8307              (__v2df) __A,
8308              (__v2df) __W);
8309}
8310
8311static __inline__ __m128d __DEFAULT_FN_ATTRS128
8312_mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
8313{
8314  return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
8315              (__v2df) __A,
8316              (__v2df) _mm_setzero_pd ());
8317}
8318
8319static __inline__ __m256d __DEFAULT_FN_ATTRS256
8320_mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
8321{
8322  return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
8323              (__v4df) __A,
8324              (__v4df) __W);
8325}
8326
8327static __inline__ __m256d __DEFAULT_FN_ATTRS256
8328_mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
8329{
8330  return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
8331              (__v4df) __A,
8332              (__v4df) _mm256_setzero_pd ());
8333}
8334
8335static __inline__ __m128 __DEFAULT_FN_ATTRS128
8336_mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
8337{
8338  return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
8339             (__v4sf) __A,
8340             (__v4sf) __W);
8341}
8342
8343static __inline__ __m128 __DEFAULT_FN_ATTRS128
8344_mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
8345{
8346  return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
8347             (__v4sf) __A,
8348             (__v4sf) _mm_setzero_ps ());
8349}
8350
8351static __inline__ __m256 __DEFAULT_FN_ATTRS256
8352_mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
8353{
8354  return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
8355             (__v8sf) __A,
8356             (__v8sf) __W);
8357}
8358
8359static __inline__ __m256 __DEFAULT_FN_ATTRS256
8360_mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
8361{
8362  return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
8363             (__v8sf) __A,
8364             (__v8sf) _mm256_setzero_ps ());
8365}
8366
8367static __inline__ __m128 __DEFAULT_FN_ATTRS128
8368_mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
8369{
8370  return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8371             (__v4sf) __W,
8372             (__mmask8) __U);
8373}
8374
8375static __inline__ __m128 __DEFAULT_FN_ATTRS128
8376_mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8377{
8378  return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8379             (__v4sf)
8380             _mm_setzero_ps (),
8381             (__mmask8) __U);
8382}
8383
8384static __inline__ __m256 __DEFAULT_FN_ATTRS256
8385_mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
8386{
8387  return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8388                (__v8sf) __W,
8389                (__mmask8) __U);
8390}
8391
8392static __inline__ __m256 __DEFAULT_FN_ATTRS256
8393_mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8394{
8395  return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8396                (__v8sf)
8397                _mm256_setzero_ps (),
8398                (__mmask8) __U);
8399}
8400
8401#define _mm_mask_cvt_roundps_ph(W, U, A, I) \
8402  ((__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8403                                          (__v8hi)(__m128i)(W), \
8404                                          (__mmask8)(U)))
8405
8406#define _mm_maskz_cvt_roundps_ph(U, A, I) \
8407  ((__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8408                                          (__v8hi)_mm_setzero_si128(), \
8409                                          (__mmask8)(U)))
8410
8411#define _mm_mask_cvtps_ph  _mm_mask_cvt_roundps_ph
8412#define _mm_maskz_cvtps_ph _mm_maskz_cvt_roundps_ph
8413
8414#define _mm256_mask_cvt_roundps_ph(W, U, A, I) \
8415  ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8416                                             (__v8hi)(__m128i)(W), \
8417                                             (__mmask8)(U)))
8418
8419#define _mm256_maskz_cvt_roundps_ph(U, A, I) \
8420  ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8421                                             (__v8hi)_mm_setzero_si128(), \
8422                                             (__mmask8)(U)))
8423
8424#define _mm256_mask_cvtps_ph  _mm256_mask_cvt_roundps_ph
8425#define _mm256_maskz_cvtps_ph _mm256_maskz_cvt_roundps_ph
8426
8427
8428#undef __DEFAULT_FN_ATTRS128
8429#undef __DEFAULT_FN_ATTRS256
8430
8431#endif /* __AVX512VLINTRIN_H */
8432