1/*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __IMMINTRIN_H
11#error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
12#endif
13
14#ifndef __AVX512VLINTRIN_H
15#define __AVX512VLINTRIN_H
16
17#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(128)))
18#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(256)))
19
20typedef short __v2hi __attribute__((__vector_size__(4)));
21typedef char __v4qi __attribute__((__vector_size__(4)));
22typedef char __v2qi __attribute__((__vector_size__(2)));
23
24/* Integer compare */
25
26#define _mm_cmpeq_epi32_mask(A, B) \
27    _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
28#define _mm_mask_cmpeq_epi32_mask(k, A, B) \
29    _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
30#define _mm_cmpge_epi32_mask(A, B) \
31    _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
32#define _mm_mask_cmpge_epi32_mask(k, A, B) \
33    _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
34#define _mm_cmpgt_epi32_mask(A, B) \
35    _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
36#define _mm_mask_cmpgt_epi32_mask(k, A, B) \
37    _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
38#define _mm_cmple_epi32_mask(A, B) \
39    _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
40#define _mm_mask_cmple_epi32_mask(k, A, B) \
41    _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
42#define _mm_cmplt_epi32_mask(A, B) \
43    _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
44#define _mm_mask_cmplt_epi32_mask(k, A, B) \
45    _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
46#define _mm_cmpneq_epi32_mask(A, B) \
47    _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
48#define _mm_mask_cmpneq_epi32_mask(k, A, B) \
49    _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
50
51#define _mm256_cmpeq_epi32_mask(A, B) \
52    _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
53#define _mm256_mask_cmpeq_epi32_mask(k, A, B) \
54    _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
55#define _mm256_cmpge_epi32_mask(A, B) \
56    _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
57#define _mm256_mask_cmpge_epi32_mask(k, A, B) \
58    _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
59#define _mm256_cmpgt_epi32_mask(A, B) \
60    _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
61#define _mm256_mask_cmpgt_epi32_mask(k, A, B) \
62    _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
63#define _mm256_cmple_epi32_mask(A, B) \
64    _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
65#define _mm256_mask_cmple_epi32_mask(k, A, B) \
66    _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
67#define _mm256_cmplt_epi32_mask(A, B) \
68    _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
69#define _mm256_mask_cmplt_epi32_mask(k, A, B) \
70    _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
71#define _mm256_cmpneq_epi32_mask(A, B) \
72    _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
73#define _mm256_mask_cmpneq_epi32_mask(k, A, B) \
74    _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
75
76#define _mm_cmpeq_epu32_mask(A, B) \
77    _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
78#define _mm_mask_cmpeq_epu32_mask(k, A, B) \
79    _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
80#define _mm_cmpge_epu32_mask(A, B) \
81    _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
82#define _mm_mask_cmpge_epu32_mask(k, A, B) \
83    _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
84#define _mm_cmpgt_epu32_mask(A, B) \
85    _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
86#define _mm_mask_cmpgt_epu32_mask(k, A, B) \
87    _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
88#define _mm_cmple_epu32_mask(A, B) \
89    _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
90#define _mm_mask_cmple_epu32_mask(k, A, B) \
91    _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
92#define _mm_cmplt_epu32_mask(A, B) \
93    _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
94#define _mm_mask_cmplt_epu32_mask(k, A, B) \
95    _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
96#define _mm_cmpneq_epu32_mask(A, B) \
97    _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
98#define _mm_mask_cmpneq_epu32_mask(k, A, B) \
99    _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
100
101#define _mm256_cmpeq_epu32_mask(A, B) \
102    _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
103#define _mm256_mask_cmpeq_epu32_mask(k, A, B) \
104    _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
105#define _mm256_cmpge_epu32_mask(A, B) \
106    _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
107#define _mm256_mask_cmpge_epu32_mask(k, A, B) \
108    _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
109#define _mm256_cmpgt_epu32_mask(A, B) \
110    _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
111#define _mm256_mask_cmpgt_epu32_mask(k, A, B) \
112    _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
113#define _mm256_cmple_epu32_mask(A, B) \
114    _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
115#define _mm256_mask_cmple_epu32_mask(k, A, B) \
116    _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
117#define _mm256_cmplt_epu32_mask(A, B) \
118    _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
119#define _mm256_mask_cmplt_epu32_mask(k, A, B) \
120    _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
121#define _mm256_cmpneq_epu32_mask(A, B) \
122    _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
123#define _mm256_mask_cmpneq_epu32_mask(k, A, B) \
124    _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
125
126#define _mm_cmpeq_epi64_mask(A, B) \
127    _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
128#define _mm_mask_cmpeq_epi64_mask(k, A, B) \
129    _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
130#define _mm_cmpge_epi64_mask(A, B) \
131    _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
132#define _mm_mask_cmpge_epi64_mask(k, A, B) \
133    _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
134#define _mm_cmpgt_epi64_mask(A, B) \
135    _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
136#define _mm_mask_cmpgt_epi64_mask(k, A, B) \
137    _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
138#define _mm_cmple_epi64_mask(A, B) \
139    _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
140#define _mm_mask_cmple_epi64_mask(k, A, B) \
141    _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
142#define _mm_cmplt_epi64_mask(A, B) \
143    _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
144#define _mm_mask_cmplt_epi64_mask(k, A, B) \
145    _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
146#define _mm_cmpneq_epi64_mask(A, B) \
147    _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
148#define _mm_mask_cmpneq_epi64_mask(k, A, B) \
149    _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
150
151#define _mm256_cmpeq_epi64_mask(A, B) \
152    _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
153#define _mm256_mask_cmpeq_epi64_mask(k, A, B) \
154    _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
155#define _mm256_cmpge_epi64_mask(A, B) \
156    _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
157#define _mm256_mask_cmpge_epi64_mask(k, A, B) \
158    _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
159#define _mm256_cmpgt_epi64_mask(A, B) \
160    _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
161#define _mm256_mask_cmpgt_epi64_mask(k, A, B) \
162    _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
163#define _mm256_cmple_epi64_mask(A, B) \
164    _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
165#define _mm256_mask_cmple_epi64_mask(k, A, B) \
166    _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
167#define _mm256_cmplt_epi64_mask(A, B) \
168    _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
169#define _mm256_mask_cmplt_epi64_mask(k, A, B) \
170    _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
171#define _mm256_cmpneq_epi64_mask(A, B) \
172    _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
173#define _mm256_mask_cmpneq_epi64_mask(k, A, B) \
174    _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
175
176#define _mm_cmpeq_epu64_mask(A, B) \
177    _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
178#define _mm_mask_cmpeq_epu64_mask(k, A, B) \
179    _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
180#define _mm_cmpge_epu64_mask(A, B) \
181    _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
182#define _mm_mask_cmpge_epu64_mask(k, A, B) \
183    _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
184#define _mm_cmpgt_epu64_mask(A, B) \
185    _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
186#define _mm_mask_cmpgt_epu64_mask(k, A, B) \
187    _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
188#define _mm_cmple_epu64_mask(A, B) \
189    _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
190#define _mm_mask_cmple_epu64_mask(k, A, B) \
191    _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
192#define _mm_cmplt_epu64_mask(A, B) \
193    _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
194#define _mm_mask_cmplt_epu64_mask(k, A, B) \
195    _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
196#define _mm_cmpneq_epu64_mask(A, B) \
197    _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
198#define _mm_mask_cmpneq_epu64_mask(k, A, B) \
199    _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
200
201#define _mm256_cmpeq_epu64_mask(A, B) \
202    _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
203#define _mm256_mask_cmpeq_epu64_mask(k, A, B) \
204    _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
205#define _mm256_cmpge_epu64_mask(A, B) \
206    _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
207#define _mm256_mask_cmpge_epu64_mask(k, A, B) \
208    _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
209#define _mm256_cmpgt_epu64_mask(A, B) \
210    _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
211#define _mm256_mask_cmpgt_epu64_mask(k, A, B) \
212    _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
213#define _mm256_cmple_epu64_mask(A, B) \
214    _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
215#define _mm256_mask_cmple_epu64_mask(k, A, B) \
216    _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
217#define _mm256_cmplt_epu64_mask(A, B) \
218    _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
219#define _mm256_mask_cmplt_epu64_mask(k, A, B) \
220    _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
221#define _mm256_cmpneq_epu64_mask(A, B) \
222    _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
223#define _mm256_mask_cmpneq_epu64_mask(k, A, B) \
224    _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
225
226static __inline__ __m256i __DEFAULT_FN_ATTRS256
227_mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
228{
229  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
230                                             (__v8si)_mm256_add_epi32(__A, __B),
231                                             (__v8si)__W);
232}
233
234static __inline__ __m256i __DEFAULT_FN_ATTRS256
235_mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B)
236{
237  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
238                                             (__v8si)_mm256_add_epi32(__A, __B),
239                                             (__v8si)_mm256_setzero_si256());
240}
241
242static __inline__ __m256i __DEFAULT_FN_ATTRS256
243_mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
244{
245  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
246                                             (__v4di)_mm256_add_epi64(__A, __B),
247                                             (__v4di)__W);
248}
249
250static __inline__ __m256i __DEFAULT_FN_ATTRS256
251_mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B)
252{
253  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
254                                             (__v4di)_mm256_add_epi64(__A, __B),
255                                             (__v4di)_mm256_setzero_si256());
256}
257
258static __inline__ __m256i __DEFAULT_FN_ATTRS256
259_mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
260{
261  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
262                                             (__v8si)_mm256_sub_epi32(__A, __B),
263                                             (__v8si)__W);
264}
265
266static __inline__ __m256i __DEFAULT_FN_ATTRS256
267_mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B)
268{
269  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
270                                             (__v8si)_mm256_sub_epi32(__A, __B),
271                                             (__v8si)_mm256_setzero_si256());
272}
273
274static __inline__ __m256i __DEFAULT_FN_ATTRS256
275_mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
276{
277  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
278                                             (__v4di)_mm256_sub_epi64(__A, __B),
279                                             (__v4di)__W);
280}
281
282static __inline__ __m256i __DEFAULT_FN_ATTRS256
283_mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B)
284{
285  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
286                                             (__v4di)_mm256_sub_epi64(__A, __B),
287                                             (__v4di)_mm256_setzero_si256());
288}
289
290static __inline__ __m128i __DEFAULT_FN_ATTRS128
291_mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
292{
293  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
294                                             (__v4si)_mm_add_epi32(__A, __B),
295                                             (__v4si)__W);
296}
297
298static __inline__ __m128i __DEFAULT_FN_ATTRS128
299_mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B)
300{
301  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
302                                             (__v4si)_mm_add_epi32(__A, __B),
303                                             (__v4si)_mm_setzero_si128());
304}
305
306static __inline__ __m128i __DEFAULT_FN_ATTRS128
307_mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
308{
309  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
310                                             (__v2di)_mm_add_epi64(__A, __B),
311                                             (__v2di)__W);
312}
313
314static __inline__ __m128i __DEFAULT_FN_ATTRS128
315_mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B)
316{
317  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
318                                             (__v2di)_mm_add_epi64(__A, __B),
319                                             (__v2di)_mm_setzero_si128());
320}
321
322static __inline__ __m128i __DEFAULT_FN_ATTRS128
323_mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
324{
325  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
326                                             (__v4si)_mm_sub_epi32(__A, __B),
327                                             (__v4si)__W);
328}
329
330static __inline__ __m128i __DEFAULT_FN_ATTRS128
331_mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B)
332{
333  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
334                                             (__v4si)_mm_sub_epi32(__A, __B),
335                                             (__v4si)_mm_setzero_si128());
336}
337
338static __inline__ __m128i __DEFAULT_FN_ATTRS128
339_mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
340{
341  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
342                                             (__v2di)_mm_sub_epi64(__A, __B),
343                                             (__v2di)__W);
344}
345
346static __inline__ __m128i __DEFAULT_FN_ATTRS128
347_mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B)
348{
349  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
350                                             (__v2di)_mm_sub_epi64(__A, __B),
351                                             (__v2di)_mm_setzero_si128());
352}
353
354static __inline__ __m256i __DEFAULT_FN_ATTRS256
355_mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
356{
357  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
358                                             (__v4di)_mm256_mul_epi32(__X, __Y),
359                                             (__v4di)__W);
360}
361
362static __inline__ __m256i __DEFAULT_FN_ATTRS256
363_mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
364{
365  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
366                                             (__v4di)_mm256_mul_epi32(__X, __Y),
367                                             (__v4di)_mm256_setzero_si256());
368}
369
370static __inline__ __m128i __DEFAULT_FN_ATTRS128
371_mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
372{
373  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
374                                             (__v2di)_mm_mul_epi32(__X, __Y),
375                                             (__v2di)__W);
376}
377
378static __inline__ __m128i __DEFAULT_FN_ATTRS128
379_mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y)
380{
381  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
382                                             (__v2di)_mm_mul_epi32(__X, __Y),
383                                             (__v2di)_mm_setzero_si128());
384}
385
386static __inline__ __m256i __DEFAULT_FN_ATTRS256
387_mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
388{
389  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
390                                             (__v4di)_mm256_mul_epu32(__X, __Y),
391                                             (__v4di)__W);
392}
393
394static __inline__ __m256i __DEFAULT_FN_ATTRS256
395_mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y)
396{
397  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
398                                             (__v4di)_mm256_mul_epu32(__X, __Y),
399                                             (__v4di)_mm256_setzero_si256());
400}
401
402static __inline__ __m128i __DEFAULT_FN_ATTRS128
403_mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
404{
405  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
406                                             (__v2di)_mm_mul_epu32(__X, __Y),
407                                             (__v2di)__W);
408}
409
410static __inline__ __m128i __DEFAULT_FN_ATTRS128
411_mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y)
412{
413  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
414                                             (__v2di)_mm_mul_epu32(__X, __Y),
415                                             (__v2di)_mm_setzero_si128());
416}
417
418static __inline__ __m256i __DEFAULT_FN_ATTRS256
419_mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B)
420{
421  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
422                                             (__v8si)_mm256_mullo_epi32(__A, __B),
423                                             (__v8si)_mm256_setzero_si256());
424}
425
426static __inline__ __m256i __DEFAULT_FN_ATTRS256
427_mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
428{
429  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
430                                             (__v8si)_mm256_mullo_epi32(__A, __B),
431                                             (__v8si)__W);
432}
433
434static __inline__ __m128i __DEFAULT_FN_ATTRS128
435_mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B)
436{
437  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
438                                             (__v4si)_mm_mullo_epi32(__A, __B),
439                                             (__v4si)_mm_setzero_si128());
440}
441
442static __inline__ __m128i __DEFAULT_FN_ATTRS128
443_mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
444{
445  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
446                                             (__v4si)_mm_mullo_epi32(__A, __B),
447                                             (__v4si)__W);
448}
449
450static __inline__ __m256i __DEFAULT_FN_ATTRS256
451_mm256_and_epi32(__m256i __a, __m256i __b)
452{
453  return (__m256i)((__v8su)__a & (__v8su)__b);
454}
455
456static __inline__ __m256i __DEFAULT_FN_ATTRS256
457_mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
458{
459  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
460                                             (__v8si)_mm256_and_epi32(__A, __B),
461                                             (__v8si)__W);
462}
463
464static __inline__ __m256i __DEFAULT_FN_ATTRS256
465_mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
466{
467  return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B);
468}
469
470static __inline__ __m128i __DEFAULT_FN_ATTRS128
471_mm_and_epi32(__m128i __a, __m128i __b)
472{
473  return (__m128i)((__v4su)__a & (__v4su)__b);
474}
475
476static __inline__ __m128i __DEFAULT_FN_ATTRS128
477_mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
478{
479  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
480                                             (__v4si)_mm_and_epi32(__A, __B),
481                                             (__v4si)__W);
482}
483
484static __inline__ __m128i __DEFAULT_FN_ATTRS128
485_mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B)
486{
487  return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B);
488}
489
490static __inline__ __m256i __DEFAULT_FN_ATTRS256
491_mm256_andnot_epi32(__m256i __A, __m256i __B)
492{
493  return (__m256i)(~(__v8su)__A & (__v8su)__B);
494}
495
496static __inline__ __m256i __DEFAULT_FN_ATTRS256
497_mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
498{
499  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
500                                          (__v8si)_mm256_andnot_epi32(__A, __B),
501                                          (__v8si)__W);
502}
503
504static __inline__ __m256i __DEFAULT_FN_ATTRS256
505_mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B)
506{
507  return (__m256i)_mm256_mask_andnot_epi32(_mm256_setzero_si256(),
508                                           __U, __A, __B);
509}
510
511static __inline__ __m128i __DEFAULT_FN_ATTRS128
512_mm_andnot_epi32(__m128i __A, __m128i __B)
513{
514  return (__m128i)(~(__v4su)__A & (__v4su)__B);
515}
516
517static __inline__ __m128i __DEFAULT_FN_ATTRS128
518_mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
519{
520  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
521                                             (__v4si)_mm_andnot_epi32(__A, __B),
522                                             (__v4si)__W);
523}
524
525static __inline__ __m128i __DEFAULT_FN_ATTRS128
526_mm_maskz_andnot_epi32(__mmask8 __U, __m128i __A, __m128i __B)
527{
528  return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B);
529}
530
531static __inline__ __m256i __DEFAULT_FN_ATTRS256
532_mm256_or_epi32(__m256i __a, __m256i __b)
533{
534  return (__m256i)((__v8su)__a | (__v8su)__b);
535}
536
537static __inline__ __m256i __DEFAULT_FN_ATTRS256
538_mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
539{
540  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
541                                             (__v8si)_mm256_or_epi32(__A, __B),
542                                             (__v8si)__W);
543}
544
545static __inline__ __m256i __DEFAULT_FN_ATTRS256
546_mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B)
547{
548  return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B);
549}
550
551static __inline__ __m128i __DEFAULT_FN_ATTRS128
552_mm_or_epi32(__m128i __a, __m128i __b)
553{
554  return (__m128i)((__v4su)__a | (__v4su)__b);
555}
556
557static __inline__ __m128i __DEFAULT_FN_ATTRS128
558_mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
559{
560  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
561                                             (__v4si)_mm_or_epi32(__A, __B),
562                                             (__v4si)__W);
563}
564
565static __inline__ __m128i __DEFAULT_FN_ATTRS128
566_mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B)
567{
568  return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B);
569}
570
571static __inline__ __m256i __DEFAULT_FN_ATTRS256
572_mm256_xor_epi32(__m256i __a, __m256i __b)
573{
574  return (__m256i)((__v8su)__a ^ (__v8su)__b);
575}
576
577static __inline__ __m256i __DEFAULT_FN_ATTRS256
578_mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
579{
580  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
581                                             (__v8si)_mm256_xor_epi32(__A, __B),
582                                             (__v8si)__W);
583}
584
585static __inline__ __m256i __DEFAULT_FN_ATTRS256
586_mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B)
587{
588  return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B);
589}
590
591static __inline__ __m128i __DEFAULT_FN_ATTRS128
592_mm_xor_epi32(__m128i __a, __m128i __b)
593{
594  return (__m128i)((__v4su)__a ^ (__v4su)__b);
595}
596
597static __inline__ __m128i __DEFAULT_FN_ATTRS128
598_mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
599{
600  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
601                                             (__v4si)_mm_xor_epi32(__A, __B),
602                                             (__v4si)__W);
603}
604
605static __inline__ __m128i __DEFAULT_FN_ATTRS128
606_mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B)
607{
608  return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B);
609}
610
611static __inline__ __m256i __DEFAULT_FN_ATTRS256
612_mm256_and_epi64(__m256i __a, __m256i __b)
613{
614  return (__m256i)((__v4du)__a & (__v4du)__b);
615}
616
617static __inline__ __m256i __DEFAULT_FN_ATTRS256
618_mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
619{
620  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
621                                             (__v4di)_mm256_and_epi64(__A, __B),
622                                             (__v4di)__W);
623}
624
625static __inline__ __m256i __DEFAULT_FN_ATTRS256
626_mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B)
627{
628  return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B);
629}
630
631static __inline__ __m128i __DEFAULT_FN_ATTRS128
632_mm_and_epi64(__m128i __a, __m128i __b)
633{
634  return (__m128i)((__v2du)__a & (__v2du)__b);
635}
636
637static __inline__ __m128i __DEFAULT_FN_ATTRS128
638_mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
639{
640  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
641                                             (__v2di)_mm_and_epi64(__A, __B),
642                                             (__v2di)__W);
643}
644
645static __inline__ __m128i __DEFAULT_FN_ATTRS128
646_mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B)
647{
648  return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B);
649}
650
651static __inline__ __m256i __DEFAULT_FN_ATTRS256
652_mm256_andnot_epi64(__m256i __A, __m256i __B)
653{
654  return (__m256i)(~(__v4du)__A & (__v4du)__B);
655}
656
657static __inline__ __m256i __DEFAULT_FN_ATTRS256
658_mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
659{
660  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
661                                          (__v4di)_mm256_andnot_epi64(__A, __B),
662                                          (__v4di)__W);
663}
664
665static __inline__ __m256i __DEFAULT_FN_ATTRS256
666_mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B)
667{
668  return (__m256i)_mm256_mask_andnot_epi64(_mm256_setzero_si256(),
669                                           __U, __A, __B);
670}
671
672static __inline__ __m128i __DEFAULT_FN_ATTRS128
673_mm_andnot_epi64(__m128i __A, __m128i __B)
674{
675  return (__m128i)(~(__v2du)__A & (__v2du)__B);
676}
677
678static __inline__ __m128i __DEFAULT_FN_ATTRS128
679_mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
680{
681  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
682                                             (__v2di)_mm_andnot_epi64(__A, __B),
683                                             (__v2di)__W);
684}
685
686static __inline__ __m128i __DEFAULT_FN_ATTRS128
687_mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B)
688{
689  return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B);
690}
691
692static __inline__ __m256i __DEFAULT_FN_ATTRS256
693_mm256_or_epi64(__m256i __a, __m256i __b)
694{
695  return (__m256i)((__v4du)__a | (__v4du)__b);
696}
697
698static __inline__ __m256i __DEFAULT_FN_ATTRS256
699_mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
700{
701  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
702                                             (__v4di)_mm256_or_epi64(__A, __B),
703                                             (__v4di)__W);
704}
705
706static __inline__ __m256i __DEFAULT_FN_ATTRS256
707_mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B)
708{
709  return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B);
710}
711
712static __inline__ __m128i __DEFAULT_FN_ATTRS128
713_mm_or_epi64(__m128i __a, __m128i __b)
714{
715  return (__m128i)((__v2du)__a | (__v2du)__b);
716}
717
718static __inline__ __m128i __DEFAULT_FN_ATTRS128
719_mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
720{
721  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
722                                             (__v2di)_mm_or_epi64(__A, __B),
723                                             (__v2di)__W);
724}
725
726static __inline__ __m128i __DEFAULT_FN_ATTRS128
727_mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B)
728{
729  return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B);
730}
731
732static __inline__ __m256i __DEFAULT_FN_ATTRS256
733_mm256_xor_epi64(__m256i __a, __m256i __b)
734{
735  return (__m256i)((__v4du)__a ^ (__v4du)__b);
736}
737
738static __inline__ __m256i __DEFAULT_FN_ATTRS256
739_mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
740{
741  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
742                                             (__v4di)_mm256_xor_epi64(__A, __B),
743                                             (__v4di)__W);
744}
745
746static __inline__ __m256i __DEFAULT_FN_ATTRS256
747_mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B)
748{
749  return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B);
750}
751
752static __inline__ __m128i __DEFAULT_FN_ATTRS128
753_mm_xor_epi64(__m128i __a, __m128i __b)
754{
755  return (__m128i)((__v2du)__a ^ (__v2du)__b);
756}
757
758static __inline__ __m128i __DEFAULT_FN_ATTRS128
759_mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A,
760        __m128i __B)
761{
762  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
763                                             (__v2di)_mm_xor_epi64(__A, __B),
764                                             (__v2di)__W);
765}
766
767static __inline__ __m128i __DEFAULT_FN_ATTRS128
768_mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
769{
770  return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B);
771}
772
773#define _mm_cmp_epi32_mask(a, b, p) \
774  (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
775                                        (__v4si)(__m128i)(b), (int)(p), \
776                                        (__mmask8)-1)
777
778#define _mm_mask_cmp_epi32_mask(m, a, b, p) \
779  (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
780                                        (__v4si)(__m128i)(b), (int)(p), \
781                                        (__mmask8)(m))
782
783#define _mm_cmp_epu32_mask(a, b, p) \
784  (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
785                                         (__v4si)(__m128i)(b), (int)(p), \
786                                         (__mmask8)-1)
787
788#define _mm_mask_cmp_epu32_mask(m, a, b, p) \
789  (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
790                                         (__v4si)(__m128i)(b), (int)(p), \
791                                         (__mmask8)(m))
792
793#define _mm256_cmp_epi32_mask(a, b, p) \
794  (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
795                                        (__v8si)(__m256i)(b), (int)(p), \
796                                        (__mmask8)-1)
797
798#define _mm256_mask_cmp_epi32_mask(m, a, b, p) \
799  (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
800                                        (__v8si)(__m256i)(b), (int)(p), \
801                                        (__mmask8)(m))
802
803#define _mm256_cmp_epu32_mask(a, b, p) \
804  (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
805                                         (__v8si)(__m256i)(b), (int)(p), \
806                                         (__mmask8)-1)
807
808#define _mm256_mask_cmp_epu32_mask(m, a, b, p) \
809  (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
810                                         (__v8si)(__m256i)(b), (int)(p), \
811                                         (__mmask8)(m))
812
813#define _mm_cmp_epi64_mask(a, b, p) \
814  (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
815                                        (__v2di)(__m128i)(b), (int)(p), \
816                                        (__mmask8)-1)
817
818#define _mm_mask_cmp_epi64_mask(m, a, b, p) \
819  (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
820                                        (__v2di)(__m128i)(b), (int)(p), \
821                                        (__mmask8)(m))
822
823#define _mm_cmp_epu64_mask(a, b, p) \
824  (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
825                                         (__v2di)(__m128i)(b), (int)(p), \
826                                         (__mmask8)-1)
827
828#define _mm_mask_cmp_epu64_mask(m, a, b, p) \
829  (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
830                                         (__v2di)(__m128i)(b), (int)(p), \
831                                         (__mmask8)(m))
832
833#define _mm256_cmp_epi64_mask(a, b, p) \
834  (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
835                                        (__v4di)(__m256i)(b), (int)(p), \
836                                        (__mmask8)-1)
837
838#define _mm256_mask_cmp_epi64_mask(m, a, b, p) \
839  (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
840                                        (__v4di)(__m256i)(b), (int)(p), \
841                                        (__mmask8)(m))
842
843#define _mm256_cmp_epu64_mask(a, b, p) \
844  (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
845                                         (__v4di)(__m256i)(b), (int)(p), \
846                                         (__mmask8)-1)
847
848#define _mm256_mask_cmp_epu64_mask(m, a, b, p) \
849  (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
850                                         (__v4di)(__m256i)(b), (int)(p), \
851                                         (__mmask8)(m))
852
853#define _mm256_cmp_ps_mask(a, b, p)  \
854  (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
855                                         (__v8sf)(__m256)(b), (int)(p), \
856                                         (__mmask8)-1)
857
858#define _mm256_mask_cmp_ps_mask(m, a, b, p)  \
859  (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
860                                         (__v8sf)(__m256)(b), (int)(p), \
861                                         (__mmask8)(m))
862
863#define _mm256_cmp_pd_mask(a, b, p)  \
864  (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
865                                         (__v4df)(__m256d)(b), (int)(p), \
866                                         (__mmask8)-1)
867
868#define _mm256_mask_cmp_pd_mask(m, a, b, p)  \
869  (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
870                                         (__v4df)(__m256d)(b), (int)(p), \
871                                         (__mmask8)(m))
872
873#define _mm_cmp_ps_mask(a, b, p)  \
874  (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
875                                         (__v4sf)(__m128)(b), (int)(p), \
876                                         (__mmask8)-1)
877
878#define _mm_mask_cmp_ps_mask(m, a, b, p)  \
879  (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
880                                         (__v4sf)(__m128)(b), (int)(p), \
881                                         (__mmask8)(m))
882
883#define _mm_cmp_pd_mask(a, b, p)  \
884  (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
885                                         (__v2df)(__m128d)(b), (int)(p), \
886                                         (__mmask8)-1)
887
888#define _mm_mask_cmp_pd_mask(m, a, b, p)  \
889  (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
890                                         (__v2df)(__m128d)(b), (int)(p), \
891                                         (__mmask8)(m))
892
893static __inline__ __m128d __DEFAULT_FN_ATTRS128
894_mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
895{
896  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
897                    __builtin_ia32_vfmaddpd ((__v2df) __A,
898                                             (__v2df) __B,
899                                             (__v2df) __C),
900                    (__v2df) __A);
901}
902
903static __inline__ __m128d __DEFAULT_FN_ATTRS128
904_mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
905{
906  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
907                    __builtin_ia32_vfmaddpd ((__v2df) __A,
908                                             (__v2df) __B,
909                                             (__v2df) __C),
910                    (__v2df) __C);
911}
912
913static __inline__ __m128d __DEFAULT_FN_ATTRS128
914_mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
915{
916  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
917                    __builtin_ia32_vfmaddpd ((__v2df) __A,
918                                             (__v2df) __B,
919                                             (__v2df) __C),
920                    (__v2df)_mm_setzero_pd());
921}
922
923static __inline__ __m128d __DEFAULT_FN_ATTRS128
924_mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
925{
926  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
927                    __builtin_ia32_vfmaddpd ((__v2df) __A,
928                                             (__v2df) __B,
929                                             -(__v2df) __C),
930                    (__v2df) __A);
931}
932
933static __inline__ __m128d __DEFAULT_FN_ATTRS128
934_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
935{
936  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
937                    __builtin_ia32_vfmaddpd ((__v2df) __A,
938                                             (__v2df) __B,
939                                             -(__v2df) __C),
940                    (__v2df)_mm_setzero_pd());
941}
942
943static __inline__ __m128d __DEFAULT_FN_ATTRS128
944_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
945{
946  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
947                    __builtin_ia32_vfmaddpd (-(__v2df) __A,
948                                             (__v2df) __B,
949                                             (__v2df) __C),
950                    (__v2df) __C);
951}
952
953static __inline__ __m128d __DEFAULT_FN_ATTRS128
954_mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
955{
956  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
957                    __builtin_ia32_vfmaddpd (-(__v2df) __A,
958                                             (__v2df) __B,
959                                             (__v2df) __C),
960                    (__v2df)_mm_setzero_pd());
961}
962
963static __inline__ __m128d __DEFAULT_FN_ATTRS128
964_mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
965{
966  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
967                    __builtin_ia32_vfmaddpd (-(__v2df) __A,
968                                             (__v2df) __B,
969                                             -(__v2df) __C),
970                    (__v2df)_mm_setzero_pd());
971}
972
973static __inline__ __m256d __DEFAULT_FN_ATTRS256
974_mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
975{
976  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
977                    __builtin_ia32_vfmaddpd256 ((__v4df) __A,
978                                                (__v4df) __B,
979                                                (__v4df) __C),
980                    (__v4df) __A);
981}
982
983static __inline__ __m256d __DEFAULT_FN_ATTRS256
984_mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
985{
986  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
987                    __builtin_ia32_vfmaddpd256 ((__v4df) __A,
988                                                (__v4df) __B,
989                                                (__v4df) __C),
990                    (__v4df) __C);
991}
992
993static __inline__ __m256d __DEFAULT_FN_ATTRS256
994_mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
995{
996  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
997                    __builtin_ia32_vfmaddpd256 ((__v4df) __A,
998                                                (__v4df) __B,
999                                                (__v4df) __C),
1000                    (__v4df)_mm256_setzero_pd());
1001}
1002
1003static __inline__ __m256d __DEFAULT_FN_ATTRS256
1004_mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1005{
1006  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1007                    __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1008                                                (__v4df) __B,
1009                                                -(__v4df) __C),
1010                    (__v4df) __A);
1011}
1012
1013static __inline__ __m256d __DEFAULT_FN_ATTRS256
1014_mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1015{
1016  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1017                    __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1018                                                (__v4df) __B,
1019                                                -(__v4df) __C),
1020                    (__v4df)_mm256_setzero_pd());
1021}
1022
1023static __inline__ __m256d __DEFAULT_FN_ATTRS256
1024_mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1025{
1026  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1027                    __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
1028                                                (__v4df) __B,
1029                                                (__v4df) __C),
1030                    (__v4df) __C);
1031}
1032
1033static __inline__ __m256d __DEFAULT_FN_ATTRS256
1034_mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1035{
1036  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1037                    __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
1038                                                (__v4df) __B,
1039                                                (__v4df) __C),
1040                    (__v4df)_mm256_setzero_pd());
1041}
1042
1043static __inline__ __m256d __DEFAULT_FN_ATTRS256
1044_mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1045{
1046  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1047                    __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
1048                                                (__v4df) __B,
1049                                                -(__v4df) __C),
1050                    (__v4df)_mm256_setzero_pd());
1051}
1052
1053static __inline__ __m128 __DEFAULT_FN_ATTRS128
1054_mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1055{
1056  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1057                    __builtin_ia32_vfmaddps ((__v4sf) __A,
1058                                             (__v4sf) __B,
1059                                             (__v4sf) __C),
1060                    (__v4sf) __A);
1061}
1062
1063static __inline__ __m128 __DEFAULT_FN_ATTRS128
1064_mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1065{
1066  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1067                    __builtin_ia32_vfmaddps ((__v4sf) __A,
1068                                             (__v4sf) __B,
1069                                             (__v4sf) __C),
1070                    (__v4sf) __C);
1071}
1072
1073static __inline__ __m128 __DEFAULT_FN_ATTRS128
1074_mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1075{
1076  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1077                    __builtin_ia32_vfmaddps ((__v4sf) __A,
1078                                             (__v4sf) __B,
1079                                             (__v4sf) __C),
1080                    (__v4sf)_mm_setzero_ps());
1081}
1082
1083static __inline__ __m128 __DEFAULT_FN_ATTRS128
1084_mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1085{
1086  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1087                    __builtin_ia32_vfmaddps ((__v4sf) __A,
1088                                             (__v4sf) __B,
1089                                             -(__v4sf) __C),
1090                    (__v4sf) __A);
1091}
1092
1093static __inline__ __m128 __DEFAULT_FN_ATTRS128
1094_mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1095{
1096  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1097                    __builtin_ia32_vfmaddps ((__v4sf) __A,
1098                                             (__v4sf) __B,
1099                                             -(__v4sf) __C),
1100                    (__v4sf)_mm_setzero_ps());
1101}
1102
1103static __inline__ __m128 __DEFAULT_FN_ATTRS128
1104_mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1105{
1106  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1107                    __builtin_ia32_vfmaddps (-(__v4sf) __A,
1108                                             (__v4sf) __B,
1109                                             (__v4sf) __C),
1110                    (__v4sf) __C);
1111}
1112
1113static __inline__ __m128 __DEFAULT_FN_ATTRS128
1114_mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1115{
1116  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1117                    __builtin_ia32_vfmaddps (-(__v4sf) __A,
1118                                             (__v4sf) __B,
1119                                             (__v4sf) __C),
1120                    (__v4sf)_mm_setzero_ps());
1121}
1122
1123static __inline__ __m128 __DEFAULT_FN_ATTRS128
1124_mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1125{
1126  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1127                    __builtin_ia32_vfmaddps (-(__v4sf) __A,
1128                                             (__v4sf) __B,
1129                                             -(__v4sf) __C),
1130                    (__v4sf)_mm_setzero_ps());
1131}
1132
1133static __inline__ __m256 __DEFAULT_FN_ATTRS256
1134_mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1135{
1136  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1137                    __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1138                                                (__v8sf) __B,
1139                                                (__v8sf) __C),
1140                    (__v8sf) __A);
1141}
1142
1143static __inline__ __m256 __DEFAULT_FN_ATTRS256
1144_mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1145{
1146  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1147                    __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1148                                                (__v8sf) __B,
1149                                                (__v8sf) __C),
1150                    (__v8sf) __C);
1151}
1152
1153static __inline__ __m256 __DEFAULT_FN_ATTRS256
1154_mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1155{
1156  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1157                    __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1158                                                (__v8sf) __B,
1159                                                (__v8sf) __C),
1160                    (__v8sf)_mm256_setzero_ps());
1161}
1162
1163static __inline__ __m256 __DEFAULT_FN_ATTRS256
1164_mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1165{
1166  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1167                    __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1168                                                (__v8sf) __B,
1169                                                -(__v8sf) __C),
1170                    (__v8sf) __A);
1171}
1172
1173static __inline__ __m256 __DEFAULT_FN_ATTRS256
1174_mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1175{
1176  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1177                    __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1178                                                (__v8sf) __B,
1179                                                -(__v8sf) __C),
1180                    (__v8sf)_mm256_setzero_ps());
1181}
1182
1183static __inline__ __m256 __DEFAULT_FN_ATTRS256
1184_mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1185{
1186  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1187                    __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1188                                                (__v8sf) __B,
1189                                                (__v8sf) __C),
1190                    (__v8sf) __C);
1191}
1192
1193static __inline__ __m256 __DEFAULT_FN_ATTRS256
1194_mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1195{
1196  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1197                    __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1198                                                (__v8sf) __B,
1199                                                (__v8sf) __C),
1200                    (__v8sf)_mm256_setzero_ps());
1201}
1202
1203static __inline__ __m256 __DEFAULT_FN_ATTRS256
1204_mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1205{
1206  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1207                    __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1208                                                (__v8sf) __B,
1209                                                -(__v8sf) __C),
1210                    (__v8sf)_mm256_setzero_ps());
1211}
1212
1213static __inline__ __m128d __DEFAULT_FN_ATTRS128
1214_mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1215{
1216  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1217                    __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1218                                                (__v2df) __B,
1219                                                (__v2df) __C),
1220                    (__v2df) __A);
1221}
1222
1223static __inline__ __m128d __DEFAULT_FN_ATTRS128
1224_mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1225{
1226  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1227                    __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1228                                                (__v2df) __B,
1229                                                (__v2df) __C),
1230                    (__v2df) __C);
1231}
1232
1233static __inline__ __m128d __DEFAULT_FN_ATTRS128
1234_mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1235{
1236  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1237                    __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1238                                                (__v2df) __B,
1239                                                (__v2df) __C),
1240                    (__v2df)_mm_setzero_pd());
1241}
1242
1243static __inline__ __m128d __DEFAULT_FN_ATTRS128
1244_mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1245{
1246  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1247                    __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1248                                                (__v2df) __B,
1249                                                -(__v2df) __C),
1250                    (__v2df) __A);
1251}
1252
1253static __inline__ __m128d __DEFAULT_FN_ATTRS128
1254_mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1255{
1256  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1257                    __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1258                                                (__v2df) __B,
1259                                                -(__v2df) __C),
1260                    (__v2df)_mm_setzero_pd());
1261}
1262
1263static __inline__ __m256d __DEFAULT_FN_ATTRS256
1264_mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1265{
1266  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1267                    __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1268                                                   (__v4df) __B,
1269                                                   (__v4df) __C),
1270                    (__v4df) __A);
1271}
1272
1273static __inline__ __m256d __DEFAULT_FN_ATTRS256
1274_mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1275{
1276  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1277                    __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1278                                                   (__v4df) __B,
1279                                                   (__v4df) __C),
1280                    (__v4df) __C);
1281}
1282
1283static __inline__ __m256d __DEFAULT_FN_ATTRS256
1284_mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1285{
1286  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1287                    __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1288                                                   (__v4df) __B,
1289                                                   (__v4df) __C),
1290                    (__v4df)_mm256_setzero_pd());
1291}
1292
1293static __inline__ __m256d __DEFAULT_FN_ATTRS256
1294_mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1295{
1296  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1297                    __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1298                                                   (__v4df) __B,
1299                                                   -(__v4df) __C),
1300                    (__v4df) __A);
1301}
1302
1303static __inline__ __m256d __DEFAULT_FN_ATTRS256
1304_mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1305{
1306  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1307                    __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1308                                                   (__v4df) __B,
1309                                                   -(__v4df) __C),
1310                    (__v4df)_mm256_setzero_pd());
1311}
1312
1313static __inline__ __m128 __DEFAULT_FN_ATTRS128
1314_mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1315{
1316  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1317                    __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1318                                                (__v4sf) __B,
1319                                                (__v4sf) __C),
1320                    (__v4sf) __A);
1321}
1322
1323static __inline__ __m128 __DEFAULT_FN_ATTRS128
1324_mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1325{
1326  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1327                    __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1328                                                (__v4sf) __B,
1329                                                (__v4sf) __C),
1330                    (__v4sf) __C);
1331}
1332
1333static __inline__ __m128 __DEFAULT_FN_ATTRS128
1334_mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1335{
1336  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1337                    __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1338                                                (__v4sf) __B,
1339                                                (__v4sf) __C),
1340                    (__v4sf)_mm_setzero_ps());
1341}
1342
1343static __inline__ __m128 __DEFAULT_FN_ATTRS128
1344_mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1345{
1346  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1347                    __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1348                                                (__v4sf) __B,
1349                                                -(__v4sf) __C),
1350                    (__v4sf) __A);
1351}
1352
1353static __inline__ __m128 __DEFAULT_FN_ATTRS128
1354_mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1355{
1356  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1357                    __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1358                                                (__v4sf) __B,
1359                                                -(__v4sf) __C),
1360                    (__v4sf)_mm_setzero_ps());
1361}
1362
1363static __inline__ __m256 __DEFAULT_FN_ATTRS256
1364_mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B,
1365                         __m256 __C)
1366{
1367  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1368                    __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1369                                                   (__v8sf) __B,
1370                                                   (__v8sf) __C),
1371                    (__v8sf) __A);
1372}
1373
1374static __inline__ __m256 __DEFAULT_FN_ATTRS256
1375_mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1376{
1377  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1378                    __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1379                                                   (__v8sf) __B,
1380                                                   (__v8sf) __C),
1381                    (__v8sf) __C);
1382}
1383
1384static __inline__ __m256 __DEFAULT_FN_ATTRS256
1385_mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1386{
1387  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1388                    __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1389                                                   (__v8sf) __B,
1390                                                   (__v8sf) __C),
1391                    (__v8sf)_mm256_setzero_ps());
1392}
1393
1394static __inline__ __m256 __DEFAULT_FN_ATTRS256
1395_mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1396{
1397  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1398                    __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1399                                                   (__v8sf) __B,
1400                                                   -(__v8sf) __C),
1401                    (__v8sf) __A);
1402}
1403
1404static __inline__ __m256 __DEFAULT_FN_ATTRS256
1405_mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1406{
1407  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1408                    __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1409                                                   (__v8sf) __B,
1410                                                   -(__v8sf) __C),
1411                    (__v8sf)_mm256_setzero_ps());
1412}
1413
1414static __inline__ __m128d __DEFAULT_FN_ATTRS128
1415_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1416{
1417  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1418                    __builtin_ia32_vfmaddpd ((__v2df) __A,
1419                                             (__v2df) __B,
1420                                             -(__v2df) __C),
1421                    (__v2df) __C);
1422}
1423
1424static __inline__ __m256d __DEFAULT_FN_ATTRS256
1425_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1426{
1427  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1428                    __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1429                                                (__v4df) __B,
1430                                                -(__v4df) __C),
1431                    (__v4df) __C);
1432}
1433
1434static __inline__ __m128 __DEFAULT_FN_ATTRS128
1435_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1436{
1437  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1438                    __builtin_ia32_vfmaddps ((__v4sf) __A,
1439                                             (__v4sf) __B,
1440                                             -(__v4sf) __C),
1441                    (__v4sf) __C);
1442}
1443
1444static __inline__ __m256 __DEFAULT_FN_ATTRS256
1445_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1446{
1447  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1448                    __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1449                                                (__v8sf) __B,
1450                                                -(__v8sf) __C),
1451                    (__v8sf) __C);
1452}
1453
1454static __inline__ __m128d __DEFAULT_FN_ATTRS128
1455_mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1456{
1457  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1458                    __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1459                                                (__v2df) __B,
1460                                                -(__v2df) __C),
1461                    (__v2df) __C);
1462}
1463
1464static __inline__ __m256d __DEFAULT_FN_ATTRS256
1465_mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1466{
1467  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1468                    __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1469                                                   (__v4df) __B,
1470                                                   -(__v4df) __C),
1471                    (__v4df) __C);
1472}
1473
1474static __inline__ __m128 __DEFAULT_FN_ATTRS128
1475_mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1476{
1477  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1478                    __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1479                                                (__v4sf) __B,
1480                                                -(__v4sf) __C),
1481                    (__v4sf) __C);
1482}
1483
1484static __inline__ __m256 __DEFAULT_FN_ATTRS256
1485_mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1486{
1487  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1488                    __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1489                                                   (__v8sf) __B,
1490                                                   -(__v8sf) __C),
1491                    (__v8sf) __C);
1492}
1493
1494static __inline__ __m128d __DEFAULT_FN_ATTRS128
1495_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1496{
1497  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1498                    __builtin_ia32_vfmaddpd ((__v2df) __A,
1499                                             -(__v2df) __B,
1500                                             (__v2df) __C),
1501                    (__v2df) __A);
1502}
1503
1504static __inline__ __m256d __DEFAULT_FN_ATTRS256
1505_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1506{
1507  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1508                    __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1509                                                -(__v4df) __B,
1510                                                (__v4df) __C),
1511                    (__v4df) __A);
1512}
1513
1514static __inline__ __m128 __DEFAULT_FN_ATTRS128
1515_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1516{
1517  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1518                    __builtin_ia32_vfmaddps ((__v4sf) __A,
1519                                             -(__v4sf) __B,
1520                                             (__v4sf) __C),
1521                    (__v4sf) __A);
1522}
1523
1524static __inline__ __m256 __DEFAULT_FN_ATTRS256
1525_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1526{
1527  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1528                    __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1529                                                -(__v8sf) __B,
1530                                                (__v8sf) __C),
1531                    (__v8sf) __A);
1532}
1533
1534static __inline__ __m128d __DEFAULT_FN_ATTRS128
1535_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1536{
1537  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1538                    __builtin_ia32_vfmaddpd ((__v2df) __A,
1539                                             -(__v2df) __B,
1540                                             -(__v2df) __C),
1541                    (__v2df) __A);
1542}
1543
1544static __inline__ __m128d __DEFAULT_FN_ATTRS128
1545_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1546{
1547  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1548                    __builtin_ia32_vfmaddpd ((__v2df) __A,
1549                                             -(__v2df) __B,
1550                                             -(__v2df) __C),
1551                    (__v2df) __C);
1552}
1553
1554static __inline__ __m256d __DEFAULT_FN_ATTRS256
1555_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1556{
1557  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1558                    __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1559                                                -(__v4df) __B,
1560                                                -(__v4df) __C),
1561                    (__v4df) __A);
1562}
1563
1564static __inline__ __m256d __DEFAULT_FN_ATTRS256
1565_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1566{
1567  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1568                    __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1569                                                -(__v4df) __B,
1570                                                -(__v4df) __C),
1571                    (__v4df) __C);
1572}
1573
1574static __inline__ __m128 __DEFAULT_FN_ATTRS128
1575_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1576{
1577  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1578                    __builtin_ia32_vfmaddps ((__v4sf) __A,
1579                                             -(__v4sf) __B,
1580                                             -(__v4sf) __C),
1581                    (__v4sf) __A);
1582}
1583
1584static __inline__ __m128 __DEFAULT_FN_ATTRS128
1585_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1586{
1587  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1588                    __builtin_ia32_vfmaddps ((__v4sf) __A,
1589                                             -(__v4sf) __B,
1590                                             -(__v4sf) __C),
1591                    (__v4sf) __C);
1592}
1593
1594static __inline__ __m256 __DEFAULT_FN_ATTRS256
1595_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1596{
1597  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1598                    __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1599                                                -(__v8sf) __B,
1600                                                -(__v8sf) __C),
1601                    (__v8sf) __A);
1602}
1603
1604static __inline__ __m256 __DEFAULT_FN_ATTRS256
1605_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1606{
1607  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1608                    __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1609                                                -(__v8sf) __B,
1610                                                -(__v8sf) __C),
1611                    (__v8sf) __C);
1612}
1613
1614static __inline__ __m128d __DEFAULT_FN_ATTRS128
1615_mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
1616  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1617                                              (__v2df)_mm_add_pd(__A, __B),
1618                                              (__v2df)__W);
1619}
1620
1621static __inline__ __m128d __DEFAULT_FN_ATTRS128
1622_mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) {
1623  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1624                                              (__v2df)_mm_add_pd(__A, __B),
1625                                              (__v2df)_mm_setzero_pd());
1626}
1627
1628static __inline__ __m256d __DEFAULT_FN_ATTRS256
1629_mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
1630  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1631                                              (__v4df)_mm256_add_pd(__A, __B),
1632                                              (__v4df)__W);
1633}
1634
1635static __inline__ __m256d __DEFAULT_FN_ATTRS256
1636_mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) {
1637  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1638                                              (__v4df)_mm256_add_pd(__A, __B),
1639                                              (__v4df)_mm256_setzero_pd());
1640}
1641
1642static __inline__ __m128 __DEFAULT_FN_ATTRS128
1643_mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
1644  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1645                                             (__v4sf)_mm_add_ps(__A, __B),
1646                                             (__v4sf)__W);
1647}
1648
1649static __inline__ __m128 __DEFAULT_FN_ATTRS128
1650_mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) {
1651  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1652                                             (__v4sf)_mm_add_ps(__A, __B),
1653                                             (__v4sf)_mm_setzero_ps());
1654}
1655
1656static __inline__ __m256 __DEFAULT_FN_ATTRS256
1657_mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
1658  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1659                                             (__v8sf)_mm256_add_ps(__A, __B),
1660                                             (__v8sf)__W);
1661}
1662
1663static __inline__ __m256 __DEFAULT_FN_ATTRS256
1664_mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) {
1665  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1666                                             (__v8sf)_mm256_add_ps(__A, __B),
1667                                             (__v8sf)_mm256_setzero_ps());
1668}
1669
1670static __inline__ __m128i __DEFAULT_FN_ATTRS128
1671_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) {
1672  return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
1673                (__v4si) __W,
1674                (__v4si) __A);
1675}
1676
1677static __inline__ __m256i __DEFAULT_FN_ATTRS256
1678_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) {
1679  return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
1680                (__v8si) __W,
1681                (__v8si) __A);
1682}
1683
1684static __inline__ __m128d __DEFAULT_FN_ATTRS128
1685_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) {
1686  return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
1687                 (__v2df) __W,
1688                 (__v2df) __A);
1689}
1690
1691static __inline__ __m256d __DEFAULT_FN_ATTRS256
1692_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) {
1693  return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
1694                 (__v4df) __W,
1695                 (__v4df) __A);
1696}
1697
1698static __inline__ __m128 __DEFAULT_FN_ATTRS128
1699_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) {
1700  return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
1701                (__v4sf) __W,
1702                (__v4sf) __A);
1703}
1704
1705static __inline__ __m256 __DEFAULT_FN_ATTRS256
1706_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) {
1707  return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
1708                (__v8sf) __W,
1709                (__v8sf) __A);
1710}
1711
1712static __inline__ __m128i __DEFAULT_FN_ATTRS128
1713_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) {
1714  return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
1715                (__v2di) __W,
1716                (__v2di) __A);
1717}
1718
1719static __inline__ __m256i __DEFAULT_FN_ATTRS256
1720_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) {
1721  return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
1722                (__v4di) __W,
1723                (__v4di) __A);
1724}
1725
1726static __inline__ __m128d __DEFAULT_FN_ATTRS128
1727_mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) {
1728  return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1729                  (__v2df) __W,
1730                  (__mmask8) __U);
1731}
1732
1733static __inline__ __m128d __DEFAULT_FN_ATTRS128
1734_mm_maskz_compress_pd (__mmask8 __U, __m128d __A) {
1735  return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1736                  (__v2df)
1737                  _mm_setzero_pd (),
1738                  (__mmask8) __U);
1739}
1740
1741static __inline__ __m256d __DEFAULT_FN_ATTRS256
1742_mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) {
1743  return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1744                  (__v4df) __W,
1745                  (__mmask8) __U);
1746}
1747
1748static __inline__ __m256d __DEFAULT_FN_ATTRS256
1749_mm256_maskz_compress_pd (__mmask8 __U, __m256d __A) {
1750  return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1751                  (__v4df)
1752                  _mm256_setzero_pd (),
1753                  (__mmask8) __U);
1754}
1755
1756static __inline__ __m128i __DEFAULT_FN_ATTRS128
1757_mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
1758  return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1759                  (__v2di) __W,
1760                  (__mmask8) __U);
1761}
1762
1763static __inline__ __m128i __DEFAULT_FN_ATTRS128
1764_mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A) {
1765  return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1766                  (__v2di)
1767                  _mm_setzero_si128 (),
1768                  (__mmask8) __U);
1769}
1770
1771static __inline__ __m256i __DEFAULT_FN_ATTRS256
1772_mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
1773  return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1774                  (__v4di) __W,
1775                  (__mmask8) __U);
1776}
1777
1778static __inline__ __m256i __DEFAULT_FN_ATTRS256
1779_mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A) {
1780  return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1781                  (__v4di)
1782                  _mm256_setzero_si256 (),
1783                  (__mmask8) __U);
1784}
1785
1786static __inline__ __m128 __DEFAULT_FN_ATTRS128
1787_mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) {
1788  return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1789                 (__v4sf) __W,
1790                 (__mmask8) __U);
1791}
1792
1793static __inline__ __m128 __DEFAULT_FN_ATTRS128
1794_mm_maskz_compress_ps (__mmask8 __U, __m128 __A) {
1795  return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1796                 (__v4sf)
1797                 _mm_setzero_ps (),
1798                 (__mmask8) __U);
1799}
1800
1801static __inline__ __m256 __DEFAULT_FN_ATTRS256
1802_mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) {
1803  return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1804                 (__v8sf) __W,
1805                 (__mmask8) __U);
1806}
1807
1808static __inline__ __m256 __DEFAULT_FN_ATTRS256
1809_mm256_maskz_compress_ps (__mmask8 __U, __m256 __A) {
1810  return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1811                 (__v8sf)
1812                 _mm256_setzero_ps (),
1813                 (__mmask8) __U);
1814}
1815
1816static __inline__ __m128i __DEFAULT_FN_ATTRS128
1817_mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
1818  return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1819                  (__v4si) __W,
1820                  (__mmask8) __U);
1821}
1822
1823static __inline__ __m128i __DEFAULT_FN_ATTRS128
1824_mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A) {
1825  return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1826                  (__v4si)
1827                  _mm_setzero_si128 (),
1828                  (__mmask8) __U);
1829}
1830
1831static __inline__ __m256i __DEFAULT_FN_ATTRS256
1832_mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
1833  return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1834                  (__v8si) __W,
1835                  (__mmask8) __U);
1836}
1837
1838static __inline__ __m256i __DEFAULT_FN_ATTRS256
1839_mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A) {
1840  return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1841                  (__v8si)
1842                  _mm256_setzero_si256 (),
1843                  (__mmask8) __U);
1844}
1845
1846static __inline__ void __DEFAULT_FN_ATTRS128
1847_mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) {
1848  __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
1849            (__v2df) __A,
1850            (__mmask8) __U);
1851}
1852
1853static __inline__ void __DEFAULT_FN_ATTRS256
1854_mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) {
1855  __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
1856            (__v4df) __A,
1857            (__mmask8) __U);
1858}
1859
1860static __inline__ void __DEFAULT_FN_ATTRS128
1861_mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) {
1862  __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
1863            (__v2di) __A,
1864            (__mmask8) __U);
1865}
1866
1867static __inline__ void __DEFAULT_FN_ATTRS256
1868_mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A) {
1869  __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
1870            (__v4di) __A,
1871            (__mmask8) __U);
1872}
1873
1874static __inline__ void __DEFAULT_FN_ATTRS128
1875_mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) {
1876  __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
1877            (__v4sf) __A,
1878            (__mmask8) __U);
1879}
1880
1881static __inline__ void __DEFAULT_FN_ATTRS256
1882_mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A) {
1883  __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
1884            (__v8sf) __A,
1885            (__mmask8) __U);
1886}
1887
1888static __inline__ void __DEFAULT_FN_ATTRS128
1889_mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) {
1890  __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
1891            (__v4si) __A,
1892            (__mmask8) __U);
1893}
1894
1895static __inline__ void __DEFAULT_FN_ATTRS256
1896_mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) {
1897  __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
1898            (__v8si) __A,
1899            (__mmask8) __U);
1900}
1901
1902static __inline__ __m128d __DEFAULT_FN_ATTRS128
1903_mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
1904  return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
1905                                              (__v2df)_mm_cvtepi32_pd(__A),
1906                                              (__v2df)__W);
1907}
1908
1909static __inline__ __m128d __DEFAULT_FN_ATTRS128
1910_mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
1911  return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
1912                                              (__v2df)_mm_cvtepi32_pd(__A),
1913                                              (__v2df)_mm_setzero_pd());
1914}
1915
1916static __inline__ __m256d __DEFAULT_FN_ATTRS256
1917_mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
1918  return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
1919                                              (__v4df)_mm256_cvtepi32_pd(__A),
1920                                              (__v4df)__W);
1921}
1922
1923static __inline__ __m256d __DEFAULT_FN_ATTRS256
1924_mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
1925  return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
1926                                              (__v4df)_mm256_cvtepi32_pd(__A),
1927                                              (__v4df)_mm256_setzero_pd());
1928}
1929
1930static __inline__ __m128 __DEFAULT_FN_ATTRS128
1931_mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
1932  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1933                                             (__v4sf)_mm_cvtepi32_ps(__A),
1934                                             (__v4sf)__W);
1935}
1936
1937static __inline__ __m128 __DEFAULT_FN_ATTRS128
1938_mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A) {
1939  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1940                                             (__v4sf)_mm_cvtepi32_ps(__A),
1941                                             (__v4sf)_mm_setzero_ps());
1942}
1943
1944static __inline__ __m256 __DEFAULT_FN_ATTRS256
1945_mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
1946  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1947                                             (__v8sf)_mm256_cvtepi32_ps(__A),
1948                                             (__v8sf)__W);
1949}
1950
1951static __inline__ __m256 __DEFAULT_FN_ATTRS256
1952_mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A) {
1953  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1954                                             (__v8sf)_mm256_cvtepi32_ps(__A),
1955                                             (__v8sf)_mm256_setzero_ps());
1956}
1957
1958static __inline__ __m128i __DEFAULT_FN_ATTRS128
1959_mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
1960  return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1961                (__v4si) __W,
1962                (__mmask8) __U);
1963}
1964
1965static __inline__ __m128i __DEFAULT_FN_ATTRS128
1966_mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A) {
1967  return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1968                (__v4si)
1969                _mm_setzero_si128 (),
1970                (__mmask8) __U);
1971}
1972
1973static __inline__ __m128i __DEFAULT_FN_ATTRS256
1974_mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
1975  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1976                                             (__v4si)_mm256_cvtpd_epi32(__A),
1977                                             (__v4si)__W);
1978}
1979
1980static __inline__ __m128i __DEFAULT_FN_ATTRS256
1981_mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) {
1982  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1983                                             (__v4si)_mm256_cvtpd_epi32(__A),
1984                                             (__v4si)_mm_setzero_si128());
1985}
1986
1987static __inline__ __m128 __DEFAULT_FN_ATTRS128
1988_mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) {
1989  return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
1990            (__v4sf) __W,
1991            (__mmask8) __U);
1992}
1993
1994static __inline__ __m128 __DEFAULT_FN_ATTRS128
1995_mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) {
1996  return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
1997            (__v4sf)
1998            _mm_setzero_ps (),
1999            (__mmask8) __U);
2000}
2001
2002static __inline__ __m128 __DEFAULT_FN_ATTRS256
2003_mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) {
2004  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2005                                             (__v4sf)_mm256_cvtpd_ps(__A),
2006                                             (__v4sf)__W);
2007}
2008
2009static __inline__ __m128 __DEFAULT_FN_ATTRS256
2010_mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) {
2011  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2012                                             (__v4sf)_mm256_cvtpd_ps(__A),
2013                                             (__v4sf)_mm_setzero_ps());
2014}
2015
2016static __inline__ __m128i __DEFAULT_FN_ATTRS128
2017_mm_cvtpd_epu32 (__m128d __A) {
2018  return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2019                 (__v4si)
2020                 _mm_setzero_si128 (),
2021                 (__mmask8) -1);
2022}
2023
2024static __inline__ __m128i __DEFAULT_FN_ATTRS128
2025_mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
2026  return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2027                 (__v4si) __W,
2028                 (__mmask8) __U);
2029}
2030
2031static __inline__ __m128i __DEFAULT_FN_ATTRS128
2032_mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A) {
2033  return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2034                 (__v4si)
2035                 _mm_setzero_si128 (),
2036                 (__mmask8) __U);
2037}
2038
2039static __inline__ __m128i __DEFAULT_FN_ATTRS256
2040_mm256_cvtpd_epu32 (__m256d __A) {
2041  return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2042                 (__v4si)
2043                 _mm_setzero_si128 (),
2044                 (__mmask8) -1);
2045}
2046
2047static __inline__ __m128i __DEFAULT_FN_ATTRS256
2048_mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
2049  return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2050                 (__v4si) __W,
2051                 (__mmask8) __U);
2052}
2053
2054static __inline__ __m128i __DEFAULT_FN_ATTRS256
2055_mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A) {
2056  return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2057                 (__v4si)
2058                 _mm_setzero_si128 (),
2059                 (__mmask8) __U);
2060}
2061
2062static __inline__ __m128i __DEFAULT_FN_ATTRS128
2063_mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
2064  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2065                                             (__v4si)_mm_cvtps_epi32(__A),
2066                                             (__v4si)__W);
2067}
2068
2069static __inline__ __m128i __DEFAULT_FN_ATTRS128
2070_mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A) {
2071  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2072                                             (__v4si)_mm_cvtps_epi32(__A),
2073                                             (__v4si)_mm_setzero_si128());
2074}
2075
2076static __inline__ __m256i __DEFAULT_FN_ATTRS256
2077_mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
2078  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2079                                             (__v8si)_mm256_cvtps_epi32(__A),
2080                                             (__v8si)__W);
2081}
2082
2083static __inline__ __m256i __DEFAULT_FN_ATTRS256
2084_mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A) {
2085  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2086                                             (__v8si)_mm256_cvtps_epi32(__A),
2087                                             (__v8si)_mm256_setzero_si256());
2088}
2089
2090static __inline__ __m128d __DEFAULT_FN_ATTRS128
2091_mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) {
2092  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2093                                              (__v2df)_mm_cvtps_pd(__A),
2094                                              (__v2df)__W);
2095}
2096
2097static __inline__ __m128d __DEFAULT_FN_ATTRS128
2098_mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
2099  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2100                                              (__v2df)_mm_cvtps_pd(__A),
2101                                              (__v2df)_mm_setzero_pd());
2102}
2103
2104static __inline__ __m256d __DEFAULT_FN_ATTRS256
2105_mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) {
2106  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2107                                              (__v4df)_mm256_cvtps_pd(__A),
2108                                              (__v4df)__W);
2109}
2110
2111static __inline__ __m256d __DEFAULT_FN_ATTRS256
2112_mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
2113  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2114                                              (__v4df)_mm256_cvtps_pd(__A),
2115                                              (__v4df)_mm256_setzero_pd());
2116}
2117
2118static __inline__ __m128i __DEFAULT_FN_ATTRS128
2119_mm_cvtps_epu32 (__m128 __A) {
2120  return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2121                 (__v4si)
2122                 _mm_setzero_si128 (),
2123                 (__mmask8) -1);
2124}
2125
2126static __inline__ __m128i __DEFAULT_FN_ATTRS128
2127_mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
2128  return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2129                 (__v4si) __W,
2130                 (__mmask8) __U);
2131}
2132
2133static __inline__ __m128i __DEFAULT_FN_ATTRS128
2134_mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A) {
2135  return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2136                 (__v4si)
2137                 _mm_setzero_si128 (),
2138                 (__mmask8) __U);
2139}
2140
2141static __inline__ __m256i __DEFAULT_FN_ATTRS256
2142_mm256_cvtps_epu32 (__m256 __A) {
2143  return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2144                 (__v8si)
2145                 _mm256_setzero_si256 (),
2146                 (__mmask8) -1);
2147}
2148
2149static __inline__ __m256i __DEFAULT_FN_ATTRS256
2150_mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
2151  return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2152                 (__v8si) __W,
2153                 (__mmask8) __U);
2154}
2155
2156static __inline__ __m256i __DEFAULT_FN_ATTRS256
2157_mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A) {
2158  return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2159                 (__v8si)
2160                 _mm256_setzero_si256 (),
2161                 (__mmask8) __U);
2162}
2163
2164static __inline__ __m128i __DEFAULT_FN_ATTRS128
2165_mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
2166  return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2167                 (__v4si) __W,
2168                 (__mmask8) __U);
2169}
2170
2171static __inline__ __m128i __DEFAULT_FN_ATTRS128
2172_mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A) {
2173  return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2174                 (__v4si)
2175                 _mm_setzero_si128 (),
2176                 (__mmask8) __U);
2177}
2178
2179static __inline__ __m128i __DEFAULT_FN_ATTRS256
2180_mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
2181  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2182                                             (__v4si)_mm256_cvttpd_epi32(__A),
2183                                             (__v4si)__W);
2184}
2185
2186static __inline__ __m128i __DEFAULT_FN_ATTRS256
2187_mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A) {
2188  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2189                                             (__v4si)_mm256_cvttpd_epi32(__A),
2190                                             (__v4si)_mm_setzero_si128());
2191}
2192
2193static __inline__ __m128i __DEFAULT_FN_ATTRS128
2194_mm_cvttpd_epu32 (__m128d __A) {
2195  return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2196                  (__v4si)
2197                  _mm_setzero_si128 (),
2198                  (__mmask8) -1);
2199}
2200
2201static __inline__ __m128i __DEFAULT_FN_ATTRS128
2202_mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
2203  return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2204                  (__v4si) __W,
2205                  (__mmask8) __U);
2206}
2207
2208static __inline__ __m128i __DEFAULT_FN_ATTRS128
2209_mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A) {
2210  return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2211                  (__v4si)
2212                  _mm_setzero_si128 (),
2213                  (__mmask8) __U);
2214}
2215
2216static __inline__ __m128i __DEFAULT_FN_ATTRS256
2217_mm256_cvttpd_epu32 (__m256d __A) {
2218  return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2219                  (__v4si)
2220                  _mm_setzero_si128 (),
2221                  (__mmask8) -1);
2222}
2223
2224static __inline__ __m128i __DEFAULT_FN_ATTRS256
2225_mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
2226  return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2227                  (__v4si) __W,
2228                  (__mmask8) __U);
2229}
2230
2231static __inline__ __m128i __DEFAULT_FN_ATTRS256
2232_mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A) {
2233  return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2234                  (__v4si)
2235                  _mm_setzero_si128 (),
2236                  (__mmask8) __U);
2237}
2238
2239static __inline__ __m128i __DEFAULT_FN_ATTRS128
2240_mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
2241  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2242                                             (__v4si)_mm_cvttps_epi32(__A),
2243                                             (__v4si)__W);
2244}
2245
2246static __inline__ __m128i __DEFAULT_FN_ATTRS128
2247_mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A) {
2248  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2249                                             (__v4si)_mm_cvttps_epi32(__A),
2250                                             (__v4si)_mm_setzero_si128());
2251}
2252
2253static __inline__ __m256i __DEFAULT_FN_ATTRS256
2254_mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
2255  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2256                                             (__v8si)_mm256_cvttps_epi32(__A),
2257                                             (__v8si)__W);
2258}
2259
2260static __inline__ __m256i __DEFAULT_FN_ATTRS256
2261_mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A) {
2262  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2263                                             (__v8si)_mm256_cvttps_epi32(__A),
2264                                             (__v8si)_mm256_setzero_si256());
2265}
2266
2267static __inline__ __m128i __DEFAULT_FN_ATTRS128
2268_mm_cvttps_epu32 (__m128 __A) {
2269  return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2270                  (__v4si)
2271                  _mm_setzero_si128 (),
2272                  (__mmask8) -1);
2273}
2274
2275static __inline__ __m128i __DEFAULT_FN_ATTRS128
2276_mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
2277  return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2278                  (__v4si) __W,
2279                  (__mmask8) __U);
2280}
2281
2282static __inline__ __m128i __DEFAULT_FN_ATTRS128
2283_mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A) {
2284  return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2285                  (__v4si)
2286                  _mm_setzero_si128 (),
2287                  (__mmask8) __U);
2288}
2289
2290static __inline__ __m256i __DEFAULT_FN_ATTRS256
2291_mm256_cvttps_epu32 (__m256 __A) {
2292  return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2293                  (__v8si)
2294                  _mm256_setzero_si256 (),
2295                  (__mmask8) -1);
2296}
2297
2298static __inline__ __m256i __DEFAULT_FN_ATTRS256
2299_mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
2300  return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2301                  (__v8si) __W,
2302                  (__mmask8) __U);
2303}
2304
2305static __inline__ __m256i __DEFAULT_FN_ATTRS256
2306_mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) {
2307  return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2308                  (__v8si)
2309                  _mm256_setzero_si256 (),
2310                  (__mmask8) __U);
2311}
2312
2313static __inline__ __m128d __DEFAULT_FN_ATTRS128
2314_mm_cvtepu32_pd (__m128i __A) {
2315  return (__m128d) __builtin_convertvector(
2316      __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df);
2317}
2318
2319static __inline__ __m128d __DEFAULT_FN_ATTRS128
2320_mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
2321  return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
2322                                              (__v2df)_mm_cvtepu32_pd(__A),
2323                                              (__v2df)__W);
2324}
2325
2326static __inline__ __m128d __DEFAULT_FN_ATTRS128
2327_mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
2328  return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
2329                                              (__v2df)_mm_cvtepu32_pd(__A),
2330                                              (__v2df)_mm_setzero_pd());
2331}
2332
2333static __inline__ __m256d __DEFAULT_FN_ATTRS256
2334_mm256_cvtepu32_pd (__m128i __A) {
2335  return (__m256d)__builtin_convertvector((__v4su)__A, __v4df);
2336}
2337
2338static __inline__ __m256d __DEFAULT_FN_ATTRS256
2339_mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
2340  return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
2341                                              (__v4df)_mm256_cvtepu32_pd(__A),
2342                                              (__v4df)__W);
2343}
2344
2345static __inline__ __m256d __DEFAULT_FN_ATTRS256
2346_mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
2347  return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
2348                                              (__v4df)_mm256_cvtepu32_pd(__A),
2349                                              (__v4df)_mm256_setzero_pd());
2350}
2351
2352static __inline__ __m128 __DEFAULT_FN_ATTRS128
2353_mm_cvtepu32_ps (__m128i __A) {
2354  return (__m128)__builtin_convertvector((__v4su)__A, __v4sf);
2355}
2356
2357static __inline__ __m128 __DEFAULT_FN_ATTRS128
2358_mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
2359  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2360                                             (__v4sf)_mm_cvtepu32_ps(__A),
2361                                             (__v4sf)__W);
2362}
2363
2364static __inline__ __m128 __DEFAULT_FN_ATTRS128
2365_mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) {
2366  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2367                                             (__v4sf)_mm_cvtepu32_ps(__A),
2368                                             (__v4sf)_mm_setzero_ps());
2369}
2370
2371static __inline__ __m256 __DEFAULT_FN_ATTRS256
2372_mm256_cvtepu32_ps (__m256i __A) {
2373  return (__m256)__builtin_convertvector((__v8su)__A, __v8sf);
2374}
2375
2376static __inline__ __m256 __DEFAULT_FN_ATTRS256
2377_mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
2378  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2379                                             (__v8sf)_mm256_cvtepu32_ps(__A),
2380                                             (__v8sf)__W);
2381}
2382
2383static __inline__ __m256 __DEFAULT_FN_ATTRS256
2384_mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) {
2385  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2386                                             (__v8sf)_mm256_cvtepu32_ps(__A),
2387                                             (__v8sf)_mm256_setzero_ps());
2388}
2389
2390static __inline__ __m128d __DEFAULT_FN_ATTRS128
2391_mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2392  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2393                                              (__v2df)_mm_div_pd(__A, __B),
2394                                              (__v2df)__W);
2395}
2396
2397static __inline__ __m128d __DEFAULT_FN_ATTRS128
2398_mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2399  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2400                                              (__v2df)_mm_div_pd(__A, __B),
2401                                              (__v2df)_mm_setzero_pd());
2402}
2403
2404static __inline__ __m256d __DEFAULT_FN_ATTRS256
2405_mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2406  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2407                                              (__v4df)_mm256_div_pd(__A, __B),
2408                                              (__v4df)__W);
2409}
2410
2411static __inline__ __m256d __DEFAULT_FN_ATTRS256
2412_mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2413  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2414                                              (__v4df)_mm256_div_pd(__A, __B),
2415                                              (__v4df)_mm256_setzero_pd());
2416}
2417
2418static __inline__ __m128 __DEFAULT_FN_ATTRS128
2419_mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2420  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2421                                             (__v4sf)_mm_div_ps(__A, __B),
2422                                             (__v4sf)__W);
2423}
2424
2425static __inline__ __m128 __DEFAULT_FN_ATTRS128
2426_mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2427  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2428                                             (__v4sf)_mm_div_ps(__A, __B),
2429                                             (__v4sf)_mm_setzero_ps());
2430}
2431
2432static __inline__ __m256 __DEFAULT_FN_ATTRS256
2433_mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2434  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2435                                             (__v8sf)_mm256_div_ps(__A, __B),
2436                                             (__v8sf)__W);
2437}
2438
2439static __inline__ __m256 __DEFAULT_FN_ATTRS256
2440_mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2441  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2442                                             (__v8sf)_mm256_div_ps(__A, __B),
2443                                             (__v8sf)_mm256_setzero_ps());
2444}
2445
2446static __inline__ __m128d __DEFAULT_FN_ATTRS128
2447_mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) {
2448  return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2449                (__v2df) __W,
2450                (__mmask8) __U);
2451}
2452
2453static __inline__ __m128d __DEFAULT_FN_ATTRS128
2454_mm_maskz_expand_pd (__mmask8 __U, __m128d __A) {
2455  return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2456                 (__v2df)
2457                 _mm_setzero_pd (),
2458                 (__mmask8) __U);
2459}
2460
2461static __inline__ __m256d __DEFAULT_FN_ATTRS256
2462_mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) {
2463  return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2464                (__v4df) __W,
2465                (__mmask8) __U);
2466}
2467
2468static __inline__ __m256d __DEFAULT_FN_ATTRS256
2469_mm256_maskz_expand_pd (__mmask8 __U, __m256d __A) {
2470  return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2471                 (__v4df)
2472                 _mm256_setzero_pd (),
2473                 (__mmask8) __U);
2474}
2475
2476static __inline__ __m128i __DEFAULT_FN_ATTRS128
2477_mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
2478  return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2479                (__v2di) __W,
2480                (__mmask8) __U);
2481}
2482
2483static __inline__ __m128i __DEFAULT_FN_ATTRS128
2484_mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A) {
2485  return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2486                 (__v2di)
2487                 _mm_setzero_si128 (),
2488                 (__mmask8) __U);
2489}
2490
2491static __inline__ __m256i __DEFAULT_FN_ATTRS256
2492_mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
2493  return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2494                (__v4di) __W,
2495                (__mmask8) __U);
2496}
2497
2498static __inline__ __m256i __DEFAULT_FN_ATTRS256
2499_mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) {
2500  return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2501                 (__v4di)
2502                 _mm256_setzero_si256 (),
2503                 (__mmask8) __U);
2504}
2505
2506static __inline__ __m128d __DEFAULT_FN_ATTRS128
2507_mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) {
2508  return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P,
2509              (__v2df) __W,
2510              (__mmask8)
2511              __U);
2512}
2513
2514static __inline__ __m128d __DEFAULT_FN_ATTRS128
2515_mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
2516  return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P,
2517               (__v2df)
2518               _mm_setzero_pd (),
2519               (__mmask8)
2520               __U);
2521}
2522
2523static __inline__ __m256d __DEFAULT_FN_ATTRS256
2524_mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) {
2525  return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P,
2526              (__v4df) __W,
2527              (__mmask8)
2528              __U);
2529}
2530
2531static __inline__ __m256d __DEFAULT_FN_ATTRS256
2532_mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
2533  return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P,
2534               (__v4df)
2535               _mm256_setzero_pd (),
2536               (__mmask8)
2537               __U);
2538}
2539
2540static __inline__ __m128i __DEFAULT_FN_ATTRS128
2541_mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) {
2542  return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P,
2543              (__v2di) __W,
2544              (__mmask8)
2545              __U);
2546}
2547
2548static __inline__ __m128i __DEFAULT_FN_ATTRS128
2549_mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
2550  return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P,
2551               (__v2di)
2552               _mm_setzero_si128 (),
2553               (__mmask8)
2554               __U);
2555}
2556
2557static __inline__ __m256i __DEFAULT_FN_ATTRS256
2558_mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
2559             void const *__P) {
2560  return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P,
2561              (__v4di) __W,
2562              (__mmask8)
2563              __U);
2564}
2565
2566static __inline__ __m256i __DEFAULT_FN_ATTRS256
2567_mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
2568  return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P,
2569               (__v4di)
2570               _mm256_setzero_si256 (),
2571               (__mmask8)
2572               __U);
2573}
2574
2575static __inline__ __m128 __DEFAULT_FN_ATTRS128
2576_mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) {
2577  return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P,
2578                   (__v4sf) __W,
2579                   (__mmask8) __U);
2580}
2581
2582static __inline__ __m128 __DEFAULT_FN_ATTRS128
2583_mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
2584  return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P,
2585              (__v4sf)
2586              _mm_setzero_ps (),
2587              (__mmask8)
2588              __U);
2589}
2590
2591static __inline__ __m256 __DEFAULT_FN_ATTRS256
2592_mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) {
2593  return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P,
2594                   (__v8sf) __W,
2595                   (__mmask8) __U);
2596}
2597
2598static __inline__ __m256 __DEFAULT_FN_ATTRS256
2599_mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
2600  return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P,
2601              (__v8sf)
2602              _mm256_setzero_ps (),
2603              (__mmask8)
2604              __U);
2605}
2606
2607static __inline__ __m128i __DEFAULT_FN_ATTRS128
2608_mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) {
2609  return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P,
2610              (__v4si) __W,
2611              (__mmask8)
2612              __U);
2613}
2614
2615static __inline__ __m128i __DEFAULT_FN_ATTRS128
2616_mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
2617  return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P,
2618               (__v4si)
2619               _mm_setzero_si128 (),
2620               (__mmask8)     __U);
2621}
2622
2623static __inline__ __m256i __DEFAULT_FN_ATTRS256
2624_mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
2625             void const *__P) {
2626  return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P,
2627              (__v8si) __W,
2628              (__mmask8)
2629              __U);
2630}
2631
2632static __inline__ __m256i __DEFAULT_FN_ATTRS256
2633_mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
2634  return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P,
2635               (__v8si)
2636               _mm256_setzero_si256 (),
2637               (__mmask8)
2638               __U);
2639}
2640
2641static __inline__ __m128 __DEFAULT_FN_ATTRS128
2642_mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) {
2643  return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2644               (__v4sf) __W,
2645               (__mmask8) __U);
2646}
2647
2648static __inline__ __m128 __DEFAULT_FN_ATTRS128
2649_mm_maskz_expand_ps (__mmask8 __U, __m128 __A) {
2650  return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2651                (__v4sf)
2652                _mm_setzero_ps (),
2653                (__mmask8) __U);
2654}
2655
2656static __inline__ __m256 __DEFAULT_FN_ATTRS256
2657_mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) {
2658  return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2659               (__v8sf) __W,
2660               (__mmask8) __U);
2661}
2662
2663static __inline__ __m256 __DEFAULT_FN_ATTRS256
2664_mm256_maskz_expand_ps (__mmask8 __U, __m256 __A) {
2665  return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2666                (__v8sf)
2667                _mm256_setzero_ps (),
2668                (__mmask8) __U);
2669}
2670
2671static __inline__ __m128i __DEFAULT_FN_ATTRS128
2672_mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
2673  return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2674                (__v4si) __W,
2675                (__mmask8) __U);
2676}
2677
2678static __inline__ __m128i __DEFAULT_FN_ATTRS128
2679_mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A) {
2680  return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2681                 (__v4si)
2682                 _mm_setzero_si128 (),
2683                 (__mmask8) __U);
2684}
2685
2686static __inline__ __m256i __DEFAULT_FN_ATTRS256
2687_mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
2688  return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2689                (__v8si) __W,
2690                (__mmask8) __U);
2691}
2692
2693static __inline__ __m256i __DEFAULT_FN_ATTRS256
2694_mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A) {
2695  return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2696                 (__v8si)
2697                 _mm256_setzero_si256 (),
2698                 (__mmask8) __U);
2699}
2700
2701static __inline__ __m128d __DEFAULT_FN_ATTRS128
2702_mm_getexp_pd (__m128d __A) {
2703  return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2704                (__v2df)
2705                _mm_setzero_pd (),
2706                (__mmask8) -1);
2707}
2708
2709static __inline__ __m128d __DEFAULT_FN_ATTRS128
2710_mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) {
2711  return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2712                (__v2df) __W,
2713                (__mmask8) __U);
2714}
2715
2716static __inline__ __m128d __DEFAULT_FN_ATTRS128
2717_mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) {
2718  return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2719                (__v2df)
2720                _mm_setzero_pd (),
2721                (__mmask8) __U);
2722}
2723
2724static __inline__ __m256d __DEFAULT_FN_ATTRS256
2725_mm256_getexp_pd (__m256d __A) {
2726  return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2727                (__v4df)
2728                _mm256_setzero_pd (),
2729                (__mmask8) -1);
2730}
2731
2732static __inline__ __m256d __DEFAULT_FN_ATTRS256
2733_mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) {
2734  return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2735                (__v4df) __W,
2736                (__mmask8) __U);
2737}
2738
2739static __inline__ __m256d __DEFAULT_FN_ATTRS256
2740_mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A) {
2741  return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2742                (__v4df)
2743                _mm256_setzero_pd (),
2744                (__mmask8) __U);
2745}
2746
2747static __inline__ __m128 __DEFAULT_FN_ATTRS128
2748_mm_getexp_ps (__m128 __A) {
2749  return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2750               (__v4sf)
2751               _mm_setzero_ps (),
2752               (__mmask8) -1);
2753}
2754
2755static __inline__ __m128 __DEFAULT_FN_ATTRS128
2756_mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) {
2757  return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2758               (__v4sf) __W,
2759               (__mmask8) __U);
2760}
2761
2762static __inline__ __m128 __DEFAULT_FN_ATTRS128
2763_mm_maskz_getexp_ps (__mmask8 __U, __m128 __A) {
2764  return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2765               (__v4sf)
2766               _mm_setzero_ps (),
2767               (__mmask8) __U);
2768}
2769
2770static __inline__ __m256 __DEFAULT_FN_ATTRS256
2771_mm256_getexp_ps (__m256 __A) {
2772  return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2773               (__v8sf)
2774               _mm256_setzero_ps (),
2775               (__mmask8) -1);
2776}
2777
2778static __inline__ __m256 __DEFAULT_FN_ATTRS256
2779_mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) {
2780  return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2781               (__v8sf) __W,
2782               (__mmask8) __U);
2783}
2784
2785static __inline__ __m256 __DEFAULT_FN_ATTRS256
2786_mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) {
2787  return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2788               (__v8sf)
2789               _mm256_setzero_ps (),
2790               (__mmask8) __U);
2791}
2792
2793static __inline__ __m128d __DEFAULT_FN_ATTRS128
2794_mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2795  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2796                                              (__v2df)_mm_max_pd(__A, __B),
2797                                              (__v2df)__W);
2798}
2799
2800static __inline__ __m128d __DEFAULT_FN_ATTRS128
2801_mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2802  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2803                                              (__v2df)_mm_max_pd(__A, __B),
2804                                              (__v2df)_mm_setzero_pd());
2805}
2806
2807static __inline__ __m256d __DEFAULT_FN_ATTRS256
2808_mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2809  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2810                                              (__v4df)_mm256_max_pd(__A, __B),
2811                                              (__v4df)__W);
2812}
2813
2814static __inline__ __m256d __DEFAULT_FN_ATTRS256
2815_mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2816  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2817                                              (__v4df)_mm256_max_pd(__A, __B),
2818                                              (__v4df)_mm256_setzero_pd());
2819}
2820
2821static __inline__ __m128 __DEFAULT_FN_ATTRS128
2822_mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2823  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2824                                             (__v4sf)_mm_max_ps(__A, __B),
2825                                             (__v4sf)__W);
2826}
2827
2828static __inline__ __m128 __DEFAULT_FN_ATTRS128
2829_mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2830  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2831                                             (__v4sf)_mm_max_ps(__A, __B),
2832                                             (__v4sf)_mm_setzero_ps());
2833}
2834
2835static __inline__ __m256 __DEFAULT_FN_ATTRS256
2836_mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2837  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2838                                             (__v8sf)_mm256_max_ps(__A, __B),
2839                                             (__v8sf)__W);
2840}
2841
2842static __inline__ __m256 __DEFAULT_FN_ATTRS256
2843_mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2844  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2845                                             (__v8sf)_mm256_max_ps(__A, __B),
2846                                             (__v8sf)_mm256_setzero_ps());
2847}
2848
2849static __inline__ __m128d __DEFAULT_FN_ATTRS128
2850_mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2851  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2852                                              (__v2df)_mm_min_pd(__A, __B),
2853                                              (__v2df)__W);
2854}
2855
2856static __inline__ __m128d __DEFAULT_FN_ATTRS128
2857_mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2858  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2859                                              (__v2df)_mm_min_pd(__A, __B),
2860                                              (__v2df)_mm_setzero_pd());
2861}
2862
2863static __inline__ __m256d __DEFAULT_FN_ATTRS256
2864_mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2865  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2866                                              (__v4df)_mm256_min_pd(__A, __B),
2867                                              (__v4df)__W);
2868}
2869
2870static __inline__ __m256d __DEFAULT_FN_ATTRS256
2871_mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2872  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2873                                              (__v4df)_mm256_min_pd(__A, __B),
2874                                              (__v4df)_mm256_setzero_pd());
2875}
2876
2877static __inline__ __m128 __DEFAULT_FN_ATTRS128
2878_mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2879  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2880                                             (__v4sf)_mm_min_ps(__A, __B),
2881                                             (__v4sf)__W);
2882}
2883
2884static __inline__ __m128 __DEFAULT_FN_ATTRS128
2885_mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2886  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2887                                             (__v4sf)_mm_min_ps(__A, __B),
2888                                             (__v4sf)_mm_setzero_ps());
2889}
2890
2891static __inline__ __m256 __DEFAULT_FN_ATTRS256
2892_mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2893  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2894                                             (__v8sf)_mm256_min_ps(__A, __B),
2895                                             (__v8sf)__W);
2896}
2897
2898static __inline__ __m256 __DEFAULT_FN_ATTRS256
2899_mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2900  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2901                                             (__v8sf)_mm256_min_ps(__A, __B),
2902                                             (__v8sf)_mm256_setzero_ps());
2903}
2904
2905static __inline__ __m128d __DEFAULT_FN_ATTRS128
2906_mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2907  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2908                                              (__v2df)_mm_mul_pd(__A, __B),
2909                                              (__v2df)__W);
2910}
2911
2912static __inline__ __m128d __DEFAULT_FN_ATTRS128
2913_mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2914  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2915                                              (__v2df)_mm_mul_pd(__A, __B),
2916                                              (__v2df)_mm_setzero_pd());
2917}
2918
2919static __inline__ __m256d __DEFAULT_FN_ATTRS256
2920_mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2921  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2922                                              (__v4df)_mm256_mul_pd(__A, __B),
2923                                              (__v4df)__W);
2924}
2925
2926static __inline__ __m256d __DEFAULT_FN_ATTRS256
2927_mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2928  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2929                                              (__v4df)_mm256_mul_pd(__A, __B),
2930                                              (__v4df)_mm256_setzero_pd());
2931}
2932
2933static __inline__ __m128 __DEFAULT_FN_ATTRS128
2934_mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2935  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2936                                             (__v4sf)_mm_mul_ps(__A, __B),
2937                                             (__v4sf)__W);
2938}
2939
2940static __inline__ __m128 __DEFAULT_FN_ATTRS128
2941_mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2942  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2943                                             (__v4sf)_mm_mul_ps(__A, __B),
2944                                             (__v4sf)_mm_setzero_ps());
2945}
2946
2947static __inline__ __m256 __DEFAULT_FN_ATTRS256
2948_mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2949  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2950                                             (__v8sf)_mm256_mul_ps(__A, __B),
2951                                             (__v8sf)__W);
2952}
2953
2954static __inline__ __m256 __DEFAULT_FN_ATTRS256
2955_mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2956  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2957                                             (__v8sf)_mm256_mul_ps(__A, __B),
2958                                             (__v8sf)_mm256_setzero_ps());
2959}
2960
2961static __inline__ __m128i __DEFAULT_FN_ATTRS128
2962_mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
2963  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2964                                             (__v4si)_mm_abs_epi32(__A),
2965                                             (__v4si)__W);
2966}
2967
2968static __inline__ __m128i __DEFAULT_FN_ATTRS128
2969_mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) {
2970  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2971                                             (__v4si)_mm_abs_epi32(__A),
2972                                             (__v4si)_mm_setzero_si128());
2973}
2974
2975static __inline__ __m256i __DEFAULT_FN_ATTRS256
2976_mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
2977  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2978                                             (__v8si)_mm256_abs_epi32(__A),
2979                                             (__v8si)__W);
2980}
2981
2982static __inline__ __m256i __DEFAULT_FN_ATTRS256
2983_mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) {
2984  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2985                                             (__v8si)_mm256_abs_epi32(__A),
2986                                             (__v8si)_mm256_setzero_si256());
2987}
2988
2989static __inline__ __m128i __DEFAULT_FN_ATTRS128
2990_mm_abs_epi64 (__m128i __A) {
2991  return (__m128i)__builtin_ia32_pabsq128((__v2di)__A);
2992}
2993
2994static __inline__ __m128i __DEFAULT_FN_ATTRS128
2995_mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
2996  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
2997                                             (__v2di)_mm_abs_epi64(__A),
2998                                             (__v2di)__W);
2999}
3000
3001static __inline__ __m128i __DEFAULT_FN_ATTRS128
3002_mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) {
3003  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3004                                             (__v2di)_mm_abs_epi64(__A),
3005                                             (__v2di)_mm_setzero_si128());
3006}
3007
3008static __inline__ __m256i __DEFAULT_FN_ATTRS256
3009_mm256_abs_epi64 (__m256i __A) {
3010  return (__m256i)__builtin_ia32_pabsq256 ((__v4di)__A);
3011}
3012
3013static __inline__ __m256i __DEFAULT_FN_ATTRS256
3014_mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
3015  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3016                                             (__v4di)_mm256_abs_epi64(__A),
3017                                             (__v4di)__W);
3018}
3019
3020static __inline__ __m256i __DEFAULT_FN_ATTRS256
3021_mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A) {
3022  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3023                                             (__v4di)_mm256_abs_epi64(__A),
3024                                             (__v4di)_mm256_setzero_si256());
3025}
3026
3027static __inline__ __m128i __DEFAULT_FN_ATTRS128
3028_mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
3029  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3030                                             (__v4si)_mm_max_epi32(__A, __B),
3031                                             (__v4si)_mm_setzero_si128());
3032}
3033
3034static __inline__ __m128i __DEFAULT_FN_ATTRS128
3035_mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3036  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3037                                             (__v4si)_mm_max_epi32(__A, __B),
3038                                             (__v4si)__W);
3039}
3040
3041static __inline__ __m256i __DEFAULT_FN_ATTRS256
3042_mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
3043  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3044                                             (__v8si)_mm256_max_epi32(__A, __B),
3045                                             (__v8si)_mm256_setzero_si256());
3046}
3047
3048static __inline__ __m256i __DEFAULT_FN_ATTRS256
3049_mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3050  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3051                                             (__v8si)_mm256_max_epi32(__A, __B),
3052                                             (__v8si)__W);
3053}
3054
3055static __inline__ __m128i __DEFAULT_FN_ATTRS128
3056_mm_max_epi64 (__m128i __A, __m128i __B) {
3057  return (__m128i)__builtin_ia32_pmaxsq128((__v2di)__A, (__v2di)__B);
3058}
3059
3060static __inline__ __m128i __DEFAULT_FN_ATTRS128
3061_mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
3062  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3063                                             (__v2di)_mm_max_epi64(__A, __B),
3064                                             (__v2di)_mm_setzero_si128());
3065}
3066
3067static __inline__ __m128i __DEFAULT_FN_ATTRS128
3068_mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3069  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3070                                             (__v2di)_mm_max_epi64(__A, __B),
3071                                             (__v2di)__W);
3072}
3073
3074static __inline__ __m256i __DEFAULT_FN_ATTRS256
3075_mm256_max_epi64 (__m256i __A, __m256i __B) {
3076  return (__m256i)__builtin_ia32_pmaxsq256((__v4di)__A, (__v4di)__B);
3077}
3078
3079static __inline__ __m256i __DEFAULT_FN_ATTRS256
3080_mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
3081  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3082                                             (__v4di)_mm256_max_epi64(__A, __B),
3083                                             (__v4di)_mm256_setzero_si256());
3084}
3085
3086static __inline__ __m256i __DEFAULT_FN_ATTRS256
3087_mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3088  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3089                                             (__v4di)_mm256_max_epi64(__A, __B),
3090                                             (__v4di)__W);
3091}
3092
3093static __inline__ __m128i __DEFAULT_FN_ATTRS128
3094_mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
3095  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3096                                             (__v4si)_mm_max_epu32(__A, __B),
3097                                             (__v4si)_mm_setzero_si128());
3098}
3099
3100static __inline__ __m128i __DEFAULT_FN_ATTRS128
3101_mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3102  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3103                                             (__v4si)_mm_max_epu32(__A, __B),
3104                                             (__v4si)__W);
3105}
3106
3107static __inline__ __m256i __DEFAULT_FN_ATTRS256
3108_mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
3109  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3110                                             (__v8si)_mm256_max_epu32(__A, __B),
3111                                             (__v8si)_mm256_setzero_si256());
3112}
3113
3114static __inline__ __m256i __DEFAULT_FN_ATTRS256
3115_mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3116  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3117                                             (__v8si)_mm256_max_epu32(__A, __B),
3118                                             (__v8si)__W);
3119}
3120
3121static __inline__ __m128i __DEFAULT_FN_ATTRS128
3122_mm_max_epu64 (__m128i __A, __m128i __B) {
3123  return (__m128i)__builtin_ia32_pmaxuq128((__v2di)__A, (__v2di)__B);
3124}
3125
3126static __inline__ __m128i __DEFAULT_FN_ATTRS128
3127_mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
3128  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3129                                             (__v2di)_mm_max_epu64(__A, __B),
3130                                             (__v2di)_mm_setzero_si128());
3131}
3132
3133static __inline__ __m128i __DEFAULT_FN_ATTRS128
3134_mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3135  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3136                                             (__v2di)_mm_max_epu64(__A, __B),
3137                                             (__v2di)__W);
3138}
3139
3140static __inline__ __m256i __DEFAULT_FN_ATTRS256
3141_mm256_max_epu64 (__m256i __A, __m256i __B) {
3142  return (__m256i)__builtin_ia32_pmaxuq256((__v4di)__A, (__v4di)__B);
3143}
3144
3145static __inline__ __m256i __DEFAULT_FN_ATTRS256
3146_mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
3147  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3148                                             (__v4di)_mm256_max_epu64(__A, __B),
3149                                             (__v4di)_mm256_setzero_si256());
3150}
3151
3152static __inline__ __m256i __DEFAULT_FN_ATTRS256
3153_mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3154  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3155                                             (__v4di)_mm256_max_epu64(__A, __B),
3156                                             (__v4di)__W);
3157}
3158
3159static __inline__ __m128i __DEFAULT_FN_ATTRS128
3160_mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
3161  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3162                                             (__v4si)_mm_min_epi32(__A, __B),
3163                                             (__v4si)_mm_setzero_si128());
3164}
3165
3166static __inline__ __m128i __DEFAULT_FN_ATTRS128
3167_mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3168  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3169                                             (__v4si)_mm_min_epi32(__A, __B),
3170                                             (__v4si)__W);
3171}
3172
3173static __inline__ __m256i __DEFAULT_FN_ATTRS256
3174_mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
3175  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3176                                             (__v8si)_mm256_min_epi32(__A, __B),
3177                                             (__v8si)_mm256_setzero_si256());
3178}
3179
3180static __inline__ __m256i __DEFAULT_FN_ATTRS256
3181_mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3182  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3183                                             (__v8si)_mm256_min_epi32(__A, __B),
3184                                             (__v8si)__W);
3185}
3186
3187static __inline__ __m128i __DEFAULT_FN_ATTRS128
3188_mm_min_epi64 (__m128i __A, __m128i __B) {
3189  return (__m128i)__builtin_ia32_pminsq128((__v2di)__A, (__v2di)__B);
3190}
3191
3192static __inline__ __m128i __DEFAULT_FN_ATTRS128
3193_mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3194  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3195                                             (__v2di)_mm_min_epi64(__A, __B),
3196                                             (__v2di)__W);
3197}
3198
3199static __inline__ __m128i __DEFAULT_FN_ATTRS128
3200_mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
3201  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3202                                             (__v2di)_mm_min_epi64(__A, __B),
3203                                             (__v2di)_mm_setzero_si128());
3204}
3205
3206static __inline__ __m256i __DEFAULT_FN_ATTRS256
3207_mm256_min_epi64 (__m256i __A, __m256i __B) {
3208  return (__m256i)__builtin_ia32_pminsq256((__v4di)__A, (__v4di)__B);
3209}
3210
3211static __inline__ __m256i __DEFAULT_FN_ATTRS256
3212_mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3213  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3214                                             (__v4di)_mm256_min_epi64(__A, __B),
3215                                             (__v4di)__W);
3216}
3217
3218static __inline__ __m256i __DEFAULT_FN_ATTRS256
3219_mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
3220  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3221                                             (__v4di)_mm256_min_epi64(__A, __B),
3222                                             (__v4di)_mm256_setzero_si256());
3223}
3224
3225static __inline__ __m128i __DEFAULT_FN_ATTRS128
3226_mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
3227  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3228                                             (__v4si)_mm_min_epu32(__A, __B),
3229                                             (__v4si)_mm_setzero_si128());
3230}
3231
3232static __inline__ __m128i __DEFAULT_FN_ATTRS128
3233_mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3234  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3235                                             (__v4si)_mm_min_epu32(__A, __B),
3236                                             (__v4si)__W);
3237}
3238
3239static __inline__ __m256i __DEFAULT_FN_ATTRS256
3240_mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
3241  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3242                                             (__v8si)_mm256_min_epu32(__A, __B),
3243                                             (__v8si)_mm256_setzero_si256());
3244}
3245
3246static __inline__ __m256i __DEFAULT_FN_ATTRS256
3247_mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3248  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3249                                             (__v8si)_mm256_min_epu32(__A, __B),
3250                                             (__v8si)__W);
3251}
3252
3253static __inline__ __m128i __DEFAULT_FN_ATTRS128
3254_mm_min_epu64 (__m128i __A, __m128i __B) {
3255  return (__m128i)__builtin_ia32_pminuq128((__v2di)__A, (__v2di)__B);
3256}
3257
3258static __inline__ __m128i __DEFAULT_FN_ATTRS128
3259_mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3260  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3261                                             (__v2di)_mm_min_epu64(__A, __B),
3262                                             (__v2di)__W);
3263}
3264
3265static __inline__ __m128i __DEFAULT_FN_ATTRS128
3266_mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
3267  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3268                                             (__v2di)_mm_min_epu64(__A, __B),
3269                                             (__v2di)_mm_setzero_si128());
3270}
3271
3272static __inline__ __m256i __DEFAULT_FN_ATTRS256
3273_mm256_min_epu64 (__m256i __A, __m256i __B) {
3274  return (__m256i)__builtin_ia32_pminuq256((__v4di)__A, (__v4di)__B);
3275}
3276
3277static __inline__ __m256i __DEFAULT_FN_ATTRS256
3278_mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3279  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3280                                             (__v4di)_mm256_min_epu64(__A, __B),
3281                                             (__v4di)__W);
3282}
3283
3284static __inline__ __m256i __DEFAULT_FN_ATTRS256
3285_mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
3286  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3287                                             (__v4di)_mm256_min_epu64(__A, __B),
3288                                             (__v4di)_mm256_setzero_si256());
3289}
3290
3291#define _mm_roundscale_pd(A, imm) \
3292  (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3293                                              (int)(imm), \
3294                                              (__v2df)_mm_setzero_pd(), \
3295                                              (__mmask8)-1)
3296
3297
3298#define _mm_mask_roundscale_pd(W, U, A, imm) \
3299  (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3300                                              (int)(imm), \
3301                                              (__v2df)(__m128d)(W), \
3302                                              (__mmask8)(U))
3303
3304
3305#define _mm_maskz_roundscale_pd(U, A, imm) \
3306  (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3307                                              (int)(imm), \
3308                                              (__v2df)_mm_setzero_pd(), \
3309                                              (__mmask8)(U))
3310
3311
3312#define _mm256_roundscale_pd(A, imm) \
3313  (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3314                                              (int)(imm), \
3315                                              (__v4df)_mm256_setzero_pd(), \
3316                                              (__mmask8)-1)
3317
3318
3319#define _mm256_mask_roundscale_pd(W, U, A, imm) \
3320  (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3321                                              (int)(imm), \
3322                                              (__v4df)(__m256d)(W), \
3323                                              (__mmask8)(U))
3324
3325
3326#define _mm256_maskz_roundscale_pd(U, A, imm)  \
3327  (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3328                                              (int)(imm), \
3329                                              (__v4df)_mm256_setzero_pd(), \
3330                                              (__mmask8)(U))
3331
3332#define _mm_roundscale_ps(A, imm)  \
3333  (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3334                                             (__v4sf)_mm_setzero_ps(), \
3335                                             (__mmask8)-1)
3336
3337
3338#define _mm_mask_roundscale_ps(W, U, A, imm)  \
3339  (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3340                                             (__v4sf)(__m128)(W), \
3341                                             (__mmask8)(U))
3342
3343
3344#define _mm_maskz_roundscale_ps(U, A, imm)  \
3345  (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3346                                             (__v4sf)_mm_setzero_ps(), \
3347                                             (__mmask8)(U))
3348
3349#define _mm256_roundscale_ps(A, imm)  \
3350  (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3351                                             (__v8sf)_mm256_setzero_ps(), \
3352                                             (__mmask8)-1)
3353
3354#define _mm256_mask_roundscale_ps(W, U, A, imm)  \
3355  (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3356                                             (__v8sf)(__m256)(W), \
3357                                             (__mmask8)(U))
3358
3359
3360#define _mm256_maskz_roundscale_ps(U, A, imm)  \
3361  (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3362                                             (__v8sf)_mm256_setzero_ps(), \
3363                                             (__mmask8)(U))
3364
3365static __inline__ __m128d __DEFAULT_FN_ATTRS128
3366_mm_scalef_pd (__m128d __A, __m128d __B) {
3367  return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3368                (__v2df) __B,
3369                (__v2df)
3370                _mm_setzero_pd (),
3371                (__mmask8) -1);
3372}
3373
3374static __inline__ __m128d __DEFAULT_FN_ATTRS128
3375_mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
3376        __m128d __B) {
3377  return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3378                (__v2df) __B,
3379                (__v2df) __W,
3380                (__mmask8) __U);
3381}
3382
3383static __inline__ __m128d __DEFAULT_FN_ATTRS128
3384_mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) {
3385  return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3386                (__v2df) __B,
3387                (__v2df)
3388                _mm_setzero_pd (),
3389                (__mmask8) __U);
3390}
3391
3392static __inline__ __m256d __DEFAULT_FN_ATTRS256
3393_mm256_scalef_pd (__m256d __A, __m256d __B) {
3394  return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3395                (__v4df) __B,
3396                (__v4df)
3397                _mm256_setzero_pd (),
3398                (__mmask8) -1);
3399}
3400
3401static __inline__ __m256d __DEFAULT_FN_ATTRS256
3402_mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
3403           __m256d __B) {
3404  return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3405                (__v4df) __B,
3406                (__v4df) __W,
3407                (__mmask8) __U);
3408}
3409
3410static __inline__ __m256d __DEFAULT_FN_ATTRS256
3411_mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) {
3412  return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3413                (__v4df) __B,
3414                (__v4df)
3415                _mm256_setzero_pd (),
3416                (__mmask8) __U);
3417}
3418
3419static __inline__ __m128 __DEFAULT_FN_ATTRS128
3420_mm_scalef_ps (__m128 __A, __m128 __B) {
3421  return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3422               (__v4sf) __B,
3423               (__v4sf)
3424               _mm_setzero_ps (),
3425               (__mmask8) -1);
3426}
3427
3428static __inline__ __m128 __DEFAULT_FN_ATTRS128
3429_mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3430  return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3431               (__v4sf) __B,
3432               (__v4sf) __W,
3433               (__mmask8) __U);
3434}
3435
3436static __inline__ __m128 __DEFAULT_FN_ATTRS128
3437_mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) {
3438  return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3439               (__v4sf) __B,
3440               (__v4sf)
3441               _mm_setzero_ps (),
3442               (__mmask8) __U);
3443}
3444
3445static __inline__ __m256 __DEFAULT_FN_ATTRS256
3446_mm256_scalef_ps (__m256 __A, __m256 __B) {
3447  return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3448               (__v8sf) __B,
3449               (__v8sf)
3450               _mm256_setzero_ps (),
3451               (__mmask8) -1);
3452}
3453
3454static __inline__ __m256 __DEFAULT_FN_ATTRS256
3455_mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
3456           __m256 __B) {
3457  return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3458               (__v8sf) __B,
3459               (__v8sf) __W,
3460               (__mmask8) __U);
3461}
3462
3463static __inline__ __m256 __DEFAULT_FN_ATTRS256
3464_mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
3465  return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3466               (__v8sf) __B,
3467               (__v8sf)
3468               _mm256_setzero_ps (),
3469               (__mmask8) __U);
3470}
3471
3472#define _mm_i64scatter_pd(addr, index, v1, scale) \
3473  __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)-1, \
3474                               (__v2di)(__m128i)(index), \
3475                               (__v2df)(__m128d)(v1), (int)(scale))
3476
3477#define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) \
3478  __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)(mask), \
3479                               (__v2di)(__m128i)(index), \
3480                               (__v2df)(__m128d)(v1), (int)(scale))
3481
3482#define _mm_i64scatter_epi64(addr, index, v1, scale) \
3483  __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)-1, \
3484                               (__v2di)(__m128i)(index), \
3485                               (__v2di)(__m128i)(v1), (int)(scale))
3486
3487#define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
3488  __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)(mask), \
3489                               (__v2di)(__m128i)(index), \
3490                               (__v2di)(__m128i)(v1), (int)(scale))
3491
3492#define _mm256_i64scatter_pd(addr, index, v1, scale) \
3493  __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)-1, \
3494                               (__v4di)(__m256i)(index), \
3495                               (__v4df)(__m256d)(v1), (int)(scale))
3496
3497#define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) \
3498  __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)(mask), \
3499                               (__v4di)(__m256i)(index), \
3500                               (__v4df)(__m256d)(v1), (int)(scale))
3501
3502#define _mm256_i64scatter_epi64(addr, index, v1, scale) \
3503  __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)-1, \
3504                               (__v4di)(__m256i)(index), \
3505                               (__v4di)(__m256i)(v1), (int)(scale))
3506
3507#define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
3508  __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)(mask), \
3509                               (__v4di)(__m256i)(index), \
3510                               (__v4di)(__m256i)(v1), (int)(scale))
3511
3512#define _mm_i64scatter_ps(addr, index, v1, scale) \
3513  __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)-1, \
3514                               (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3515                               (int)(scale))
3516
3517#define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) \
3518  __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)(mask), \
3519                               (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3520                               (int)(scale))
3521
3522#define _mm_i64scatter_epi32(addr, index, v1, scale) \
3523  __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)-1, \
3524                               (__v2di)(__m128i)(index), \
3525                               (__v4si)(__m128i)(v1), (int)(scale))
3526
3527#define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
3528  __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)(mask), \
3529                               (__v2di)(__m128i)(index), \
3530                               (__v4si)(__m128i)(v1), (int)(scale))
3531
3532#define _mm256_i64scatter_ps(addr, index, v1, scale) \
3533  __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)-1, \
3534                               (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3535                               (int)(scale))
3536
3537#define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) \
3538  __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)(mask), \
3539                               (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3540                               (int)(scale))
3541
3542#define _mm256_i64scatter_epi32(addr, index, v1, scale) \
3543  __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)-1, \
3544                               (__v4di)(__m256i)(index), \
3545                               (__v4si)(__m128i)(v1), (int)(scale))
3546
3547#define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
3548  __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)(mask), \
3549                               (__v4di)(__m256i)(index), \
3550                               (__v4si)(__m128i)(v1), (int)(scale))
3551
3552#define _mm_i32scatter_pd(addr, index, v1, scale) \
3553  __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)-1, \
3554                               (__v4si)(__m128i)(index), \
3555                               (__v2df)(__m128d)(v1), (int)(scale))
3556
3557#define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) \
3558    __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)(mask), \
3559                                 (__v4si)(__m128i)(index), \
3560                                 (__v2df)(__m128d)(v1), (int)(scale))
3561
3562#define _mm_i32scatter_epi64(addr, index, v1, scale) \
3563    __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)-1, \
3564                                 (__v4si)(__m128i)(index), \
3565                                 (__v2di)(__m128i)(v1), (int)(scale))
3566
3567#define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
3568    __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)(mask), \
3569                                 (__v4si)(__m128i)(index), \
3570                                 (__v2di)(__m128i)(v1), (int)(scale))
3571
3572#define _mm256_i32scatter_pd(addr, index, v1, scale) \
3573    __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)-1, \
3574                                 (__v4si)(__m128i)(index), \
3575                                 (__v4df)(__m256d)(v1), (int)(scale))
3576
3577#define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) \
3578    __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)(mask), \
3579                                 (__v4si)(__m128i)(index), \
3580                                 (__v4df)(__m256d)(v1), (int)(scale))
3581
3582#define _mm256_i32scatter_epi64(addr, index, v1, scale) \
3583    __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)-1, \
3584                                 (__v4si)(__m128i)(index), \
3585                                 (__v4di)(__m256i)(v1), (int)(scale))
3586
3587#define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
3588    __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)(mask), \
3589                                 (__v4si)(__m128i)(index), \
3590                                 (__v4di)(__m256i)(v1), (int)(scale))
3591
3592#define _mm_i32scatter_ps(addr, index, v1, scale) \
3593    __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)-1, \
3594                                 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3595                                 (int)(scale))
3596
3597#define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) \
3598    __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)(mask), \
3599                                 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3600                                 (int)(scale))
3601
3602#define _mm_i32scatter_epi32(addr, index, v1, scale) \
3603    __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)-1, \
3604                                 (__v4si)(__m128i)(index), \
3605                                 (__v4si)(__m128i)(v1), (int)(scale))
3606
3607#define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
3608    __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)(mask), \
3609                                 (__v4si)(__m128i)(index), \
3610                                 (__v4si)(__m128i)(v1), (int)(scale))
3611
3612#define _mm256_i32scatter_ps(addr, index, v1, scale) \
3613    __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)-1, \
3614                                 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3615                                 (int)(scale))
3616
3617#define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) \
3618    __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)(mask), \
3619                                 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3620                                 (int)(scale))
3621
3622#define _mm256_i32scatter_epi32(addr, index, v1, scale) \
3623    __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)-1, \
3624                                 (__v8si)(__m256i)(index), \
3625                                 (__v8si)(__m256i)(v1), (int)(scale))
3626
3627#define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
3628    __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)(mask), \
3629                                 (__v8si)(__m256i)(index), \
3630                                 (__v8si)(__m256i)(v1), (int)(scale))
3631
3632  static __inline__ __m128d __DEFAULT_FN_ATTRS128
3633  _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) {
3634    return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3635                                                (__v2df)_mm_sqrt_pd(__A),
3636                                                (__v2df)__W);
3637  }
3638
3639  static __inline__ __m128d __DEFAULT_FN_ATTRS128
3640  _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) {
3641    return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3642                                                (__v2df)_mm_sqrt_pd(__A),
3643                                                (__v2df)_mm_setzero_pd());
3644  }
3645
3646  static __inline__ __m256d __DEFAULT_FN_ATTRS256
3647  _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) {
3648    return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3649                                                (__v4df)_mm256_sqrt_pd(__A),
3650                                                (__v4df)__W);
3651  }
3652
3653  static __inline__ __m256d __DEFAULT_FN_ATTRS256
3654  _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) {
3655    return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3656                                                (__v4df)_mm256_sqrt_pd(__A),
3657                                                (__v4df)_mm256_setzero_pd());
3658  }
3659
3660  static __inline__ __m128 __DEFAULT_FN_ATTRS128
3661  _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) {
3662    return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3663                                               (__v4sf)_mm_sqrt_ps(__A),
3664                                               (__v4sf)__W);
3665  }
3666
3667  static __inline__ __m128 __DEFAULT_FN_ATTRS128
3668  _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) {
3669    return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3670                                               (__v4sf)_mm_sqrt_ps(__A),
3671                                               (__v4sf)_mm_setzero_ps());
3672  }
3673
3674  static __inline__ __m256 __DEFAULT_FN_ATTRS256
3675  _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) {
3676    return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3677                                               (__v8sf)_mm256_sqrt_ps(__A),
3678                                               (__v8sf)__W);
3679  }
3680
3681  static __inline__ __m256 __DEFAULT_FN_ATTRS256
3682  _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) {
3683    return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3684                                               (__v8sf)_mm256_sqrt_ps(__A),
3685                                               (__v8sf)_mm256_setzero_ps());
3686  }
3687
3688  static __inline__ __m128d __DEFAULT_FN_ATTRS128
3689  _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
3690    return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3691                                                (__v2df)_mm_sub_pd(__A, __B),
3692                                                (__v2df)__W);
3693  }
3694
3695  static __inline__ __m128d __DEFAULT_FN_ATTRS128
3696  _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) {
3697    return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3698                                                (__v2df)_mm_sub_pd(__A, __B),
3699                                                (__v2df)_mm_setzero_pd());
3700  }
3701
3702  static __inline__ __m256d __DEFAULT_FN_ATTRS256
3703  _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
3704    return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3705                                                (__v4df)_mm256_sub_pd(__A, __B),
3706                                                (__v4df)__W);
3707  }
3708
3709  static __inline__ __m256d __DEFAULT_FN_ATTRS256
3710  _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) {
3711    return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3712                                                (__v4df)_mm256_sub_pd(__A, __B),
3713                                                (__v4df)_mm256_setzero_pd());
3714  }
3715
3716  static __inline__ __m128 __DEFAULT_FN_ATTRS128
3717  _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3718    return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3719                                               (__v4sf)_mm_sub_ps(__A, __B),
3720                                               (__v4sf)__W);
3721  }
3722
3723  static __inline__ __m128 __DEFAULT_FN_ATTRS128
3724  _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) {
3725    return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3726                                               (__v4sf)_mm_sub_ps(__A, __B),
3727                                               (__v4sf)_mm_setzero_ps());
3728  }
3729
3730  static __inline__ __m256 __DEFAULT_FN_ATTRS256
3731  _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
3732    return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3733                                               (__v8sf)_mm256_sub_ps(__A, __B),
3734                                               (__v8sf)__W);
3735  }
3736
3737  static __inline__ __m256 __DEFAULT_FN_ATTRS256
3738  _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) {
3739    return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3740                                               (__v8sf)_mm256_sub_ps(__A, __B),
3741                                               (__v8sf)_mm256_setzero_ps());
3742  }
3743
3744  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3745  _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B) {
3746    return (__m128i)__builtin_ia32_vpermi2vard128((__v4si) __A, (__v4si)__I,
3747                                                  (__v4si)__B);
3748  }
3749
3750  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3751  _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I,
3752                              __m128i __B) {
3753    return (__m128i)__builtin_ia32_selectd_128(__U,
3754                                    (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3755                                    (__v4si)__A);
3756  }
3757
3758  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3759  _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U,
3760                               __m128i __B) {
3761    return (__m128i)__builtin_ia32_selectd_128(__U,
3762                                    (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3763                                    (__v4si)__I);
3764  }
3765
3766  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3767  _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I,
3768                               __m128i __B) {
3769    return (__m128i)__builtin_ia32_selectd_128(__U,
3770                                    (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3771                                    (__v4si)_mm_setzero_si128());
3772  }
3773
3774  static __inline__ __m256i __DEFAULT_FN_ATTRS256
3775  _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B) {
3776    return (__m256i)__builtin_ia32_vpermi2vard256((__v8si)__A, (__v8si) __I,
3777                                                  (__v8si) __B);
3778  }
3779
3780  static __inline__ __m256i __DEFAULT_FN_ATTRS256
3781  _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I,
3782                                 __m256i __B) {
3783    return (__m256i)__builtin_ia32_selectd_256(__U,
3784                                 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3785                                 (__v8si)__A);
3786  }
3787
3788  static __inline__ __m256i __DEFAULT_FN_ATTRS256
3789  _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U,
3790                                  __m256i __B) {
3791    return (__m256i)__builtin_ia32_selectd_256(__U,
3792                                 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3793                                 (__v8si)__I);
3794  }
3795
3796  static __inline__ __m256i __DEFAULT_FN_ATTRS256
3797  _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I,
3798                                  __m256i __B) {
3799    return (__m256i)__builtin_ia32_selectd_256(__U,
3800                                 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3801                                 (__v8si)_mm256_setzero_si256());
3802  }
3803
3804  static __inline__ __m128d __DEFAULT_FN_ATTRS128
3805  _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B) {
3806    return (__m128d)__builtin_ia32_vpermi2varpd128((__v2df)__A, (__v2di)__I,
3807                                                   (__v2df)__B);
3808  }
3809
3810  static __inline__ __m128d __DEFAULT_FN_ATTRS128
3811  _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B) {
3812    return (__m128d)__builtin_ia32_selectpd_128(__U,
3813                                       (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3814                                       (__v2df)__A);
3815  }
3816
3817  static __inline__ __m128d __DEFAULT_FN_ATTRS128
3818  _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B) {
3819    return (__m128d)__builtin_ia32_selectpd_128(__U,
3820                                       (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3821                                       (__v2df)(__m128d)__I);
3822  }
3823
3824  static __inline__ __m128d __DEFAULT_FN_ATTRS128
3825  _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B) {
3826    return (__m128d)__builtin_ia32_selectpd_128(__U,
3827                                       (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3828                                       (__v2df)_mm_setzero_pd());
3829  }
3830
3831  static __inline__ __m256d __DEFAULT_FN_ATTRS256
3832  _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B) {
3833    return (__m256d)__builtin_ia32_vpermi2varpd256((__v4df)__A, (__v4di)__I,
3834                                                   (__v4df)__B);
3835  }
3836
3837  static __inline__ __m256d __DEFAULT_FN_ATTRS256
3838  _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I,
3839                              __m256d __B) {
3840    return (__m256d)__builtin_ia32_selectpd_256(__U,
3841                                    (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3842                                    (__v4df)__A);
3843  }
3844
3845  static __inline__ __m256d __DEFAULT_FN_ATTRS256
3846  _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U,
3847                               __m256d __B) {
3848    return (__m256d)__builtin_ia32_selectpd_256(__U,
3849                                    (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3850                                    (__v4df)(__m256d)__I);
3851  }
3852
3853  static __inline__ __m256d __DEFAULT_FN_ATTRS256
3854  _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I,
3855                               __m256d __B) {
3856    return (__m256d)__builtin_ia32_selectpd_256(__U,
3857                                    (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3858                                    (__v4df)_mm256_setzero_pd());
3859  }
3860
3861  static __inline__ __m128 __DEFAULT_FN_ATTRS128
3862  _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B) {
3863    return (__m128)__builtin_ia32_vpermi2varps128((__v4sf)__A, (__v4si)__I,
3864                                                  (__v4sf)__B);
3865  }
3866
3867  static __inline__ __m128 __DEFAULT_FN_ATTRS128
3868  _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B) {
3869    return (__m128)__builtin_ia32_selectps_128(__U,
3870                                       (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3871                                       (__v4sf)__A);
3872  }
3873
3874  static __inline__ __m128 __DEFAULT_FN_ATTRS128
3875  _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B) {
3876    return (__m128)__builtin_ia32_selectps_128(__U,
3877                                       (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3878                                       (__v4sf)(__m128)__I);
3879  }
3880
3881  static __inline__ __m128 __DEFAULT_FN_ATTRS128
3882  _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B) {
3883    return (__m128)__builtin_ia32_selectps_128(__U,
3884                                       (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3885                                       (__v4sf)_mm_setzero_ps());
3886  }
3887
3888  static __inline__ __m256 __DEFAULT_FN_ATTRS256
3889  _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B) {
3890    return (__m256)__builtin_ia32_vpermi2varps256((__v8sf)__A, (__v8si)__I,
3891                                                  (__v8sf) __B);
3892  }
3893
3894  static __inline__ __m256 __DEFAULT_FN_ATTRS256
3895  _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B) {
3896    return (__m256)__builtin_ia32_selectps_256(__U,
3897                                    (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3898                                    (__v8sf)__A);
3899  }
3900
3901  static __inline__ __m256 __DEFAULT_FN_ATTRS256
3902  _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U,
3903                               __m256 __B) {
3904    return (__m256)__builtin_ia32_selectps_256(__U,
3905                                    (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3906                                    (__v8sf)(__m256)__I);
3907  }
3908
3909  static __inline__ __m256 __DEFAULT_FN_ATTRS256
3910  _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I,
3911                               __m256 __B) {
3912    return (__m256)__builtin_ia32_selectps_256(__U,
3913                                    (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3914                                    (__v8sf)_mm256_setzero_ps());
3915  }
3916
3917  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3918  _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B) {
3919    return (__m128i)__builtin_ia32_vpermi2varq128((__v2di)__A, (__v2di)__I,
3920                                                  (__v2di)__B);
3921  }
3922
3923  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3924  _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I,
3925                              __m128i __B) {
3926    return (__m128i)__builtin_ia32_selectq_128(__U,
3927                                    (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3928                                    (__v2di)__A);
3929  }
3930
3931  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3932  _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U,
3933                               __m128i __B) {
3934    return (__m128i)__builtin_ia32_selectq_128(__U,
3935                                    (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3936                                    (__v2di)__I);
3937  }
3938
3939  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3940  _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I,
3941                               __m128i __B) {
3942    return (__m128i)__builtin_ia32_selectq_128(__U,
3943                                    (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3944                                    (__v2di)_mm_setzero_si128());
3945  }
3946
3947
3948  static __inline__ __m256i __DEFAULT_FN_ATTRS256
3949  _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B) {
3950    return (__m256i)__builtin_ia32_vpermi2varq256((__v4di)__A, (__v4di) __I,
3951                                                  (__v4di) __B);
3952  }
3953
3954  static __inline__ __m256i __DEFAULT_FN_ATTRS256
3955  _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I,
3956                                 __m256i __B) {
3957    return (__m256i)__builtin_ia32_selectq_256(__U,
3958                                 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3959                                 (__v4di)__A);
3960  }
3961
3962  static __inline__ __m256i __DEFAULT_FN_ATTRS256
3963  _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U,
3964                                  __m256i __B) {
3965    return (__m256i)__builtin_ia32_selectq_256(__U,
3966                                 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3967                                 (__v4di)__I);
3968  }
3969
3970  static __inline__ __m256i __DEFAULT_FN_ATTRS256
3971  _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I,
3972                                  __m256i __B) {
3973    return (__m256i)__builtin_ia32_selectq_256(__U,
3974                                 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3975                                 (__v4di)_mm256_setzero_si256());
3976  }
3977
3978  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3979  _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
3980  {
3981    return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3982                                               (__v4si)_mm_cvtepi8_epi32(__A),
3983                                               (__v4si)__W);
3984  }
3985
3986  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3987  _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A)
3988  {
3989    return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3990                                               (__v4si)_mm_cvtepi8_epi32(__A),
3991                                               (__v4si)_mm_setzero_si128());
3992  }
3993
3994  static __inline__ __m256i __DEFAULT_FN_ATTRS256
3995  _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
3996  {
3997    return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
3998                                               (__v8si)_mm256_cvtepi8_epi32(__A),
3999                                               (__v8si)__W);
4000  }
4001
4002  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4003  _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
4004  {
4005    return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4006                                               (__v8si)_mm256_cvtepi8_epi32(__A),
4007                                               (__v8si)_mm256_setzero_si256());
4008  }
4009
4010  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4011  _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4012  {
4013    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4014                                               (__v2di)_mm_cvtepi8_epi64(__A),
4015                                               (__v2di)__W);
4016  }
4017
4018  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4019  _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
4020  {
4021    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4022                                               (__v2di)_mm_cvtepi8_epi64(__A),
4023                                               (__v2di)_mm_setzero_si128());
4024  }
4025
4026  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4027  _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4028  {
4029    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4030                                               (__v4di)_mm256_cvtepi8_epi64(__A),
4031                                               (__v4di)__W);
4032  }
4033
4034  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4035  _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
4036  {
4037    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4038                                               (__v4di)_mm256_cvtepi8_epi64(__A),
4039                                               (__v4di)_mm256_setzero_si256());
4040  }
4041
4042  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4043  _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
4044  {
4045    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4046                                               (__v2di)_mm_cvtepi32_epi64(__X),
4047                                               (__v2di)__W);
4048  }
4049
4050  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4051  _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
4052  {
4053    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4054                                               (__v2di)_mm_cvtepi32_epi64(__X),
4055                                               (__v2di)_mm_setzero_si128());
4056  }
4057
4058  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4059  _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
4060  {
4061    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4062                                               (__v4di)_mm256_cvtepi32_epi64(__X),
4063                                               (__v4di)__W);
4064  }
4065
4066  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4067  _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
4068  {
4069    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4070                                               (__v4di)_mm256_cvtepi32_epi64(__X),
4071                                               (__v4di)_mm256_setzero_si256());
4072  }
4073
4074  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4075  _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4076  {
4077    return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4078                                               (__v4si)_mm_cvtepi16_epi32(__A),
4079                                               (__v4si)__W);
4080  }
4081
4082  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4083  _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A)
4084  {
4085    return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4086                                               (__v4si)_mm_cvtepi16_epi32(__A),
4087                                               (__v4si)_mm_setzero_si128());
4088  }
4089
4090  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4091  _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4092  {
4093    return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4094                                               (__v8si)_mm256_cvtepi16_epi32(__A),
4095                                               (__v8si)__W);
4096  }
4097
4098  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4099  _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
4100  {
4101    return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4102                                               (__v8si)_mm256_cvtepi16_epi32(__A),
4103                                               (__v8si)_mm256_setzero_si256());
4104  }
4105
4106  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4107  _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4108  {
4109    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4110                                               (__v2di)_mm_cvtepi16_epi64(__A),
4111                                               (__v2di)__W);
4112  }
4113
4114  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4115  _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
4116  {
4117    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4118                                               (__v2di)_mm_cvtepi16_epi64(__A),
4119                                               (__v2di)_mm_setzero_si128());
4120  }
4121
4122  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4123  _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4124  {
4125    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4126                                               (__v4di)_mm256_cvtepi16_epi64(__A),
4127                                               (__v4di)__W);
4128  }
4129
4130  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4131  _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
4132  {
4133    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4134                                               (__v4di)_mm256_cvtepi16_epi64(__A),
4135                                               (__v4di)_mm256_setzero_si256());
4136  }
4137
4138
4139  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4140  _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4141  {
4142    return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4143                                               (__v4si)_mm_cvtepu8_epi32(__A),
4144                                               (__v4si)__W);
4145  }
4146
4147  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4148  _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
4149  {
4150    return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4151                                               (__v4si)_mm_cvtepu8_epi32(__A),
4152                                               (__v4si)_mm_setzero_si128());
4153  }
4154
4155  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4156  _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4157  {
4158    return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4159                                               (__v8si)_mm256_cvtepu8_epi32(__A),
4160                                               (__v8si)__W);
4161  }
4162
4163  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4164  _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
4165  {
4166    return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4167                                               (__v8si)_mm256_cvtepu8_epi32(__A),
4168                                               (__v8si)_mm256_setzero_si256());
4169  }
4170
4171  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4172  _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4173  {
4174    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4175                                               (__v2di)_mm_cvtepu8_epi64(__A),
4176                                               (__v2di)__W);
4177  }
4178
4179  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4180  _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
4181  {
4182    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4183                                               (__v2di)_mm_cvtepu8_epi64(__A),
4184                                               (__v2di)_mm_setzero_si128());
4185  }
4186
4187  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4188  _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4189  {
4190    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4191                                               (__v4di)_mm256_cvtepu8_epi64(__A),
4192                                               (__v4di)__W);
4193  }
4194
4195  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4196  _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
4197  {
4198    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4199                                               (__v4di)_mm256_cvtepu8_epi64(__A),
4200                                               (__v4di)_mm256_setzero_si256());
4201  }
4202
4203  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4204  _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
4205  {
4206    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4207                                               (__v2di)_mm_cvtepu32_epi64(__X),
4208                                               (__v2di)__W);
4209  }
4210
4211  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4212  _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
4213  {
4214    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4215                                               (__v2di)_mm_cvtepu32_epi64(__X),
4216                                               (__v2di)_mm_setzero_si128());
4217  }
4218
4219  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4220  _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
4221  {
4222    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4223                                               (__v4di)_mm256_cvtepu32_epi64(__X),
4224                                               (__v4di)__W);
4225  }
4226
4227  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4228  _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
4229  {
4230    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4231                                               (__v4di)_mm256_cvtepu32_epi64(__X),
4232                                               (__v4di)_mm256_setzero_si256());
4233  }
4234
4235  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4236  _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4237  {
4238    return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4239                                               (__v4si)_mm_cvtepu16_epi32(__A),
4240                                               (__v4si)__W);
4241  }
4242
4243  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4244  _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
4245  {
4246    return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4247                                               (__v4si)_mm_cvtepu16_epi32(__A),
4248                                               (__v4si)_mm_setzero_si128());
4249  }
4250
4251  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4252  _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4253  {
4254    return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4255                                               (__v8si)_mm256_cvtepu16_epi32(__A),
4256                                               (__v8si)__W);
4257  }
4258
4259  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4260  _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
4261  {
4262    return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4263                                               (__v8si)_mm256_cvtepu16_epi32(__A),
4264                                               (__v8si)_mm256_setzero_si256());
4265  }
4266
4267  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4268  _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4269  {
4270    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4271                                               (__v2di)_mm_cvtepu16_epi64(__A),
4272                                               (__v2di)__W);
4273  }
4274
4275  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4276  _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
4277  {
4278    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4279                                               (__v2di)_mm_cvtepu16_epi64(__A),
4280                                               (__v2di)_mm_setzero_si128());
4281  }
4282
4283  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4284  _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4285  {
4286    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4287                                               (__v4di)_mm256_cvtepu16_epi64(__A),
4288                                               (__v4di)__W);
4289  }
4290
4291  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4292  _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
4293  {
4294    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4295                                               (__v4di)_mm256_cvtepu16_epi64(__A),
4296                                               (__v4di)_mm256_setzero_si256());
4297  }
4298
4299
4300#define _mm_rol_epi32(a, b) \
4301  (__m128i)__builtin_ia32_prold128((__v4si)(__m128i)(a), (int)(b))
4302
4303#define _mm_mask_rol_epi32(w, u, a, b) \
4304  (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4305                                      (__v4si)_mm_rol_epi32((a), (b)), \
4306                                      (__v4si)(__m128i)(w))
4307
4308#define _mm_maskz_rol_epi32(u, a, b) \
4309  (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4310                                      (__v4si)_mm_rol_epi32((a), (b)), \
4311                                      (__v4si)_mm_setzero_si128())
4312
4313#define _mm256_rol_epi32(a, b) \
4314  (__m256i)__builtin_ia32_prold256((__v8si)(__m256i)(a), (int)(b))
4315
4316#define _mm256_mask_rol_epi32(w, u, a, b) \
4317  (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4318                                      (__v8si)_mm256_rol_epi32((a), (b)), \
4319                                      (__v8si)(__m256i)(w))
4320
4321#define _mm256_maskz_rol_epi32(u, a, b) \
4322  (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4323                                      (__v8si)_mm256_rol_epi32((a), (b)), \
4324                                      (__v8si)_mm256_setzero_si256())
4325
4326#define _mm_rol_epi64(a, b) \
4327  (__m128i)__builtin_ia32_prolq128((__v2di)(__m128i)(a), (int)(b))
4328
4329#define _mm_mask_rol_epi64(w, u, a, b) \
4330  (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4331                                      (__v2di)_mm_rol_epi64((a), (b)), \
4332                                      (__v2di)(__m128i)(w))
4333
4334#define _mm_maskz_rol_epi64(u, a, b) \
4335  (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4336                                      (__v2di)_mm_rol_epi64((a), (b)), \
4337                                      (__v2di)_mm_setzero_si128())
4338
4339#define _mm256_rol_epi64(a, b) \
4340  (__m256i)__builtin_ia32_prolq256((__v4di)(__m256i)(a), (int)(b))
4341
4342#define _mm256_mask_rol_epi64(w, u, a, b) \
4343  (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4344                                      (__v4di)_mm256_rol_epi64((a), (b)), \
4345                                      (__v4di)(__m256i)(w))
4346
4347#define _mm256_maskz_rol_epi64(u, a, b) \
4348  (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4349                                      (__v4di)_mm256_rol_epi64((a), (b)), \
4350                                      (__v4di)_mm256_setzero_si256())
4351
4352static __inline__ __m128i __DEFAULT_FN_ATTRS128
4353_mm_rolv_epi32 (__m128i __A, __m128i __B)
4354{
4355  return (__m128i)__builtin_ia32_prolvd128((__v4si)__A, (__v4si)__B);
4356}
4357
4358static __inline__ __m128i __DEFAULT_FN_ATTRS128
4359_mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4360{
4361  return (__m128i)__builtin_ia32_selectd_128(__U,
4362                                             (__v4si)_mm_rolv_epi32(__A, __B),
4363                                             (__v4si)__W);
4364}
4365
4366static __inline__ __m128i __DEFAULT_FN_ATTRS128
4367_mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4368{
4369  return (__m128i)__builtin_ia32_selectd_128(__U,
4370                                             (__v4si)_mm_rolv_epi32(__A, __B),
4371                                             (__v4si)_mm_setzero_si128());
4372}
4373
4374static __inline__ __m256i __DEFAULT_FN_ATTRS256
4375_mm256_rolv_epi32 (__m256i __A, __m256i __B)
4376{
4377  return (__m256i)__builtin_ia32_prolvd256((__v8si)__A, (__v8si)__B);
4378}
4379
4380static __inline__ __m256i __DEFAULT_FN_ATTRS256
4381_mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4382{
4383  return (__m256i)__builtin_ia32_selectd_256(__U,
4384                                            (__v8si)_mm256_rolv_epi32(__A, __B),
4385                                            (__v8si)__W);
4386}
4387
4388static __inline__ __m256i __DEFAULT_FN_ATTRS256
4389_mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4390{
4391  return (__m256i)__builtin_ia32_selectd_256(__U,
4392                                            (__v8si)_mm256_rolv_epi32(__A, __B),
4393                                            (__v8si)_mm256_setzero_si256());
4394}
4395
4396static __inline__ __m128i __DEFAULT_FN_ATTRS128
4397_mm_rolv_epi64 (__m128i __A, __m128i __B)
4398{
4399  return (__m128i)__builtin_ia32_prolvq128((__v2di)__A, (__v2di)__B);
4400}
4401
4402static __inline__ __m128i __DEFAULT_FN_ATTRS128
4403_mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4404{
4405  return (__m128i)__builtin_ia32_selectq_128(__U,
4406                                             (__v2di)_mm_rolv_epi64(__A, __B),
4407                                             (__v2di)__W);
4408}
4409
4410static __inline__ __m128i __DEFAULT_FN_ATTRS128
4411_mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
4412{
4413  return (__m128i)__builtin_ia32_selectq_128(__U,
4414                                             (__v2di)_mm_rolv_epi64(__A, __B),
4415                                             (__v2di)_mm_setzero_si128());
4416}
4417
4418static __inline__ __m256i __DEFAULT_FN_ATTRS256
4419_mm256_rolv_epi64 (__m256i __A, __m256i __B)
4420{
4421  return (__m256i)__builtin_ia32_prolvq256((__v4di)__A, (__v4di)__B);
4422}
4423
4424static __inline__ __m256i __DEFAULT_FN_ATTRS256
4425_mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4426{
4427  return (__m256i)__builtin_ia32_selectq_256(__U,
4428                                            (__v4di)_mm256_rolv_epi64(__A, __B),
4429                                            (__v4di)__W);
4430}
4431
4432static __inline__ __m256i __DEFAULT_FN_ATTRS256
4433_mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
4434{
4435  return (__m256i)__builtin_ia32_selectq_256(__U,
4436                                            (__v4di)_mm256_rolv_epi64(__A, __B),
4437                                            (__v4di)_mm256_setzero_si256());
4438}
4439
4440#define _mm_ror_epi32(a, b) \
4441  (__m128i)__builtin_ia32_prord128((__v4si)(__m128i)(a), (int)(b))
4442
4443#define _mm_mask_ror_epi32(w, u, a, b) \
4444  (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4445                                      (__v4si)_mm_ror_epi32((a), (b)), \
4446                                      (__v4si)(__m128i)(w))
4447
4448#define _mm_maskz_ror_epi32(u, a, b) \
4449  (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4450                                      (__v4si)_mm_ror_epi32((a), (b)), \
4451                                      (__v4si)_mm_setzero_si128())
4452
4453#define _mm256_ror_epi32(a, b) \
4454  (__m256i)__builtin_ia32_prord256((__v8si)(__m256i)(a), (int)(b))
4455
4456#define _mm256_mask_ror_epi32(w, u, a, b) \
4457  (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4458                                      (__v8si)_mm256_ror_epi32((a), (b)), \
4459                                      (__v8si)(__m256i)(w))
4460
4461#define _mm256_maskz_ror_epi32(u, a, b) \
4462  (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4463                                      (__v8si)_mm256_ror_epi32((a), (b)), \
4464                                      (__v8si)_mm256_setzero_si256())
4465
4466#define _mm_ror_epi64(a, b) \
4467  (__m128i)__builtin_ia32_prorq128((__v2di)(__m128i)(a), (int)(b))
4468
4469#define _mm_mask_ror_epi64(w, u, a, b) \
4470  (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4471                                      (__v2di)_mm_ror_epi64((a), (b)), \
4472                                      (__v2di)(__m128i)(w))
4473
4474#define _mm_maskz_ror_epi64(u, a, b) \
4475  (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4476                                      (__v2di)_mm_ror_epi64((a), (b)), \
4477                                      (__v2di)_mm_setzero_si128())
4478
4479#define _mm256_ror_epi64(a, b) \
4480  (__m256i)__builtin_ia32_prorq256((__v4di)(__m256i)(a), (int)(b))
4481
4482#define _mm256_mask_ror_epi64(w, u, a, b) \
4483  (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4484                                      (__v4di)_mm256_ror_epi64((a), (b)), \
4485                                      (__v4di)(__m256i)(w))
4486
4487#define _mm256_maskz_ror_epi64(u, a, b) \
4488  (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4489                                      (__v4di)_mm256_ror_epi64((a), (b)), \
4490                                      (__v4di)_mm256_setzero_si256())
4491
4492static __inline__ __m128i __DEFAULT_FN_ATTRS128
4493_mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4494{
4495  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4496                                             (__v4si)_mm_sll_epi32(__A, __B),
4497                                             (__v4si)__W);
4498}
4499
4500static __inline__ __m128i __DEFAULT_FN_ATTRS128
4501_mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B)
4502{
4503  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4504                                             (__v4si)_mm_sll_epi32(__A, __B),
4505                                             (__v4si)_mm_setzero_si128());
4506}
4507
4508static __inline__ __m256i __DEFAULT_FN_ATTRS256
4509_mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4510{
4511  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4512                                             (__v8si)_mm256_sll_epi32(__A, __B),
4513                                             (__v8si)__W);
4514}
4515
4516static __inline__ __m256i __DEFAULT_FN_ATTRS256
4517_mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B)
4518{
4519  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4520                                             (__v8si)_mm256_sll_epi32(__A, __B),
4521                                             (__v8si)_mm256_setzero_si256());
4522}
4523
4524static __inline__ __m128i __DEFAULT_FN_ATTRS128
4525_mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
4526{
4527  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4528                                             (__v4si)_mm_slli_epi32(__A, __B),
4529                                             (__v4si)__W);
4530}
4531
4532static __inline__ __m128i __DEFAULT_FN_ATTRS128
4533_mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, int __B)
4534{
4535  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4536                                             (__v4si)_mm_slli_epi32(__A, __B),
4537                                             (__v4si)_mm_setzero_si128());
4538}
4539
4540static __inline__ __m256i __DEFAULT_FN_ATTRS256
4541_mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
4542{
4543  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4544                                             (__v8si)_mm256_slli_epi32(__A, __B),
4545                                             (__v8si)__W);
4546}
4547
4548static __inline__ __m256i __DEFAULT_FN_ATTRS256
4549_mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, int __B)
4550{
4551  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4552                                             (__v8si)_mm256_slli_epi32(__A, __B),
4553                                             (__v8si)_mm256_setzero_si256());
4554}
4555
4556static __inline__ __m128i __DEFAULT_FN_ATTRS128
4557_mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4558{
4559  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4560                                             (__v2di)_mm_sll_epi64(__A, __B),
4561                                             (__v2di)__W);
4562}
4563
4564static __inline__ __m128i __DEFAULT_FN_ATTRS128
4565_mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B)
4566{
4567  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4568                                             (__v2di)_mm_sll_epi64(__A, __B),
4569                                             (__v2di)_mm_setzero_si128());
4570}
4571
4572static __inline__ __m256i __DEFAULT_FN_ATTRS256
4573_mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4574{
4575  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4576                                             (__v4di)_mm256_sll_epi64(__A, __B),
4577                                             (__v4di)__W);
4578}
4579
4580static __inline__ __m256i __DEFAULT_FN_ATTRS256
4581_mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B)
4582{
4583  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4584                                             (__v4di)_mm256_sll_epi64(__A, __B),
4585                                             (__v4di)_mm256_setzero_si256());
4586}
4587
4588static __inline__ __m128i __DEFAULT_FN_ATTRS128
4589_mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B)
4590{
4591  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4592                                             (__v2di)_mm_slli_epi64(__A, __B),
4593                                             (__v2di)__W);
4594}
4595
4596static __inline__ __m128i __DEFAULT_FN_ATTRS128
4597_mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, int __B)
4598{
4599  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4600                                             (__v2di)_mm_slli_epi64(__A, __B),
4601                                             (__v2di)_mm_setzero_si128());
4602}
4603
4604static __inline__ __m256i __DEFAULT_FN_ATTRS256
4605_mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B)
4606{
4607  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4608                                             (__v4di)_mm256_slli_epi64(__A, __B),
4609                                             (__v4di)__W);
4610}
4611
4612static __inline__ __m256i __DEFAULT_FN_ATTRS256
4613_mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, int __B)
4614{
4615  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4616                                             (__v4di)_mm256_slli_epi64(__A, __B),
4617                                             (__v4di)_mm256_setzero_si256());
4618}
4619
4620static __inline__ __m128i __DEFAULT_FN_ATTRS128
4621_mm_rorv_epi32 (__m128i __A, __m128i __B)
4622{
4623  return (__m128i)__builtin_ia32_prorvd128((__v4si)__A, (__v4si)__B);
4624}
4625
4626static __inline__ __m128i __DEFAULT_FN_ATTRS128
4627_mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4628{
4629  return (__m128i)__builtin_ia32_selectd_128(__U,
4630                                             (__v4si)_mm_rorv_epi32(__A, __B),
4631                                             (__v4si)__W);
4632}
4633
4634static __inline__ __m128i __DEFAULT_FN_ATTRS128
4635_mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4636{
4637  return (__m128i)__builtin_ia32_selectd_128(__U,
4638                                             (__v4si)_mm_rorv_epi32(__A, __B),
4639                                             (__v4si)_mm_setzero_si128());
4640}
4641
4642static __inline__ __m256i __DEFAULT_FN_ATTRS256
4643_mm256_rorv_epi32 (__m256i __A, __m256i __B)
4644{
4645  return (__m256i)__builtin_ia32_prorvd256((__v8si)__A, (__v8si)__B);
4646}
4647
4648static __inline__ __m256i __DEFAULT_FN_ATTRS256
4649_mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4650{
4651  return (__m256i)__builtin_ia32_selectd_256(__U,
4652                                            (__v8si)_mm256_rorv_epi32(__A, __B),
4653                                            (__v8si)__W);
4654}
4655
4656static __inline__ __m256i __DEFAULT_FN_ATTRS256
4657_mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4658{
4659  return (__m256i)__builtin_ia32_selectd_256(__U,
4660                                            (__v8si)_mm256_rorv_epi32(__A, __B),
4661                                            (__v8si)_mm256_setzero_si256());
4662}
4663
4664static __inline__ __m128i __DEFAULT_FN_ATTRS128
4665_mm_rorv_epi64 (__m128i __A, __m128i __B)
4666{
4667  return (__m128i)__builtin_ia32_prorvq128((__v2di)__A, (__v2di)__B);
4668}
4669
4670static __inline__ __m128i __DEFAULT_FN_ATTRS128
4671_mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4672{
4673  return (__m128i)__builtin_ia32_selectq_128(__U,
4674                                             (__v2di)_mm_rorv_epi64(__A, __B),
4675                                             (__v2di)__W);
4676}
4677
4678static __inline__ __m128i __DEFAULT_FN_ATTRS128
4679_mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
4680{
4681  return (__m128i)__builtin_ia32_selectq_128(__U,
4682                                             (__v2di)_mm_rorv_epi64(__A, __B),
4683                                             (__v2di)_mm_setzero_si128());
4684}
4685
4686static __inline__ __m256i __DEFAULT_FN_ATTRS256
4687_mm256_rorv_epi64 (__m256i __A, __m256i __B)
4688{
4689  return (__m256i)__builtin_ia32_prorvq256((__v4di)__A, (__v4di)__B);
4690}
4691
4692static __inline__ __m256i __DEFAULT_FN_ATTRS256
4693_mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4694{
4695  return (__m256i)__builtin_ia32_selectq_256(__U,
4696                                            (__v4di)_mm256_rorv_epi64(__A, __B),
4697                                            (__v4di)__W);
4698}
4699
4700static __inline__ __m256i __DEFAULT_FN_ATTRS256
4701_mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
4702{
4703  return (__m256i)__builtin_ia32_selectq_256(__U,
4704                                            (__v4di)_mm256_rorv_epi64(__A, __B),
4705                                            (__v4di)_mm256_setzero_si256());
4706}
4707
4708static __inline__ __m128i __DEFAULT_FN_ATTRS128
4709_mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4710{
4711  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4712                                             (__v2di)_mm_sllv_epi64(__X, __Y),
4713                                             (__v2di)__W);
4714}
4715
4716static __inline__ __m128i __DEFAULT_FN_ATTRS128
4717_mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
4718{
4719  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4720                                             (__v2di)_mm_sllv_epi64(__X, __Y),
4721                                             (__v2di)_mm_setzero_si128());
4722}
4723
4724static __inline__ __m256i __DEFAULT_FN_ATTRS256
4725_mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4726{
4727  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4728                                            (__v4di)_mm256_sllv_epi64(__X, __Y),
4729                                            (__v4di)__W);
4730}
4731
4732static __inline__ __m256i __DEFAULT_FN_ATTRS256
4733_mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
4734{
4735  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4736                                            (__v4di)_mm256_sllv_epi64(__X, __Y),
4737                                            (__v4di)_mm256_setzero_si256());
4738}
4739
4740static __inline__ __m128i __DEFAULT_FN_ATTRS128
4741_mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4742{
4743  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4744                                             (__v4si)_mm_sllv_epi32(__X, __Y),
4745                                             (__v4si)__W);
4746}
4747
4748static __inline__ __m128i __DEFAULT_FN_ATTRS128
4749_mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4750{
4751  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4752                                             (__v4si)_mm_sllv_epi32(__X, __Y),
4753                                             (__v4si)_mm_setzero_si128());
4754}
4755
4756static __inline__ __m256i __DEFAULT_FN_ATTRS256
4757_mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4758{
4759  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4760                                            (__v8si)_mm256_sllv_epi32(__X, __Y),
4761                                            (__v8si)__W);
4762}
4763
4764static __inline__ __m256i __DEFAULT_FN_ATTRS256
4765_mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4766{
4767  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4768                                            (__v8si)_mm256_sllv_epi32(__X, __Y),
4769                                            (__v8si)_mm256_setzero_si256());
4770}
4771
4772static __inline__ __m128i __DEFAULT_FN_ATTRS128
4773_mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4774{
4775  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4776                                             (__v2di)_mm_srlv_epi64(__X, __Y),
4777                                             (__v2di)__W);
4778}
4779
4780static __inline__ __m128i __DEFAULT_FN_ATTRS128
4781_mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
4782{
4783  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4784                                             (__v2di)_mm_srlv_epi64(__X, __Y),
4785                                             (__v2di)_mm_setzero_si128());
4786}
4787
4788static __inline__ __m256i __DEFAULT_FN_ATTRS256
4789_mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4790{
4791  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4792                                            (__v4di)_mm256_srlv_epi64(__X, __Y),
4793                                            (__v4di)__W);
4794}
4795
4796static __inline__ __m256i __DEFAULT_FN_ATTRS256
4797_mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
4798{
4799  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4800                                            (__v4di)_mm256_srlv_epi64(__X, __Y),
4801                                            (__v4di)_mm256_setzero_si256());
4802}
4803
4804static __inline__ __m128i __DEFAULT_FN_ATTRS128
4805_mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4806{
4807  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4808                                            (__v4si)_mm_srlv_epi32(__X, __Y),
4809                                            (__v4si)__W);
4810}
4811
4812static __inline__ __m128i __DEFAULT_FN_ATTRS128
4813_mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4814{
4815  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4816                                            (__v4si)_mm_srlv_epi32(__X, __Y),
4817                                            (__v4si)_mm_setzero_si128());
4818}
4819
4820static __inline__ __m256i __DEFAULT_FN_ATTRS256
4821_mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4822{
4823  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4824                                            (__v8si)_mm256_srlv_epi32(__X, __Y),
4825                                            (__v8si)__W);
4826}
4827
4828static __inline__ __m256i __DEFAULT_FN_ATTRS256
4829_mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4830{
4831  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4832                                            (__v8si)_mm256_srlv_epi32(__X, __Y),
4833                                            (__v8si)_mm256_setzero_si256());
4834}
4835
4836static __inline__ __m128i __DEFAULT_FN_ATTRS128
4837_mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4838{
4839  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4840                                             (__v4si)_mm_srl_epi32(__A, __B),
4841                                             (__v4si)__W);
4842}
4843
4844static __inline__ __m128i __DEFAULT_FN_ATTRS128
4845_mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B)
4846{
4847  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4848                                             (__v4si)_mm_srl_epi32(__A, __B),
4849                                             (__v4si)_mm_setzero_si128());
4850}
4851
4852static __inline__ __m256i __DEFAULT_FN_ATTRS256
4853_mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4854{
4855  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4856                                             (__v8si)_mm256_srl_epi32(__A, __B),
4857                                             (__v8si)__W);
4858}
4859
4860static __inline__ __m256i __DEFAULT_FN_ATTRS256
4861_mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B)
4862{
4863  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4864                                             (__v8si)_mm256_srl_epi32(__A, __B),
4865                                             (__v8si)_mm256_setzero_si256());
4866}
4867
4868static __inline__ __m128i __DEFAULT_FN_ATTRS128
4869_mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
4870{
4871  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4872                                             (__v4si)_mm_srli_epi32(__A, __B),
4873                                             (__v4si)__W);
4874}
4875
4876static __inline__ __m128i __DEFAULT_FN_ATTRS128
4877_mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, int __B)
4878{
4879  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4880                                             (__v4si)_mm_srli_epi32(__A, __B),
4881                                             (__v4si)_mm_setzero_si128());
4882}
4883
4884static __inline__ __m256i __DEFAULT_FN_ATTRS256
4885_mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
4886{
4887  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4888                                             (__v8si)_mm256_srli_epi32(__A, __B),
4889                                             (__v8si)__W);
4890}
4891
4892static __inline__ __m256i __DEFAULT_FN_ATTRS256
4893_mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, int __B)
4894{
4895  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4896                                             (__v8si)_mm256_srli_epi32(__A, __B),
4897                                             (__v8si)_mm256_setzero_si256());
4898}
4899
4900static __inline__ __m128i __DEFAULT_FN_ATTRS128
4901_mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4902{
4903  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4904                                             (__v2di)_mm_srl_epi64(__A, __B),
4905                                             (__v2di)__W);
4906}
4907
4908static __inline__ __m128i __DEFAULT_FN_ATTRS128
4909_mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B)
4910{
4911  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4912                                             (__v2di)_mm_srl_epi64(__A, __B),
4913                                             (__v2di)_mm_setzero_si128());
4914}
4915
4916static __inline__ __m256i __DEFAULT_FN_ATTRS256
4917_mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4918{
4919  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4920                                             (__v4di)_mm256_srl_epi64(__A, __B),
4921                                             (__v4di)__W);
4922}
4923
4924static __inline__ __m256i __DEFAULT_FN_ATTRS256
4925_mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B)
4926{
4927  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4928                                             (__v4di)_mm256_srl_epi64(__A, __B),
4929                                             (__v4di)_mm256_setzero_si256());
4930}
4931
4932static __inline__ __m128i __DEFAULT_FN_ATTRS128
4933_mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B)
4934{
4935  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4936                                             (__v2di)_mm_srli_epi64(__A, __B),
4937                                             (__v2di)__W);
4938}
4939
4940static __inline__ __m128i __DEFAULT_FN_ATTRS128
4941_mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, int __B)
4942{
4943  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4944                                             (__v2di)_mm_srli_epi64(__A, __B),
4945                                             (__v2di)_mm_setzero_si128());
4946}
4947
4948static __inline__ __m256i __DEFAULT_FN_ATTRS256
4949_mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B)
4950{
4951  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4952                                             (__v4di)_mm256_srli_epi64(__A, __B),
4953                                             (__v4di)__W);
4954}
4955
4956static __inline__ __m256i __DEFAULT_FN_ATTRS256
4957_mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, int __B)
4958{
4959  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4960                                             (__v4di)_mm256_srli_epi64(__A, __B),
4961                                             (__v4di)_mm256_setzero_si256());
4962}
4963
4964static __inline__ __m128i __DEFAULT_FN_ATTRS128
4965_mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4966{
4967  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4968                                            (__v4si)_mm_srav_epi32(__X, __Y),
4969                                            (__v4si)__W);
4970}
4971
4972static __inline__ __m128i __DEFAULT_FN_ATTRS128
4973_mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4974{
4975  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4976                                            (__v4si)_mm_srav_epi32(__X, __Y),
4977                                            (__v4si)_mm_setzero_si128());
4978}
4979
4980static __inline__ __m256i __DEFAULT_FN_ATTRS256
4981_mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4982{
4983  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4984                                            (__v8si)_mm256_srav_epi32(__X, __Y),
4985                                            (__v8si)__W);
4986}
4987
4988static __inline__ __m256i __DEFAULT_FN_ATTRS256
4989_mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4990{
4991  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4992                                            (__v8si)_mm256_srav_epi32(__X, __Y),
4993                                            (__v8si)_mm256_setzero_si256());
4994}
4995
4996static __inline__ __m128i __DEFAULT_FN_ATTRS128
4997_mm_srav_epi64(__m128i __X, __m128i __Y)
4998{
4999  return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y);
5000}
5001
5002static __inline__ __m128i __DEFAULT_FN_ATTRS128
5003_mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
5004{
5005  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5006                                             (__v2di)_mm_srav_epi64(__X, __Y),
5007                                             (__v2di)__W);
5008}
5009
5010static __inline__ __m128i __DEFAULT_FN_ATTRS128
5011_mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
5012{
5013  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5014                                             (__v2di)_mm_srav_epi64(__X, __Y),
5015                                             (__v2di)_mm_setzero_si128());
5016}
5017
5018static __inline__ __m256i __DEFAULT_FN_ATTRS256
5019_mm256_srav_epi64(__m256i __X, __m256i __Y)
5020{
5021  return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y);
5022}
5023
5024static __inline__ __m256i __DEFAULT_FN_ATTRS256
5025_mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
5026{
5027  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5028                                             (__v4di)_mm256_srav_epi64(__X, __Y),
5029                                             (__v4di)__W);
5030}
5031
5032static __inline__ __m256i __DEFAULT_FN_ATTRS256
5033_mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
5034{
5035  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5036                                             (__v4di)_mm256_srav_epi64(__X, __Y),
5037                                             (__v4di)_mm256_setzero_si256());
5038}
5039
5040static __inline__ __m128i __DEFAULT_FN_ATTRS128
5041_mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
5042{
5043  return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
5044                 (__v4si) __A,
5045                 (__v4si) __W);
5046}
5047
5048static __inline__ __m128i __DEFAULT_FN_ATTRS128
5049_mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
5050{
5051  return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
5052                 (__v4si) __A,
5053                 (__v4si) _mm_setzero_si128 ());
5054}
5055
5056
5057static __inline__ __m256i __DEFAULT_FN_ATTRS256
5058_mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
5059{
5060  return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
5061                 (__v8si) __A,
5062                 (__v8si) __W);
5063}
5064
5065static __inline__ __m256i __DEFAULT_FN_ATTRS256
5066_mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
5067{
5068  return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
5069                 (__v8si) __A,
5070                 (__v8si) _mm256_setzero_si256 ());
5071}
5072
5073static __inline __m128i __DEFAULT_FN_ATTRS128
5074_mm_load_epi32 (void const *__P)
5075{
5076  return *(const __m128i *) __P;
5077}
5078
5079static __inline__ __m128i __DEFAULT_FN_ATTRS128
5080_mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
5081{
5082  return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P,
5083              (__v4si) __W,
5084              (__mmask8)
5085              __U);
5086}
5087
5088static __inline__ __m128i __DEFAULT_FN_ATTRS128
5089_mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
5090{
5091  return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P,
5092              (__v4si)
5093              _mm_setzero_si128 (),
5094              (__mmask8)
5095              __U);
5096}
5097
5098static __inline __m256i __DEFAULT_FN_ATTRS256
5099_mm256_load_epi32 (void const *__P)
5100{
5101  return *(const __m256i *) __P;
5102}
5103
5104static __inline__ __m256i __DEFAULT_FN_ATTRS256
5105_mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
5106{
5107  return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P,
5108              (__v8si) __W,
5109              (__mmask8)
5110              __U);
5111}
5112
5113static __inline__ __m256i __DEFAULT_FN_ATTRS256
5114_mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
5115{
5116  return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P,
5117              (__v8si)
5118              _mm256_setzero_si256 (),
5119              (__mmask8)
5120              __U);
5121}
5122
5123static __inline void __DEFAULT_FN_ATTRS128
5124_mm_store_epi32 (void *__P, __m128i __A)
5125{
5126  *(__m128i *) __P = __A;
5127}
5128
5129static __inline__ void __DEFAULT_FN_ATTRS128
5130_mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
5131{
5132  __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
5133          (__v4si) __A,
5134          (__mmask8) __U);
5135}
5136
5137static __inline void __DEFAULT_FN_ATTRS256
5138_mm256_store_epi32 (void *__P, __m256i __A)
5139{
5140  *(__m256i *) __P = __A;
5141}
5142
5143static __inline__ void __DEFAULT_FN_ATTRS256
5144_mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
5145{
5146  __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
5147          (__v8si) __A,
5148          (__mmask8) __U);
5149}
5150
5151static __inline__ __m128i __DEFAULT_FN_ATTRS128
5152_mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
5153{
5154  return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
5155                 (__v2di) __A,
5156                 (__v2di) __W);
5157}
5158
5159static __inline__ __m128i __DEFAULT_FN_ATTRS128
5160_mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
5161{
5162  return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
5163                 (__v2di) __A,
5164                 (__v2di) _mm_setzero_si128 ());
5165}
5166
5167static __inline__ __m256i __DEFAULT_FN_ATTRS256
5168_mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
5169{
5170  return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
5171                 (__v4di) __A,
5172                 (__v4di) __W);
5173}
5174
5175static __inline__ __m256i __DEFAULT_FN_ATTRS256
5176_mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
5177{
5178  return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
5179                 (__v4di) __A,
5180                 (__v4di) _mm256_setzero_si256 ());
5181}
5182
5183static __inline __m128i __DEFAULT_FN_ATTRS128
5184_mm_load_epi64 (void const *__P)
5185{
5186  return *(const __m128i *) __P;
5187}
5188
5189static __inline__ __m128i __DEFAULT_FN_ATTRS128
5190_mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
5191{
5192  return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P,
5193              (__v2di) __W,
5194              (__mmask8)
5195              __U);
5196}
5197
5198static __inline__ __m128i __DEFAULT_FN_ATTRS128
5199_mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
5200{
5201  return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P,
5202              (__v2di)
5203              _mm_setzero_si128 (),
5204              (__mmask8)
5205              __U);
5206}
5207
5208static __inline __m256i __DEFAULT_FN_ATTRS256
5209_mm256_load_epi64 (void const *__P)
5210{
5211  return *(const __m256i *) __P;
5212}
5213
5214static __inline__ __m256i __DEFAULT_FN_ATTRS256
5215_mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
5216{
5217  return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P,
5218              (__v4di) __W,
5219              (__mmask8)
5220              __U);
5221}
5222
5223static __inline__ __m256i __DEFAULT_FN_ATTRS256
5224_mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
5225{
5226  return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P,
5227              (__v4di)
5228              _mm256_setzero_si256 (),
5229              (__mmask8)
5230              __U);
5231}
5232
5233static __inline void __DEFAULT_FN_ATTRS128
5234_mm_store_epi64 (void *__P, __m128i __A)
5235{
5236  *(__m128i *) __P = __A;
5237}
5238
5239static __inline__ void __DEFAULT_FN_ATTRS128
5240_mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
5241{
5242  __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
5243          (__v2di) __A,
5244          (__mmask8) __U);
5245}
5246
5247static __inline void __DEFAULT_FN_ATTRS256
5248_mm256_store_epi64 (void *__P, __m256i __A)
5249{
5250  *(__m256i *) __P = __A;
5251}
5252
5253static __inline__ void __DEFAULT_FN_ATTRS256
5254_mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
5255{
5256  __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
5257          (__v4di) __A,
5258          (__mmask8) __U);
5259}
5260
5261static __inline__ __m128d __DEFAULT_FN_ATTRS128
5262_mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
5263{
5264  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5265                                              (__v2df)_mm_movedup_pd(__A),
5266                                              (__v2df)__W);
5267}
5268
5269static __inline__ __m128d __DEFAULT_FN_ATTRS128
5270_mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
5271{
5272  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5273                                              (__v2df)_mm_movedup_pd(__A),
5274                                              (__v2df)_mm_setzero_pd());
5275}
5276
5277static __inline__ __m256d __DEFAULT_FN_ATTRS256
5278_mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
5279{
5280  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5281                                              (__v4df)_mm256_movedup_pd(__A),
5282                                              (__v4df)__W);
5283}
5284
5285static __inline__ __m256d __DEFAULT_FN_ATTRS256
5286_mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
5287{
5288  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5289                                              (__v4df)_mm256_movedup_pd(__A),
5290                                              (__v4df)_mm256_setzero_pd());
5291}
5292
5293static __inline__ __m128i __DEFAULT_FN_ATTRS128
5294_mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A)
5295{
5296   return (__m128i)__builtin_ia32_selectd_128(__M,
5297                                              (__v4si) _mm_set1_epi32(__A),
5298                                              (__v4si)__O);
5299}
5300
5301static __inline__ __m128i __DEFAULT_FN_ATTRS128
5302_mm_maskz_set1_epi32( __mmask8 __M, int __A)
5303{
5304   return (__m128i)__builtin_ia32_selectd_128(__M,
5305                                              (__v4si) _mm_set1_epi32(__A),
5306                                              (__v4si)_mm_setzero_si128());
5307}
5308
5309static __inline__ __m256i __DEFAULT_FN_ATTRS256
5310_mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A)
5311{
5312   return (__m256i)__builtin_ia32_selectd_256(__M,
5313                                              (__v8si) _mm256_set1_epi32(__A),
5314                                              (__v8si)__O);
5315}
5316
5317static __inline__ __m256i __DEFAULT_FN_ATTRS256
5318_mm256_maskz_set1_epi32( __mmask8 __M, int __A)
5319{
5320   return (__m256i)__builtin_ia32_selectd_256(__M,
5321                                              (__v8si) _mm256_set1_epi32(__A),
5322                                              (__v8si)_mm256_setzero_si256());
5323}
5324
5325
5326static __inline__ __m128i __DEFAULT_FN_ATTRS128
5327_mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
5328{
5329  return (__m128i) __builtin_ia32_selectq_128(__M,
5330                                              (__v2di) _mm_set1_epi64x(__A),
5331                                              (__v2di) __O);
5332}
5333
5334static __inline__ __m128i __DEFAULT_FN_ATTRS128
5335_mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
5336{
5337  return (__m128i) __builtin_ia32_selectq_128(__M,
5338                                              (__v2di) _mm_set1_epi64x(__A),
5339                                              (__v2di) _mm_setzero_si128());
5340}
5341
5342static __inline__ __m256i __DEFAULT_FN_ATTRS256
5343_mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
5344{
5345  return (__m256i) __builtin_ia32_selectq_256(__M,
5346                                              (__v4di) _mm256_set1_epi64x(__A),
5347                                              (__v4di) __O) ;
5348}
5349
5350static __inline__ __m256i __DEFAULT_FN_ATTRS256
5351_mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
5352{
5353   return (__m256i) __builtin_ia32_selectq_256(__M,
5354                                               (__v4di) _mm256_set1_epi64x(__A),
5355                                               (__v4di) _mm256_setzero_si256());
5356}
5357
5358#define _mm_fixupimm_pd(A, B, C, imm) \
5359  (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5360                                             (__v2df)(__m128d)(B), \
5361                                             (__v2di)(__m128i)(C), (int)(imm), \
5362                                             (__mmask8)-1)
5363
5364#define _mm_mask_fixupimm_pd(A, U, B, C, imm) \
5365  (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5366                                             (__v2df)(__m128d)(B), \
5367                                             (__v2di)(__m128i)(C), (int)(imm), \
5368                                             (__mmask8)(U))
5369
5370#define _mm_maskz_fixupimm_pd(U, A, B, C, imm) \
5371  (__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \
5372                                              (__v2df)(__m128d)(B), \
5373                                              (__v2di)(__m128i)(C), \
5374                                              (int)(imm), (__mmask8)(U))
5375
5376#define _mm256_fixupimm_pd(A, B, C, imm) \
5377  (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5378                                             (__v4df)(__m256d)(B), \
5379                                             (__v4di)(__m256i)(C), (int)(imm), \
5380                                             (__mmask8)-1)
5381
5382#define _mm256_mask_fixupimm_pd(A, U, B, C, imm) \
5383  (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5384                                             (__v4df)(__m256d)(B), \
5385                                             (__v4di)(__m256i)(C), (int)(imm), \
5386                                             (__mmask8)(U))
5387
5388#define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) \
5389  (__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \
5390                                              (__v4df)(__m256d)(B), \
5391                                              (__v4di)(__m256i)(C), \
5392                                              (int)(imm), (__mmask8)(U))
5393
5394#define _mm_fixupimm_ps(A, B, C, imm) \
5395  (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5396                                            (__v4sf)(__m128)(B), \
5397                                            (__v4si)(__m128i)(C), (int)(imm), \
5398                                            (__mmask8)-1)
5399
5400#define _mm_mask_fixupimm_ps(A, U, B, C, imm) \
5401  (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5402                                            (__v4sf)(__m128)(B), \
5403                                            (__v4si)(__m128i)(C), (int)(imm), \
5404                                            (__mmask8)(U))
5405
5406#define _mm_maskz_fixupimm_ps(U, A, B, C, imm) \
5407  (__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \
5408                                             (__v4sf)(__m128)(B), \
5409                                             (__v4si)(__m128i)(C), (int)(imm), \
5410                                             (__mmask8)(U))
5411
5412#define _mm256_fixupimm_ps(A, B, C, imm) \
5413  (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5414                                            (__v8sf)(__m256)(B), \
5415                                            (__v8si)(__m256i)(C), (int)(imm), \
5416                                            (__mmask8)-1)
5417
5418#define _mm256_mask_fixupimm_ps(A, U, B, C, imm) \
5419  (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5420                                            (__v8sf)(__m256)(B), \
5421                                            (__v8si)(__m256i)(C), (int)(imm), \
5422                                            (__mmask8)(U))
5423
5424#define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) \
5425  (__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \
5426                                             (__v8sf)(__m256)(B), \
5427                                             (__v8si)(__m256i)(C), (int)(imm), \
5428                                             (__mmask8)(U))
5429
5430static __inline__ __m128d __DEFAULT_FN_ATTRS128
5431_mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
5432{
5433  return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P,
5434               (__v2df) __W,
5435               (__mmask8) __U);
5436}
5437
5438static __inline__ __m128d __DEFAULT_FN_ATTRS128
5439_mm_maskz_load_pd (__mmask8 __U, void const *__P)
5440{
5441  return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P,
5442               (__v2df)
5443               _mm_setzero_pd (),
5444               (__mmask8) __U);
5445}
5446
5447static __inline__ __m256d __DEFAULT_FN_ATTRS256
5448_mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
5449{
5450  return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P,
5451               (__v4df) __W,
5452               (__mmask8) __U);
5453}
5454
5455static __inline__ __m256d __DEFAULT_FN_ATTRS256
5456_mm256_maskz_load_pd (__mmask8 __U, void const *__P)
5457{
5458  return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P,
5459               (__v4df)
5460               _mm256_setzero_pd (),
5461               (__mmask8) __U);
5462}
5463
5464static __inline__ __m128 __DEFAULT_FN_ATTRS128
5465_mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
5466{
5467  return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P,
5468              (__v4sf) __W,
5469              (__mmask8) __U);
5470}
5471
5472static __inline__ __m128 __DEFAULT_FN_ATTRS128
5473_mm_maskz_load_ps (__mmask8 __U, void const *__P)
5474{
5475  return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P,
5476              (__v4sf)
5477              _mm_setzero_ps (),
5478              (__mmask8) __U);
5479}
5480
5481static __inline__ __m256 __DEFAULT_FN_ATTRS256
5482_mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
5483{
5484  return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P,
5485              (__v8sf) __W,
5486              (__mmask8) __U);
5487}
5488
5489static __inline__ __m256 __DEFAULT_FN_ATTRS256
5490_mm256_maskz_load_ps (__mmask8 __U, void const *__P)
5491{
5492  return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P,
5493              (__v8sf)
5494              _mm256_setzero_ps (),
5495              (__mmask8) __U);
5496}
5497
5498static __inline __m128i __DEFAULT_FN_ATTRS128
5499_mm_loadu_epi64 (void const *__P)
5500{
5501  struct __loadu_epi64 {
5502    __m128i_u __v;
5503  } __attribute__((__packed__, __may_alias__));
5504  return ((const struct __loadu_epi64*)__P)->__v;
5505}
5506
5507static __inline__ __m128i __DEFAULT_FN_ATTRS128
5508_mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
5509{
5510  return (__m128i) __builtin_ia32_loaddqudi128_mask ((const __v2di *) __P,
5511                 (__v2di) __W,
5512                 (__mmask8) __U);
5513}
5514
5515static __inline__ __m128i __DEFAULT_FN_ATTRS128
5516_mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5517{
5518  return (__m128i) __builtin_ia32_loaddqudi128_mask ((const __v2di *) __P,
5519                 (__v2di)
5520                 _mm_setzero_si128 (),
5521                 (__mmask8) __U);
5522}
5523
5524static __inline __m256i __DEFAULT_FN_ATTRS256
5525_mm256_loadu_epi64 (void const *__P)
5526{
5527  struct __loadu_epi64 {
5528    __m256i_u __v;
5529  } __attribute__((__packed__, __may_alias__));
5530  return ((const struct __loadu_epi64*)__P)->__v;
5531}
5532
5533static __inline__ __m256i __DEFAULT_FN_ATTRS256
5534_mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
5535{
5536  return (__m256i) __builtin_ia32_loaddqudi256_mask ((const __v4di *) __P,
5537                 (__v4di) __W,
5538                 (__mmask8) __U);
5539}
5540
5541static __inline__ __m256i __DEFAULT_FN_ATTRS256
5542_mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5543{
5544  return (__m256i) __builtin_ia32_loaddqudi256_mask ((const __v4di *) __P,
5545                 (__v4di)
5546                 _mm256_setzero_si256 (),
5547                 (__mmask8) __U);
5548}
5549
5550static __inline __m128i __DEFAULT_FN_ATTRS128
5551_mm_loadu_epi32 (void const *__P)
5552{
5553  struct __loadu_epi32 {
5554    __m128i_u __v;
5555  } __attribute__((__packed__, __may_alias__));
5556  return ((const struct __loadu_epi32*)__P)->__v;
5557}
5558
5559static __inline__ __m128i __DEFAULT_FN_ATTRS128
5560_mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
5561{
5562  return (__m128i) __builtin_ia32_loaddqusi128_mask ((const __v4si *) __P,
5563                 (__v4si) __W,
5564                 (__mmask8) __U);
5565}
5566
5567static __inline__ __m128i __DEFAULT_FN_ATTRS128
5568_mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
5569{
5570  return (__m128i) __builtin_ia32_loaddqusi128_mask ((const __v4si *) __P,
5571                 (__v4si)
5572                 _mm_setzero_si128 (),
5573                 (__mmask8) __U);
5574}
5575
5576static __inline __m256i __DEFAULT_FN_ATTRS256
5577_mm256_loadu_epi32 (void const *__P)
5578{
5579  struct __loadu_epi32 {
5580    __m256i_u __v;
5581  } __attribute__((__packed__, __may_alias__));
5582  return ((const struct __loadu_epi32*)__P)->__v;
5583}
5584
5585static __inline__ __m256i __DEFAULT_FN_ATTRS256
5586_mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
5587{
5588  return (__m256i) __builtin_ia32_loaddqusi256_mask ((const __v8si *) __P,
5589                 (__v8si) __W,
5590                 (__mmask8) __U);
5591}
5592
5593static __inline__ __m256i __DEFAULT_FN_ATTRS256
5594_mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
5595{
5596  return (__m256i) __builtin_ia32_loaddqusi256_mask ((const __v8si *) __P,
5597                 (__v8si)
5598                 _mm256_setzero_si256 (),
5599                 (__mmask8) __U);
5600}
5601
5602static __inline__ __m128d __DEFAULT_FN_ATTRS128
5603_mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
5604{
5605  return (__m128d) __builtin_ia32_loadupd128_mask ((const __v2df *) __P,
5606               (__v2df) __W,
5607               (__mmask8) __U);
5608}
5609
5610static __inline__ __m128d __DEFAULT_FN_ATTRS128
5611_mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
5612{
5613  return (__m128d) __builtin_ia32_loadupd128_mask ((const __v2df *) __P,
5614               (__v2df)
5615               _mm_setzero_pd (),
5616               (__mmask8) __U);
5617}
5618
5619static __inline__ __m256d __DEFAULT_FN_ATTRS256
5620_mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
5621{
5622  return (__m256d) __builtin_ia32_loadupd256_mask ((const __v4df *) __P,
5623               (__v4df) __W,
5624               (__mmask8) __U);
5625}
5626
5627static __inline__ __m256d __DEFAULT_FN_ATTRS256
5628_mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
5629{
5630  return (__m256d) __builtin_ia32_loadupd256_mask ((const __v4df *) __P,
5631               (__v4df)
5632               _mm256_setzero_pd (),
5633               (__mmask8) __U);
5634}
5635
5636static __inline__ __m128 __DEFAULT_FN_ATTRS128
5637_mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
5638{
5639  return (__m128) __builtin_ia32_loadups128_mask ((const __v4sf *) __P,
5640              (__v4sf) __W,
5641              (__mmask8) __U);
5642}
5643
5644static __inline__ __m128 __DEFAULT_FN_ATTRS128
5645_mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
5646{
5647  return (__m128) __builtin_ia32_loadups128_mask ((const __v4sf *) __P,
5648              (__v4sf)
5649              _mm_setzero_ps (),
5650              (__mmask8) __U);
5651}
5652
5653static __inline__ __m256 __DEFAULT_FN_ATTRS256
5654_mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
5655{
5656  return (__m256) __builtin_ia32_loadups256_mask ((const __v8sf *) __P,
5657              (__v8sf) __W,
5658              (__mmask8) __U);
5659}
5660
5661static __inline__ __m256 __DEFAULT_FN_ATTRS256
5662_mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
5663{
5664  return (__m256) __builtin_ia32_loadups256_mask ((const __v8sf *) __P,
5665              (__v8sf)
5666              _mm256_setzero_ps (),
5667              (__mmask8) __U);
5668}
5669
5670static __inline__ void __DEFAULT_FN_ATTRS128
5671_mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
5672{
5673  __builtin_ia32_storeapd128_mask ((__v2df *) __P,
5674           (__v2df) __A,
5675           (__mmask8) __U);
5676}
5677
5678static __inline__ void __DEFAULT_FN_ATTRS256
5679_mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
5680{
5681  __builtin_ia32_storeapd256_mask ((__v4df *) __P,
5682           (__v4df) __A,
5683           (__mmask8) __U);
5684}
5685
5686static __inline__ void __DEFAULT_FN_ATTRS128
5687_mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
5688{
5689  __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
5690           (__v4sf) __A,
5691           (__mmask8) __U);
5692}
5693
5694static __inline__ void __DEFAULT_FN_ATTRS256
5695_mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
5696{
5697  __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
5698           (__v8sf) __A,
5699           (__mmask8) __U);
5700}
5701
5702static __inline void __DEFAULT_FN_ATTRS128
5703_mm_storeu_epi64 (void *__P, __m128i __A)
5704{
5705  struct __storeu_epi64 {
5706    __m128i_u __v;
5707  } __attribute__((__packed__, __may_alias__));
5708  ((struct __storeu_epi64*)__P)->__v = __A;
5709}
5710
5711static __inline__ void __DEFAULT_FN_ATTRS128
5712_mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
5713{
5714  __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
5715             (__v2di) __A,
5716             (__mmask8) __U);
5717}
5718
5719static __inline void __DEFAULT_FN_ATTRS256
5720_mm256_storeu_epi64 (void *__P, __m256i __A)
5721{
5722  struct __storeu_epi64 {
5723    __m256i_u __v;
5724  } __attribute__((__packed__, __may_alias__));
5725  ((struct __storeu_epi64*)__P)->__v = __A;
5726}
5727
5728static __inline__ void __DEFAULT_FN_ATTRS256
5729_mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
5730{
5731  __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
5732             (__v4di) __A,
5733             (__mmask8) __U);
5734}
5735
5736static __inline void __DEFAULT_FN_ATTRS128
5737_mm_storeu_epi32 (void *__P, __m128i __A)
5738{
5739  struct __storeu_epi32 {
5740    __m128i_u __v;
5741  } __attribute__((__packed__, __may_alias__));
5742  ((struct __storeu_epi32*)__P)->__v = __A;
5743}
5744
5745static __inline__ void __DEFAULT_FN_ATTRS128
5746_mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
5747{
5748  __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
5749             (__v4si) __A,
5750             (__mmask8) __U);
5751}
5752
5753static __inline void __DEFAULT_FN_ATTRS256
5754_mm256_storeu_epi32 (void *__P, __m256i __A)
5755{
5756  struct __storeu_epi32 {
5757    __m256i_u __v;
5758  } __attribute__((__packed__, __may_alias__));
5759  ((struct __storeu_epi32*)__P)->__v = __A;
5760}
5761
5762static __inline__ void __DEFAULT_FN_ATTRS256
5763_mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
5764{
5765  __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
5766             (__v8si) __A,
5767             (__mmask8) __U);
5768}
5769
5770static __inline__ void __DEFAULT_FN_ATTRS128
5771_mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
5772{
5773  __builtin_ia32_storeupd128_mask ((__v2df *) __P,
5774           (__v2df) __A,
5775           (__mmask8) __U);
5776}
5777
5778static __inline__ void __DEFAULT_FN_ATTRS256
5779_mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
5780{
5781  __builtin_ia32_storeupd256_mask ((__v4df *) __P,
5782           (__v4df) __A,
5783           (__mmask8) __U);
5784}
5785
5786static __inline__ void __DEFAULT_FN_ATTRS128
5787_mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
5788{
5789  __builtin_ia32_storeups128_mask ((__v4sf *) __P,
5790           (__v4sf) __A,
5791           (__mmask8) __U);
5792}
5793
5794static __inline__ void __DEFAULT_FN_ATTRS256
5795_mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
5796{
5797  __builtin_ia32_storeups256_mask ((__v8sf *) __P,
5798           (__v8sf) __A,
5799           (__mmask8) __U);
5800}
5801
5802
5803static __inline__ __m128d __DEFAULT_FN_ATTRS128
5804_mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5805{
5806  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5807                                              (__v2df)_mm_unpackhi_pd(__A, __B),
5808                                              (__v2df)__W);
5809}
5810
5811static __inline__ __m128d __DEFAULT_FN_ATTRS128
5812_mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
5813{
5814  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5815                                              (__v2df)_mm_unpackhi_pd(__A, __B),
5816                                              (__v2df)_mm_setzero_pd());
5817}
5818
5819static __inline__ __m256d __DEFAULT_FN_ATTRS256
5820_mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
5821{
5822  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5823                                           (__v4df)_mm256_unpackhi_pd(__A, __B),
5824                                           (__v4df)__W);
5825}
5826
5827static __inline__ __m256d __DEFAULT_FN_ATTRS256
5828_mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
5829{
5830  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5831                                           (__v4df)_mm256_unpackhi_pd(__A, __B),
5832                                           (__v4df)_mm256_setzero_pd());
5833}
5834
5835static __inline__ __m128 __DEFAULT_FN_ATTRS128
5836_mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5837{
5838  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5839                                             (__v4sf)_mm_unpackhi_ps(__A, __B),
5840                                             (__v4sf)__W);
5841}
5842
5843static __inline__ __m128 __DEFAULT_FN_ATTRS128
5844_mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
5845{
5846  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5847                                             (__v4sf)_mm_unpackhi_ps(__A, __B),
5848                                             (__v4sf)_mm_setzero_ps());
5849}
5850
5851static __inline__ __m256 __DEFAULT_FN_ATTRS256
5852_mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
5853{
5854  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5855                                           (__v8sf)_mm256_unpackhi_ps(__A, __B),
5856                                           (__v8sf)__W);
5857}
5858
5859static __inline__ __m256 __DEFAULT_FN_ATTRS256
5860_mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
5861{
5862  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5863                                           (__v8sf)_mm256_unpackhi_ps(__A, __B),
5864                                           (__v8sf)_mm256_setzero_ps());
5865}
5866
5867static __inline__ __m128d __DEFAULT_FN_ATTRS128
5868_mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5869{
5870  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5871                                              (__v2df)_mm_unpacklo_pd(__A, __B),
5872                                              (__v2df)__W);
5873}
5874
5875static __inline__ __m128d __DEFAULT_FN_ATTRS128
5876_mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
5877{
5878  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5879                                              (__v2df)_mm_unpacklo_pd(__A, __B),
5880                                              (__v2df)_mm_setzero_pd());
5881}
5882
5883static __inline__ __m256d __DEFAULT_FN_ATTRS256
5884_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
5885{
5886  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5887                                           (__v4df)_mm256_unpacklo_pd(__A, __B),
5888                                           (__v4df)__W);
5889}
5890
5891static __inline__ __m256d __DEFAULT_FN_ATTRS256
5892_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
5893{
5894  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5895                                           (__v4df)_mm256_unpacklo_pd(__A, __B),
5896                                           (__v4df)_mm256_setzero_pd());
5897}
5898
5899static __inline__ __m128 __DEFAULT_FN_ATTRS128
5900_mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5901{
5902  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5903                                             (__v4sf)_mm_unpacklo_ps(__A, __B),
5904                                             (__v4sf)__W);
5905}
5906
5907static __inline__ __m128 __DEFAULT_FN_ATTRS128
5908_mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
5909{
5910  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5911                                             (__v4sf)_mm_unpacklo_ps(__A, __B),
5912                                             (__v4sf)_mm_setzero_ps());
5913}
5914
5915static __inline__ __m256 __DEFAULT_FN_ATTRS256
5916_mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
5917{
5918  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5919                                           (__v8sf)_mm256_unpacklo_ps(__A, __B),
5920                                           (__v8sf)__W);
5921}
5922
5923static __inline__ __m256 __DEFAULT_FN_ATTRS256
5924_mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B)
5925{
5926  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5927                                           (__v8sf)_mm256_unpacklo_ps(__A, __B),
5928                                           (__v8sf)_mm256_setzero_ps());
5929}
5930
5931static __inline__ __m128d __DEFAULT_FN_ATTRS128
5932_mm_rcp14_pd (__m128d __A)
5933{
5934  return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5935                (__v2df)
5936                _mm_setzero_pd (),
5937                (__mmask8) -1);
5938}
5939
5940static __inline__ __m128d __DEFAULT_FN_ATTRS128
5941_mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
5942{
5943  return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5944                (__v2df) __W,
5945                (__mmask8) __U);
5946}
5947
5948static __inline__ __m128d __DEFAULT_FN_ATTRS128
5949_mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
5950{
5951  return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5952                (__v2df)
5953                _mm_setzero_pd (),
5954                (__mmask8) __U);
5955}
5956
5957static __inline__ __m256d __DEFAULT_FN_ATTRS256
5958_mm256_rcp14_pd (__m256d __A)
5959{
5960  return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5961                (__v4df)
5962                _mm256_setzero_pd (),
5963                (__mmask8) -1);
5964}
5965
5966static __inline__ __m256d __DEFAULT_FN_ATTRS256
5967_mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
5968{
5969  return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5970                (__v4df) __W,
5971                (__mmask8) __U);
5972}
5973
5974static __inline__ __m256d __DEFAULT_FN_ATTRS256
5975_mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
5976{
5977  return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5978                (__v4df)
5979                _mm256_setzero_pd (),
5980                (__mmask8) __U);
5981}
5982
5983static __inline__ __m128 __DEFAULT_FN_ATTRS128
5984_mm_rcp14_ps (__m128 __A)
5985{
5986  return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
5987               (__v4sf)
5988               _mm_setzero_ps (),
5989               (__mmask8) -1);
5990}
5991
5992static __inline__ __m128 __DEFAULT_FN_ATTRS128
5993_mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
5994{
5995  return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
5996               (__v4sf) __W,
5997               (__mmask8) __U);
5998}
5999
6000static __inline__ __m128 __DEFAULT_FN_ATTRS128
6001_mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
6002{
6003  return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
6004               (__v4sf)
6005               _mm_setzero_ps (),
6006               (__mmask8) __U);
6007}
6008
6009static __inline__ __m256 __DEFAULT_FN_ATTRS256
6010_mm256_rcp14_ps (__m256 __A)
6011{
6012  return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6013               (__v8sf)
6014               _mm256_setzero_ps (),
6015               (__mmask8) -1);
6016}
6017
6018static __inline__ __m256 __DEFAULT_FN_ATTRS256
6019_mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
6020{
6021  return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6022               (__v8sf) __W,
6023               (__mmask8) __U);
6024}
6025
6026static __inline__ __m256 __DEFAULT_FN_ATTRS256
6027_mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
6028{
6029  return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6030               (__v8sf)
6031               _mm256_setzero_ps (),
6032               (__mmask8) __U);
6033}
6034
6035#define _mm_mask_permute_pd(W, U, X, C) \
6036  (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6037                                       (__v2df)_mm_permute_pd((X), (C)), \
6038                                       (__v2df)(__m128d)(W))
6039
6040#define _mm_maskz_permute_pd(U, X, C) \
6041  (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6042                                       (__v2df)_mm_permute_pd((X), (C)), \
6043                                       (__v2df)_mm_setzero_pd())
6044
6045#define _mm256_mask_permute_pd(W, U, X, C) \
6046  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6047                                       (__v4df)_mm256_permute_pd((X), (C)), \
6048                                       (__v4df)(__m256d)(W))
6049
6050#define _mm256_maskz_permute_pd(U, X, C) \
6051  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6052                                       (__v4df)_mm256_permute_pd((X), (C)), \
6053                                       (__v4df)_mm256_setzero_pd())
6054
6055#define _mm_mask_permute_ps(W, U, X, C) \
6056  (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6057                                      (__v4sf)_mm_permute_ps((X), (C)), \
6058                                      (__v4sf)(__m128)(W))
6059
6060#define _mm_maskz_permute_ps(U, X, C) \
6061  (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6062                                      (__v4sf)_mm_permute_ps((X), (C)), \
6063                                      (__v4sf)_mm_setzero_ps())
6064
6065#define _mm256_mask_permute_ps(W, U, X, C) \
6066  (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6067                                      (__v8sf)_mm256_permute_ps((X), (C)), \
6068                                      (__v8sf)(__m256)(W))
6069
6070#define _mm256_maskz_permute_ps(U, X, C) \
6071  (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6072                                      (__v8sf)_mm256_permute_ps((X), (C)), \
6073                                      (__v8sf)_mm256_setzero_ps())
6074
6075static __inline__ __m128d __DEFAULT_FN_ATTRS128
6076_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
6077{
6078  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
6079                                            (__v2df)_mm_permutevar_pd(__A, __C),
6080                                            (__v2df)__W);
6081}
6082
6083static __inline__ __m128d __DEFAULT_FN_ATTRS128
6084_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
6085{
6086  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
6087                                            (__v2df)_mm_permutevar_pd(__A, __C),
6088                                            (__v2df)_mm_setzero_pd());
6089}
6090
6091static __inline__ __m256d __DEFAULT_FN_ATTRS256
6092_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
6093{
6094  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
6095                                         (__v4df)_mm256_permutevar_pd(__A, __C),
6096                                         (__v4df)__W);
6097}
6098
6099static __inline__ __m256d __DEFAULT_FN_ATTRS256
6100_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
6101{
6102  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
6103                                         (__v4df)_mm256_permutevar_pd(__A, __C),
6104                                         (__v4df)_mm256_setzero_pd());
6105}
6106
6107static __inline__ __m128 __DEFAULT_FN_ATTRS128
6108_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
6109{
6110  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
6111                                            (__v4sf)_mm_permutevar_ps(__A, __C),
6112                                            (__v4sf)__W);
6113}
6114
6115static __inline__ __m128 __DEFAULT_FN_ATTRS128
6116_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
6117{
6118  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
6119                                            (__v4sf)_mm_permutevar_ps(__A, __C),
6120                                            (__v4sf)_mm_setzero_ps());
6121}
6122
6123static __inline__ __m256 __DEFAULT_FN_ATTRS256
6124_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
6125{
6126  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
6127                                          (__v8sf)_mm256_permutevar_ps(__A, __C),
6128                                          (__v8sf)__W);
6129}
6130
6131static __inline__ __m256 __DEFAULT_FN_ATTRS256
6132_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C)
6133{
6134  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
6135                                          (__v8sf)_mm256_permutevar_ps(__A, __C),
6136                                          (__v8sf)_mm256_setzero_ps());
6137}
6138
6139static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6140_mm_test_epi32_mask (__m128i __A, __m128i __B)
6141{
6142  return _mm_cmpneq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
6143}
6144
6145static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6146_mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
6147{
6148  return _mm_mask_cmpneq_epi32_mask (__U, _mm_and_si128 (__A, __B),
6149                                     _mm_setzero_si128());
6150}
6151
6152static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6153_mm256_test_epi32_mask (__m256i __A, __m256i __B)
6154{
6155  return _mm256_cmpneq_epi32_mask (_mm256_and_si256 (__A, __B),
6156                                   _mm256_setzero_si256());
6157}
6158
6159static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6160_mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
6161{
6162  return _mm256_mask_cmpneq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
6163                                        _mm256_setzero_si256());
6164}
6165
6166static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6167_mm_test_epi64_mask (__m128i __A, __m128i __B)
6168{
6169  return _mm_cmpneq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
6170}
6171
6172static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6173_mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
6174{
6175  return _mm_mask_cmpneq_epi64_mask (__U, _mm_and_si128 (__A, __B),
6176                                     _mm_setzero_si128());
6177}
6178
6179static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6180_mm256_test_epi64_mask (__m256i __A, __m256i __B)
6181{
6182  return _mm256_cmpneq_epi64_mask (_mm256_and_si256 (__A, __B),
6183                                   _mm256_setzero_si256());
6184}
6185
6186static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6187_mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
6188{
6189  return _mm256_mask_cmpneq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
6190                                        _mm256_setzero_si256());
6191}
6192
6193static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6194_mm_testn_epi32_mask (__m128i __A, __m128i __B)
6195{
6196  return _mm_cmpeq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
6197}
6198
6199static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6200_mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
6201{
6202  return _mm_mask_cmpeq_epi32_mask (__U, _mm_and_si128 (__A, __B),
6203                                    _mm_setzero_si128());
6204}
6205
6206static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6207_mm256_testn_epi32_mask (__m256i __A, __m256i __B)
6208{
6209  return _mm256_cmpeq_epi32_mask (_mm256_and_si256 (__A, __B),
6210                                  _mm256_setzero_si256());
6211}
6212
6213static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6214_mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
6215{
6216  return _mm256_mask_cmpeq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
6217                                       _mm256_setzero_si256());
6218}
6219
6220static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6221_mm_testn_epi64_mask (__m128i __A, __m128i __B)
6222{
6223  return _mm_cmpeq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
6224}
6225
6226static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6227_mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
6228{
6229  return _mm_mask_cmpeq_epi64_mask (__U, _mm_and_si128 (__A, __B),
6230                                    _mm_setzero_si128());
6231}
6232
6233static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6234_mm256_testn_epi64_mask (__m256i __A, __m256i __B)
6235{
6236  return _mm256_cmpeq_epi64_mask (_mm256_and_si256 (__A, __B),
6237                                  _mm256_setzero_si256());
6238}
6239
6240static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6241_mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
6242{
6243  return _mm256_mask_cmpeq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
6244                                       _mm256_setzero_si256());
6245}
6246
6247static __inline__ __m128i __DEFAULT_FN_ATTRS128
6248_mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6249{
6250  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6251                                           (__v4si)_mm_unpackhi_epi32(__A, __B),
6252                                           (__v4si)__W);
6253}
6254
6255static __inline__ __m128i __DEFAULT_FN_ATTRS128
6256_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6257{
6258  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6259                                           (__v4si)_mm_unpackhi_epi32(__A, __B),
6260                                           (__v4si)_mm_setzero_si128());
6261}
6262
6263static __inline__ __m256i __DEFAULT_FN_ATTRS256
6264_mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6265{
6266  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6267                                        (__v8si)_mm256_unpackhi_epi32(__A, __B),
6268                                        (__v8si)__W);
6269}
6270
6271static __inline__ __m256i __DEFAULT_FN_ATTRS256
6272_mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
6273{
6274  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6275                                        (__v8si)_mm256_unpackhi_epi32(__A, __B),
6276                                        (__v8si)_mm256_setzero_si256());
6277}
6278
6279static __inline__ __m128i __DEFAULT_FN_ATTRS128
6280_mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6281{
6282  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6283                                           (__v2di)_mm_unpackhi_epi64(__A, __B),
6284                                           (__v2di)__W);
6285}
6286
6287static __inline__ __m128i __DEFAULT_FN_ATTRS128
6288_mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6289{
6290  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6291                                           (__v2di)_mm_unpackhi_epi64(__A, __B),
6292                                           (__v2di)_mm_setzero_si128());
6293}
6294
6295static __inline__ __m256i __DEFAULT_FN_ATTRS256
6296_mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6297{
6298  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6299                                        (__v4di)_mm256_unpackhi_epi64(__A, __B),
6300                                        (__v4di)__W);
6301}
6302
6303static __inline__ __m256i __DEFAULT_FN_ATTRS256
6304_mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
6305{
6306  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6307                                        (__v4di)_mm256_unpackhi_epi64(__A, __B),
6308                                        (__v4di)_mm256_setzero_si256());
6309}
6310
6311static __inline__ __m128i __DEFAULT_FN_ATTRS128
6312_mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6313{
6314  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6315                                           (__v4si)_mm_unpacklo_epi32(__A, __B),
6316                                           (__v4si)__W);
6317}
6318
6319static __inline__ __m128i __DEFAULT_FN_ATTRS128
6320_mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6321{
6322  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6323                                           (__v4si)_mm_unpacklo_epi32(__A, __B),
6324                                           (__v4si)_mm_setzero_si128());
6325}
6326
6327static __inline__ __m256i __DEFAULT_FN_ATTRS256
6328_mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6329{
6330  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6331                                        (__v8si)_mm256_unpacklo_epi32(__A, __B),
6332                                        (__v8si)__W);
6333}
6334
6335static __inline__ __m256i __DEFAULT_FN_ATTRS256
6336_mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
6337{
6338  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6339                                        (__v8si)_mm256_unpacklo_epi32(__A, __B),
6340                                        (__v8si)_mm256_setzero_si256());
6341}
6342
6343static __inline__ __m128i __DEFAULT_FN_ATTRS128
6344_mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6345{
6346  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6347                                           (__v2di)_mm_unpacklo_epi64(__A, __B),
6348                                           (__v2di)__W);
6349}
6350
6351static __inline__ __m128i __DEFAULT_FN_ATTRS128
6352_mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6353{
6354  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6355                                           (__v2di)_mm_unpacklo_epi64(__A, __B),
6356                                           (__v2di)_mm_setzero_si128());
6357}
6358
6359static __inline__ __m256i __DEFAULT_FN_ATTRS256
6360_mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6361{
6362  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6363                                        (__v4di)_mm256_unpacklo_epi64(__A, __B),
6364                                        (__v4di)__W);
6365}
6366
6367static __inline__ __m256i __DEFAULT_FN_ATTRS256
6368_mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
6369{
6370  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6371                                        (__v4di)_mm256_unpacklo_epi64(__A, __B),
6372                                        (__v4di)_mm256_setzero_si256());
6373}
6374
6375static __inline__ __m128i __DEFAULT_FN_ATTRS128
6376_mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6377{
6378  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6379                                             (__v4si)_mm_sra_epi32(__A, __B),
6380                                             (__v4si)__W);
6381}
6382
6383static __inline__ __m128i __DEFAULT_FN_ATTRS128
6384_mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6385{
6386  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6387                                             (__v4si)_mm_sra_epi32(__A, __B),
6388                                             (__v4si)_mm_setzero_si128());
6389}
6390
6391static __inline__ __m256i __DEFAULT_FN_ATTRS256
6392_mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
6393{
6394  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6395                                             (__v8si)_mm256_sra_epi32(__A, __B),
6396                                             (__v8si)__W);
6397}
6398
6399static __inline__ __m256i __DEFAULT_FN_ATTRS256
6400_mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B)
6401{
6402  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6403                                             (__v8si)_mm256_sra_epi32(__A, __B),
6404                                             (__v8si)_mm256_setzero_si256());
6405}
6406
6407static __inline__ __m128i __DEFAULT_FN_ATTRS128
6408_mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
6409{
6410  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6411                                             (__v4si)_mm_srai_epi32(__A, __B),
6412                                             (__v4si)__W);
6413}
6414
6415static __inline__ __m128i __DEFAULT_FN_ATTRS128
6416_mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, int __B)
6417{
6418  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6419                                             (__v4si)_mm_srai_epi32(__A, __B),
6420                                             (__v4si)_mm_setzero_si128());
6421}
6422
6423static __inline__ __m256i __DEFAULT_FN_ATTRS256
6424_mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
6425{
6426  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6427                                             (__v8si)_mm256_srai_epi32(__A, __B),
6428                                             (__v8si)__W);
6429}
6430
6431static __inline__ __m256i __DEFAULT_FN_ATTRS256
6432_mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, int __B)
6433{
6434  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6435                                             (__v8si)_mm256_srai_epi32(__A, __B),
6436                                             (__v8si)_mm256_setzero_si256());
6437}
6438
6439static __inline__ __m128i __DEFAULT_FN_ATTRS128
6440_mm_sra_epi64(__m128i __A, __m128i __B)
6441{
6442  return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B);
6443}
6444
6445static __inline__ __m128i __DEFAULT_FN_ATTRS128
6446_mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6447{
6448  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6449                                             (__v2di)_mm_sra_epi64(__A, __B), \
6450                                             (__v2di)__W);
6451}
6452
6453static __inline__ __m128i __DEFAULT_FN_ATTRS128
6454_mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6455{
6456  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6457                                             (__v2di)_mm_sra_epi64(__A, __B), \
6458                                             (__v2di)_mm_setzero_si128());
6459}
6460
6461static __inline__ __m256i __DEFAULT_FN_ATTRS256
6462_mm256_sra_epi64(__m256i __A, __m128i __B)
6463{
6464  return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B);
6465}
6466
6467static __inline__ __m256i __DEFAULT_FN_ATTRS256
6468_mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
6469{
6470  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6471                                           (__v4di)_mm256_sra_epi64(__A, __B), \
6472                                           (__v4di)__W);
6473}
6474
6475static __inline__ __m256i __DEFAULT_FN_ATTRS256
6476_mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B)
6477{
6478  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6479                                           (__v4di)_mm256_sra_epi64(__A, __B), \
6480                                           (__v4di)_mm256_setzero_si256());
6481}
6482
6483static __inline__ __m128i __DEFAULT_FN_ATTRS128
6484_mm_srai_epi64(__m128i __A, int __imm)
6485{
6486  return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, __imm);
6487}
6488
6489static __inline__ __m128i __DEFAULT_FN_ATTRS128
6490_mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __imm)
6491{
6492  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6493                                           (__v2di)_mm_srai_epi64(__A, __imm), \
6494                                           (__v2di)__W);
6495}
6496
6497static __inline__ __m128i __DEFAULT_FN_ATTRS128
6498_mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, int __imm)
6499{
6500  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6501                                           (__v2di)_mm_srai_epi64(__A, __imm), \
6502                                           (__v2di)_mm_setzero_si128());
6503}
6504
6505static __inline__ __m256i __DEFAULT_FN_ATTRS256
6506_mm256_srai_epi64(__m256i __A, int __imm)
6507{
6508  return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, __imm);
6509}
6510
6511static __inline__ __m256i __DEFAULT_FN_ATTRS256
6512_mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __imm)
6513{
6514  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6515                                        (__v4di)_mm256_srai_epi64(__A, __imm), \
6516                                        (__v4di)__W);
6517}
6518
6519static __inline__ __m256i __DEFAULT_FN_ATTRS256
6520_mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, int __imm)
6521{
6522  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6523                                        (__v4di)_mm256_srai_epi64(__A, __imm), \
6524                                        (__v4di)_mm256_setzero_si256());
6525}
6526
6527#define _mm_ternarylogic_epi32(A, B, C, imm) \
6528  (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
6529                                            (__v4si)(__m128i)(B), \
6530                                            (__v4si)(__m128i)(C), (int)(imm), \
6531                                            (__mmask8)-1)
6532
6533#define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) \
6534  (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
6535                                            (__v4si)(__m128i)(B), \
6536                                            (__v4si)(__m128i)(C), (int)(imm), \
6537                                            (__mmask8)(U))
6538
6539#define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) \
6540  (__m128i)__builtin_ia32_pternlogd128_maskz((__v4si)(__m128i)(A), \
6541                                             (__v4si)(__m128i)(B), \
6542                                             (__v4si)(__m128i)(C), (int)(imm), \
6543                                             (__mmask8)(U))
6544
6545#define _mm256_ternarylogic_epi32(A, B, C, imm) \
6546  (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
6547                                            (__v8si)(__m256i)(B), \
6548                                            (__v8si)(__m256i)(C), (int)(imm), \
6549                                            (__mmask8)-1)
6550
6551#define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) \
6552  (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
6553                                            (__v8si)(__m256i)(B), \
6554                                            (__v8si)(__m256i)(C), (int)(imm), \
6555                                            (__mmask8)(U))
6556
6557#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) \
6558  (__m256i)__builtin_ia32_pternlogd256_maskz((__v8si)(__m256i)(A), \
6559                                             (__v8si)(__m256i)(B), \
6560                                             (__v8si)(__m256i)(C), (int)(imm), \
6561                                             (__mmask8)(U))
6562
6563#define _mm_ternarylogic_epi64(A, B, C, imm) \
6564  (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
6565                                            (__v2di)(__m128i)(B), \
6566                                            (__v2di)(__m128i)(C), (int)(imm), \
6567                                            (__mmask8)-1)
6568
6569#define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) \
6570  (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
6571                                            (__v2di)(__m128i)(B), \
6572                                            (__v2di)(__m128i)(C), (int)(imm), \
6573                                            (__mmask8)(U))
6574
6575#define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) \
6576  (__m128i)__builtin_ia32_pternlogq128_maskz((__v2di)(__m128i)(A), \
6577                                             (__v2di)(__m128i)(B), \
6578                                             (__v2di)(__m128i)(C), (int)(imm), \
6579                                             (__mmask8)(U))
6580
6581#define _mm256_ternarylogic_epi64(A, B, C, imm) \
6582  (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
6583                                            (__v4di)(__m256i)(B), \
6584                                            (__v4di)(__m256i)(C), (int)(imm), \
6585                                            (__mmask8)-1)
6586
6587#define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) \
6588  (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
6589                                            (__v4di)(__m256i)(B), \
6590                                            (__v4di)(__m256i)(C), (int)(imm), \
6591                                            (__mmask8)(U))
6592
6593#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) \
6594  (__m256i)__builtin_ia32_pternlogq256_maskz((__v4di)(__m256i)(A), \
6595                                             (__v4di)(__m256i)(B), \
6596                                             (__v4di)(__m256i)(C), (int)(imm), \
6597                                             (__mmask8)(U))
6598
6599
6600
6601#define _mm256_shuffle_f32x4(A, B, imm) \
6602  (__m256)__builtin_ia32_shuf_f32x4_256((__v8sf)(__m256)(A), \
6603                                        (__v8sf)(__m256)(B), (int)(imm))
6604
6605#define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) \
6606  (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6607                                      (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
6608                                      (__v8sf)(__m256)(W))
6609
6610#define _mm256_maskz_shuffle_f32x4(U, A, B, imm) \
6611  (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6612                                      (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
6613                                      (__v8sf)_mm256_setzero_ps())
6614
6615#define _mm256_shuffle_f64x2(A, B, imm) \
6616  (__m256d)__builtin_ia32_shuf_f64x2_256((__v4df)(__m256d)(A), \
6617                                         (__v4df)(__m256d)(B), (int)(imm))
6618
6619#define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) \
6620  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6621                                      (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
6622                                      (__v4df)(__m256d)(W))
6623
6624#define _mm256_maskz_shuffle_f64x2(U, A, B, imm) \
6625  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6626                                      (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
6627                                      (__v4df)_mm256_setzero_pd())
6628
6629#define _mm256_shuffle_i32x4(A, B, imm) \
6630  (__m256i)__builtin_ia32_shuf_i32x4_256((__v8si)(__m256i)(A), \
6631                                         (__v8si)(__m256i)(B), (int)(imm))
6632
6633#define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) \
6634  (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
6635                                      (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
6636                                      (__v8si)(__m256i)(W))
6637
6638#define _mm256_maskz_shuffle_i32x4(U, A, B, imm) \
6639  (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
6640                                      (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
6641                                      (__v8si)_mm256_setzero_si256())
6642
6643#define _mm256_shuffle_i64x2(A, B, imm) \
6644  (__m256i)__builtin_ia32_shuf_i64x2_256((__v4di)(__m256i)(A), \
6645                                         (__v4di)(__m256i)(B), (int)(imm))
6646
6647#define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) \
6648  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
6649                                      (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
6650                                      (__v4di)(__m256i)(W))
6651
6652
6653#define _mm256_maskz_shuffle_i64x2(U, A, B, imm) \
6654  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
6655                                      (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
6656                                      (__v4di)_mm256_setzero_si256())
6657
6658#define _mm_mask_shuffle_pd(W, U, A, B, M) \
6659  (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6660                                       (__v2df)_mm_shuffle_pd((A), (B), (M)), \
6661                                       (__v2df)(__m128d)(W))
6662
6663#define _mm_maskz_shuffle_pd(U, A, B, M) \
6664  (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6665                                       (__v2df)_mm_shuffle_pd((A), (B), (M)), \
6666                                       (__v2df)_mm_setzero_pd())
6667
6668#define _mm256_mask_shuffle_pd(W, U, A, B, M) \
6669  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6670                                       (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
6671                                       (__v4df)(__m256d)(W))
6672
6673#define _mm256_maskz_shuffle_pd(U, A, B, M) \
6674  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6675                                       (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
6676                                       (__v4df)_mm256_setzero_pd())
6677
6678#define _mm_mask_shuffle_ps(W, U, A, B, M) \
6679  (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6680                                      (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
6681                                      (__v4sf)(__m128)(W))
6682
6683#define _mm_maskz_shuffle_ps(U, A, B, M) \
6684  (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6685                                      (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
6686                                      (__v4sf)_mm_setzero_ps())
6687
6688#define _mm256_mask_shuffle_ps(W, U, A, B, M) \
6689  (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6690                                      (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
6691                                      (__v8sf)(__m256)(W))
6692
6693#define _mm256_maskz_shuffle_ps(U, A, B, M) \
6694  (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6695                                      (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
6696                                      (__v8sf)_mm256_setzero_ps())
6697
6698static __inline__ __m128d __DEFAULT_FN_ATTRS128
6699_mm_rsqrt14_pd (__m128d __A)
6700{
6701  return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6702                 (__v2df)
6703                 _mm_setzero_pd (),
6704                 (__mmask8) -1);
6705}
6706
6707static __inline__ __m128d __DEFAULT_FN_ATTRS128
6708_mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
6709{
6710  return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6711                 (__v2df) __W,
6712                 (__mmask8) __U);
6713}
6714
6715static __inline__ __m128d __DEFAULT_FN_ATTRS128
6716_mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
6717{
6718  return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6719                 (__v2df)
6720                 _mm_setzero_pd (),
6721                 (__mmask8) __U);
6722}
6723
6724static __inline__ __m256d __DEFAULT_FN_ATTRS256
6725_mm256_rsqrt14_pd (__m256d __A)
6726{
6727  return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6728                 (__v4df)
6729                 _mm256_setzero_pd (),
6730                 (__mmask8) -1);
6731}
6732
6733static __inline__ __m256d __DEFAULT_FN_ATTRS256
6734_mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
6735{
6736  return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6737                 (__v4df) __W,
6738                 (__mmask8) __U);
6739}
6740
6741static __inline__ __m256d __DEFAULT_FN_ATTRS256
6742_mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
6743{
6744  return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6745                 (__v4df)
6746                 _mm256_setzero_pd (),
6747                 (__mmask8) __U);
6748}
6749
6750static __inline__ __m128 __DEFAULT_FN_ATTRS128
6751_mm_rsqrt14_ps (__m128 __A)
6752{
6753  return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6754                (__v4sf)
6755                _mm_setzero_ps (),
6756                (__mmask8) -1);
6757}
6758
6759static __inline__ __m128 __DEFAULT_FN_ATTRS128
6760_mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
6761{
6762  return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6763                (__v4sf) __W,
6764                (__mmask8) __U);
6765}
6766
6767static __inline__ __m128 __DEFAULT_FN_ATTRS128
6768_mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
6769{
6770  return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6771                (__v4sf)
6772                _mm_setzero_ps (),
6773                (__mmask8) __U);
6774}
6775
6776static __inline__ __m256 __DEFAULT_FN_ATTRS256
6777_mm256_rsqrt14_ps (__m256 __A)
6778{
6779  return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6780                (__v8sf)
6781                _mm256_setzero_ps (),
6782                (__mmask8) -1);
6783}
6784
6785static __inline__ __m256 __DEFAULT_FN_ATTRS256
6786_mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
6787{
6788  return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6789                (__v8sf) __W,
6790                (__mmask8) __U);
6791}
6792
6793static __inline__ __m256 __DEFAULT_FN_ATTRS256
6794_mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
6795{
6796  return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6797                (__v8sf)
6798                _mm256_setzero_ps (),
6799                (__mmask8) __U);
6800}
6801
6802static __inline__ __m256 __DEFAULT_FN_ATTRS256
6803_mm256_broadcast_f32x4(__m128 __A)
6804{
6805  return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6806                                         0, 1, 2, 3, 0, 1, 2, 3);
6807}
6808
6809static __inline__ __m256 __DEFAULT_FN_ATTRS256
6810_mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A)
6811{
6812  return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
6813                                            (__v8sf)_mm256_broadcast_f32x4(__A),
6814                                            (__v8sf)__O);
6815}
6816
6817static __inline__ __m256 __DEFAULT_FN_ATTRS256
6818_mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
6819{
6820  return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
6821                                            (__v8sf)_mm256_broadcast_f32x4(__A),
6822                                            (__v8sf)_mm256_setzero_ps());
6823}
6824
6825static __inline__ __m256i __DEFAULT_FN_ATTRS256
6826_mm256_broadcast_i32x4(__m128i __A)
6827{
6828  return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6829                                          0, 1, 2, 3, 0, 1, 2, 3);
6830}
6831
6832static __inline__ __m256i __DEFAULT_FN_ATTRS256
6833_mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A)
6834{
6835  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
6836                                            (__v8si)_mm256_broadcast_i32x4(__A),
6837                                            (__v8si)__O);
6838}
6839
6840static __inline__ __m256i __DEFAULT_FN_ATTRS256
6841_mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A)
6842{
6843  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
6844                                            (__v8si)_mm256_broadcast_i32x4(__A),
6845                                            (__v8si)_mm256_setzero_si256());
6846}
6847
6848static __inline__ __m256d __DEFAULT_FN_ATTRS256
6849_mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
6850{
6851  return (__m256d)__builtin_ia32_selectpd_256(__M,
6852                                              (__v4df) _mm256_broadcastsd_pd(__A),
6853                                              (__v4df) __O);
6854}
6855
6856static __inline__ __m256d __DEFAULT_FN_ATTRS256
6857_mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
6858{
6859  return (__m256d)__builtin_ia32_selectpd_256(__M,
6860                                              (__v4df) _mm256_broadcastsd_pd(__A),
6861                                              (__v4df) _mm256_setzero_pd());
6862}
6863
6864static __inline__ __m128 __DEFAULT_FN_ATTRS128
6865_mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
6866{
6867  return (__m128)__builtin_ia32_selectps_128(__M,
6868                                             (__v4sf) _mm_broadcastss_ps(__A),
6869                                             (__v4sf) __O);
6870}
6871
6872static __inline__ __m128 __DEFAULT_FN_ATTRS128
6873_mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
6874{
6875  return (__m128)__builtin_ia32_selectps_128(__M,
6876                                             (__v4sf) _mm_broadcastss_ps(__A),
6877                                             (__v4sf) _mm_setzero_ps());
6878}
6879
6880static __inline__ __m256 __DEFAULT_FN_ATTRS256
6881_mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
6882{
6883  return (__m256)__builtin_ia32_selectps_256(__M,
6884                                             (__v8sf) _mm256_broadcastss_ps(__A),
6885                                             (__v8sf) __O);
6886}
6887
6888static __inline__ __m256 __DEFAULT_FN_ATTRS256
6889_mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
6890{
6891  return (__m256)__builtin_ia32_selectps_256(__M,
6892                                             (__v8sf) _mm256_broadcastss_ps(__A),
6893                                             (__v8sf) _mm256_setzero_ps());
6894}
6895
6896static __inline__ __m128i __DEFAULT_FN_ATTRS128
6897_mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
6898{
6899  return (__m128i)__builtin_ia32_selectd_128(__M,
6900                                             (__v4si) _mm_broadcastd_epi32(__A),
6901                                             (__v4si) __O);
6902}
6903
6904static __inline__ __m128i __DEFAULT_FN_ATTRS128
6905_mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
6906{
6907  return (__m128i)__builtin_ia32_selectd_128(__M,
6908                                             (__v4si) _mm_broadcastd_epi32(__A),
6909                                             (__v4si) _mm_setzero_si128());
6910}
6911
6912static __inline__ __m256i __DEFAULT_FN_ATTRS256
6913_mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
6914{
6915  return (__m256i)__builtin_ia32_selectd_256(__M,
6916                                             (__v8si) _mm256_broadcastd_epi32(__A),
6917                                             (__v8si) __O);
6918}
6919
6920static __inline__ __m256i __DEFAULT_FN_ATTRS256
6921_mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
6922{
6923  return (__m256i)__builtin_ia32_selectd_256(__M,
6924                                             (__v8si) _mm256_broadcastd_epi32(__A),
6925                                             (__v8si) _mm256_setzero_si256());
6926}
6927
6928static __inline__ __m128i __DEFAULT_FN_ATTRS128
6929_mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
6930{
6931  return (__m128i)__builtin_ia32_selectq_128(__M,
6932                                             (__v2di) _mm_broadcastq_epi64(__A),
6933                                             (__v2di) __O);
6934}
6935
6936static __inline__ __m128i __DEFAULT_FN_ATTRS128
6937_mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
6938{
6939  return (__m128i)__builtin_ia32_selectq_128(__M,
6940                                             (__v2di) _mm_broadcastq_epi64(__A),
6941                                             (__v2di) _mm_setzero_si128());
6942}
6943
6944static __inline__ __m256i __DEFAULT_FN_ATTRS256
6945_mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
6946{
6947  return (__m256i)__builtin_ia32_selectq_256(__M,
6948                                             (__v4di) _mm256_broadcastq_epi64(__A),
6949                                             (__v4di) __O);
6950}
6951
6952static __inline__ __m256i __DEFAULT_FN_ATTRS256
6953_mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
6954{
6955  return (__m256i)__builtin_ia32_selectq_256(__M,
6956                                             (__v4di) _mm256_broadcastq_epi64(__A),
6957                                             (__v4di) _mm256_setzero_si256());
6958}
6959
6960static __inline__ __m128i __DEFAULT_FN_ATTRS128
6961_mm_cvtsepi32_epi8 (__m128i __A)
6962{
6963  return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6964               (__v16qi)_mm_undefined_si128(),
6965               (__mmask8) -1);
6966}
6967
6968static __inline__ __m128i __DEFAULT_FN_ATTRS128
6969_mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
6970{
6971  return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6972               (__v16qi) __O, __M);
6973}
6974
6975static __inline__ __m128i __DEFAULT_FN_ATTRS128
6976_mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
6977{
6978  return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6979               (__v16qi) _mm_setzero_si128 (),
6980               __M);
6981}
6982
6983static __inline__ void __DEFAULT_FN_ATTRS128
6984_mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
6985{
6986  __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
6987}
6988
6989static __inline__ __m128i __DEFAULT_FN_ATTRS256
6990_mm256_cvtsepi32_epi8 (__m256i __A)
6991{
6992  return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
6993               (__v16qi)_mm_undefined_si128(),
6994               (__mmask8) -1);
6995}
6996
6997static __inline__ __m128i __DEFAULT_FN_ATTRS256
6998_mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
6999{
7000  return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7001               (__v16qi) __O, __M);
7002}
7003
7004static __inline__ __m128i __DEFAULT_FN_ATTRS256
7005_mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
7006{
7007  return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7008               (__v16qi) _mm_setzero_si128 (),
7009               __M);
7010}
7011
7012static __inline__ void __DEFAULT_FN_ATTRS256
7013_mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7014{
7015  __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
7016}
7017
7018static __inline__ __m128i __DEFAULT_FN_ATTRS128
7019_mm_cvtsepi32_epi16 (__m128i __A)
7020{
7021  return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7022               (__v8hi)_mm_setzero_si128 (),
7023               (__mmask8) -1);
7024}
7025
7026static __inline__ __m128i __DEFAULT_FN_ATTRS128
7027_mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7028{
7029  return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7030               (__v8hi)__O,
7031               __M);
7032}
7033
7034static __inline__ __m128i __DEFAULT_FN_ATTRS128
7035_mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
7036{
7037  return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7038               (__v8hi) _mm_setzero_si128 (),
7039               __M);
7040}
7041
7042static __inline__ void __DEFAULT_FN_ATTRS128
7043_mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7044{
7045  __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7046}
7047
7048static __inline__ __m128i __DEFAULT_FN_ATTRS256
7049_mm256_cvtsepi32_epi16 (__m256i __A)
7050{
7051  return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7052               (__v8hi)_mm_undefined_si128(),
7053               (__mmask8) -1);
7054}
7055
7056static __inline__ __m128i __DEFAULT_FN_ATTRS256
7057_mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7058{
7059  return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7060               (__v8hi) __O, __M);
7061}
7062
7063static __inline__ __m128i __DEFAULT_FN_ATTRS256
7064_mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
7065{
7066  return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7067               (__v8hi) _mm_setzero_si128 (),
7068               __M);
7069}
7070
7071static __inline__ void __DEFAULT_FN_ATTRS256
7072_mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7073{
7074  __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7075}
7076
7077static __inline__ __m128i __DEFAULT_FN_ATTRS128
7078_mm_cvtsepi64_epi8 (__m128i __A)
7079{
7080  return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7081               (__v16qi)_mm_undefined_si128(),
7082               (__mmask8) -1);
7083}
7084
7085static __inline__ __m128i __DEFAULT_FN_ATTRS128
7086_mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7087{
7088  return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7089               (__v16qi) __O, __M);
7090}
7091
7092static __inline__ __m128i __DEFAULT_FN_ATTRS128
7093_mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
7094{
7095  return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7096               (__v16qi) _mm_setzero_si128 (),
7097               __M);
7098}
7099
7100static __inline__ void __DEFAULT_FN_ATTRS128
7101_mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7102{
7103  __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7104}
7105
7106static __inline__ __m128i __DEFAULT_FN_ATTRS256
7107_mm256_cvtsepi64_epi8 (__m256i __A)
7108{
7109  return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7110               (__v16qi)_mm_undefined_si128(),
7111               (__mmask8) -1);
7112}
7113
7114static __inline__ __m128i __DEFAULT_FN_ATTRS256
7115_mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7116{
7117  return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7118               (__v16qi) __O, __M);
7119}
7120
7121static __inline__ __m128i __DEFAULT_FN_ATTRS256
7122_mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
7123{
7124  return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7125               (__v16qi) _mm_setzero_si128 (),
7126               __M);
7127}
7128
7129static __inline__ void __DEFAULT_FN_ATTRS256
7130_mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7131{
7132  __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7133}
7134
7135static __inline__ __m128i __DEFAULT_FN_ATTRS128
7136_mm_cvtsepi64_epi32 (__m128i __A)
7137{
7138  return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7139               (__v4si)_mm_undefined_si128(),
7140               (__mmask8) -1);
7141}
7142
7143static __inline__ __m128i __DEFAULT_FN_ATTRS128
7144_mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7145{
7146  return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7147               (__v4si) __O, __M);
7148}
7149
7150static __inline__ __m128i __DEFAULT_FN_ATTRS128
7151_mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
7152{
7153  return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7154               (__v4si) _mm_setzero_si128 (),
7155               __M);
7156}
7157
7158static __inline__ void __DEFAULT_FN_ATTRS128
7159_mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
7160{
7161  __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7162}
7163
7164static __inline__ __m128i __DEFAULT_FN_ATTRS256
7165_mm256_cvtsepi64_epi32 (__m256i __A)
7166{
7167  return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7168               (__v4si)_mm_undefined_si128(),
7169               (__mmask8) -1);
7170}
7171
7172static __inline__ __m128i __DEFAULT_FN_ATTRS256
7173_mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7174{
7175  return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7176               (__v4si)__O,
7177               __M);
7178}
7179
7180static __inline__ __m128i __DEFAULT_FN_ATTRS256
7181_mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
7182{
7183  return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7184               (__v4si) _mm_setzero_si128 (),
7185               __M);
7186}
7187
7188static __inline__ void __DEFAULT_FN_ATTRS256
7189_mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
7190{
7191  __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7192}
7193
7194static __inline__ __m128i __DEFAULT_FN_ATTRS128
7195_mm_cvtsepi64_epi16 (__m128i __A)
7196{
7197  return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7198               (__v8hi)_mm_undefined_si128(),
7199               (__mmask8) -1);
7200}
7201
7202static __inline__ __m128i __DEFAULT_FN_ATTRS128
7203_mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7204{
7205  return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7206               (__v8hi) __O, __M);
7207}
7208
7209static __inline__ __m128i __DEFAULT_FN_ATTRS128
7210_mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
7211{
7212  return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7213               (__v8hi) _mm_setzero_si128 (),
7214               __M);
7215}
7216
7217static __inline__ void __DEFAULT_FN_ATTRS128
7218_mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7219{
7220  __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7221}
7222
7223static __inline__ __m128i __DEFAULT_FN_ATTRS256
7224_mm256_cvtsepi64_epi16 (__m256i __A)
7225{
7226  return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7227               (__v8hi)_mm_undefined_si128(),
7228               (__mmask8) -1);
7229}
7230
7231static __inline__ __m128i __DEFAULT_FN_ATTRS256
7232_mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7233{
7234  return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7235               (__v8hi) __O, __M);
7236}
7237
7238static __inline__ __m128i __DEFAULT_FN_ATTRS256
7239_mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
7240{
7241  return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7242               (__v8hi) _mm_setzero_si128 (),
7243               __M);
7244}
7245
7246static __inline__ void __DEFAULT_FN_ATTRS256
7247_mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7248{
7249  __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7250}
7251
7252static __inline__ __m128i __DEFAULT_FN_ATTRS128
7253_mm_cvtusepi32_epi8 (__m128i __A)
7254{
7255  return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7256                (__v16qi)_mm_undefined_si128(),
7257                (__mmask8) -1);
7258}
7259
7260static __inline__ __m128i __DEFAULT_FN_ATTRS128
7261_mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7262{
7263  return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7264                (__v16qi) __O,
7265                __M);
7266}
7267
7268static __inline__ __m128i __DEFAULT_FN_ATTRS128
7269_mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
7270{
7271  return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7272                (__v16qi) _mm_setzero_si128 (),
7273                __M);
7274}
7275
7276static __inline__ void __DEFAULT_FN_ATTRS128
7277_mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7278{
7279  __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7280}
7281
7282static __inline__ __m128i __DEFAULT_FN_ATTRS256
7283_mm256_cvtusepi32_epi8 (__m256i __A)
7284{
7285  return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7286                (__v16qi)_mm_undefined_si128(),
7287                (__mmask8) -1);
7288}
7289
7290static __inline__ __m128i __DEFAULT_FN_ATTRS256
7291_mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7292{
7293  return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7294                (__v16qi) __O,
7295                __M);
7296}
7297
7298static __inline__ __m128i __DEFAULT_FN_ATTRS256
7299_mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
7300{
7301  return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7302                (__v16qi) _mm_setzero_si128 (),
7303                __M);
7304}
7305
7306static __inline__ void __DEFAULT_FN_ATTRS256
7307_mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7308{
7309  __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
7310}
7311
7312static __inline__ __m128i __DEFAULT_FN_ATTRS128
7313_mm_cvtusepi32_epi16 (__m128i __A)
7314{
7315  return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7316                (__v8hi)_mm_undefined_si128(),
7317                (__mmask8) -1);
7318}
7319
7320static __inline__ __m128i __DEFAULT_FN_ATTRS128
7321_mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7322{
7323  return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7324                (__v8hi) __O, __M);
7325}
7326
7327static __inline__ __m128i __DEFAULT_FN_ATTRS128
7328_mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
7329{
7330  return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7331                (__v8hi) _mm_setzero_si128 (),
7332                __M);
7333}
7334
7335static __inline__ void __DEFAULT_FN_ATTRS128
7336_mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7337{
7338  __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7339}
7340
7341static __inline__ __m128i __DEFAULT_FN_ATTRS256
7342_mm256_cvtusepi32_epi16 (__m256i __A)
7343{
7344  return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7345                (__v8hi) _mm_undefined_si128(),
7346                (__mmask8) -1);
7347}
7348
7349static __inline__ __m128i __DEFAULT_FN_ATTRS256
7350_mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7351{
7352  return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7353                (__v8hi) __O, __M);
7354}
7355
7356static __inline__ __m128i __DEFAULT_FN_ATTRS256
7357_mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
7358{
7359  return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7360                (__v8hi) _mm_setzero_si128 (),
7361                __M);
7362}
7363
7364static __inline__ void __DEFAULT_FN_ATTRS256
7365_mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7366{
7367  __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7368}
7369
7370static __inline__ __m128i __DEFAULT_FN_ATTRS128
7371_mm_cvtusepi64_epi8 (__m128i __A)
7372{
7373  return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7374                (__v16qi)_mm_undefined_si128(),
7375                (__mmask8) -1);
7376}
7377
7378static __inline__ __m128i __DEFAULT_FN_ATTRS128
7379_mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7380{
7381  return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7382                (__v16qi) __O,
7383                __M);
7384}
7385
7386static __inline__ __m128i __DEFAULT_FN_ATTRS128
7387_mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
7388{
7389  return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7390                (__v16qi) _mm_setzero_si128 (),
7391                __M);
7392}
7393
7394static __inline__ void __DEFAULT_FN_ATTRS128
7395_mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7396{
7397  __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7398}
7399
7400static __inline__ __m128i __DEFAULT_FN_ATTRS256
7401_mm256_cvtusepi64_epi8 (__m256i __A)
7402{
7403  return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7404                (__v16qi)_mm_undefined_si128(),
7405                (__mmask8) -1);
7406}
7407
7408static __inline__ __m128i __DEFAULT_FN_ATTRS256
7409_mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7410{
7411  return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7412                (__v16qi) __O,
7413                __M);
7414}
7415
7416static __inline__ __m128i __DEFAULT_FN_ATTRS256
7417_mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
7418{
7419  return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7420                (__v16qi) _mm_setzero_si128 (),
7421                __M);
7422}
7423
7424static __inline__ void __DEFAULT_FN_ATTRS256
7425_mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7426{
7427  __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7428}
7429
7430static __inline__ __m128i __DEFAULT_FN_ATTRS128
7431_mm_cvtusepi64_epi32 (__m128i __A)
7432{
7433  return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7434                (__v4si)_mm_undefined_si128(),
7435                (__mmask8) -1);
7436}
7437
7438static __inline__ __m128i __DEFAULT_FN_ATTRS128
7439_mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7440{
7441  return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7442                (__v4si) __O, __M);
7443}
7444
7445static __inline__ __m128i __DEFAULT_FN_ATTRS128
7446_mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
7447{
7448  return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7449                (__v4si) _mm_setzero_si128 (),
7450                __M);
7451}
7452
7453static __inline__ void __DEFAULT_FN_ATTRS128
7454_mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
7455{
7456  __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7457}
7458
7459static __inline__ __m128i __DEFAULT_FN_ATTRS256
7460_mm256_cvtusepi64_epi32 (__m256i __A)
7461{
7462  return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7463                (__v4si)_mm_undefined_si128(),
7464                (__mmask8) -1);
7465}
7466
7467static __inline__ __m128i __DEFAULT_FN_ATTRS256
7468_mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7469{
7470  return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7471                (__v4si) __O, __M);
7472}
7473
7474static __inline__ __m128i __DEFAULT_FN_ATTRS256
7475_mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
7476{
7477  return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7478                (__v4si) _mm_setzero_si128 (),
7479                __M);
7480}
7481
7482static __inline__ void __DEFAULT_FN_ATTRS256
7483_mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
7484{
7485  __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7486}
7487
7488static __inline__ __m128i __DEFAULT_FN_ATTRS128
7489_mm_cvtusepi64_epi16 (__m128i __A)
7490{
7491  return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7492                (__v8hi)_mm_undefined_si128(),
7493                (__mmask8) -1);
7494}
7495
7496static __inline__ __m128i __DEFAULT_FN_ATTRS128
7497_mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7498{
7499  return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7500                (__v8hi) __O, __M);
7501}
7502
7503static __inline__ __m128i __DEFAULT_FN_ATTRS128
7504_mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
7505{
7506  return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7507                (__v8hi) _mm_setzero_si128 (),
7508                __M);
7509}
7510
7511static __inline__ void __DEFAULT_FN_ATTRS128
7512_mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7513{
7514  __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7515}
7516
7517static __inline__ __m128i __DEFAULT_FN_ATTRS256
7518_mm256_cvtusepi64_epi16 (__m256i __A)
7519{
7520  return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7521                (__v8hi)_mm_undefined_si128(),
7522                (__mmask8) -1);
7523}
7524
7525static __inline__ __m128i __DEFAULT_FN_ATTRS256
7526_mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7527{
7528  return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7529                (__v8hi) __O, __M);
7530}
7531
7532static __inline__ __m128i __DEFAULT_FN_ATTRS256
7533_mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
7534{
7535  return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7536                (__v8hi) _mm_setzero_si128 (),
7537                __M);
7538}
7539
7540static __inline__ void __DEFAULT_FN_ATTRS256
7541_mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7542{
7543  __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7544}
7545
7546static __inline__ __m128i __DEFAULT_FN_ATTRS128
7547_mm_cvtepi32_epi8 (__m128i __A)
7548{
7549  return (__m128i)__builtin_shufflevector(
7550      __builtin_convertvector((__v4si)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
7551      2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
7552}
7553
7554static __inline__ __m128i __DEFAULT_FN_ATTRS128
7555_mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7556{
7557  return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7558              (__v16qi) __O, __M);
7559}
7560
7561static __inline__ __m128i __DEFAULT_FN_ATTRS128
7562_mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
7563{
7564  return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7565              (__v16qi)
7566              _mm_setzero_si128 (),
7567              __M);
7568}
7569
7570static __inline__ void __DEFAULT_FN_ATTRS128
7571_mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7572{
7573  __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7574}
7575
7576static __inline__ __m128i __DEFAULT_FN_ATTRS256
7577_mm256_cvtepi32_epi8 (__m256i __A)
7578{
7579  return (__m128i)__builtin_shufflevector(
7580      __builtin_convertvector((__v8si)__A, __v8qi),
7581      (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
7582      12, 13, 14, 15);
7583}
7584
7585static __inline__ __m128i __DEFAULT_FN_ATTRS256
7586_mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7587{
7588  return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7589              (__v16qi) __O, __M);
7590}
7591
7592static __inline__ __m128i __DEFAULT_FN_ATTRS256
7593_mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
7594{
7595  return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7596              (__v16qi) _mm_setzero_si128 (),
7597              __M);
7598}
7599
7600static __inline__ void __DEFAULT_FN_ATTRS256
7601_mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7602{
7603  __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
7604}
7605
7606static __inline__ __m128i __DEFAULT_FN_ATTRS128
7607_mm_cvtepi32_epi16 (__m128i __A)
7608{
7609  return (__m128i)__builtin_shufflevector(
7610      __builtin_convertvector((__v4si)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
7611      2, 3, 4, 5, 6, 7);
7612}
7613
7614static __inline__ __m128i __DEFAULT_FN_ATTRS128
7615_mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7616{
7617  return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
7618              (__v8hi) __O, __M);
7619}
7620
7621static __inline__ __m128i __DEFAULT_FN_ATTRS128
7622_mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
7623{
7624  return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
7625              (__v8hi) _mm_setzero_si128 (),
7626              __M);
7627}
7628
7629static __inline__ void __DEFAULT_FN_ATTRS128
7630_mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7631{
7632  __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7633}
7634
7635static __inline__ __m128i __DEFAULT_FN_ATTRS256
7636_mm256_cvtepi32_epi16 (__m256i __A)
7637{
7638  return (__m128i)__builtin_convertvector((__v8si)__A, __v8hi);
7639}
7640
7641static __inline__ __m128i __DEFAULT_FN_ATTRS256
7642_mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7643{
7644  return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
7645              (__v8hi) __O, __M);
7646}
7647
7648static __inline__ __m128i __DEFAULT_FN_ATTRS256
7649_mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
7650{
7651  return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
7652              (__v8hi) _mm_setzero_si128 (),
7653              __M);
7654}
7655
7656static __inline__ void __DEFAULT_FN_ATTRS256
7657_mm256_mask_cvtepi32_storeu_epi16 (void *  __P, __mmask8 __M, __m256i __A)
7658{
7659  __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7660}
7661
7662static __inline__ __m128i __DEFAULT_FN_ATTRS128
7663_mm_cvtepi64_epi8 (__m128i __A)
7664{
7665  return (__m128i)__builtin_shufflevector(
7666      __builtin_convertvector((__v2di)__A, __v2qi), (__v2qi){0, 0}, 0, 1, 2, 3,
7667      3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
7668}
7669
7670static __inline__ __m128i __DEFAULT_FN_ATTRS128
7671_mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7672{
7673  return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
7674              (__v16qi) __O, __M);
7675}
7676
7677static __inline__ __m128i __DEFAULT_FN_ATTRS128
7678_mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
7679{
7680  return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
7681              (__v16qi) _mm_setzero_si128 (),
7682              __M);
7683}
7684
7685static __inline__ void __DEFAULT_FN_ATTRS128
7686_mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7687{
7688  __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7689}
7690
7691static __inline__ __m128i __DEFAULT_FN_ATTRS256
7692_mm256_cvtepi64_epi8 (__m256i __A)
7693{
7694  return (__m128i)__builtin_shufflevector(
7695      __builtin_convertvector((__v4di)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
7696      2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
7697}
7698
7699static __inline__ __m128i __DEFAULT_FN_ATTRS256
7700_mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7701{
7702  return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
7703              (__v16qi) __O, __M);
7704}
7705
7706static __inline__ __m128i __DEFAULT_FN_ATTRS256
7707_mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
7708{
7709  return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
7710              (__v16qi) _mm_setzero_si128 (),
7711              __M);
7712}
7713
7714static __inline__ void __DEFAULT_FN_ATTRS256
7715_mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7716{
7717  __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7718}
7719
7720static __inline__ __m128i __DEFAULT_FN_ATTRS128
7721_mm_cvtepi64_epi32 (__m128i __A)
7722{
7723  return (__m128i)__builtin_shufflevector(
7724      __builtin_convertvector((__v2di)__A, __v2si), (__v2si){0, 0}, 0, 1, 2, 3);
7725}
7726
7727static __inline__ __m128i __DEFAULT_FN_ATTRS128
7728_mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7729{
7730  return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
7731              (__v4si) __O, __M);
7732}
7733
7734static __inline__ __m128i __DEFAULT_FN_ATTRS128
7735_mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
7736{
7737  return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
7738              (__v4si) _mm_setzero_si128 (),
7739              __M);
7740}
7741
7742static __inline__ void __DEFAULT_FN_ATTRS128
7743_mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
7744{
7745  __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7746}
7747
7748static __inline__ __m128i __DEFAULT_FN_ATTRS256
7749_mm256_cvtepi64_epi32 (__m256i __A)
7750{
7751  return (__m128i)__builtin_convertvector((__v4di)__A, __v4si);
7752}
7753
7754static __inline__ __m128i __DEFAULT_FN_ATTRS256
7755_mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7756{
7757  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
7758                                             (__v4si)_mm256_cvtepi64_epi32(__A),
7759                                             (__v4si)__O);
7760}
7761
7762static __inline__ __m128i __DEFAULT_FN_ATTRS256
7763_mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
7764{
7765  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
7766                                             (__v4si)_mm256_cvtepi64_epi32(__A),
7767                                             (__v4si)_mm_setzero_si128());
7768}
7769
7770static __inline__ void __DEFAULT_FN_ATTRS256
7771_mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
7772{
7773  __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7774}
7775
7776static __inline__ __m128i __DEFAULT_FN_ATTRS128
7777_mm_cvtepi64_epi16 (__m128i __A)
7778{
7779  return (__m128i)__builtin_shufflevector(
7780      __builtin_convertvector((__v2di)__A, __v2hi), (__v2hi){0, 0}, 0, 1, 2, 3,
7781      3, 3, 3, 3);
7782}
7783
7784static __inline__ __m128i __DEFAULT_FN_ATTRS128
7785_mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7786{
7787  return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
7788              (__v8hi)__O,
7789              __M);
7790}
7791
7792static __inline__ __m128i __DEFAULT_FN_ATTRS128
7793_mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
7794{
7795  return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
7796              (__v8hi) _mm_setzero_si128 (),
7797              __M);
7798}
7799
7800static __inline__ void __DEFAULT_FN_ATTRS128
7801_mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7802{
7803  __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7804}
7805
7806static __inline__ __m128i __DEFAULT_FN_ATTRS256
7807_mm256_cvtepi64_epi16 (__m256i __A)
7808{
7809  return (__m128i)__builtin_shufflevector(
7810      __builtin_convertvector((__v4di)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
7811      2, 3, 4, 5, 6, 7);
7812}
7813
7814static __inline__ __m128i __DEFAULT_FN_ATTRS256
7815_mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7816{
7817  return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
7818              (__v8hi) __O, __M);
7819}
7820
7821static __inline__ __m128i __DEFAULT_FN_ATTRS256
7822_mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
7823{
7824  return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
7825              (__v8hi) _mm_setzero_si128 (),
7826              __M);
7827}
7828
7829static __inline__ void __DEFAULT_FN_ATTRS256
7830_mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7831{
7832  __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7833}
7834
7835#define _mm256_extractf32x4_ps(A, imm) \
7836  (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7837                                               (int)(imm), \
7838                                               (__v4sf)_mm_undefined_ps(), \
7839                                               (__mmask8)-1)
7840
7841#define _mm256_mask_extractf32x4_ps(W, U, A, imm) \
7842  (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7843                                               (int)(imm), \
7844                                               (__v4sf)(__m128)(W), \
7845                                               (__mmask8)(U))
7846
7847#define _mm256_maskz_extractf32x4_ps(U, A, imm) \
7848  (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7849                                               (int)(imm), \
7850                                               (__v4sf)_mm_setzero_ps(), \
7851                                               (__mmask8)(U))
7852
7853#define _mm256_extracti32x4_epi32(A, imm) \
7854  (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7855                                                (int)(imm), \
7856                                                (__v4si)_mm_undefined_si128(), \
7857                                                (__mmask8)-1)
7858
7859#define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \
7860  (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7861                                                (int)(imm), \
7862                                                (__v4si)(__m128i)(W), \
7863                                                (__mmask8)(U))
7864
7865#define _mm256_maskz_extracti32x4_epi32(U, A, imm) \
7866  (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7867                                                (int)(imm), \
7868                                                (__v4si)_mm_setzero_si128(), \
7869                                                (__mmask8)(U))
7870
7871#define _mm256_insertf32x4(A, B, imm) \
7872  (__m256)__builtin_ia32_insertf32x4_256((__v8sf)(__m256)(A), \
7873                                         (__v4sf)(__m128)(B), (int)(imm))
7874
7875#define _mm256_mask_insertf32x4(W, U, A, B, imm) \
7876  (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7877                                  (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
7878                                  (__v8sf)(__m256)(W))
7879
7880#define _mm256_maskz_insertf32x4(U, A, B, imm) \
7881  (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7882                                  (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
7883                                  (__v8sf)_mm256_setzero_ps())
7884
7885#define _mm256_inserti32x4(A, B, imm) \
7886  (__m256i)__builtin_ia32_inserti32x4_256((__v8si)(__m256i)(A), \
7887                                          (__v4si)(__m128i)(B), (int)(imm))
7888
7889#define _mm256_mask_inserti32x4(W, U, A, B, imm) \
7890  (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7891                                  (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
7892                                  (__v8si)(__m256i)(W))
7893
7894#define _mm256_maskz_inserti32x4(U, A, B, imm) \
7895  (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7896                                  (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
7897                                  (__v8si)_mm256_setzero_si256())
7898
7899#define _mm_getmant_pd(A, B, C) \
7900  (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7901                                            (int)(((C)<<2) | (B)), \
7902                                            (__v2df)_mm_setzero_pd(), \
7903                                            (__mmask8)-1)
7904
7905#define _mm_mask_getmant_pd(W, U, A, B, C) \
7906  (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7907                                            (int)(((C)<<2) | (B)), \
7908                                            (__v2df)(__m128d)(W), \
7909                                            (__mmask8)(U))
7910
7911#define _mm_maskz_getmant_pd(U, A, B, C) \
7912  (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7913                                            (int)(((C)<<2) | (B)), \
7914                                            (__v2df)_mm_setzero_pd(), \
7915                                            (__mmask8)(U))
7916
7917#define _mm256_getmant_pd(A, B, C) \
7918  (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7919                                            (int)(((C)<<2) | (B)), \
7920                                            (__v4df)_mm256_setzero_pd(), \
7921                                            (__mmask8)-1)
7922
7923#define _mm256_mask_getmant_pd(W, U, A, B, C) \
7924  (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7925                                            (int)(((C)<<2) | (B)), \
7926                                            (__v4df)(__m256d)(W), \
7927                                            (__mmask8)(U))
7928
7929#define _mm256_maskz_getmant_pd(U, A, B, C) \
7930  (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7931                                            (int)(((C)<<2) | (B)), \
7932                                            (__v4df)_mm256_setzero_pd(), \
7933                                            (__mmask8)(U))
7934
7935#define _mm_getmant_ps(A, B, C) \
7936  (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7937                                           (int)(((C)<<2) | (B)), \
7938                                           (__v4sf)_mm_setzero_ps(), \
7939                                           (__mmask8)-1)
7940
7941#define _mm_mask_getmant_ps(W, U, A, B, C) \
7942  (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7943                                           (int)(((C)<<2) | (B)), \
7944                                           (__v4sf)(__m128)(W), \
7945                                           (__mmask8)(U))
7946
7947#define _mm_maskz_getmant_ps(U, A, B, C) \
7948  (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7949                                           (int)(((C)<<2) | (B)), \
7950                                           (__v4sf)_mm_setzero_ps(), \
7951                                           (__mmask8)(U))
7952
7953#define _mm256_getmant_ps(A, B, C) \
7954  (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7955                                           (int)(((C)<<2) | (B)), \
7956                                           (__v8sf)_mm256_setzero_ps(), \
7957                                           (__mmask8)-1)
7958
7959#define _mm256_mask_getmant_ps(W, U, A, B, C) \
7960  (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7961                                           (int)(((C)<<2) | (B)), \
7962                                           (__v8sf)(__m256)(W), \
7963                                           (__mmask8)(U))
7964
7965#define _mm256_maskz_getmant_ps(U, A, B, C) \
7966  (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7967                                           (int)(((C)<<2) | (B)), \
7968                                           (__v8sf)_mm256_setzero_ps(), \
7969                                           (__mmask8)(U))
7970
7971#define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
7972  (__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \
7973                                        (void const *)(addr), \
7974                                        (__v2di)(__m128i)(index), \
7975                                        (__mmask8)(mask), (int)(scale))
7976
7977#define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7978  (__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \
7979                                        (void const *)(addr), \
7980                                        (__v2di)(__m128i)(index), \
7981                                        (__mmask8)(mask), (int)(scale))
7982
7983#define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
7984  (__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \
7985                                        (void const *)(addr), \
7986                                        (__v4di)(__m256i)(index), \
7987                                        (__mmask8)(mask), (int)(scale))
7988
7989#define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7990  (__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \
7991                                        (void const *)(addr), \
7992                                        (__v4di)(__m256i)(index), \
7993                                        (__mmask8)(mask), (int)(scale))
7994
7995#define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
7996  (__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \
7997                                       (void const *)(addr), \
7998                                       (__v2di)(__m128i)(index), \
7999                                       (__mmask8)(mask), (int)(scale))
8000
8001#define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
8002  (__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \
8003                                        (void const *)(addr), \
8004                                        (__v2di)(__m128i)(index), \
8005                                        (__mmask8)(mask), (int)(scale))
8006
8007#define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
8008  (__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \
8009                                       (void const *)(addr), \
8010                                       (__v4di)(__m256i)(index), \
8011                                       (__mmask8)(mask), (int)(scale))
8012
8013#define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
8014  (__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \
8015                                        (void const *)(addr), \
8016                                        (__v4di)(__m256i)(index), \
8017                                        (__mmask8)(mask), (int)(scale))
8018
8019#define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
8020  (__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \
8021                                        (void const *)(addr), \
8022                                        (__v4si)(__m128i)(index), \
8023                                        (__mmask8)(mask), (int)(scale))
8024
8025#define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
8026  (__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \
8027                                        (void const *)(addr), \
8028                                        (__v4si)(__m128i)(index), \
8029                                        (__mmask8)(mask), (int)(scale))
8030
8031#define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
8032  (__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \
8033                                        (void const *)(addr), \
8034                                        (__v4si)(__m128i)(index), \
8035                                        (__mmask8)(mask), (int)(scale))
8036
8037#define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
8038  (__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \
8039                                        (void const *)(addr), \
8040                                        (__v4si)(__m128i)(index), \
8041                                        (__mmask8)(mask), (int)(scale))
8042
8043#define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
8044  (__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \
8045                                       (void const *)(addr), \
8046                                       (__v4si)(__m128i)(index), \
8047                                       (__mmask8)(mask), (int)(scale))
8048
8049#define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
8050  (__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \
8051                                        (void const *)(addr), \
8052                                        (__v4si)(__m128i)(index), \
8053                                        (__mmask8)(mask), (int)(scale))
8054
8055#define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
8056  (__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \
8057                                       (void const *)(addr), \
8058                                       (__v8si)(__m256i)(index), \
8059                                       (__mmask8)(mask), (int)(scale))
8060
8061#define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
8062  (__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \
8063                                        (void const *)(addr), \
8064                                        (__v8si)(__m256i)(index), \
8065                                        (__mmask8)(mask), (int)(scale))
8066
8067#define _mm256_permutex_pd(X, C) \
8068  (__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(X), (int)(C))
8069
8070#define _mm256_mask_permutex_pd(W, U, X, C) \
8071  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8072                                       (__v4df)_mm256_permutex_pd((X), (C)), \
8073                                       (__v4df)(__m256d)(W))
8074
8075#define _mm256_maskz_permutex_pd(U, X, C) \
8076  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8077                                       (__v4df)_mm256_permutex_pd((X), (C)), \
8078                                       (__v4df)_mm256_setzero_pd())
8079
8080#define _mm256_permutex_epi64(X, C) \
8081  (__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(X), (int)(C))
8082
8083#define _mm256_mask_permutex_epi64(W, U, X, C) \
8084  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8085                                      (__v4di)_mm256_permutex_epi64((X), (C)), \
8086                                      (__v4di)(__m256i)(W))
8087
8088#define _mm256_maskz_permutex_epi64(U, X, C) \
8089  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8090                                      (__v4di)_mm256_permutex_epi64((X), (C)), \
8091                                      (__v4di)_mm256_setzero_si256())
8092
8093static __inline__ __m256d __DEFAULT_FN_ATTRS256
8094_mm256_permutexvar_pd (__m256i __X, __m256d __Y)
8095{
8096  return (__m256d)__builtin_ia32_permvardf256((__v4df)__Y, (__v4di)__X);
8097}
8098
8099static __inline__ __m256d __DEFAULT_FN_ATTRS256
8100_mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
8101          __m256d __Y)
8102{
8103  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
8104                                        (__v4df)_mm256_permutexvar_pd(__X, __Y),
8105                                        (__v4df)__W);
8106}
8107
8108static __inline__ __m256d __DEFAULT_FN_ATTRS256
8109_mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
8110{
8111  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
8112                                        (__v4df)_mm256_permutexvar_pd(__X, __Y),
8113                                        (__v4df)_mm256_setzero_pd());
8114}
8115
8116static __inline__ __m256i __DEFAULT_FN_ATTRS256
8117_mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y)
8118{
8119  return (__m256i)__builtin_ia32_permvardi256((__v4di) __Y, (__v4di) __X);
8120}
8121
8122static __inline__ __m256i __DEFAULT_FN_ATTRS256
8123_mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
8124{
8125  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
8126                                     (__v4di)_mm256_permutexvar_epi64(__X, __Y),
8127                                     (__v4di)_mm256_setzero_si256());
8128}
8129
8130static __inline__ __m256i __DEFAULT_FN_ATTRS256
8131_mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
8132             __m256i __Y)
8133{
8134  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
8135                                     (__v4di)_mm256_permutexvar_epi64(__X, __Y),
8136                                     (__v4di)__W);
8137}
8138
8139#define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A))
8140
8141static __inline__ __m256 __DEFAULT_FN_ATTRS256
8142_mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y)
8143{
8144  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8145                                        (__v8sf)_mm256_permutexvar_ps(__X, __Y),
8146                                        (__v8sf)__W);
8147}
8148
8149static __inline__ __m256 __DEFAULT_FN_ATTRS256
8150_mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y)
8151{
8152  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8153                                        (__v8sf)_mm256_permutexvar_ps(__X, __Y),
8154                                        (__v8sf)_mm256_setzero_ps());
8155}
8156
8157#define _mm256_permutexvar_epi32(A, B) _mm256_permutevar8x32_epi32((B), (A))
8158
8159static __inline__ __m256i __DEFAULT_FN_ATTRS256
8160_mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X,
8161                              __m256i __Y)
8162{
8163  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
8164                                     (__v8si)_mm256_permutexvar_epi32(__X, __Y),
8165                                     (__v8si)__W);
8166}
8167
8168static __inline__ __m256i __DEFAULT_FN_ATTRS256
8169_mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
8170{
8171  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
8172                                     (__v8si)_mm256_permutexvar_epi32(__X, __Y),
8173                                     (__v8si)_mm256_setzero_si256());
8174}
8175
8176#define _mm_alignr_epi32(A, B, imm) \
8177  (__m128i)__builtin_ia32_alignd128((__v4si)(__m128i)(A), \
8178                                    (__v4si)(__m128i)(B), (int)(imm))
8179
8180#define _mm_mask_alignr_epi32(W, U, A, B, imm) \
8181  (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8182                                    (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
8183                                    (__v4si)(__m128i)(W))
8184
8185#define _mm_maskz_alignr_epi32(U, A, B, imm) \
8186  (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8187                                    (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
8188                                    (__v4si)_mm_setzero_si128())
8189
8190#define _mm256_alignr_epi32(A, B, imm) \
8191  (__m256i)__builtin_ia32_alignd256((__v8si)(__m256i)(A), \
8192                                    (__v8si)(__m256i)(B), (int)(imm))
8193
8194#define _mm256_mask_alignr_epi32(W, U, A, B, imm) \
8195  (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8196                                 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
8197                                 (__v8si)(__m256i)(W))
8198
8199#define _mm256_maskz_alignr_epi32(U, A, B, imm) \
8200  (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8201                                 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
8202                                 (__v8si)_mm256_setzero_si256())
8203
8204#define _mm_alignr_epi64(A, B, imm) \
8205  (__m128i)__builtin_ia32_alignq128((__v2di)(__m128i)(A), \
8206                                    (__v2di)(__m128i)(B), (int)(imm))
8207
8208#define _mm_mask_alignr_epi64(W, U, A, B, imm) \
8209  (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
8210                                    (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
8211                                    (__v2di)(__m128i)(W))
8212
8213#define _mm_maskz_alignr_epi64(U, A, B, imm) \
8214  (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
8215                                    (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
8216                                    (__v2di)_mm_setzero_si128())
8217
8218#define _mm256_alignr_epi64(A, B, imm) \
8219  (__m256i)__builtin_ia32_alignq256((__v4di)(__m256i)(A), \
8220                                    (__v4di)(__m256i)(B), (int)(imm))
8221
8222#define _mm256_mask_alignr_epi64(W, U, A, B, imm) \
8223  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8224                                 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
8225                                 (__v4di)(__m256i)(W))
8226
8227#define _mm256_maskz_alignr_epi64(U, A, B, imm) \
8228  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8229                                 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
8230                                 (__v4di)_mm256_setzero_si256())
8231
8232static __inline__ __m128 __DEFAULT_FN_ATTRS128
8233_mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
8234{
8235  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8236                                             (__v4sf)_mm_movehdup_ps(__A),
8237                                             (__v4sf)__W);
8238}
8239
8240static __inline__ __m128 __DEFAULT_FN_ATTRS128
8241_mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
8242{
8243  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8244                                             (__v4sf)_mm_movehdup_ps(__A),
8245                                             (__v4sf)_mm_setzero_ps());
8246}
8247
8248static __inline__ __m256 __DEFAULT_FN_ATTRS256
8249_mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
8250{
8251  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8252                                             (__v8sf)_mm256_movehdup_ps(__A),
8253                                             (__v8sf)__W);
8254}
8255
8256static __inline__ __m256 __DEFAULT_FN_ATTRS256
8257_mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
8258{
8259  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8260                                             (__v8sf)_mm256_movehdup_ps(__A),
8261                                             (__v8sf)_mm256_setzero_ps());
8262}
8263
8264static __inline__ __m128 __DEFAULT_FN_ATTRS128
8265_mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
8266{
8267  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8268                                             (__v4sf)_mm_moveldup_ps(__A),
8269                                             (__v4sf)__W);
8270}
8271
8272static __inline__ __m128 __DEFAULT_FN_ATTRS128
8273_mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
8274{
8275  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8276                                             (__v4sf)_mm_moveldup_ps(__A),
8277                                             (__v4sf)_mm_setzero_ps());
8278}
8279
8280static __inline__ __m256 __DEFAULT_FN_ATTRS256
8281_mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
8282{
8283  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8284                                             (__v8sf)_mm256_moveldup_ps(__A),
8285                                             (__v8sf)__W);
8286}
8287
8288static __inline__ __m256 __DEFAULT_FN_ATTRS256
8289_mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
8290{
8291  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8292                                             (__v8sf)_mm256_moveldup_ps(__A),
8293                                             (__v8sf)_mm256_setzero_ps());
8294}
8295
8296#define _mm256_mask_shuffle_epi32(W, U, A, I) \
8297  (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8298                                      (__v8si)_mm256_shuffle_epi32((A), (I)), \
8299                                      (__v8si)(__m256i)(W))
8300
8301#define _mm256_maskz_shuffle_epi32(U, A, I) \
8302  (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8303                                      (__v8si)_mm256_shuffle_epi32((A), (I)), \
8304                                      (__v8si)_mm256_setzero_si256())
8305
8306#define _mm_mask_shuffle_epi32(W, U, A, I) \
8307  (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8308                                      (__v4si)_mm_shuffle_epi32((A), (I)), \
8309                                      (__v4si)(__m128i)(W))
8310
8311#define _mm_maskz_shuffle_epi32(U, A, I) \
8312  (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8313                                      (__v4si)_mm_shuffle_epi32((A), (I)), \
8314                                      (__v4si)_mm_setzero_si128())
8315
8316static __inline__ __m128d __DEFAULT_FN_ATTRS128
8317_mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
8318{
8319  return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
8320              (__v2df) __A,
8321              (__v2df) __W);
8322}
8323
8324static __inline__ __m128d __DEFAULT_FN_ATTRS128
8325_mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
8326{
8327  return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
8328              (__v2df) __A,
8329              (__v2df) _mm_setzero_pd ());
8330}
8331
8332static __inline__ __m256d __DEFAULT_FN_ATTRS256
8333_mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
8334{
8335  return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
8336              (__v4df) __A,
8337              (__v4df) __W);
8338}
8339
8340static __inline__ __m256d __DEFAULT_FN_ATTRS256
8341_mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
8342{
8343  return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
8344              (__v4df) __A,
8345              (__v4df) _mm256_setzero_pd ());
8346}
8347
8348static __inline__ __m128 __DEFAULT_FN_ATTRS128
8349_mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
8350{
8351  return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
8352             (__v4sf) __A,
8353             (__v4sf) __W);
8354}
8355
8356static __inline__ __m128 __DEFAULT_FN_ATTRS128
8357_mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
8358{
8359  return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
8360             (__v4sf) __A,
8361             (__v4sf) _mm_setzero_ps ());
8362}
8363
8364static __inline__ __m256 __DEFAULT_FN_ATTRS256
8365_mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
8366{
8367  return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
8368             (__v8sf) __A,
8369             (__v8sf) __W);
8370}
8371
8372static __inline__ __m256 __DEFAULT_FN_ATTRS256
8373_mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
8374{
8375  return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
8376             (__v8sf) __A,
8377             (__v8sf) _mm256_setzero_ps ());
8378}
8379
8380static __inline__ __m128 __DEFAULT_FN_ATTRS128
8381_mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
8382{
8383  return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8384             (__v4sf) __W,
8385             (__mmask8) __U);
8386}
8387
8388static __inline__ __m128 __DEFAULT_FN_ATTRS128
8389_mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8390{
8391  return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8392             (__v4sf)
8393             _mm_setzero_ps (),
8394             (__mmask8) __U);
8395}
8396
8397static __inline__ __m256 __DEFAULT_FN_ATTRS256
8398_mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
8399{
8400  return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8401                (__v8sf) __W,
8402                (__mmask8) __U);
8403}
8404
8405static __inline__ __m256 __DEFAULT_FN_ATTRS256
8406_mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8407{
8408  return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8409                (__v8sf)
8410                _mm256_setzero_ps (),
8411                (__mmask8) __U);
8412}
8413
8414#define _mm_mask_cvt_roundps_ph(W, U, A, I) \
8415  (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8416                                         (__v8hi)(__m128i)(W), \
8417                                         (__mmask8)(U))
8418
8419#define _mm_maskz_cvt_roundps_ph(U, A, I) \
8420  (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8421                                         (__v8hi)_mm_setzero_si128(), \
8422                                         (__mmask8)(U))
8423
8424#define _mm_mask_cvtps_ph  _mm_mask_cvt_roundps_ph
8425#define _mm_maskz_cvtps_ph _mm_maskz_cvt_roundps_ph
8426
8427#define _mm256_mask_cvt_roundps_ph(W, U, A, I) \
8428  (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8429                                            (__v8hi)(__m128i)(W), \
8430                                            (__mmask8)(U))
8431
8432#define _mm256_maskz_cvt_roundps_ph(U, A, I) \
8433  (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8434                                            (__v8hi)_mm_setzero_si128(), \
8435                                            (__mmask8)(U))
8436
8437#define _mm256_mask_cvtps_ph  _mm256_mask_cvt_roundps_ph
8438#define _mm256_maskz_cvtps_ph _mm256_maskz_cvt_roundps_ph
8439
8440
8441#undef __DEFAULT_FN_ATTRS128
8442#undef __DEFAULT_FN_ATTRS256
8443
8444#endif /* __AVX512VLINTRIN_H */
8445