1/*===---- avx512vlbwintrin.h - AVX512VL and AVX512BW intrinsics ------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __IMMINTRIN_H
11#error "Never use <avx512vlbwintrin.h> directly; include <immintrin.h> instead."
12#endif
13
14#ifndef __AVX512VLBWINTRIN_H
15#define __AVX512VLBWINTRIN_H
16
17/* Define the default attributes for the functions in this file. */
18#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"), __min_vector_width__(128)))
19#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"), __min_vector_width__(256)))
20
21/* Integer compare */
22
23#define _mm_cmp_epi8_mask(a, b, p) \
24  ((__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
25                                          (__v16qi)(__m128i)(b), (int)(p), \
26                                          (__mmask16)-1))
27
28#define _mm_mask_cmp_epi8_mask(m, a, b, p) \
29  ((__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
30                                          (__v16qi)(__m128i)(b), (int)(p), \
31                                          (__mmask16)(m)))
32
33#define _mm_cmp_epu8_mask(a, b, p) \
34  ((__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \
35                                           (__v16qi)(__m128i)(b), (int)(p), \
36                                           (__mmask16)-1))
37
38#define _mm_mask_cmp_epu8_mask(m, a, b, p) \
39  ((__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \
40                                           (__v16qi)(__m128i)(b), (int)(p), \
41                                           (__mmask16)(m)))
42
43#define _mm256_cmp_epi8_mask(a, b, p) \
44  ((__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \
45                                          (__v32qi)(__m256i)(b), (int)(p), \
46                                          (__mmask32)-1))
47
48#define _mm256_mask_cmp_epi8_mask(m, a, b, p) \
49  ((__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \
50                                          (__v32qi)(__m256i)(b), (int)(p), \
51                                          (__mmask32)(m)))
52
53#define _mm256_cmp_epu8_mask(a, b, p) \
54  ((__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \
55                                           (__v32qi)(__m256i)(b), (int)(p), \
56                                           (__mmask32)-1))
57
58#define _mm256_mask_cmp_epu8_mask(m, a, b, p) \
59  ((__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \
60                                           (__v32qi)(__m256i)(b), (int)(p), \
61                                           (__mmask32)(m)))
62
63#define _mm_cmp_epi16_mask(a, b, p) \
64  ((__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \
65                                         (__v8hi)(__m128i)(b), (int)(p), \
66                                         (__mmask8)-1))
67
68#define _mm_mask_cmp_epi16_mask(m, a, b, p) \
69  ((__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \
70                                         (__v8hi)(__m128i)(b), (int)(p), \
71                                         (__mmask8)(m)))
72
73#define _mm_cmp_epu16_mask(a, b, p) \
74  ((__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \
75                                          (__v8hi)(__m128i)(b), (int)(p), \
76                                          (__mmask8)-1))
77
78#define _mm_mask_cmp_epu16_mask(m, a, b, p) \
79  ((__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \
80                                          (__v8hi)(__m128i)(b), (int)(p), \
81                                          (__mmask8)(m)))
82
83#define _mm256_cmp_epi16_mask(a, b, p) \
84  ((__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \
85                                          (__v16hi)(__m256i)(b), (int)(p), \
86                                          (__mmask16)-1))
87
88#define _mm256_mask_cmp_epi16_mask(m, a, b, p) \
89  ((__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \
90                                          (__v16hi)(__m256i)(b), (int)(p), \
91                                          (__mmask16)(m)))
92
93#define _mm256_cmp_epu16_mask(a, b, p) \
94  ((__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \
95                                           (__v16hi)(__m256i)(b), (int)(p), \
96                                           (__mmask16)-1))
97
98#define _mm256_mask_cmp_epu16_mask(m, a, b, p) \
99  ((__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \
100                                           (__v16hi)(__m256i)(b), (int)(p), \
101                                           (__mmask16)(m)))
102
103#define _mm_cmpeq_epi8_mask(A, B) \
104    _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ)
105#define _mm_mask_cmpeq_epi8_mask(k, A, B) \
106    _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ)
107#define _mm_cmpge_epi8_mask(A, B) \
108    _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_GE)
109#define _mm_mask_cmpge_epi8_mask(k, A, B) \
110    _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE)
111#define _mm_cmpgt_epi8_mask(A, B) \
112    _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_GT)
113#define _mm_mask_cmpgt_epi8_mask(k, A, B) \
114    _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT)
115#define _mm_cmple_epi8_mask(A, B) \
116    _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_LE)
117#define _mm_mask_cmple_epi8_mask(k, A, B) \
118    _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE)
119#define _mm_cmplt_epi8_mask(A, B) \
120    _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_LT)
121#define _mm_mask_cmplt_epi8_mask(k, A, B) \
122    _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT)
123#define _mm_cmpneq_epi8_mask(A, B) \
124    _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_NE)
125#define _mm_mask_cmpneq_epi8_mask(k, A, B) \
126    _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE)
127
128#define _mm256_cmpeq_epi8_mask(A, B) \
129    _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ)
130#define _mm256_mask_cmpeq_epi8_mask(k, A, B) \
131    _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ)
132#define _mm256_cmpge_epi8_mask(A, B) \
133    _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_GE)
134#define _mm256_mask_cmpge_epi8_mask(k, A, B) \
135    _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE)
136#define _mm256_cmpgt_epi8_mask(A, B) \
137    _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_GT)
138#define _mm256_mask_cmpgt_epi8_mask(k, A, B) \
139    _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT)
140#define _mm256_cmple_epi8_mask(A, B) \
141    _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_LE)
142#define _mm256_mask_cmple_epi8_mask(k, A, B) \
143    _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE)
144#define _mm256_cmplt_epi8_mask(A, B) \
145    _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_LT)
146#define _mm256_mask_cmplt_epi8_mask(k, A, B) \
147    _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT)
148#define _mm256_cmpneq_epi8_mask(A, B) \
149    _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_NE)
150#define _mm256_mask_cmpneq_epi8_mask(k, A, B) \
151    _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE)
152
153#define _mm_cmpeq_epu8_mask(A, B) \
154    _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ)
155#define _mm_mask_cmpeq_epu8_mask(k, A, B) \
156    _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ)
157#define _mm_cmpge_epu8_mask(A, B) \
158    _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_GE)
159#define _mm_mask_cmpge_epu8_mask(k, A, B) \
160    _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE)
161#define _mm_cmpgt_epu8_mask(A, B) \
162    _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_GT)
163#define _mm_mask_cmpgt_epu8_mask(k, A, B) \
164    _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT)
165#define _mm_cmple_epu8_mask(A, B) \
166    _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_LE)
167#define _mm_mask_cmple_epu8_mask(k, A, B) \
168    _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE)
169#define _mm_cmplt_epu8_mask(A, B) \
170    _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_LT)
171#define _mm_mask_cmplt_epu8_mask(k, A, B) \
172    _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT)
173#define _mm_cmpneq_epu8_mask(A, B) \
174    _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_NE)
175#define _mm_mask_cmpneq_epu8_mask(k, A, B) \
176    _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE)
177
178#define _mm256_cmpeq_epu8_mask(A, B) \
179    _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ)
180#define _mm256_mask_cmpeq_epu8_mask(k, A, B) \
181    _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ)
182#define _mm256_cmpge_epu8_mask(A, B) \
183    _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_GE)
184#define _mm256_mask_cmpge_epu8_mask(k, A, B) \
185    _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE)
186#define _mm256_cmpgt_epu8_mask(A, B) \
187    _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_GT)
188#define _mm256_mask_cmpgt_epu8_mask(k, A, B) \
189    _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT)
190#define _mm256_cmple_epu8_mask(A, B) \
191    _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_LE)
192#define _mm256_mask_cmple_epu8_mask(k, A, B) \
193    _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE)
194#define _mm256_cmplt_epu8_mask(A, B) \
195    _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_LT)
196#define _mm256_mask_cmplt_epu8_mask(k, A, B) \
197    _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT)
198#define _mm256_cmpneq_epu8_mask(A, B) \
199    _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_NE)
200#define _mm256_mask_cmpneq_epu8_mask(k, A, B) \
201    _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE)
202
203#define _mm_cmpeq_epi16_mask(A, B) \
204    _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ)
205#define _mm_mask_cmpeq_epi16_mask(k, A, B) \
206    _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ)
207#define _mm_cmpge_epi16_mask(A, B) \
208    _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_GE)
209#define _mm_mask_cmpge_epi16_mask(k, A, B) \
210    _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE)
211#define _mm_cmpgt_epi16_mask(A, B) \
212    _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_GT)
213#define _mm_mask_cmpgt_epi16_mask(k, A, B) \
214    _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT)
215#define _mm_cmple_epi16_mask(A, B) \
216    _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_LE)
217#define _mm_mask_cmple_epi16_mask(k, A, B) \
218    _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE)
219#define _mm_cmplt_epi16_mask(A, B) \
220    _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_LT)
221#define _mm_mask_cmplt_epi16_mask(k, A, B) \
222    _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT)
223#define _mm_cmpneq_epi16_mask(A, B) \
224    _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_NE)
225#define _mm_mask_cmpneq_epi16_mask(k, A, B) \
226    _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE)
227
228#define _mm256_cmpeq_epi16_mask(A, B) \
229    _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ)
230#define _mm256_mask_cmpeq_epi16_mask(k, A, B) \
231    _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ)
232#define _mm256_cmpge_epi16_mask(A, B) \
233    _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_GE)
234#define _mm256_mask_cmpge_epi16_mask(k, A, B) \
235    _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE)
236#define _mm256_cmpgt_epi16_mask(A, B) \
237    _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_GT)
238#define _mm256_mask_cmpgt_epi16_mask(k, A, B) \
239    _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT)
240#define _mm256_cmple_epi16_mask(A, B) \
241    _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_LE)
242#define _mm256_mask_cmple_epi16_mask(k, A, B) \
243    _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE)
244#define _mm256_cmplt_epi16_mask(A, B) \
245    _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_LT)
246#define _mm256_mask_cmplt_epi16_mask(k, A, B) \
247    _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT)
248#define _mm256_cmpneq_epi16_mask(A, B) \
249    _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_NE)
250#define _mm256_mask_cmpneq_epi16_mask(k, A, B) \
251    _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE)
252
253#define _mm_cmpeq_epu16_mask(A, B) \
254    _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ)
255#define _mm_mask_cmpeq_epu16_mask(k, A, B) \
256    _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ)
257#define _mm_cmpge_epu16_mask(A, B) \
258    _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_GE)
259#define _mm_mask_cmpge_epu16_mask(k, A, B) \
260    _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE)
261#define _mm_cmpgt_epu16_mask(A, B) \
262    _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_GT)
263#define _mm_mask_cmpgt_epu16_mask(k, A, B) \
264    _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT)
265#define _mm_cmple_epu16_mask(A, B) \
266    _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_LE)
267#define _mm_mask_cmple_epu16_mask(k, A, B) \
268    _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE)
269#define _mm_cmplt_epu16_mask(A, B) \
270    _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_LT)
271#define _mm_mask_cmplt_epu16_mask(k, A, B) \
272    _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT)
273#define _mm_cmpneq_epu16_mask(A, B) \
274    _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_NE)
275#define _mm_mask_cmpneq_epu16_mask(k, A, B) \
276    _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
277
278#define _mm256_cmpeq_epu16_mask(A, B) \
279    _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ)
280#define _mm256_mask_cmpeq_epu16_mask(k, A, B) \
281    _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ)
282#define _mm256_cmpge_epu16_mask(A, B) \
283    _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_GE)
284#define _mm256_mask_cmpge_epu16_mask(k, A, B) \
285    _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE)
286#define _mm256_cmpgt_epu16_mask(A, B) \
287    _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_GT)
288#define _mm256_mask_cmpgt_epu16_mask(k, A, B) \
289    _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT)
290#define _mm256_cmple_epu16_mask(A, B) \
291    _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_LE)
292#define _mm256_mask_cmple_epu16_mask(k, A, B) \
293    _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE)
294#define _mm256_cmplt_epu16_mask(A, B) \
295    _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_LT)
296#define _mm256_mask_cmplt_epu16_mask(k, A, B) \
297    _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT)
298#define _mm256_cmpneq_epu16_mask(A, B) \
299    _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_NE)
300#define _mm256_mask_cmpneq_epu16_mask(k, A, B) \
301    _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
302
303static __inline__ __m256i __DEFAULT_FN_ATTRS256
304_mm256_mask_add_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B){
305  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
306                                             (__v32qi)_mm256_add_epi8(__A, __B),
307                                             (__v32qi)__W);
308}
309
310static __inline__ __m256i __DEFAULT_FN_ATTRS256
311_mm256_maskz_add_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
312  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
313                                             (__v32qi)_mm256_add_epi8(__A, __B),
314                                             (__v32qi)_mm256_setzero_si256());
315}
316
317static __inline__ __m256i __DEFAULT_FN_ATTRS256
318_mm256_mask_add_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
319  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
320                                             (__v16hi)_mm256_add_epi16(__A, __B),
321                                             (__v16hi)__W);
322}
323
324static __inline__ __m256i __DEFAULT_FN_ATTRS256
325_mm256_maskz_add_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
326  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
327                                             (__v16hi)_mm256_add_epi16(__A, __B),
328                                             (__v16hi)_mm256_setzero_si256());
329}
330
331static __inline__ __m256i __DEFAULT_FN_ATTRS256
332_mm256_mask_sub_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
333  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
334                                             (__v32qi)_mm256_sub_epi8(__A, __B),
335                                             (__v32qi)__W);
336}
337
338static __inline__ __m256i __DEFAULT_FN_ATTRS256
339_mm256_maskz_sub_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
340  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
341                                             (__v32qi)_mm256_sub_epi8(__A, __B),
342                                             (__v32qi)_mm256_setzero_si256());
343}
344
345static __inline__ __m256i __DEFAULT_FN_ATTRS256
346_mm256_mask_sub_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
347  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
348                                             (__v16hi)_mm256_sub_epi16(__A, __B),
349                                             (__v16hi)__W);
350}
351
352static __inline__ __m256i __DEFAULT_FN_ATTRS256
353_mm256_maskz_sub_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
354  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
355                                             (__v16hi)_mm256_sub_epi16(__A, __B),
356                                             (__v16hi)_mm256_setzero_si256());
357}
358
359static __inline__ __m128i __DEFAULT_FN_ATTRS128
360_mm_mask_add_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
361  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
362                                             (__v16qi)_mm_add_epi8(__A, __B),
363                                             (__v16qi)__W);
364}
365
366static __inline__ __m128i __DEFAULT_FN_ATTRS128
367_mm_maskz_add_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
368  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
369                                             (__v16qi)_mm_add_epi8(__A, __B),
370                                             (__v16qi)_mm_setzero_si128());
371}
372
373static __inline__ __m128i __DEFAULT_FN_ATTRS128
374_mm_mask_add_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
375  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
376                                             (__v8hi)_mm_add_epi16(__A, __B),
377                                             (__v8hi)__W);
378}
379
380static __inline__ __m128i __DEFAULT_FN_ATTRS128
381_mm_maskz_add_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
382  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
383                                             (__v8hi)_mm_add_epi16(__A, __B),
384                                             (__v8hi)_mm_setzero_si128());
385}
386
387static __inline__ __m128i __DEFAULT_FN_ATTRS128
388_mm_mask_sub_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
389  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
390                                             (__v16qi)_mm_sub_epi8(__A, __B),
391                                             (__v16qi)__W);
392}
393
394static __inline__ __m128i __DEFAULT_FN_ATTRS128
395_mm_maskz_sub_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
396  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
397                                             (__v16qi)_mm_sub_epi8(__A, __B),
398                                             (__v16qi)_mm_setzero_si128());
399}
400
401static __inline__ __m128i __DEFAULT_FN_ATTRS128
402_mm_mask_sub_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
403  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
404                                             (__v8hi)_mm_sub_epi16(__A, __B),
405                                             (__v8hi)__W);
406}
407
408static __inline__ __m128i __DEFAULT_FN_ATTRS128
409_mm_maskz_sub_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
410  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
411                                             (__v8hi)_mm_sub_epi16(__A, __B),
412                                             (__v8hi)_mm_setzero_si128());
413}
414
415static __inline__ __m256i __DEFAULT_FN_ATTRS256
416_mm256_mask_mullo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
417  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
418                                             (__v16hi)_mm256_mullo_epi16(__A, __B),
419                                             (__v16hi)__W);
420}
421
422static __inline__ __m256i __DEFAULT_FN_ATTRS256
423_mm256_maskz_mullo_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
424  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
425                                             (__v16hi)_mm256_mullo_epi16(__A, __B),
426                                             (__v16hi)_mm256_setzero_si256());
427}
428
429static __inline__ __m128i __DEFAULT_FN_ATTRS128
430_mm_mask_mullo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
431  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
432                                             (__v8hi)_mm_mullo_epi16(__A, __B),
433                                             (__v8hi)__W);
434}
435
436static __inline__ __m128i __DEFAULT_FN_ATTRS128
437_mm_maskz_mullo_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
438  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
439                                             (__v8hi)_mm_mullo_epi16(__A, __B),
440                                             (__v8hi)_mm_setzero_si128());
441}
442
443static __inline__ __m128i __DEFAULT_FN_ATTRS128
444_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W)
445{
446  return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U,
447              (__v16qi) __W,
448              (__v16qi) __A);
449}
450
451static __inline__ __m256i __DEFAULT_FN_ATTRS256
452_mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W)
453{
454  return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U,
455               (__v32qi) __W,
456               (__v32qi) __A);
457}
458
459static __inline__ __m128i __DEFAULT_FN_ATTRS128
460_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W)
461{
462  return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U,
463               (__v8hi) __W,
464               (__v8hi) __A);
465}
466
467static __inline__ __m256i __DEFAULT_FN_ATTRS256
468_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W)
469{
470  return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U,
471               (__v16hi) __W,
472               (__v16hi) __A);
473}
474
475static __inline__ __m128i __DEFAULT_FN_ATTRS128
476_mm_mask_abs_epi8(__m128i __W, __mmask16 __U, __m128i __A)
477{
478  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
479                                             (__v16qi)_mm_abs_epi8(__A),
480                                             (__v16qi)__W);
481}
482
483static __inline__ __m128i __DEFAULT_FN_ATTRS128
484_mm_maskz_abs_epi8(__mmask16 __U, __m128i __A)
485{
486  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
487                                             (__v16qi)_mm_abs_epi8(__A),
488                                             (__v16qi)_mm_setzero_si128());
489}
490
491static __inline__ __m256i __DEFAULT_FN_ATTRS256
492_mm256_mask_abs_epi8(__m256i __W, __mmask32 __U, __m256i __A)
493{
494  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
495                                             (__v32qi)_mm256_abs_epi8(__A),
496                                             (__v32qi)__W);
497}
498
499static __inline__ __m256i __DEFAULT_FN_ATTRS256
500_mm256_maskz_abs_epi8 (__mmask32 __U, __m256i __A)
501{
502  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
503                                             (__v32qi)_mm256_abs_epi8(__A),
504                                             (__v32qi)_mm256_setzero_si256());
505}
506
507static __inline__ __m128i __DEFAULT_FN_ATTRS128
508_mm_mask_abs_epi16(__m128i __W, __mmask8 __U, __m128i __A)
509{
510  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
511                                             (__v8hi)_mm_abs_epi16(__A),
512                                             (__v8hi)__W);
513}
514
515static __inline__ __m128i __DEFAULT_FN_ATTRS128
516_mm_maskz_abs_epi16(__mmask8 __U, __m128i __A)
517{
518  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
519                                             (__v8hi)_mm_abs_epi16(__A),
520                                             (__v8hi)_mm_setzero_si128());
521}
522
523static __inline__ __m256i __DEFAULT_FN_ATTRS256
524_mm256_mask_abs_epi16(__m256i __W, __mmask16 __U, __m256i __A)
525{
526  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
527                                             (__v16hi)_mm256_abs_epi16(__A),
528                                             (__v16hi)__W);
529}
530
531static __inline__ __m256i __DEFAULT_FN_ATTRS256
532_mm256_maskz_abs_epi16(__mmask16 __U, __m256i __A)
533{
534  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
535                                             (__v16hi)_mm256_abs_epi16(__A),
536                                             (__v16hi)_mm256_setzero_si256());
537}
538
539static __inline__ __m128i __DEFAULT_FN_ATTRS128
540_mm_maskz_packs_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
541  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
542                                             (__v8hi)_mm_packs_epi32(__A, __B),
543                                             (__v8hi)_mm_setzero_si128());
544}
545
546static __inline__ __m128i __DEFAULT_FN_ATTRS128
547_mm_mask_packs_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
548{
549  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
550                                             (__v8hi)_mm_packs_epi32(__A, __B),
551                                             (__v8hi)__W);
552}
553
554static __inline__ __m256i __DEFAULT_FN_ATTRS256
555_mm256_maskz_packs_epi32(__mmask16 __M, __m256i __A, __m256i __B)
556{
557  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
558                                          (__v16hi)_mm256_packs_epi32(__A, __B),
559                                          (__v16hi)_mm256_setzero_si256());
560}
561
562static __inline__ __m256i __DEFAULT_FN_ATTRS256
563_mm256_mask_packs_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
564{
565  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
566                                          (__v16hi)_mm256_packs_epi32(__A, __B),
567                                          (__v16hi)__W);
568}
569
570static __inline__ __m128i __DEFAULT_FN_ATTRS128
571_mm_maskz_packs_epi16(__mmask16 __M, __m128i __A, __m128i __B)
572{
573  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
574                                             (__v16qi)_mm_packs_epi16(__A, __B),
575                                             (__v16qi)_mm_setzero_si128());
576}
577
578static __inline__ __m128i __DEFAULT_FN_ATTRS128
579_mm_mask_packs_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
580{
581  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
582                                             (__v16qi)_mm_packs_epi16(__A, __B),
583                                             (__v16qi)__W);
584}
585
586static __inline__ __m256i __DEFAULT_FN_ATTRS256
587_mm256_maskz_packs_epi16(__mmask32 __M, __m256i __A, __m256i __B)
588{
589  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
590                                          (__v32qi)_mm256_packs_epi16(__A, __B),
591                                          (__v32qi)_mm256_setzero_si256());
592}
593
594static __inline__ __m256i __DEFAULT_FN_ATTRS256
595_mm256_mask_packs_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
596{
597  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
598                                          (__v32qi)_mm256_packs_epi16(__A, __B),
599                                          (__v32qi)__W);
600}
601
602static __inline__ __m128i __DEFAULT_FN_ATTRS128
603_mm_maskz_packus_epi32(__mmask8 __M, __m128i __A, __m128i __B)
604{
605  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
606                                             (__v8hi)_mm_packus_epi32(__A, __B),
607                                             (__v8hi)_mm_setzero_si128());
608}
609
610static __inline__ __m128i __DEFAULT_FN_ATTRS128
611_mm_mask_packus_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
612{
613  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
614                                             (__v8hi)_mm_packus_epi32(__A, __B),
615                                             (__v8hi)__W);
616}
617
618static __inline__ __m256i __DEFAULT_FN_ATTRS256
619_mm256_maskz_packus_epi32(__mmask16 __M, __m256i __A, __m256i __B)
620{
621  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
622                                         (__v16hi)_mm256_packus_epi32(__A, __B),
623                                         (__v16hi)_mm256_setzero_si256());
624}
625
626static __inline__ __m256i __DEFAULT_FN_ATTRS256
627_mm256_mask_packus_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
628{
629  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
630                                         (__v16hi)_mm256_packus_epi32(__A, __B),
631                                         (__v16hi)__W);
632}
633
634static __inline__ __m128i __DEFAULT_FN_ATTRS128
635_mm_maskz_packus_epi16(__mmask16 __M, __m128i __A, __m128i __B)
636{
637  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
638                                            (__v16qi)_mm_packus_epi16(__A, __B),
639                                            (__v16qi)_mm_setzero_si128());
640}
641
642static __inline__ __m128i __DEFAULT_FN_ATTRS128
643_mm_mask_packus_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
644{
645  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
646                                            (__v16qi)_mm_packus_epi16(__A, __B),
647                                            (__v16qi)__W);
648}
649
650static __inline__ __m256i __DEFAULT_FN_ATTRS256
651_mm256_maskz_packus_epi16(__mmask32 __M, __m256i __A, __m256i __B)
652{
653  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
654                                         (__v32qi)_mm256_packus_epi16(__A, __B),
655                                         (__v32qi)_mm256_setzero_si256());
656}
657
658static __inline__ __m256i __DEFAULT_FN_ATTRS256
659_mm256_mask_packus_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
660{
661  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
662                                         (__v32qi)_mm256_packus_epi16(__A, __B),
663                                         (__v32qi)__W);
664}
665
666static __inline__ __m128i __DEFAULT_FN_ATTRS128
667_mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
668{
669  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
670                                             (__v16qi)_mm_adds_epi8(__A, __B),
671                                             (__v16qi)__W);
672}
673
674static __inline__ __m128i __DEFAULT_FN_ATTRS128
675_mm_maskz_adds_epi8(__mmask16 __U, __m128i __A, __m128i __B)
676{
677  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
678                                             (__v16qi)_mm_adds_epi8(__A, __B),
679                                             (__v16qi)_mm_setzero_si128());
680}
681
682static __inline__ __m256i __DEFAULT_FN_ATTRS256
683_mm256_mask_adds_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
684{
685  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
686                                            (__v32qi)_mm256_adds_epi8(__A, __B),
687                                            (__v32qi)__W);
688}
689
690static __inline__ __m256i __DEFAULT_FN_ATTRS256
691_mm256_maskz_adds_epi8(__mmask32 __U, __m256i __A, __m256i __B)
692{
693  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
694                                            (__v32qi)_mm256_adds_epi8(__A, __B),
695                                            (__v32qi)_mm256_setzero_si256());
696}
697
698static __inline__ __m128i __DEFAULT_FN_ATTRS128
699_mm_mask_adds_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
700{
701  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
702                                             (__v8hi)_mm_adds_epi16(__A, __B),
703                                             (__v8hi)__W);
704}
705
706static __inline__ __m128i __DEFAULT_FN_ATTRS128
707_mm_maskz_adds_epi16(__mmask8 __U, __m128i __A, __m128i __B)
708{
709  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
710                                             (__v8hi)_mm_adds_epi16(__A, __B),
711                                             (__v8hi)_mm_setzero_si128());
712}
713
714static __inline__ __m256i __DEFAULT_FN_ATTRS256
715_mm256_mask_adds_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
716{
717  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
718                                           (__v16hi)_mm256_adds_epi16(__A, __B),
719                                           (__v16hi)__W);
720}
721
722static __inline__ __m256i __DEFAULT_FN_ATTRS256
723_mm256_maskz_adds_epi16(__mmask16 __U, __m256i __A, __m256i __B)
724{
725  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
726                                           (__v16hi)_mm256_adds_epi16(__A, __B),
727                                           (__v16hi)_mm256_setzero_si256());
728}
729
730static __inline__ __m128i __DEFAULT_FN_ATTRS128
731_mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
732{
733  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
734                                             (__v16qi)_mm_adds_epu8(__A, __B),
735                                             (__v16qi)__W);
736}
737
738static __inline__ __m128i __DEFAULT_FN_ATTRS128
739_mm_maskz_adds_epu8(__mmask16 __U, __m128i __A, __m128i __B)
740{
741  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
742                                             (__v16qi)_mm_adds_epu8(__A, __B),
743                                             (__v16qi)_mm_setzero_si128());
744}
745
746static __inline__ __m256i __DEFAULT_FN_ATTRS256
747_mm256_mask_adds_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
748{
749  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
750                                            (__v32qi)_mm256_adds_epu8(__A, __B),
751                                            (__v32qi)__W);
752}
753
754static __inline__ __m256i __DEFAULT_FN_ATTRS256
755_mm256_maskz_adds_epu8(__mmask32 __U, __m256i __A, __m256i __B)
756{
757  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
758                                            (__v32qi)_mm256_adds_epu8(__A, __B),
759                                            (__v32qi)_mm256_setzero_si256());
760}
761
762static __inline__ __m128i __DEFAULT_FN_ATTRS128
763_mm_mask_adds_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
764{
765  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
766                                             (__v8hi)_mm_adds_epu16(__A, __B),
767                                             (__v8hi)__W);
768}
769
770static __inline__ __m128i __DEFAULT_FN_ATTRS128
771_mm_maskz_adds_epu16(__mmask8 __U, __m128i __A, __m128i __B)
772{
773  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
774                                             (__v8hi)_mm_adds_epu16(__A, __B),
775                                             (__v8hi)_mm_setzero_si128());
776}
777
778static __inline__ __m256i __DEFAULT_FN_ATTRS256
779_mm256_mask_adds_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
780{
781  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
782                                           (__v16hi)_mm256_adds_epu16(__A, __B),
783                                           (__v16hi)__W);
784}
785
786static __inline__ __m256i __DEFAULT_FN_ATTRS256
787_mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B)
788{
789  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
790                                           (__v16hi)_mm256_adds_epu16(__A, __B),
791                                           (__v16hi)_mm256_setzero_si256());
792}
793
794static __inline__ __m128i __DEFAULT_FN_ATTRS128
795_mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
796{
797  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
798                                             (__v16qi)_mm_avg_epu8(__A, __B),
799                                             (__v16qi)__W);
800}
801
802static __inline__ __m128i __DEFAULT_FN_ATTRS128
803_mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B)
804{
805  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
806                                             (__v16qi)_mm_avg_epu8(__A, __B),
807                                             (__v16qi)_mm_setzero_si128());
808}
809
810static __inline__ __m256i __DEFAULT_FN_ATTRS256
811_mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
812{
813  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
814                                             (__v32qi)_mm256_avg_epu8(__A, __B),
815                                             (__v32qi)__W);
816}
817
818static __inline__ __m256i __DEFAULT_FN_ATTRS256
819_mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B)
820{
821  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
822                                             (__v32qi)_mm256_avg_epu8(__A, __B),
823                                             (__v32qi)_mm256_setzero_si256());
824}
825
826static __inline__ __m128i __DEFAULT_FN_ATTRS128
827_mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
828{
829  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
830                                             (__v8hi)_mm_avg_epu16(__A, __B),
831                                             (__v8hi)__W);
832}
833
834static __inline__ __m128i __DEFAULT_FN_ATTRS128
835_mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B)
836{
837  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
838                                             (__v8hi)_mm_avg_epu16(__A, __B),
839                                             (__v8hi)_mm_setzero_si128());
840}
841
842static __inline__ __m256i __DEFAULT_FN_ATTRS256
843_mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
844{
845  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
846                                            (__v16hi)_mm256_avg_epu16(__A, __B),
847                                            (__v16hi)__W);
848}
849
850static __inline__ __m256i __DEFAULT_FN_ATTRS256
851_mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B)
852{
853  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
854                                            (__v16hi)_mm256_avg_epu16(__A, __B),
855                                            (__v16hi)_mm256_setzero_si256());
856}
857
858static __inline__ __m128i __DEFAULT_FN_ATTRS128
859_mm_maskz_max_epi8(__mmask16 __M, __m128i __A, __m128i __B)
860{
861  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
862                                             (__v16qi)_mm_max_epi8(__A, __B),
863                                             (__v16qi)_mm_setzero_si128());
864}
865
866static __inline__ __m128i __DEFAULT_FN_ATTRS128
867_mm_mask_max_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
868{
869  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
870                                             (__v16qi)_mm_max_epi8(__A, __B),
871                                             (__v16qi)__W);
872}
873
874static __inline__ __m256i __DEFAULT_FN_ATTRS256
875_mm256_maskz_max_epi8(__mmask32 __M, __m256i __A, __m256i __B)
876{
877  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
878                                             (__v32qi)_mm256_max_epi8(__A, __B),
879                                             (__v32qi)_mm256_setzero_si256());
880}
881
882static __inline__ __m256i __DEFAULT_FN_ATTRS256
883_mm256_mask_max_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
884{
885  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
886                                             (__v32qi)_mm256_max_epi8(__A, __B),
887                                             (__v32qi)__W);
888}
889
890static __inline__ __m128i __DEFAULT_FN_ATTRS128
891_mm_maskz_max_epi16(__mmask8 __M, __m128i __A, __m128i __B)
892{
893  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
894                                             (__v8hi)_mm_max_epi16(__A, __B),
895                                             (__v8hi)_mm_setzero_si128());
896}
897
898static __inline__ __m128i __DEFAULT_FN_ATTRS128
899_mm_mask_max_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
900{
901  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
902                                             (__v8hi)_mm_max_epi16(__A, __B),
903                                             (__v8hi)__W);
904}
905
906static __inline__ __m256i __DEFAULT_FN_ATTRS256
907_mm256_maskz_max_epi16(__mmask16 __M, __m256i __A, __m256i __B)
908{
909  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
910                                            (__v16hi)_mm256_max_epi16(__A, __B),
911                                            (__v16hi)_mm256_setzero_si256());
912}
913
914static __inline__ __m256i __DEFAULT_FN_ATTRS256
915_mm256_mask_max_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
916{
917  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
918                                            (__v16hi)_mm256_max_epi16(__A, __B),
919                                            (__v16hi)__W);
920}
921
922static __inline__ __m128i __DEFAULT_FN_ATTRS128
923_mm_maskz_max_epu8(__mmask16 __M, __m128i __A, __m128i __B)
924{
925  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
926                                             (__v16qi)_mm_max_epu8(__A, __B),
927                                             (__v16qi)_mm_setzero_si128());
928}
929
930static __inline__ __m128i __DEFAULT_FN_ATTRS128
931_mm_mask_max_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
932{
933  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
934                                             (__v16qi)_mm_max_epu8(__A, __B),
935                                             (__v16qi)__W);
936}
937
938static __inline__ __m256i __DEFAULT_FN_ATTRS256
939_mm256_maskz_max_epu8 (__mmask32 __M, __m256i __A, __m256i __B)
940{
941  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
942                                             (__v32qi)_mm256_max_epu8(__A, __B),
943                                             (__v32qi)_mm256_setzero_si256());
944}
945
946static __inline__ __m256i __DEFAULT_FN_ATTRS256
947_mm256_mask_max_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
948{
949  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
950                                             (__v32qi)_mm256_max_epu8(__A, __B),
951                                             (__v32qi)__W);
952}
953
954static __inline__ __m128i __DEFAULT_FN_ATTRS128
955_mm_maskz_max_epu16(__mmask8 __M, __m128i __A, __m128i __B)
956{
957  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
958                                             (__v8hi)_mm_max_epu16(__A, __B),
959                                             (__v8hi)_mm_setzero_si128());
960}
961
962static __inline__ __m128i __DEFAULT_FN_ATTRS128
963_mm_mask_max_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
964{
965  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
966                                             (__v8hi)_mm_max_epu16(__A, __B),
967                                             (__v8hi)__W);
968}
969
970static __inline__ __m256i __DEFAULT_FN_ATTRS256
971_mm256_maskz_max_epu16(__mmask16 __M, __m256i __A, __m256i __B)
972{
973  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
974                                            (__v16hi)_mm256_max_epu16(__A, __B),
975                                            (__v16hi)_mm256_setzero_si256());
976}
977
978static __inline__ __m256i __DEFAULT_FN_ATTRS256
979_mm256_mask_max_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
980{
981  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
982                                            (__v16hi)_mm256_max_epu16(__A, __B),
983                                            (__v16hi)__W);
984}
985
986static __inline__ __m128i __DEFAULT_FN_ATTRS128
987_mm_maskz_min_epi8(__mmask16 __M, __m128i __A, __m128i __B)
988{
989  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
990                                             (__v16qi)_mm_min_epi8(__A, __B),
991                                             (__v16qi)_mm_setzero_si128());
992}
993
994static __inline__ __m128i __DEFAULT_FN_ATTRS128
995_mm_mask_min_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
996{
997  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
998                                             (__v16qi)_mm_min_epi8(__A, __B),
999                                             (__v16qi)__W);
1000}
1001
1002static __inline__ __m256i __DEFAULT_FN_ATTRS256
1003_mm256_maskz_min_epi8(__mmask32 __M, __m256i __A, __m256i __B)
1004{
1005  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
1006                                             (__v32qi)_mm256_min_epi8(__A, __B),
1007                                             (__v32qi)_mm256_setzero_si256());
1008}
1009
1010static __inline__ __m256i __DEFAULT_FN_ATTRS256
1011_mm256_mask_min_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
1012{
1013  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
1014                                             (__v32qi)_mm256_min_epi8(__A, __B),
1015                                             (__v32qi)__W);
1016}
1017
1018static __inline__ __m128i __DEFAULT_FN_ATTRS128
1019_mm_maskz_min_epi16(__mmask8 __M, __m128i __A, __m128i __B)
1020{
1021  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
1022                                             (__v8hi)_mm_min_epi16(__A, __B),
1023                                             (__v8hi)_mm_setzero_si128());
1024}
1025
1026static __inline__ __m128i __DEFAULT_FN_ATTRS128
1027_mm_mask_min_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
1028{
1029  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
1030                                             (__v8hi)_mm_min_epi16(__A, __B),
1031                                             (__v8hi)__W);
1032}
1033
1034static __inline__ __m256i __DEFAULT_FN_ATTRS256
1035_mm256_maskz_min_epi16(__mmask16 __M, __m256i __A, __m256i __B)
1036{
1037  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
1038                                            (__v16hi)_mm256_min_epi16(__A, __B),
1039                                            (__v16hi)_mm256_setzero_si256());
1040}
1041
1042static __inline__ __m256i __DEFAULT_FN_ATTRS256
1043_mm256_mask_min_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
1044{
1045  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
1046                                            (__v16hi)_mm256_min_epi16(__A, __B),
1047                                            (__v16hi)__W);
1048}
1049
1050static __inline__ __m128i __DEFAULT_FN_ATTRS128
1051_mm_maskz_min_epu8(__mmask16 __M, __m128i __A, __m128i __B)
1052{
1053  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
1054                                             (__v16qi)_mm_min_epu8(__A, __B),
1055                                             (__v16qi)_mm_setzero_si128());
1056}
1057
1058static __inline__ __m128i __DEFAULT_FN_ATTRS128
1059_mm_mask_min_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
1060{
1061  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
1062                                             (__v16qi)_mm_min_epu8(__A, __B),
1063                                             (__v16qi)__W);
1064}
1065
1066static __inline__ __m256i __DEFAULT_FN_ATTRS256
1067_mm256_maskz_min_epu8 (__mmask32 __M, __m256i __A, __m256i __B)
1068{
1069  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
1070                                             (__v32qi)_mm256_min_epu8(__A, __B),
1071                                             (__v32qi)_mm256_setzero_si256());
1072}
1073
1074static __inline__ __m256i __DEFAULT_FN_ATTRS256
1075_mm256_mask_min_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
1076{
1077  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
1078                                             (__v32qi)_mm256_min_epu8(__A, __B),
1079                                             (__v32qi)__W);
1080}
1081
1082static __inline__ __m128i __DEFAULT_FN_ATTRS128
1083_mm_maskz_min_epu16(__mmask8 __M, __m128i __A, __m128i __B)
1084{
1085  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
1086                                             (__v8hi)_mm_min_epu16(__A, __B),
1087                                             (__v8hi)_mm_setzero_si128());
1088}
1089
1090static __inline__ __m128i __DEFAULT_FN_ATTRS128
1091_mm_mask_min_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
1092{
1093  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
1094                                             (__v8hi)_mm_min_epu16(__A, __B),
1095                                             (__v8hi)__W);
1096}
1097
1098static __inline__ __m256i __DEFAULT_FN_ATTRS256
1099_mm256_maskz_min_epu16(__mmask16 __M, __m256i __A, __m256i __B)
1100{
1101  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
1102                                            (__v16hi)_mm256_min_epu16(__A, __B),
1103                                            (__v16hi)_mm256_setzero_si256());
1104}
1105
1106static __inline__ __m256i __DEFAULT_FN_ATTRS256
1107_mm256_mask_min_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
1108{
1109  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
1110                                            (__v16hi)_mm256_min_epu16(__A, __B),
1111                                            (__v16hi)__W);
1112}
1113
1114static __inline__ __m128i __DEFAULT_FN_ATTRS128
1115_mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
1116{
1117  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1118                                            (__v16qi)_mm_shuffle_epi8(__A, __B),
1119                                            (__v16qi)__W);
1120}
1121
1122static __inline__ __m128i __DEFAULT_FN_ATTRS128
1123_mm_maskz_shuffle_epi8(__mmask16 __U, __m128i __A, __m128i __B)
1124{
1125  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1126                                            (__v16qi)_mm_shuffle_epi8(__A, __B),
1127                                            (__v16qi)_mm_setzero_si128());
1128}
1129
1130static __inline__ __m256i __DEFAULT_FN_ATTRS256
1131_mm256_mask_shuffle_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
1132{
1133  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1134                                         (__v32qi)_mm256_shuffle_epi8(__A, __B),
1135                                         (__v32qi)__W);
1136}
1137
1138static __inline__ __m256i __DEFAULT_FN_ATTRS256
1139_mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B)
1140{
1141  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1142                                         (__v32qi)_mm256_shuffle_epi8(__A, __B),
1143                                         (__v32qi)_mm256_setzero_si256());
1144}
1145
1146static __inline__ __m128i __DEFAULT_FN_ATTRS128
1147_mm_mask_subs_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
1148{
1149  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1150                                             (__v16qi)_mm_subs_epi8(__A, __B),
1151                                             (__v16qi)__W);
1152}
1153
1154static __inline__ __m128i __DEFAULT_FN_ATTRS128
1155_mm_maskz_subs_epi8(__mmask16 __U, __m128i __A, __m128i __B)
1156{
1157  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1158                                             (__v16qi)_mm_subs_epi8(__A, __B),
1159                                             (__v16qi)_mm_setzero_si128());
1160}
1161
1162static __inline__ __m256i __DEFAULT_FN_ATTRS256
1163_mm256_mask_subs_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
1164{
1165  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1166                                            (__v32qi)_mm256_subs_epi8(__A, __B),
1167                                            (__v32qi)__W);
1168}
1169
1170static __inline__ __m256i __DEFAULT_FN_ATTRS256
1171_mm256_maskz_subs_epi8(__mmask32 __U, __m256i __A, __m256i __B)
1172{
1173  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1174                                            (__v32qi)_mm256_subs_epi8(__A, __B),
1175                                            (__v32qi)_mm256_setzero_si256());
1176}
1177
1178static __inline__ __m128i __DEFAULT_FN_ATTRS128
1179_mm_mask_subs_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
1180{
1181  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1182                                             (__v8hi)_mm_subs_epi16(__A, __B),
1183                                             (__v8hi)__W);
1184}
1185
1186static __inline__ __m128i __DEFAULT_FN_ATTRS128
1187_mm_maskz_subs_epi16(__mmask8 __U, __m128i __A, __m128i __B)
1188{
1189  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1190                                             (__v8hi)_mm_subs_epi16(__A, __B),
1191                                             (__v8hi)_mm_setzero_si128());
1192}
1193
1194static __inline__ __m256i __DEFAULT_FN_ATTRS256
1195_mm256_mask_subs_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
1196{
1197  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1198                                           (__v16hi)_mm256_subs_epi16(__A, __B),
1199                                           (__v16hi)__W);
1200}
1201
1202static __inline__ __m256i __DEFAULT_FN_ATTRS256
1203_mm256_maskz_subs_epi16(__mmask16 __U, __m256i __A, __m256i __B)
1204{
1205  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1206                                           (__v16hi)_mm256_subs_epi16(__A, __B),
1207                                           (__v16hi)_mm256_setzero_si256());
1208}
1209
1210static __inline__ __m128i __DEFAULT_FN_ATTRS128
1211_mm_mask_subs_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
1212{
1213  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1214                                             (__v16qi)_mm_subs_epu8(__A, __B),
1215                                             (__v16qi)__W);
1216}
1217
1218static __inline__ __m128i __DEFAULT_FN_ATTRS128
1219_mm_maskz_subs_epu8(__mmask16 __U, __m128i __A, __m128i __B)
1220{
1221  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1222                                             (__v16qi)_mm_subs_epu8(__A, __B),
1223                                             (__v16qi)_mm_setzero_si128());
1224}
1225
1226static __inline__ __m256i __DEFAULT_FN_ATTRS256
1227_mm256_mask_subs_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
1228{
1229  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1230                                            (__v32qi)_mm256_subs_epu8(__A, __B),
1231                                            (__v32qi)__W);
1232}
1233
1234static __inline__ __m256i __DEFAULT_FN_ATTRS256
1235_mm256_maskz_subs_epu8(__mmask32 __U, __m256i __A, __m256i __B)
1236{
1237  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1238                                            (__v32qi)_mm256_subs_epu8(__A, __B),
1239                                            (__v32qi)_mm256_setzero_si256());
1240}
1241
1242static __inline__ __m128i __DEFAULT_FN_ATTRS128
1243_mm_mask_subs_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
1244{
1245  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1246                                             (__v8hi)_mm_subs_epu16(__A, __B),
1247                                             (__v8hi)__W);
1248}
1249
1250static __inline__ __m128i __DEFAULT_FN_ATTRS128
1251_mm_maskz_subs_epu16(__mmask8 __U, __m128i __A, __m128i __B)
1252{
1253  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1254                                             (__v8hi)_mm_subs_epu16(__A, __B),
1255                                             (__v8hi)_mm_setzero_si128());
1256}
1257
1258static __inline__ __m256i __DEFAULT_FN_ATTRS256
1259_mm256_mask_subs_epu16(__m256i __W, __mmask16 __U, __m256i __A,
1260      __m256i __B) {
1261  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1262                                           (__v16hi)_mm256_subs_epu16(__A, __B),
1263                                           (__v16hi)__W);
1264}
1265
1266static __inline__ __m256i __DEFAULT_FN_ATTRS256
1267_mm256_maskz_subs_epu16(__mmask16 __U, __m256i __A, __m256i __B)
1268{
1269  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1270                                           (__v16hi)_mm256_subs_epu16(__A, __B),
1271                                           (__v16hi)_mm256_setzero_si256());
1272}
1273
1274static __inline__ __m128i __DEFAULT_FN_ATTRS128
1275_mm_permutex2var_epi16(__m128i __A, __m128i __I, __m128i __B)
1276{
1277  return (__m128i)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I,
1278                                                 (__v8hi) __B);
1279}
1280
1281static __inline__ __m128i __DEFAULT_FN_ATTRS128
1282_mm_mask_permutex2var_epi16(__m128i __A, __mmask8 __U, __m128i __I,
1283                            __m128i __B)
1284{
1285  return (__m128i)__builtin_ia32_selectw_128(__U,
1286                                  (__v8hi)_mm_permutex2var_epi16(__A, __I, __B),
1287                                  (__v8hi)__A);
1288}
1289
1290static __inline__ __m128i __DEFAULT_FN_ATTRS128
1291_mm_mask2_permutex2var_epi16(__m128i __A, __m128i __I, __mmask8 __U,
1292                             __m128i __B)
1293{
1294  return (__m128i)__builtin_ia32_selectw_128(__U,
1295                                  (__v8hi)_mm_permutex2var_epi16(__A, __I, __B),
1296                                  (__v8hi)__I);
1297}
1298
1299static __inline__ __m128i __DEFAULT_FN_ATTRS128
1300_mm_maskz_permutex2var_epi16 (__mmask8 __U, __m128i __A, __m128i __I,
1301            __m128i __B)
1302{
1303  return (__m128i)__builtin_ia32_selectw_128(__U,
1304                                  (__v8hi)_mm_permutex2var_epi16(__A, __I, __B),
1305                                  (__v8hi)_mm_setzero_si128());
1306}
1307
1308static __inline__ __m256i __DEFAULT_FN_ATTRS256
1309_mm256_permutex2var_epi16(__m256i __A, __m256i __I, __m256i __B)
1310{
1311  return (__m256i)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I,
1312                                                 (__v16hi)__B);
1313}
1314
1315static __inline__ __m256i __DEFAULT_FN_ATTRS256
1316_mm256_mask_permutex2var_epi16(__m256i __A, __mmask16 __U, __m256i __I,
1317                               __m256i __B)
1318{
1319  return (__m256i)__builtin_ia32_selectw_256(__U,
1320                              (__v16hi)_mm256_permutex2var_epi16(__A, __I, __B),
1321                              (__v16hi)__A);
1322}
1323
1324static __inline__ __m256i __DEFAULT_FN_ATTRS256
1325_mm256_mask2_permutex2var_epi16(__m256i __A, __m256i __I, __mmask16 __U,
1326                                __m256i __B)
1327{
1328  return (__m256i)__builtin_ia32_selectw_256(__U,
1329                              (__v16hi)_mm256_permutex2var_epi16(__A, __I, __B),
1330                              (__v16hi)__I);
1331}
1332
1333static __inline__ __m256i __DEFAULT_FN_ATTRS256
1334_mm256_maskz_permutex2var_epi16 (__mmask16 __U, __m256i __A, __m256i __I,
1335                                 __m256i __B)
1336{
1337  return (__m256i)__builtin_ia32_selectw_256(__U,
1338                              (__v16hi)_mm256_permutex2var_epi16(__A, __I, __B),
1339                              (__v16hi)_mm256_setzero_si256());
1340}
1341
1342static __inline__ __m128i __DEFAULT_FN_ATTRS128
1343_mm_mask_maddubs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) {
1344  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1345                                            (__v8hi)_mm_maddubs_epi16(__X, __Y),
1346                                            (__v8hi)__W);
1347}
1348
1349static __inline__ __m128i __DEFAULT_FN_ATTRS128
1350_mm_maskz_maddubs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) {
1351  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1352                                            (__v8hi)_mm_maddubs_epi16(__X, __Y),
1353                                            (__v8hi)_mm_setzero_si128());
1354}
1355
1356static __inline__ __m256i __DEFAULT_FN_ATTRS256
1357_mm256_mask_maddubs_epi16(__m256i __W, __mmask16 __U, __m256i __X,
1358                          __m256i __Y) {
1359  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1360                                        (__v16hi)_mm256_maddubs_epi16(__X, __Y),
1361                                        (__v16hi)__W);
1362}
1363
1364static __inline__ __m256i __DEFAULT_FN_ATTRS256
1365_mm256_maskz_maddubs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) {
1366  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1367                                        (__v16hi)_mm256_maddubs_epi16(__X, __Y),
1368                                        (__v16hi)_mm256_setzero_si256());
1369}
1370
1371static __inline__ __m128i __DEFAULT_FN_ATTRS128
1372_mm_mask_madd_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
1373  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1374                                             (__v4si)_mm_madd_epi16(__A, __B),
1375                                             (__v4si)__W);
1376}
1377
1378static __inline__ __m128i __DEFAULT_FN_ATTRS128
1379_mm_maskz_madd_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
1380  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1381                                             (__v4si)_mm_madd_epi16(__A, __B),
1382                                             (__v4si)_mm_setzero_si128());
1383}
1384
1385static __inline__ __m256i __DEFAULT_FN_ATTRS256
1386_mm256_mask_madd_epi16(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
1387  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
1388                                            (__v8si)_mm256_madd_epi16(__A, __B),
1389                                            (__v8si)__W);
1390}
1391
1392static __inline__ __m256i __DEFAULT_FN_ATTRS256
1393_mm256_maskz_madd_epi16(__mmask8 __U, __m256i __A, __m256i __B) {
1394  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
1395                                            (__v8si)_mm256_madd_epi16(__A, __B),
1396                                            (__v8si)_mm256_setzero_si256());
1397}
1398
1399static __inline__ __m128i __DEFAULT_FN_ATTRS128
1400_mm_cvtsepi16_epi8 (__m128i __A) {
1401  return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
1402               (__v16qi) _mm_setzero_si128(),
1403               (__mmask8) -1);
1404}
1405
1406static __inline__ __m128i __DEFAULT_FN_ATTRS128
1407_mm_mask_cvtsepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) {
1408  return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
1409               (__v16qi) __O,
1410                __M);
1411}
1412
1413static __inline__ __m128i __DEFAULT_FN_ATTRS128
1414_mm_maskz_cvtsepi16_epi8 (__mmask8 __M, __m128i __A) {
1415  return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
1416               (__v16qi) _mm_setzero_si128(),
1417               __M);
1418}
1419
1420static __inline__ __m128i __DEFAULT_FN_ATTRS256
1421_mm256_cvtsepi16_epi8 (__m256i __A) {
1422  return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
1423               (__v16qi) _mm_setzero_si128(),
1424               (__mmask16) -1);
1425}
1426
1427static __inline__ __m128i __DEFAULT_FN_ATTRS256
1428_mm256_mask_cvtsepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) {
1429  return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
1430               (__v16qi) __O,
1431                __M);
1432}
1433
1434static __inline__ __m128i __DEFAULT_FN_ATTRS256
1435_mm256_maskz_cvtsepi16_epi8 (__mmask16 __M, __m256i __A) {
1436  return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
1437               (__v16qi) _mm_setzero_si128(),
1438               __M);
1439}
1440
1441static __inline__ __m128i __DEFAULT_FN_ATTRS128
1442_mm_cvtusepi16_epi8 (__m128i __A) {
1443  return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
1444                (__v16qi) _mm_setzero_si128(),
1445                (__mmask8) -1);
1446}
1447
1448static __inline__ __m128i __DEFAULT_FN_ATTRS128
1449_mm_mask_cvtusepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) {
1450  return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
1451                (__v16qi) __O,
1452                __M);
1453}
1454
1455static __inline__ __m128i __DEFAULT_FN_ATTRS128
1456_mm_maskz_cvtusepi16_epi8 (__mmask8 __M, __m128i __A) {
1457  return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
1458                (__v16qi) _mm_setzero_si128(),
1459                __M);
1460}
1461
1462static __inline__ __m128i __DEFAULT_FN_ATTRS256
1463_mm256_cvtusepi16_epi8 (__m256i __A) {
1464  return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
1465                (__v16qi) _mm_setzero_si128(),
1466                (__mmask16) -1);
1467}
1468
1469static __inline__ __m128i __DEFAULT_FN_ATTRS256
1470_mm256_mask_cvtusepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) {
1471  return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
1472                (__v16qi) __O,
1473                __M);
1474}
1475
1476static __inline__ __m128i __DEFAULT_FN_ATTRS256
1477_mm256_maskz_cvtusepi16_epi8 (__mmask16 __M, __m256i __A) {
1478  return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
1479                (__v16qi) _mm_setzero_si128(),
1480                __M);
1481}
1482
1483static __inline__ __m128i __DEFAULT_FN_ATTRS128
1484_mm_cvtepi16_epi8 (__m128i __A) {
1485  return (__m128i)__builtin_shufflevector(
1486      __builtin_convertvector((__v8hi)__A, __v8qi),
1487      (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
1488      12, 13, 14, 15);
1489}
1490
1491static __inline__ __m128i __DEFAULT_FN_ATTRS128
1492_mm_mask_cvtepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) {
1493  return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
1494               (__v16qi) __O,
1495               __M);
1496}
1497
1498static __inline__ __m128i __DEFAULT_FN_ATTRS128
1499_mm_maskz_cvtepi16_epi8 (__mmask8 __M, __m128i __A) {
1500  return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
1501               (__v16qi) _mm_setzero_si128(),
1502               __M);
1503}
1504
1505static __inline__ void __DEFAULT_FN_ATTRS128
1506_mm_mask_cvtepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1507{
1508  __builtin_ia32_pmovwb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M);
1509}
1510
1511
1512static __inline__ void __DEFAULT_FN_ATTRS128
1513_mm_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1514{
1515  __builtin_ia32_pmovswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M);
1516}
1517
1518static __inline__ void __DEFAULT_FN_ATTRS128
1519_mm_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1520{
1521  __builtin_ia32_pmovuswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M);
1522}
1523
1524static __inline__ __m128i __DEFAULT_FN_ATTRS256
1525_mm256_cvtepi16_epi8 (__m256i __A) {
1526  return (__m128i)__builtin_convertvector((__v16hi) __A, __v16qi);
1527}
1528
1529static __inline__ __m128i __DEFAULT_FN_ATTRS256
1530_mm256_mask_cvtepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) {
1531  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
1532                                             (__v16qi)_mm256_cvtepi16_epi8(__A),
1533                                             (__v16qi)__O);
1534}
1535
1536static __inline__ __m128i __DEFAULT_FN_ATTRS256
1537_mm256_maskz_cvtepi16_epi8 (__mmask16 __M, __m256i __A) {
1538  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
1539                                             (__v16qi)_mm256_cvtepi16_epi8(__A),
1540                                             (__v16qi)_mm_setzero_si128());
1541}
1542
1543static __inline__ void __DEFAULT_FN_ATTRS256
1544_mm256_mask_cvtepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A)
1545{
1546  __builtin_ia32_pmovwb256mem_mask ((__v16qi *) __P, (__v16hi) __A, __M);
1547}
1548
1549static __inline__ void __DEFAULT_FN_ATTRS256
1550_mm256_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A)
1551{
1552  __builtin_ia32_pmovswb256mem_mask ((__v16qi *) __P, (__v16hi) __A, __M);
1553}
1554
1555static __inline__ void __DEFAULT_FN_ATTRS256
1556_mm256_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A)
1557{
1558  __builtin_ia32_pmovuswb256mem_mask ((__v16qi*) __P, (__v16hi) __A, __M);
1559}
1560
1561static __inline__ __m128i __DEFAULT_FN_ATTRS128
1562_mm_mask_mulhrs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) {
1563  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1564                                             (__v8hi)_mm_mulhrs_epi16(__X, __Y),
1565                                             (__v8hi)__W);
1566}
1567
1568static __inline__ __m128i __DEFAULT_FN_ATTRS128
1569_mm_maskz_mulhrs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) {
1570  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1571                                             (__v8hi)_mm_mulhrs_epi16(__X, __Y),
1572                                             (__v8hi)_mm_setzero_si128());
1573}
1574
1575static __inline__ __m256i __DEFAULT_FN_ATTRS256
1576_mm256_mask_mulhrs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y) {
1577  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1578                                         (__v16hi)_mm256_mulhrs_epi16(__X, __Y),
1579                                         (__v16hi)__W);
1580}
1581
1582static __inline__ __m256i __DEFAULT_FN_ATTRS256
1583_mm256_maskz_mulhrs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) {
1584  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1585                                         (__v16hi)_mm256_mulhrs_epi16(__X, __Y),
1586                                         (__v16hi)_mm256_setzero_si256());
1587}
1588
1589static __inline__ __m128i __DEFAULT_FN_ATTRS128
1590_mm_mask_mulhi_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
1591  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1592                                             (__v8hi)_mm_mulhi_epu16(__A, __B),
1593                                             (__v8hi)__W);
1594}
1595
1596static __inline__ __m128i __DEFAULT_FN_ATTRS128
1597_mm_maskz_mulhi_epu16(__mmask8 __U, __m128i __A, __m128i __B) {
1598  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1599                                             (__v8hi)_mm_mulhi_epu16(__A, __B),
1600                                             (__v8hi)_mm_setzero_si128());
1601}
1602
1603static __inline__ __m256i __DEFAULT_FN_ATTRS256
1604_mm256_mask_mulhi_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
1605  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1606                                          (__v16hi)_mm256_mulhi_epu16(__A, __B),
1607                                          (__v16hi)__W);
1608}
1609
1610static __inline__ __m256i __DEFAULT_FN_ATTRS256
1611_mm256_maskz_mulhi_epu16(__mmask16 __U, __m256i __A, __m256i __B) {
1612  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1613                                          (__v16hi)_mm256_mulhi_epu16(__A, __B),
1614                                          (__v16hi)_mm256_setzero_si256());
1615}
1616
1617static __inline__ __m128i __DEFAULT_FN_ATTRS128
1618_mm_mask_mulhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
1619  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1620                                             (__v8hi)_mm_mulhi_epi16(__A, __B),
1621                                             (__v8hi)__W);
1622}
1623
1624static __inline__ __m128i __DEFAULT_FN_ATTRS128
1625_mm_maskz_mulhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
1626  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1627                                             (__v8hi)_mm_mulhi_epi16(__A, __B),
1628                                             (__v8hi)_mm_setzero_si128());
1629}
1630
1631static __inline__ __m256i __DEFAULT_FN_ATTRS256
1632_mm256_mask_mulhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
1633  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1634                                          (__v16hi)_mm256_mulhi_epi16(__A, __B),
1635                                          (__v16hi)__W);
1636}
1637
1638static __inline__ __m256i __DEFAULT_FN_ATTRS256
1639_mm256_maskz_mulhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
1640  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1641                                          (__v16hi)_mm256_mulhi_epi16(__A, __B),
1642                                          (__v16hi)_mm256_setzero_si256());
1643}
1644
1645static __inline__ __m128i __DEFAULT_FN_ATTRS128
1646_mm_mask_unpackhi_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
1647  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1648                                           (__v16qi)_mm_unpackhi_epi8(__A, __B),
1649                                           (__v16qi)__W);
1650}
1651
1652static __inline__ __m128i __DEFAULT_FN_ATTRS128
1653_mm_maskz_unpackhi_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
1654  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1655                                           (__v16qi)_mm_unpackhi_epi8(__A, __B),
1656                                           (__v16qi)_mm_setzero_si128());
1657}
1658
1659static __inline__ __m256i __DEFAULT_FN_ATTRS256
1660_mm256_mask_unpackhi_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
1661  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1662                                        (__v32qi)_mm256_unpackhi_epi8(__A, __B),
1663                                        (__v32qi)__W);
1664}
1665
1666static __inline__ __m256i __DEFAULT_FN_ATTRS256
1667_mm256_maskz_unpackhi_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
1668  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1669                                        (__v32qi)_mm256_unpackhi_epi8(__A, __B),
1670                                        (__v32qi)_mm256_setzero_si256());
1671}
1672
1673static __inline__ __m128i __DEFAULT_FN_ATTRS128
1674_mm_mask_unpackhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
1675  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1676                                           (__v8hi)_mm_unpackhi_epi16(__A, __B),
1677                                           (__v8hi)__W);
1678}
1679
1680static __inline__ __m128i __DEFAULT_FN_ATTRS128
1681_mm_maskz_unpackhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
1682  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1683                                           (__v8hi)_mm_unpackhi_epi16(__A, __B),
1684                                           (__v8hi) _mm_setzero_si128());
1685}
1686
1687static __inline__ __m256i __DEFAULT_FN_ATTRS256
1688_mm256_mask_unpackhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
1689  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1690                                       (__v16hi)_mm256_unpackhi_epi16(__A, __B),
1691                                       (__v16hi)__W);
1692}
1693
1694static __inline__ __m256i __DEFAULT_FN_ATTRS256
1695_mm256_maskz_unpackhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
1696  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1697                                       (__v16hi)_mm256_unpackhi_epi16(__A, __B),
1698                                       (__v16hi)_mm256_setzero_si256());
1699}
1700
1701static __inline__ __m128i __DEFAULT_FN_ATTRS128
1702_mm_mask_unpacklo_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
1703  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1704                                           (__v16qi)_mm_unpacklo_epi8(__A, __B),
1705                                           (__v16qi)__W);
1706}
1707
1708static __inline__ __m128i __DEFAULT_FN_ATTRS128
1709_mm_maskz_unpacklo_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
1710  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1711                                           (__v16qi)_mm_unpacklo_epi8(__A, __B),
1712                                           (__v16qi)_mm_setzero_si128());
1713}
1714
1715static __inline__ __m256i __DEFAULT_FN_ATTRS256
1716_mm256_mask_unpacklo_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
1717  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1718                                        (__v32qi)_mm256_unpacklo_epi8(__A, __B),
1719                                        (__v32qi)__W);
1720}
1721
1722static __inline__ __m256i __DEFAULT_FN_ATTRS256
1723_mm256_maskz_unpacklo_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
1724  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1725                                        (__v32qi)_mm256_unpacklo_epi8(__A, __B),
1726                                        (__v32qi)_mm256_setzero_si256());
1727}
1728
1729static __inline__ __m128i __DEFAULT_FN_ATTRS128
1730_mm_mask_unpacklo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
1731  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1732                                           (__v8hi)_mm_unpacklo_epi16(__A, __B),
1733                                           (__v8hi)__W);
1734}
1735
1736static __inline__ __m128i __DEFAULT_FN_ATTRS128
1737_mm_maskz_unpacklo_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
1738  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1739                                           (__v8hi)_mm_unpacklo_epi16(__A, __B),
1740                                           (__v8hi) _mm_setzero_si128());
1741}
1742
1743static __inline__ __m256i __DEFAULT_FN_ATTRS256
1744_mm256_mask_unpacklo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
1745  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1746                                       (__v16hi)_mm256_unpacklo_epi16(__A, __B),
1747                                       (__v16hi)__W);
1748}
1749
1750static __inline__ __m256i __DEFAULT_FN_ATTRS256
1751_mm256_maskz_unpacklo_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
1752  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1753                                       (__v16hi)_mm256_unpacklo_epi16(__A, __B),
1754                                       (__v16hi)_mm256_setzero_si256());
1755}
1756
1757static __inline__ __m128i __DEFAULT_FN_ATTRS128
1758_mm_mask_cvtepi8_epi16(__m128i __W, __mmask8 __U, __m128i __A)
1759{
1760  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1761                                             (__v8hi)_mm_cvtepi8_epi16(__A),
1762                                             (__v8hi)__W);
1763}
1764
1765static __inline__ __m128i __DEFAULT_FN_ATTRS128
1766_mm_maskz_cvtepi8_epi16(__mmask8 __U, __m128i __A)
1767{
1768  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1769                                             (__v8hi)_mm_cvtepi8_epi16(__A),
1770                                             (__v8hi)_mm_setzero_si128());
1771}
1772
1773static __inline__ __m256i __DEFAULT_FN_ATTRS256
1774_mm256_mask_cvtepi8_epi16(__m256i __W, __mmask16 __U, __m128i __A)
1775{
1776  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1777                                             (__v16hi)_mm256_cvtepi8_epi16(__A),
1778                                             (__v16hi)__W);
1779}
1780
1781static __inline__ __m256i __DEFAULT_FN_ATTRS256
1782_mm256_maskz_cvtepi8_epi16(__mmask16 __U, __m128i __A)
1783{
1784  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1785                                             (__v16hi)_mm256_cvtepi8_epi16(__A),
1786                                             (__v16hi)_mm256_setzero_si256());
1787}
1788
1789
1790static __inline__ __m128i __DEFAULT_FN_ATTRS128
1791_mm_mask_cvtepu8_epi16(__m128i __W, __mmask8 __U, __m128i __A)
1792{
1793  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1794                                             (__v8hi)_mm_cvtepu8_epi16(__A),
1795                                             (__v8hi)__W);
1796}
1797
1798static __inline__ __m128i __DEFAULT_FN_ATTRS128
1799_mm_maskz_cvtepu8_epi16(__mmask8 __U, __m128i __A)
1800{
1801  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1802                                             (__v8hi)_mm_cvtepu8_epi16(__A),
1803                                             (__v8hi)_mm_setzero_si128());
1804}
1805
1806static __inline__ __m256i __DEFAULT_FN_ATTRS256
1807_mm256_mask_cvtepu8_epi16(__m256i __W, __mmask16 __U, __m128i __A)
1808{
1809  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1810                                             (__v16hi)_mm256_cvtepu8_epi16(__A),
1811                                             (__v16hi)__W);
1812}
1813
1814static __inline__ __m256i __DEFAULT_FN_ATTRS256
1815_mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A)
1816{
1817  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1818                                             (__v16hi)_mm256_cvtepu8_epi16(__A),
1819                                             (__v16hi)_mm256_setzero_si256());
1820}
1821
1822
1823#define _mm_mask_shufflehi_epi16(W, U, A, imm) \
1824  ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
1825                                       (__v8hi)_mm_shufflehi_epi16((A), (imm)), \
1826                                       (__v8hi)(__m128i)(W)))
1827
1828#define _mm_maskz_shufflehi_epi16(U, A, imm) \
1829  ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
1830                                       (__v8hi)_mm_shufflehi_epi16((A), (imm)), \
1831                                       (__v8hi)_mm_setzero_si128()))
1832
1833#define _mm256_mask_shufflehi_epi16(W, U, A, imm) \
1834  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
1835                                       (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \
1836                                       (__v16hi)(__m256i)(W)))
1837
1838#define _mm256_maskz_shufflehi_epi16(U, A, imm) \
1839  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
1840                                       (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \
1841                                       (__v16hi)_mm256_setzero_si256()))
1842
1843#define _mm_mask_shufflelo_epi16(W, U, A, imm) \
1844  ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
1845                                       (__v8hi)_mm_shufflelo_epi16((A), (imm)), \
1846                                       (__v8hi)(__m128i)(W)))
1847
1848#define _mm_maskz_shufflelo_epi16(U, A, imm) \
1849  ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
1850                                       (__v8hi)_mm_shufflelo_epi16((A), (imm)), \
1851                                       (__v8hi)_mm_setzero_si128()))
1852
1853#define _mm256_mask_shufflelo_epi16(W, U, A, imm) \
1854  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
1855                                       (__v16hi)_mm256_shufflelo_epi16((A), \
1856                                                                       (imm)), \
1857                                       (__v16hi)(__m256i)(W)))
1858
1859#define _mm256_maskz_shufflelo_epi16(U, A, imm) \
1860  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
1861                                       (__v16hi)_mm256_shufflelo_epi16((A), \
1862                                                                       (imm)), \
1863                                       (__v16hi)_mm256_setzero_si256()))
1864
1865static __inline__ __m256i __DEFAULT_FN_ATTRS256
1866_mm256_sllv_epi16(__m256i __A, __m256i __B)
1867{
1868  return (__m256i)__builtin_ia32_psllv16hi((__v16hi)__A, (__v16hi)__B);
1869}
1870
1871static __inline__ __m256i __DEFAULT_FN_ATTRS256
1872_mm256_mask_sllv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
1873{
1874  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1875                                           (__v16hi)_mm256_sllv_epi16(__A, __B),
1876                                           (__v16hi)__W);
1877}
1878
1879static __inline__ __m256i __DEFAULT_FN_ATTRS256
1880_mm256_maskz_sllv_epi16(__mmask16 __U, __m256i __A, __m256i __B)
1881{
1882  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1883                                           (__v16hi)_mm256_sllv_epi16(__A, __B),
1884                                           (__v16hi)_mm256_setzero_si256());
1885}
1886
1887static __inline__ __m128i __DEFAULT_FN_ATTRS128
1888_mm_sllv_epi16(__m128i __A, __m128i __B)
1889{
1890  return (__m128i)__builtin_ia32_psllv8hi((__v8hi)__A, (__v8hi)__B);
1891}
1892
1893static __inline__ __m128i __DEFAULT_FN_ATTRS128
1894_mm_mask_sllv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
1895{
1896  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1897                                             (__v8hi)_mm_sllv_epi16(__A, __B),
1898                                             (__v8hi)__W);
1899}
1900
1901static __inline__ __m128i __DEFAULT_FN_ATTRS128
1902_mm_maskz_sllv_epi16(__mmask8 __U, __m128i __A, __m128i __B)
1903{
1904  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1905                                             (__v8hi)_mm_sllv_epi16(__A, __B),
1906                                             (__v8hi)_mm_setzero_si128());
1907}
1908
1909static __inline__ __m128i __DEFAULT_FN_ATTRS128
1910_mm_mask_sll_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
1911{
1912  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1913                                             (__v8hi)_mm_sll_epi16(__A, __B),
1914                                             (__v8hi)__W);
1915}
1916
1917static __inline__ __m128i __DEFAULT_FN_ATTRS128
1918_mm_maskz_sll_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
1919{
1920  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1921                                             (__v8hi)_mm_sll_epi16(__A, __B),
1922                                             (__v8hi)_mm_setzero_si128());
1923}
1924
1925static __inline__ __m256i __DEFAULT_FN_ATTRS256
1926_mm256_mask_sll_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B)
1927{
1928  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1929                                          (__v16hi)_mm256_sll_epi16(__A, __B),
1930                                          (__v16hi)__W);
1931}
1932
1933static __inline__ __m256i __DEFAULT_FN_ATTRS256
1934_mm256_maskz_sll_epi16(__mmask16 __U, __m256i __A, __m128i __B)
1935{
1936  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1937                                          (__v16hi)_mm256_sll_epi16(__A, __B),
1938                                          (__v16hi)_mm256_setzero_si256());
1939}
1940
1941static __inline__ __m128i __DEFAULT_FN_ATTRS128
1942_mm_mask_slli_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
1943{
1944  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1945                                             (__v8hi)_mm_slli_epi16(__A, (int)__B),
1946                                             (__v8hi)__W);
1947}
1948
1949static __inline__ __m128i __DEFAULT_FN_ATTRS128
1950_mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B)
1951{
1952  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1953                                             (__v8hi)_mm_slli_epi16(__A, (int)__B),
1954                                             (__v8hi)_mm_setzero_si128());
1955}
1956
1957static __inline__ __m256i __DEFAULT_FN_ATTRS256
1958_mm256_mask_slli_epi16(__m256i __W, __mmask16 __U, __m256i __A,
1959                       unsigned int __B)
1960{
1961  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1962                                         (__v16hi)_mm256_slli_epi16(__A, (int)__B),
1963                                         (__v16hi)__W);
1964}
1965
1966static __inline__ __m256i __DEFAULT_FN_ATTRS256
1967_mm256_maskz_slli_epi16(__mmask16 __U, __m256i __A, unsigned int __B)
1968{
1969  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1970                                         (__v16hi)_mm256_slli_epi16(__A, (int)__B),
1971                                         (__v16hi)_mm256_setzero_si256());
1972}
1973
1974static __inline__ __m256i __DEFAULT_FN_ATTRS256
1975_mm256_srlv_epi16(__m256i __A, __m256i __B)
1976{
1977  return (__m256i)__builtin_ia32_psrlv16hi((__v16hi)__A, (__v16hi)__B);
1978}
1979
1980static __inline__ __m256i __DEFAULT_FN_ATTRS256
1981_mm256_mask_srlv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
1982{
1983  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1984                                           (__v16hi)_mm256_srlv_epi16(__A, __B),
1985                                           (__v16hi)__W);
1986}
1987
1988static __inline__ __m256i __DEFAULT_FN_ATTRS256
1989_mm256_maskz_srlv_epi16(__mmask16 __U, __m256i __A, __m256i __B)
1990{
1991  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1992                                           (__v16hi)_mm256_srlv_epi16(__A, __B),
1993                                           (__v16hi)_mm256_setzero_si256());
1994}
1995
1996static __inline__ __m128i __DEFAULT_FN_ATTRS128
1997_mm_srlv_epi16(__m128i __A, __m128i __B)
1998{
1999  return (__m128i)__builtin_ia32_psrlv8hi((__v8hi)__A, (__v8hi)__B);
2000}
2001
2002static __inline__ __m128i __DEFAULT_FN_ATTRS128
2003_mm_mask_srlv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
2004{
2005  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2006                                             (__v8hi)_mm_srlv_epi16(__A, __B),
2007                                             (__v8hi)__W);
2008}
2009
2010static __inline__ __m128i __DEFAULT_FN_ATTRS128
2011_mm_maskz_srlv_epi16(__mmask8 __U, __m128i __A, __m128i __B)
2012{
2013  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2014                                             (__v8hi)_mm_srlv_epi16(__A, __B),
2015                                             (__v8hi)_mm_setzero_si128());
2016}
2017
2018static __inline__ __m256i __DEFAULT_FN_ATTRS256
2019_mm256_srav_epi16(__m256i __A, __m256i __B)
2020{
2021  return (__m256i)__builtin_ia32_psrav16hi((__v16hi)__A, (__v16hi)__B);
2022}
2023
2024static __inline__ __m256i __DEFAULT_FN_ATTRS256
2025_mm256_mask_srav_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
2026{
2027  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2028                                           (__v16hi)_mm256_srav_epi16(__A, __B),
2029                                           (__v16hi)__W);
2030}
2031
2032static __inline__ __m256i __DEFAULT_FN_ATTRS256
2033_mm256_maskz_srav_epi16(__mmask16 __U, __m256i __A, __m256i __B)
2034{
2035  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2036                                           (__v16hi)_mm256_srav_epi16(__A, __B),
2037                                           (__v16hi)_mm256_setzero_si256());
2038}
2039
2040static __inline__ __m128i __DEFAULT_FN_ATTRS128
2041_mm_srav_epi16(__m128i __A, __m128i __B)
2042{
2043  return (__m128i)__builtin_ia32_psrav8hi((__v8hi)__A, (__v8hi)__B);
2044}
2045
2046static __inline__ __m128i __DEFAULT_FN_ATTRS128
2047_mm_mask_srav_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
2048{
2049  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2050                                             (__v8hi)_mm_srav_epi16(__A, __B),
2051                                             (__v8hi)__W);
2052}
2053
2054static __inline__ __m128i __DEFAULT_FN_ATTRS128
2055_mm_maskz_srav_epi16(__mmask8 __U, __m128i __A, __m128i __B)
2056{
2057  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2058                                             (__v8hi)_mm_srav_epi16(__A, __B),
2059                                             (__v8hi)_mm_setzero_si128());
2060}
2061
2062static __inline__ __m128i __DEFAULT_FN_ATTRS128
2063_mm_mask_sra_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
2064{
2065  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2066                                             (__v8hi)_mm_sra_epi16(__A, __B),
2067                                             (__v8hi)__W);
2068}
2069
2070static __inline__ __m128i __DEFAULT_FN_ATTRS128
2071_mm_maskz_sra_epi16(__mmask8 __U, __m128i __A, __m128i __B)
2072{
2073  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2074                                             (__v8hi)_mm_sra_epi16(__A, __B),
2075                                             (__v8hi)_mm_setzero_si128());
2076}
2077
2078static __inline__ __m256i __DEFAULT_FN_ATTRS256
2079_mm256_mask_sra_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B)
2080{
2081  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2082                                          (__v16hi)_mm256_sra_epi16(__A, __B),
2083                                          (__v16hi)__W);
2084}
2085
2086static __inline__ __m256i __DEFAULT_FN_ATTRS256
2087_mm256_maskz_sra_epi16(__mmask16 __U, __m256i __A, __m128i __B)
2088{
2089  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2090                                          (__v16hi)_mm256_sra_epi16(__A, __B),
2091                                          (__v16hi)_mm256_setzero_si256());
2092}
2093
2094static __inline__ __m128i __DEFAULT_FN_ATTRS128
2095_mm_mask_srai_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
2096{
2097  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2098                                             (__v8hi)_mm_srai_epi16(__A, (int)__B),
2099                                             (__v8hi)__W);
2100}
2101
2102static __inline__ __m128i __DEFAULT_FN_ATTRS128
2103_mm_maskz_srai_epi16(__mmask8 __U, __m128i __A, unsigned int __B)
2104{
2105  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2106                                             (__v8hi)_mm_srai_epi16(__A, (int)__B),
2107                                             (__v8hi)_mm_setzero_si128());
2108}
2109
2110static __inline__ __m256i __DEFAULT_FN_ATTRS256
2111_mm256_mask_srai_epi16(__m256i __W, __mmask16 __U, __m256i __A,
2112                       unsigned int __B)
2113{
2114  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2115                                         (__v16hi)_mm256_srai_epi16(__A, (int)__B),
2116                                         (__v16hi)__W);
2117}
2118
2119static __inline__ __m256i __DEFAULT_FN_ATTRS256
2120_mm256_maskz_srai_epi16(__mmask16 __U, __m256i __A, unsigned int __B)
2121{
2122  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2123                                         (__v16hi)_mm256_srai_epi16(__A, (int)__B),
2124                                         (__v16hi)_mm256_setzero_si256());
2125}
2126
2127static __inline__ __m128i __DEFAULT_FN_ATTRS128
2128_mm_mask_srl_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
2129{
2130  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2131                                             (__v8hi)_mm_srl_epi16(__A, __B),
2132                                             (__v8hi)__W);
2133}
2134
2135static __inline__ __m128i __DEFAULT_FN_ATTRS128
2136_mm_maskz_srl_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
2137{
2138  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2139                                             (__v8hi)_mm_srl_epi16(__A, __B),
2140                                             (__v8hi)_mm_setzero_si128());
2141}
2142
2143static __inline__ __m256i __DEFAULT_FN_ATTRS256
2144_mm256_mask_srl_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B)
2145{
2146  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2147                                          (__v16hi)_mm256_srl_epi16(__A, __B),
2148                                          (__v16hi)__W);
2149}
2150
2151static __inline__ __m256i __DEFAULT_FN_ATTRS256
2152_mm256_maskz_srl_epi16(__mmask16 __U, __m256i __A, __m128i __B)
2153{
2154  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2155                                          (__v16hi)_mm256_srl_epi16(__A, __B),
2156                                          (__v16hi)_mm256_setzero_si256());
2157}
2158
2159static __inline__ __m128i __DEFAULT_FN_ATTRS128
2160_mm_mask_srli_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B)
2161{
2162  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2163                                             (__v8hi)_mm_srli_epi16(__A, __B),
2164                                             (__v8hi)__W);
2165}
2166
2167static __inline__ __m128i __DEFAULT_FN_ATTRS128
2168_mm_maskz_srli_epi16 (__mmask8 __U, __m128i __A, int __B)
2169{
2170  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2171                                             (__v8hi)_mm_srli_epi16(__A, __B),
2172                                             (__v8hi)_mm_setzero_si128());
2173}
2174
2175static __inline__ __m256i __DEFAULT_FN_ATTRS256
2176_mm256_mask_srli_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B)
2177{
2178  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2179                                         (__v16hi)_mm256_srli_epi16(__A, __B),
2180                                         (__v16hi)__W);
2181}
2182
2183static __inline__ __m256i __DEFAULT_FN_ATTRS256
2184_mm256_maskz_srli_epi16(__mmask16 __U, __m256i __A, int __B)
2185{
2186  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2187                                         (__v16hi)_mm256_srli_epi16(__A, __B),
2188                                         (__v16hi)_mm256_setzero_si256());
2189}
2190
2191static __inline__ __m128i __DEFAULT_FN_ATTRS128
2192_mm_mask_mov_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
2193{
2194  return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U,
2195                (__v8hi) __A,
2196                (__v8hi) __W);
2197}
2198
2199static __inline__ __m128i __DEFAULT_FN_ATTRS128
2200_mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A)
2201{
2202  return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U,
2203                (__v8hi) __A,
2204                (__v8hi) _mm_setzero_si128 ());
2205}
2206
2207static __inline__ __m256i __DEFAULT_FN_ATTRS256
2208_mm256_mask_mov_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
2209{
2210  return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U,
2211                (__v16hi) __A,
2212                (__v16hi) __W);
2213}
2214
2215static __inline__ __m256i __DEFAULT_FN_ATTRS256
2216_mm256_maskz_mov_epi16 (__mmask16 __U, __m256i __A)
2217{
2218  return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U,
2219                (__v16hi) __A,
2220                (__v16hi) _mm256_setzero_si256 ());
2221}
2222
2223static __inline__ __m128i __DEFAULT_FN_ATTRS128
2224_mm_mask_mov_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
2225{
2226  return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U,
2227                (__v16qi) __A,
2228                (__v16qi) __W);
2229}
2230
2231static __inline__ __m128i __DEFAULT_FN_ATTRS128
2232_mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A)
2233{
2234  return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U,
2235                (__v16qi) __A,
2236                (__v16qi) _mm_setzero_si128 ());
2237}
2238
2239static __inline__ __m256i __DEFAULT_FN_ATTRS256
2240_mm256_mask_mov_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
2241{
2242  return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U,
2243                (__v32qi) __A,
2244                (__v32qi) __W);
2245}
2246
2247static __inline__ __m256i __DEFAULT_FN_ATTRS256
2248_mm256_maskz_mov_epi8 (__mmask32 __U, __m256i __A)
2249{
2250  return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U,
2251                (__v32qi) __A,
2252                (__v32qi) _mm256_setzero_si256 ());
2253}
2254
2255
2256static __inline__ __m128i __DEFAULT_FN_ATTRS128
2257_mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A)
2258{
2259  return (__m128i) __builtin_ia32_selectb_128(__M,
2260                                              (__v16qi) _mm_set1_epi8(__A),
2261                                              (__v16qi) __O);
2262}
2263
2264static __inline__ __m128i __DEFAULT_FN_ATTRS128
2265_mm_maskz_set1_epi8 (__mmask16 __M, char __A)
2266{
2267 return (__m128i) __builtin_ia32_selectb_128(__M,
2268                                             (__v16qi) _mm_set1_epi8(__A),
2269                                             (__v16qi) _mm_setzero_si128());
2270}
2271
2272static __inline__ __m256i __DEFAULT_FN_ATTRS256
2273_mm256_mask_set1_epi8 (__m256i __O, __mmask32 __M, char __A)
2274{
2275  return (__m256i) __builtin_ia32_selectb_256(__M,
2276                                              (__v32qi) _mm256_set1_epi8(__A),
2277                                              (__v32qi) __O);
2278}
2279
2280static __inline__ __m256i __DEFAULT_FN_ATTRS256
2281_mm256_maskz_set1_epi8 (__mmask32 __M, char __A)
2282{
2283  return (__m256i) __builtin_ia32_selectb_256(__M,
2284                                              (__v32qi) _mm256_set1_epi8(__A),
2285                                              (__v32qi) _mm256_setzero_si256());
2286}
2287
2288static __inline __m128i __DEFAULT_FN_ATTRS128
2289_mm_loadu_epi16 (void const *__P)
2290{
2291  struct __loadu_epi16 {
2292    __m128i_u __v;
2293  } __attribute__((__packed__, __may_alias__));
2294  return ((const struct __loadu_epi16*)__P)->__v;
2295}
2296
2297static __inline__ __m128i __DEFAULT_FN_ATTRS128
2298_mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P)
2299{
2300  return (__m128i) __builtin_ia32_loaddquhi128_mask ((const __v8hi *) __P,
2301                 (__v8hi) __W,
2302                 (__mmask8) __U);
2303}
2304
2305static __inline__ __m128i __DEFAULT_FN_ATTRS128
2306_mm_maskz_loadu_epi16 (__mmask8 __U, void const *__P)
2307{
2308  return (__m128i) __builtin_ia32_loaddquhi128_mask ((const __v8hi *) __P,
2309                 (__v8hi)
2310                 _mm_setzero_si128 (),
2311                 (__mmask8) __U);
2312}
2313
2314static __inline __m256i __DEFAULT_FN_ATTRS256
2315_mm256_loadu_epi16 (void const *__P)
2316{
2317  struct __loadu_epi16 {
2318    __m256i_u __v;
2319  } __attribute__((__packed__, __may_alias__));
2320  return ((const struct __loadu_epi16*)__P)->__v;
2321}
2322
2323static __inline__ __m256i __DEFAULT_FN_ATTRS256
2324_mm256_mask_loadu_epi16 (__m256i __W, __mmask16 __U, void const *__P)
2325{
2326  return (__m256i) __builtin_ia32_loaddquhi256_mask ((const __v16hi *) __P,
2327                 (__v16hi) __W,
2328                 (__mmask16) __U);
2329}
2330
2331static __inline__ __m256i __DEFAULT_FN_ATTRS256
2332_mm256_maskz_loadu_epi16 (__mmask16 __U, void const *__P)
2333{
2334  return (__m256i) __builtin_ia32_loaddquhi256_mask ((const __v16hi *) __P,
2335                 (__v16hi)
2336                 _mm256_setzero_si256 (),
2337                 (__mmask16) __U);
2338}
2339
2340static __inline __m128i __DEFAULT_FN_ATTRS128
2341_mm_loadu_epi8 (void const *__P)
2342{
2343  struct __loadu_epi8 {
2344    __m128i_u __v;
2345  } __attribute__((__packed__, __may_alias__));
2346  return ((const struct __loadu_epi8*)__P)->__v;
2347}
2348
2349static __inline__ __m128i __DEFAULT_FN_ATTRS128
2350_mm_mask_loadu_epi8 (__m128i __W, __mmask16 __U, void const *__P)
2351{
2352  return (__m128i) __builtin_ia32_loaddquqi128_mask ((const __v16qi *) __P,
2353                 (__v16qi) __W,
2354                 (__mmask16) __U);
2355}
2356
2357static __inline__ __m128i __DEFAULT_FN_ATTRS128
2358_mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P)
2359{
2360  return (__m128i) __builtin_ia32_loaddquqi128_mask ((const __v16qi *) __P,
2361                 (__v16qi)
2362                 _mm_setzero_si128 (),
2363                 (__mmask16) __U);
2364}
2365
2366static __inline __m256i __DEFAULT_FN_ATTRS256
2367_mm256_loadu_epi8 (void const *__P)
2368{
2369  struct __loadu_epi8 {
2370    __m256i_u __v;
2371  } __attribute__((__packed__, __may_alias__));
2372  return ((const struct __loadu_epi8*)__P)->__v;
2373}
2374
2375static __inline__ __m256i __DEFAULT_FN_ATTRS256
2376_mm256_mask_loadu_epi8 (__m256i __W, __mmask32 __U, void const *__P)
2377{
2378  return (__m256i) __builtin_ia32_loaddquqi256_mask ((const __v32qi *) __P,
2379                 (__v32qi) __W,
2380                 (__mmask32) __U);
2381}
2382
2383static __inline__ __m256i __DEFAULT_FN_ATTRS256
2384_mm256_maskz_loadu_epi8 (__mmask32 __U, void const *__P)
2385{
2386  return (__m256i) __builtin_ia32_loaddquqi256_mask ((const __v32qi *) __P,
2387                 (__v32qi)
2388                 _mm256_setzero_si256 (),
2389                 (__mmask32) __U);
2390}
2391
2392static __inline void __DEFAULT_FN_ATTRS128
2393_mm_storeu_epi16 (void *__P, __m128i __A)
2394{
2395  struct __storeu_epi16 {
2396    __m128i_u __v;
2397  } __attribute__((__packed__, __may_alias__));
2398  ((struct __storeu_epi16*)__P)->__v = __A;
2399}
2400
2401static __inline__ void __DEFAULT_FN_ATTRS128
2402_mm_mask_storeu_epi16 (void *__P, __mmask8 __U, __m128i __A)
2403{
2404  __builtin_ia32_storedquhi128_mask ((__v8hi *) __P,
2405             (__v8hi) __A,
2406             (__mmask8) __U);
2407}
2408
2409static __inline void __DEFAULT_FN_ATTRS256
2410_mm256_storeu_epi16 (void *__P, __m256i __A)
2411{
2412  struct __storeu_epi16 {
2413    __m256i_u __v;
2414  } __attribute__((__packed__, __may_alias__));
2415  ((struct __storeu_epi16*)__P)->__v = __A;
2416}
2417
2418static __inline__ void __DEFAULT_FN_ATTRS256
2419_mm256_mask_storeu_epi16 (void *__P, __mmask16 __U, __m256i __A)
2420{
2421  __builtin_ia32_storedquhi256_mask ((__v16hi *) __P,
2422             (__v16hi) __A,
2423             (__mmask16) __U);
2424}
2425
2426static __inline void __DEFAULT_FN_ATTRS128
2427_mm_storeu_epi8 (void *__P, __m128i __A)
2428{
2429  struct __storeu_epi8 {
2430    __m128i_u __v;
2431  } __attribute__((__packed__, __may_alias__));
2432  ((struct __storeu_epi8*)__P)->__v = __A;
2433}
2434
2435static __inline__ void __DEFAULT_FN_ATTRS128
2436_mm_mask_storeu_epi8 (void *__P, __mmask16 __U, __m128i __A)
2437{
2438  __builtin_ia32_storedquqi128_mask ((__v16qi *) __P,
2439             (__v16qi) __A,
2440             (__mmask16) __U);
2441}
2442
2443static __inline void __DEFAULT_FN_ATTRS256
2444_mm256_storeu_epi8 (void *__P, __m256i __A)
2445{
2446  struct __storeu_epi8 {
2447    __m256i_u __v;
2448  } __attribute__((__packed__, __may_alias__));
2449  ((struct __storeu_epi8*)__P)->__v = __A;
2450}
2451
2452static __inline__ void __DEFAULT_FN_ATTRS256
2453_mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A)
2454{
2455  __builtin_ia32_storedquqi256_mask ((__v32qi *) __P,
2456             (__v32qi) __A,
2457             (__mmask32) __U);
2458}
2459
2460static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2461_mm_test_epi8_mask (__m128i __A, __m128i __B)
2462{
2463  return _mm_cmpneq_epi8_mask (_mm_and_si128(__A, __B), _mm_setzero_si128());
2464}
2465
2466static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2467_mm_mask_test_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
2468{
2469  return _mm_mask_cmpneq_epi8_mask (__U, _mm_and_si128 (__A, __B),
2470                                    _mm_setzero_si128());
2471}
2472
2473static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2474_mm256_test_epi8_mask (__m256i __A, __m256i __B)
2475{
2476  return _mm256_cmpneq_epi8_mask (_mm256_and_si256(__A, __B),
2477                                  _mm256_setzero_si256());
2478}
2479
2480static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2481_mm256_mask_test_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
2482{
2483  return _mm256_mask_cmpneq_epi8_mask (__U, _mm256_and_si256(__A, __B),
2484                                       _mm256_setzero_si256());
2485}
2486
2487static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
2488_mm_test_epi16_mask (__m128i __A, __m128i __B)
2489{
2490  return _mm_cmpneq_epi16_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
2491}
2492
2493static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
2494_mm_mask_test_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
2495{
2496  return _mm_mask_cmpneq_epi16_mask (__U, _mm_and_si128 (__A, __B),
2497                                     _mm_setzero_si128());
2498}
2499
2500static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
2501_mm256_test_epi16_mask (__m256i __A, __m256i __B)
2502{
2503  return _mm256_cmpneq_epi16_mask (_mm256_and_si256 (__A, __B),
2504                                   _mm256_setzero_si256 ());
2505}
2506
2507static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
2508_mm256_mask_test_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
2509{
2510  return _mm256_mask_cmpneq_epi16_mask (__U, _mm256_and_si256(__A, __B),
2511                                        _mm256_setzero_si256());
2512}
2513
2514static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2515_mm_testn_epi8_mask (__m128i __A, __m128i __B)
2516{
2517  return _mm_cmpeq_epi8_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
2518}
2519
2520static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2521_mm_mask_testn_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
2522{
2523  return _mm_mask_cmpeq_epi8_mask (__U, _mm_and_si128 (__A, __B),
2524                                  _mm_setzero_si128());
2525}
2526
2527static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2528_mm256_testn_epi8_mask (__m256i __A, __m256i __B)
2529{
2530  return _mm256_cmpeq_epi8_mask (_mm256_and_si256 (__A, __B),
2531                                 _mm256_setzero_si256());
2532}
2533
2534static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2535_mm256_mask_testn_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
2536{
2537  return _mm256_mask_cmpeq_epi8_mask (__U, _mm256_and_si256 (__A, __B),
2538                                      _mm256_setzero_si256());
2539}
2540
2541static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
2542_mm_testn_epi16_mask (__m128i __A, __m128i __B)
2543{
2544  return _mm_cmpeq_epi16_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
2545}
2546
2547static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
2548_mm_mask_testn_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
2549{
2550  return _mm_mask_cmpeq_epi16_mask (__U, _mm_and_si128(__A, __B), _mm_setzero_si128());
2551}
2552
2553static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
2554_mm256_testn_epi16_mask (__m256i __A, __m256i __B)
2555{
2556  return _mm256_cmpeq_epi16_mask (_mm256_and_si256(__A, __B),
2557                                  _mm256_setzero_si256());
2558}
2559
2560static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
2561_mm256_mask_testn_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
2562{
2563  return _mm256_mask_cmpeq_epi16_mask (__U, _mm256_and_si256 (__A, __B),
2564                                       _mm256_setzero_si256());
2565}
2566
2567static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2568_mm_movepi8_mask (__m128i __A)
2569{
2570  return (__mmask16) __builtin_ia32_cvtb2mask128 ((__v16qi) __A);
2571}
2572
2573static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2574_mm256_movepi8_mask (__m256i __A)
2575{
2576  return (__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A);
2577}
2578
2579static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
2580_mm_movepi16_mask (__m128i __A)
2581{
2582  return (__mmask8) __builtin_ia32_cvtw2mask128 ((__v8hi) __A);
2583}
2584
2585static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
2586_mm256_movepi16_mask (__m256i __A)
2587{
2588  return (__mmask16) __builtin_ia32_cvtw2mask256 ((__v16hi) __A);
2589}
2590
2591static __inline__ __m128i __DEFAULT_FN_ATTRS128
2592_mm_movm_epi8 (__mmask16 __A)
2593{
2594  return (__m128i) __builtin_ia32_cvtmask2b128 (__A);
2595}
2596
2597static __inline__ __m256i __DEFAULT_FN_ATTRS256
2598_mm256_movm_epi8 (__mmask32 __A)
2599{
2600  return (__m256i) __builtin_ia32_cvtmask2b256 (__A);
2601}
2602
2603static __inline__ __m128i __DEFAULT_FN_ATTRS128
2604_mm_movm_epi16 (__mmask8 __A)
2605{
2606  return (__m128i) __builtin_ia32_cvtmask2w128 (__A);
2607}
2608
2609static __inline__ __m256i __DEFAULT_FN_ATTRS256
2610_mm256_movm_epi16 (__mmask16 __A)
2611{
2612  return (__m256i) __builtin_ia32_cvtmask2w256 (__A);
2613}
2614
2615static __inline__ __m128i __DEFAULT_FN_ATTRS128
2616_mm_mask_broadcastb_epi8 (__m128i __O, __mmask16 __M, __m128i __A)
2617{
2618  return (__m128i)__builtin_ia32_selectb_128(__M,
2619                                             (__v16qi) _mm_broadcastb_epi8(__A),
2620                                             (__v16qi) __O);
2621}
2622
2623static __inline__ __m128i __DEFAULT_FN_ATTRS128
2624_mm_maskz_broadcastb_epi8 (__mmask16 __M, __m128i __A)
2625{
2626  return (__m128i)__builtin_ia32_selectb_128(__M,
2627                                             (__v16qi) _mm_broadcastb_epi8(__A),
2628                                             (__v16qi) _mm_setzero_si128());
2629}
2630
2631static __inline__ __m256i __DEFAULT_FN_ATTRS256
2632_mm256_mask_broadcastb_epi8 (__m256i __O, __mmask32 __M, __m128i __A)
2633{
2634  return (__m256i)__builtin_ia32_selectb_256(__M,
2635                                             (__v32qi) _mm256_broadcastb_epi8(__A),
2636                                             (__v32qi) __O);
2637}
2638
2639static __inline__ __m256i __DEFAULT_FN_ATTRS256
2640_mm256_maskz_broadcastb_epi8 (__mmask32 __M, __m128i __A)
2641{
2642  return (__m256i)__builtin_ia32_selectb_256(__M,
2643                                             (__v32qi) _mm256_broadcastb_epi8(__A),
2644                                             (__v32qi) _mm256_setzero_si256());
2645}
2646
2647static __inline__ __m128i __DEFAULT_FN_ATTRS128
2648_mm_mask_broadcastw_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2649{
2650  return (__m128i)__builtin_ia32_selectw_128(__M,
2651                                             (__v8hi) _mm_broadcastw_epi16(__A),
2652                                             (__v8hi) __O);
2653}
2654
2655static __inline__ __m128i __DEFAULT_FN_ATTRS128
2656_mm_maskz_broadcastw_epi16 (__mmask8 __M, __m128i __A)
2657{
2658  return (__m128i)__builtin_ia32_selectw_128(__M,
2659                                             (__v8hi) _mm_broadcastw_epi16(__A),
2660                                             (__v8hi) _mm_setzero_si128());
2661}
2662
2663static __inline__ __m256i __DEFAULT_FN_ATTRS256
2664_mm256_mask_broadcastw_epi16 (__m256i __O, __mmask16 __M, __m128i __A)
2665{
2666  return (__m256i)__builtin_ia32_selectw_256(__M,
2667                                             (__v16hi) _mm256_broadcastw_epi16(__A),
2668                                             (__v16hi) __O);
2669}
2670
2671static __inline__ __m256i __DEFAULT_FN_ATTRS256
2672_mm256_maskz_broadcastw_epi16 (__mmask16 __M, __m128i __A)
2673{
2674  return (__m256i)__builtin_ia32_selectw_256(__M,
2675                                             (__v16hi) _mm256_broadcastw_epi16(__A),
2676                                             (__v16hi) _mm256_setzero_si256());
2677}
2678
2679static __inline__ __m256i __DEFAULT_FN_ATTRS256
2680_mm256_mask_set1_epi16 (__m256i __O, __mmask16 __M, short __A)
2681{
2682  return (__m256i) __builtin_ia32_selectw_256 (__M,
2683                                               (__v16hi) _mm256_set1_epi16(__A),
2684                                               (__v16hi) __O);
2685}
2686
2687static __inline__ __m256i __DEFAULT_FN_ATTRS256
2688_mm256_maskz_set1_epi16 (__mmask16 __M, short __A)
2689{
2690  return (__m256i) __builtin_ia32_selectw_256(__M,
2691                                              (__v16hi)_mm256_set1_epi16(__A),
2692                                              (__v16hi) _mm256_setzero_si256());
2693}
2694
2695static __inline__ __m128i __DEFAULT_FN_ATTRS128
2696_mm_mask_set1_epi16 (__m128i __O, __mmask8 __M, short __A)
2697{
2698  return (__m128i) __builtin_ia32_selectw_128(__M,
2699                                              (__v8hi) _mm_set1_epi16(__A),
2700                                              (__v8hi) __O);
2701}
2702
2703static __inline__ __m128i __DEFAULT_FN_ATTRS128
2704_mm_maskz_set1_epi16 (__mmask8 __M, short __A)
2705{
2706  return (__m128i) __builtin_ia32_selectw_128(__M,
2707                                              (__v8hi) _mm_set1_epi16(__A),
2708                                              (__v8hi) _mm_setzero_si128());
2709}
2710
2711static __inline__ __m128i __DEFAULT_FN_ATTRS128
2712_mm_permutexvar_epi16 (__m128i __A, __m128i __B)
2713{
2714  return (__m128i)__builtin_ia32_permvarhi128((__v8hi) __B, (__v8hi) __A);
2715}
2716
2717static __inline__ __m128i __DEFAULT_FN_ATTRS128
2718_mm_maskz_permutexvar_epi16 (__mmask8 __M, __m128i __A, __m128i __B)
2719{
2720  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
2721                                        (__v8hi)_mm_permutexvar_epi16(__A, __B),
2722                                        (__v8hi) _mm_setzero_si128());
2723}
2724
2725static __inline__ __m128i __DEFAULT_FN_ATTRS128
2726_mm_mask_permutexvar_epi16 (__m128i __W, __mmask8 __M, __m128i __A,
2727          __m128i __B)
2728{
2729  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
2730                                        (__v8hi)_mm_permutexvar_epi16(__A, __B),
2731                                        (__v8hi)__W);
2732}
2733
2734static __inline__ __m256i __DEFAULT_FN_ATTRS256
2735_mm256_permutexvar_epi16 (__m256i __A, __m256i __B)
2736{
2737  return (__m256i)__builtin_ia32_permvarhi256((__v16hi) __B, (__v16hi) __A);
2738}
2739
2740static __inline__ __m256i __DEFAULT_FN_ATTRS256
2741_mm256_maskz_permutexvar_epi16 (__mmask16 __M, __m256i __A,
2742        __m256i __B)
2743{
2744  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
2745                                    (__v16hi)_mm256_permutexvar_epi16(__A, __B),
2746                                    (__v16hi)_mm256_setzero_si256());
2747}
2748
2749static __inline__ __m256i __DEFAULT_FN_ATTRS256
2750_mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
2751             __m256i __B)
2752{
2753  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
2754                                    (__v16hi)_mm256_permutexvar_epi16(__A, __B),
2755                                    (__v16hi)__W);
2756}
2757
2758#define _mm_mask_alignr_epi8(W, U, A, B, N) \
2759  ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
2760                                 (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \
2761                                 (__v16qi)(__m128i)(W)))
2762
2763#define _mm_maskz_alignr_epi8(U, A, B, N) \
2764  ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
2765                                 (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \
2766                                 (__v16qi)_mm_setzero_si128()))
2767
2768#define _mm256_mask_alignr_epi8(W, U, A, B, N) \
2769  ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
2770                              (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \
2771                              (__v32qi)(__m256i)(W)))
2772
2773#define _mm256_maskz_alignr_epi8(U, A, B, N) \
2774  ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
2775                              (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \
2776                              (__v32qi)_mm256_setzero_si256()))
2777
2778#define _mm_dbsad_epu8(A, B, imm) \
2779  ((__m128i)__builtin_ia32_dbpsadbw128((__v16qi)(__m128i)(A), \
2780                                       (__v16qi)(__m128i)(B), (int)(imm)))
2781
2782#define _mm_mask_dbsad_epu8(W, U, A, B, imm) \
2783  ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
2784                                      (__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \
2785                                      (__v8hi)(__m128i)(W)))
2786
2787#define _mm_maskz_dbsad_epu8(U, A, B, imm) \
2788  ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
2789                                      (__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \
2790                                      (__v8hi)_mm_setzero_si128()))
2791
2792#define _mm256_dbsad_epu8(A, B, imm) \
2793  ((__m256i)__builtin_ia32_dbpsadbw256((__v32qi)(__m256i)(A), \
2794                                       (__v32qi)(__m256i)(B), (int)(imm)))
2795
2796#define _mm256_mask_dbsad_epu8(W, U, A, B, imm) \
2797  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
2798                                  (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \
2799                                  (__v16hi)(__m256i)(W)))
2800
2801#define _mm256_maskz_dbsad_epu8(U, A, B, imm) \
2802  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
2803                                  (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \
2804                                  (__v16hi)_mm256_setzero_si256()))
2805
2806static __inline__ short __DEFAULT_FN_ATTRS128
2807_mm_reduce_add_epi16(__m128i __W) {
2808  return __builtin_reduce_add((__v8hi)__W);
2809}
2810
2811static __inline__ short __DEFAULT_FN_ATTRS128
2812_mm_reduce_mul_epi16(__m128i __W) {
2813  return __builtin_reduce_mul((__v8hi)__W);
2814}
2815
2816static __inline__ short __DEFAULT_FN_ATTRS128
2817_mm_reduce_and_epi16(__m128i __W) {
2818  return __builtin_reduce_and((__v8hi)__W);
2819}
2820
2821static __inline__ short __DEFAULT_FN_ATTRS128
2822_mm_reduce_or_epi16(__m128i __W) {
2823  return __builtin_reduce_or((__v8hi)__W);
2824}
2825
2826static __inline__ short __DEFAULT_FN_ATTRS128
2827_mm_mask_reduce_add_epi16( __mmask8 __M, __m128i __W) {
2828  __W = _mm_maskz_mov_epi16(__M, __W);
2829  return __builtin_reduce_add((__v8hi)__W);
2830}
2831
2832static __inline__ short __DEFAULT_FN_ATTRS128
2833_mm_mask_reduce_mul_epi16( __mmask8 __M, __m128i __W) {
2834  __W = _mm_mask_mov_epi16(_mm_set1_epi16(1), __M, __W);
2835  return __builtin_reduce_mul((__v8hi)__W);
2836}
2837
2838static __inline__ short __DEFAULT_FN_ATTRS128
2839_mm_mask_reduce_and_epi16( __mmask8 __M, __m128i __W) {
2840  __W = _mm_mask_mov_epi16(_mm_set1_epi16(-1), __M, __W);
2841  return __builtin_reduce_and((__v8hi)__W);
2842}
2843
2844static __inline__ short __DEFAULT_FN_ATTRS128
2845_mm_mask_reduce_or_epi16(__mmask8 __M, __m128i __W) {
2846  __W = _mm_maskz_mov_epi16(__M, __W);
2847  return __builtin_reduce_or((__v8hi)__W);
2848}
2849
2850static __inline__ short __DEFAULT_FN_ATTRS128
2851_mm_reduce_max_epi16(__m128i __V) {
2852  return __builtin_reduce_max((__v8hi)__V);
2853}
2854
2855static __inline__ unsigned short __DEFAULT_FN_ATTRS128
2856_mm_reduce_max_epu16(__m128i __V) {
2857  return __builtin_reduce_max((__v8hu)__V);
2858}
2859
2860static __inline__ short __DEFAULT_FN_ATTRS128
2861_mm_reduce_min_epi16(__m128i __V) {
2862  return __builtin_reduce_min((__v8hi)__V);
2863}
2864
2865static __inline__ unsigned short __DEFAULT_FN_ATTRS128
2866_mm_reduce_min_epu16(__m128i __V) {
2867  return __builtin_reduce_min((__v8hu)__V);
2868}
2869
2870static __inline__ short __DEFAULT_FN_ATTRS128
2871_mm_mask_reduce_max_epi16(__mmask16 __M, __m128i __V) {
2872  __V = _mm_mask_mov_epi16(_mm_set1_epi16(-32767-1), __M, __V);
2873  return __builtin_reduce_max((__v8hi)__V);
2874}
2875
2876static __inline__ unsigned short __DEFAULT_FN_ATTRS128
2877_mm_mask_reduce_max_epu16(__mmask16 __M, __m128i __V) {
2878  __V = _mm_maskz_mov_epi16(__M, __V);
2879  return __builtin_reduce_max((__v8hu)__V);
2880}
2881
2882static __inline__ short __DEFAULT_FN_ATTRS128
2883_mm_mask_reduce_min_epi16(__mmask16 __M, __m128i __V) {
2884  __V = _mm_mask_mov_epi16(_mm_set1_epi16(32767), __M, __V);
2885  return __builtin_reduce_min((__v8hi)__V);
2886}
2887
2888static __inline__ unsigned short __DEFAULT_FN_ATTRS128
2889_mm_mask_reduce_min_epu16(__mmask16 __M, __m128i __V) {
2890  __V = _mm_mask_mov_epi16(_mm_set1_epi16(-1), __M, __V);
2891  return __builtin_reduce_min((__v8hu)__V);
2892}
2893
2894static __inline__ short __DEFAULT_FN_ATTRS256
2895_mm256_reduce_add_epi16(__m256i __W) {
2896  return __builtin_reduce_add((__v16hi)__W);
2897}
2898
2899static __inline__ short __DEFAULT_FN_ATTRS256
2900_mm256_reduce_mul_epi16(__m256i __W) {
2901  return __builtin_reduce_mul((__v16hi)__W);
2902}
2903
2904static __inline__ short __DEFAULT_FN_ATTRS256
2905_mm256_reduce_and_epi16(__m256i __W) {
2906  return __builtin_reduce_and((__v16hi)__W);
2907}
2908
2909static __inline__ short __DEFAULT_FN_ATTRS256
2910_mm256_reduce_or_epi16(__m256i __W) {
2911  return __builtin_reduce_or((__v16hi)__W);
2912}
2913
2914static __inline__ short __DEFAULT_FN_ATTRS256
2915_mm256_mask_reduce_add_epi16( __mmask16 __M, __m256i __W) {
2916  __W = _mm256_maskz_mov_epi16(__M, __W);
2917  return __builtin_reduce_add((__v16hi)__W);
2918}
2919
2920static __inline__ short __DEFAULT_FN_ATTRS256
2921_mm256_mask_reduce_mul_epi16( __mmask16 __M, __m256i __W) {
2922  __W = _mm256_mask_mov_epi16(_mm256_set1_epi16(1), __M, __W);
2923  return __builtin_reduce_mul((__v16hi)__W);
2924}
2925
2926static __inline__ short __DEFAULT_FN_ATTRS256
2927_mm256_mask_reduce_and_epi16( __mmask16 __M, __m256i __W) {
2928  __W = _mm256_mask_mov_epi16(_mm256_set1_epi16(-1), __M, __W);
2929  return __builtin_reduce_and((__v16hi)__W);
2930}
2931
2932static __inline__ short __DEFAULT_FN_ATTRS256
2933_mm256_mask_reduce_or_epi16(__mmask16 __M, __m256i __W) {
2934  __W = _mm256_maskz_mov_epi16(__M, __W);
2935  return __builtin_reduce_or((__v16hi)__W);
2936}
2937
2938static __inline__ short __DEFAULT_FN_ATTRS256
2939_mm256_reduce_max_epi16(__m256i __V) {
2940  return __builtin_reduce_max((__v16hi)__V);
2941}
2942
2943static __inline__ unsigned short __DEFAULT_FN_ATTRS256
2944_mm256_reduce_max_epu16(__m256i __V) {
2945  return __builtin_reduce_max((__v16hu)__V);
2946}
2947
2948static __inline__ short __DEFAULT_FN_ATTRS256
2949_mm256_reduce_min_epi16(__m256i __V) {
2950  return __builtin_reduce_min((__v16hi)__V);
2951}
2952
2953static __inline__ unsigned short __DEFAULT_FN_ATTRS256
2954_mm256_reduce_min_epu16(__m256i __V) {
2955  return __builtin_reduce_min((__v16hu)__V);
2956}
2957
2958static __inline__ short __DEFAULT_FN_ATTRS256
2959_mm256_mask_reduce_max_epi16(__mmask16 __M, __m256i __V) {
2960  __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(-32767-1), __M, __V);
2961  return __builtin_reduce_max((__v16hi)__V);
2962}
2963
2964static __inline__ unsigned short __DEFAULT_FN_ATTRS256
2965_mm256_mask_reduce_max_epu16(__mmask16 __M, __m256i __V) {
2966  __V = _mm256_maskz_mov_epi16(__M, __V);
2967  return __builtin_reduce_max((__v16hu)__V);
2968}
2969
2970static __inline__ short __DEFAULT_FN_ATTRS256
2971_mm256_mask_reduce_min_epi16(__mmask16 __M, __m256i __V) {
2972  __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(32767), __M, __V);
2973  return __builtin_reduce_min((__v16hi)__V);
2974}
2975
2976static __inline__ unsigned short __DEFAULT_FN_ATTRS256
2977_mm256_mask_reduce_min_epu16(__mmask16 __M, __m256i __V) {
2978  __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(-1), __M, __V);
2979  return __builtin_reduce_min((__v16hu)__V);
2980}
2981
2982static __inline__ signed char __DEFAULT_FN_ATTRS128
2983_mm_reduce_add_epi8(__m128i __W) {
2984  return __builtin_reduce_add((__v16qs)__W);
2985}
2986
2987static __inline__ signed char __DEFAULT_FN_ATTRS128
2988_mm_reduce_mul_epi8(__m128i __W) {
2989  return __builtin_reduce_mul((__v16qs)__W);
2990}
2991
2992static __inline__ signed char __DEFAULT_FN_ATTRS128
2993_mm_reduce_and_epi8(__m128i __W) {
2994  return __builtin_reduce_and((__v16qs)__W);
2995}
2996
2997static __inline__ signed char __DEFAULT_FN_ATTRS128
2998_mm_reduce_or_epi8(__m128i __W) {
2999  return __builtin_reduce_or((__v16qs)__W);
3000}
3001
3002static __inline__ signed char __DEFAULT_FN_ATTRS128
3003_mm_mask_reduce_add_epi8(__mmask16 __M, __m128i __W) {
3004  __W = _mm_maskz_mov_epi8(__M, __W);
3005  return __builtin_reduce_add((__v16qs)__W);
3006}
3007
3008static __inline__ signed char __DEFAULT_FN_ATTRS128
3009_mm_mask_reduce_mul_epi8(__mmask16 __M, __m128i __W) {
3010  __W = _mm_mask_mov_epi8(_mm_set1_epi8(1), __M, __W);
3011  return __builtin_reduce_mul((__v16qs)__W);
3012}
3013
3014static __inline__ signed char __DEFAULT_FN_ATTRS128
3015_mm_mask_reduce_and_epi8(__mmask16 __M, __m128i __W) {
3016  __W = _mm_mask_mov_epi8(_mm_set1_epi8(-1), __M, __W);
3017  return __builtin_reduce_and((__v16qs)__W);
3018}
3019
3020static __inline__ signed char __DEFAULT_FN_ATTRS128
3021_mm_mask_reduce_or_epi8(__mmask16 __M, __m128i __W) {
3022  __W = _mm_maskz_mov_epi8(__M, __W);
3023  return __builtin_reduce_or((__v16qs)__W);
3024}
3025
3026static __inline__ signed char __DEFAULT_FN_ATTRS128
3027_mm_reduce_max_epi8(__m128i __V) {
3028  return __builtin_reduce_max((__v16qs)__V);
3029}
3030
3031static __inline__ unsigned char __DEFAULT_FN_ATTRS128
3032_mm_reduce_max_epu8(__m128i __V) {
3033  return __builtin_reduce_max((__v16qu)__V);
3034}
3035
3036static __inline__ signed char __DEFAULT_FN_ATTRS128
3037_mm_reduce_min_epi8(__m128i __V) {
3038  return __builtin_reduce_min((__v16qs)__V);
3039}
3040
3041static __inline__ unsigned char __DEFAULT_FN_ATTRS128
3042_mm_reduce_min_epu8(__m128i __V) {
3043  return __builtin_reduce_min((__v16qu)__V);
3044}
3045
3046static __inline__ signed char __DEFAULT_FN_ATTRS128
3047_mm_mask_reduce_max_epi8(__mmask16 __M, __m128i __V) {
3048  __V = _mm_mask_mov_epi8(_mm_set1_epi8(-127-1), __M, __V);
3049  return __builtin_reduce_max((__v16qs)__V);
3050}
3051
3052static __inline__ unsigned char __DEFAULT_FN_ATTRS128
3053_mm_mask_reduce_max_epu8(__mmask16 __M, __m128i __V) {
3054  __V = _mm_maskz_mov_epi8(__M, __V);
3055  return __builtin_reduce_max((__v16qu)__V);
3056}
3057
3058static __inline__ signed char __DEFAULT_FN_ATTRS128
3059_mm_mask_reduce_min_epi8(__mmask16 __M, __m128i __V) {
3060  __V = _mm_mask_mov_epi8(_mm_set1_epi8(127), __M, __V);
3061  return __builtin_reduce_min((__v16qs)__V);
3062}
3063
3064static __inline__ unsigned char __DEFAULT_FN_ATTRS128
3065_mm_mask_reduce_min_epu8(__mmask16 __M, __m128i __V) {
3066  __V = _mm_mask_mov_epi8(_mm_set1_epi8(-1), __M, __V);
3067  return __builtin_reduce_min((__v16qu)__V);
3068}
3069
3070static __inline__ signed char __DEFAULT_FN_ATTRS256
3071_mm256_reduce_add_epi8(__m256i __W) {
3072  return __builtin_reduce_add((__v32qs)__W);
3073}
3074
3075static __inline__ signed char __DEFAULT_FN_ATTRS256
3076_mm256_reduce_mul_epi8(__m256i __W) {
3077  return __builtin_reduce_mul((__v32qs)__W);
3078}
3079
3080static __inline__ signed char __DEFAULT_FN_ATTRS256
3081_mm256_reduce_and_epi8(__m256i __W) {
3082  return __builtin_reduce_and((__v32qs)__W);
3083}
3084
3085static __inline__ signed char __DEFAULT_FN_ATTRS256
3086_mm256_reduce_or_epi8(__m256i __W) {
3087  return __builtin_reduce_or((__v32qs)__W);
3088}
3089
3090static __inline__ signed char __DEFAULT_FN_ATTRS256
3091_mm256_mask_reduce_add_epi8(__mmask32 __M, __m256i __W) {
3092  __W = _mm256_maskz_mov_epi8(__M, __W);
3093  return __builtin_reduce_add((__v32qs)__W);
3094}
3095
3096static __inline__ signed char __DEFAULT_FN_ATTRS256
3097_mm256_mask_reduce_mul_epi8(__mmask32 __M, __m256i __W) {
3098  __W = _mm256_mask_mov_epi8(_mm256_set1_epi8(1), __M, __W);
3099  return __builtin_reduce_mul((__v32qs)__W);
3100}
3101
3102static __inline__ signed char __DEFAULT_FN_ATTRS256
3103_mm256_mask_reduce_and_epi8(__mmask32 __M, __m256i __W) {
3104  __W = _mm256_mask_mov_epi8(_mm256_set1_epi8(-1), __M, __W);
3105  return __builtin_reduce_and((__v32qs)__W);
3106}
3107
3108static __inline__ signed char __DEFAULT_FN_ATTRS256
3109_mm256_mask_reduce_or_epi8(__mmask32 __M, __m256i __W) {
3110  __W = _mm256_maskz_mov_epi8(__M, __W);
3111  return __builtin_reduce_or((__v32qs)__W);
3112}
3113
3114static __inline__ signed char __DEFAULT_FN_ATTRS256
3115_mm256_reduce_max_epi8(__m256i __V) {
3116  return __builtin_reduce_max((__v32qs)__V);
3117}
3118
3119static __inline__ unsigned char __DEFAULT_FN_ATTRS256
3120_mm256_reduce_max_epu8(__m256i __V) {
3121  return __builtin_reduce_max((__v32qu)__V);
3122}
3123
3124static __inline__ signed char __DEFAULT_FN_ATTRS256
3125_mm256_reduce_min_epi8(__m256i __V) {
3126  return __builtin_reduce_min((__v32qs)__V);
3127}
3128
3129static __inline__ unsigned char __DEFAULT_FN_ATTRS256
3130_mm256_reduce_min_epu8(__m256i __V) {
3131  return __builtin_reduce_min((__v32qu)__V);
3132}
3133
3134static __inline__ signed char __DEFAULT_FN_ATTRS256
3135_mm256_mask_reduce_max_epi8(__mmask32 __M, __m256i __V) {
3136  __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(-127-1), __M, __V);
3137  return __builtin_reduce_max((__v32qs)__V);
3138}
3139
3140static __inline__ unsigned char __DEFAULT_FN_ATTRS256
3141_mm256_mask_reduce_max_epu8(__mmask32 __M, __m256i __V) {
3142  __V = _mm256_maskz_mov_epi8(__M, __V);
3143  return __builtin_reduce_max((__v32qu)__V);
3144}
3145
3146static __inline__ signed char __DEFAULT_FN_ATTRS256
3147_mm256_mask_reduce_min_epi8(__mmask32 __M, __m256i __V) {
3148  __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(127), __M, __V);
3149  return __builtin_reduce_min((__v32qs)__V);
3150}
3151
3152static __inline__ unsigned char __DEFAULT_FN_ATTRS256
3153_mm256_mask_reduce_min_epu8(__mmask32 __M, __m256i __V) {
3154  __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(-1), __M, __V);
3155  return __builtin_reduce_min((__v32qu)__V);
3156}
3157
3158#undef __DEFAULT_FN_ATTRS128
3159#undef __DEFAULT_FN_ATTRS256
3160
3161#endif /* __AVX512VLBWINTRIN_H */
3162