1/*===---- avx512vlbwintrin.h - AVX512VL and AVX512BW intrinsics ------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __IMMINTRIN_H
11#error "Never use <avx512vlbwintrin.h> directly; include <immintrin.h> instead."
12#endif
13
14#ifndef __AVX512VLBWINTRIN_H
15#define __AVX512VLBWINTRIN_H
16
17/* Define the default attributes for the functions in this file. */
18#define __DEFAULT_FN_ATTRS128                                                  \
19  __attribute__((__always_inline__, __nodebug__,                               \
20                 __target__("avx512vl,avx512bw,no-evex512"),                   \
21                 __min_vector_width__(128)))
22#define __DEFAULT_FN_ATTRS256                                                  \
23  __attribute__((__always_inline__, __nodebug__,                               \
24                 __target__("avx512vl,avx512bw,no-evex512"),                   \
25                 __min_vector_width__(256)))
26
27/* Integer compare */
28
29#define _mm_cmp_epi8_mask(a, b, p) \
30  ((__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
31                                          (__v16qi)(__m128i)(b), (int)(p), \
32                                          (__mmask16)-1))
33
34#define _mm_mask_cmp_epi8_mask(m, a, b, p) \
35  ((__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
36                                          (__v16qi)(__m128i)(b), (int)(p), \
37                                          (__mmask16)(m)))
38
39#define _mm_cmp_epu8_mask(a, b, p) \
40  ((__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \
41                                           (__v16qi)(__m128i)(b), (int)(p), \
42                                           (__mmask16)-1))
43
44#define _mm_mask_cmp_epu8_mask(m, a, b, p) \
45  ((__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \
46                                           (__v16qi)(__m128i)(b), (int)(p), \
47                                           (__mmask16)(m)))
48
49#define _mm256_cmp_epi8_mask(a, b, p) \
50  ((__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \
51                                          (__v32qi)(__m256i)(b), (int)(p), \
52                                          (__mmask32)-1))
53
54#define _mm256_mask_cmp_epi8_mask(m, a, b, p) \
55  ((__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \
56                                          (__v32qi)(__m256i)(b), (int)(p), \
57                                          (__mmask32)(m)))
58
59#define _mm256_cmp_epu8_mask(a, b, p) \
60  ((__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \
61                                           (__v32qi)(__m256i)(b), (int)(p), \
62                                           (__mmask32)-1))
63
64#define _mm256_mask_cmp_epu8_mask(m, a, b, p) \
65  ((__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \
66                                           (__v32qi)(__m256i)(b), (int)(p), \
67                                           (__mmask32)(m)))
68
69#define _mm_cmp_epi16_mask(a, b, p) \
70  ((__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \
71                                         (__v8hi)(__m128i)(b), (int)(p), \
72                                         (__mmask8)-1))
73
74#define _mm_mask_cmp_epi16_mask(m, a, b, p) \
75  ((__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \
76                                         (__v8hi)(__m128i)(b), (int)(p), \
77                                         (__mmask8)(m)))
78
79#define _mm_cmp_epu16_mask(a, b, p) \
80  ((__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \
81                                          (__v8hi)(__m128i)(b), (int)(p), \
82                                          (__mmask8)-1))
83
84#define _mm_mask_cmp_epu16_mask(m, a, b, p) \
85  ((__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \
86                                          (__v8hi)(__m128i)(b), (int)(p), \
87                                          (__mmask8)(m)))
88
89#define _mm256_cmp_epi16_mask(a, b, p) \
90  ((__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \
91                                          (__v16hi)(__m256i)(b), (int)(p), \
92                                          (__mmask16)-1))
93
94#define _mm256_mask_cmp_epi16_mask(m, a, b, p) \
95  ((__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \
96                                          (__v16hi)(__m256i)(b), (int)(p), \
97                                          (__mmask16)(m)))
98
99#define _mm256_cmp_epu16_mask(a, b, p) \
100  ((__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \
101                                           (__v16hi)(__m256i)(b), (int)(p), \
102                                           (__mmask16)-1))
103
104#define _mm256_mask_cmp_epu16_mask(m, a, b, p) \
105  ((__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \
106                                           (__v16hi)(__m256i)(b), (int)(p), \
107                                           (__mmask16)(m)))
108
109#define _mm_cmpeq_epi8_mask(A, B) \
110    _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ)
111#define _mm_mask_cmpeq_epi8_mask(k, A, B) \
112    _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ)
113#define _mm_cmpge_epi8_mask(A, B) \
114    _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_GE)
115#define _mm_mask_cmpge_epi8_mask(k, A, B) \
116    _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE)
117#define _mm_cmpgt_epi8_mask(A, B) \
118    _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_GT)
119#define _mm_mask_cmpgt_epi8_mask(k, A, B) \
120    _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT)
121#define _mm_cmple_epi8_mask(A, B) \
122    _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_LE)
123#define _mm_mask_cmple_epi8_mask(k, A, B) \
124    _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE)
125#define _mm_cmplt_epi8_mask(A, B) \
126    _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_LT)
127#define _mm_mask_cmplt_epi8_mask(k, A, B) \
128    _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT)
129#define _mm_cmpneq_epi8_mask(A, B) \
130    _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_NE)
131#define _mm_mask_cmpneq_epi8_mask(k, A, B) \
132    _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE)
133
134#define _mm256_cmpeq_epi8_mask(A, B) \
135    _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ)
136#define _mm256_mask_cmpeq_epi8_mask(k, A, B) \
137    _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ)
138#define _mm256_cmpge_epi8_mask(A, B) \
139    _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_GE)
140#define _mm256_mask_cmpge_epi8_mask(k, A, B) \
141    _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE)
142#define _mm256_cmpgt_epi8_mask(A, B) \
143    _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_GT)
144#define _mm256_mask_cmpgt_epi8_mask(k, A, B) \
145    _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT)
146#define _mm256_cmple_epi8_mask(A, B) \
147    _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_LE)
148#define _mm256_mask_cmple_epi8_mask(k, A, B) \
149    _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE)
150#define _mm256_cmplt_epi8_mask(A, B) \
151    _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_LT)
152#define _mm256_mask_cmplt_epi8_mask(k, A, B) \
153    _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT)
154#define _mm256_cmpneq_epi8_mask(A, B) \
155    _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_NE)
156#define _mm256_mask_cmpneq_epi8_mask(k, A, B) \
157    _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE)
158
159#define _mm_cmpeq_epu8_mask(A, B) \
160    _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ)
161#define _mm_mask_cmpeq_epu8_mask(k, A, B) \
162    _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ)
163#define _mm_cmpge_epu8_mask(A, B) \
164    _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_GE)
165#define _mm_mask_cmpge_epu8_mask(k, A, B) \
166    _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE)
167#define _mm_cmpgt_epu8_mask(A, B) \
168    _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_GT)
169#define _mm_mask_cmpgt_epu8_mask(k, A, B) \
170    _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT)
171#define _mm_cmple_epu8_mask(A, B) \
172    _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_LE)
173#define _mm_mask_cmple_epu8_mask(k, A, B) \
174    _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE)
175#define _mm_cmplt_epu8_mask(A, B) \
176    _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_LT)
177#define _mm_mask_cmplt_epu8_mask(k, A, B) \
178    _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT)
179#define _mm_cmpneq_epu8_mask(A, B) \
180    _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_NE)
181#define _mm_mask_cmpneq_epu8_mask(k, A, B) \
182    _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE)
183
184#define _mm256_cmpeq_epu8_mask(A, B) \
185    _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ)
186#define _mm256_mask_cmpeq_epu8_mask(k, A, B) \
187    _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ)
188#define _mm256_cmpge_epu8_mask(A, B) \
189    _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_GE)
190#define _mm256_mask_cmpge_epu8_mask(k, A, B) \
191    _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE)
192#define _mm256_cmpgt_epu8_mask(A, B) \
193    _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_GT)
194#define _mm256_mask_cmpgt_epu8_mask(k, A, B) \
195    _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT)
196#define _mm256_cmple_epu8_mask(A, B) \
197    _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_LE)
198#define _mm256_mask_cmple_epu8_mask(k, A, B) \
199    _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE)
200#define _mm256_cmplt_epu8_mask(A, B) \
201    _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_LT)
202#define _mm256_mask_cmplt_epu8_mask(k, A, B) \
203    _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT)
204#define _mm256_cmpneq_epu8_mask(A, B) \
205    _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_NE)
206#define _mm256_mask_cmpneq_epu8_mask(k, A, B) \
207    _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE)
208
209#define _mm_cmpeq_epi16_mask(A, B) \
210    _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ)
211#define _mm_mask_cmpeq_epi16_mask(k, A, B) \
212    _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ)
213#define _mm_cmpge_epi16_mask(A, B) \
214    _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_GE)
215#define _mm_mask_cmpge_epi16_mask(k, A, B) \
216    _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE)
217#define _mm_cmpgt_epi16_mask(A, B) \
218    _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_GT)
219#define _mm_mask_cmpgt_epi16_mask(k, A, B) \
220    _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT)
221#define _mm_cmple_epi16_mask(A, B) \
222    _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_LE)
223#define _mm_mask_cmple_epi16_mask(k, A, B) \
224    _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE)
225#define _mm_cmplt_epi16_mask(A, B) \
226    _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_LT)
227#define _mm_mask_cmplt_epi16_mask(k, A, B) \
228    _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT)
229#define _mm_cmpneq_epi16_mask(A, B) \
230    _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_NE)
231#define _mm_mask_cmpneq_epi16_mask(k, A, B) \
232    _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE)
233
234#define _mm256_cmpeq_epi16_mask(A, B) \
235    _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ)
236#define _mm256_mask_cmpeq_epi16_mask(k, A, B) \
237    _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ)
238#define _mm256_cmpge_epi16_mask(A, B) \
239    _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_GE)
240#define _mm256_mask_cmpge_epi16_mask(k, A, B) \
241    _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE)
242#define _mm256_cmpgt_epi16_mask(A, B) \
243    _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_GT)
244#define _mm256_mask_cmpgt_epi16_mask(k, A, B) \
245    _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT)
246#define _mm256_cmple_epi16_mask(A, B) \
247    _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_LE)
248#define _mm256_mask_cmple_epi16_mask(k, A, B) \
249    _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE)
250#define _mm256_cmplt_epi16_mask(A, B) \
251    _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_LT)
252#define _mm256_mask_cmplt_epi16_mask(k, A, B) \
253    _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT)
254#define _mm256_cmpneq_epi16_mask(A, B) \
255    _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_NE)
256#define _mm256_mask_cmpneq_epi16_mask(k, A, B) \
257    _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE)
258
259#define _mm_cmpeq_epu16_mask(A, B) \
260    _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ)
261#define _mm_mask_cmpeq_epu16_mask(k, A, B) \
262    _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ)
263#define _mm_cmpge_epu16_mask(A, B) \
264    _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_GE)
265#define _mm_mask_cmpge_epu16_mask(k, A, B) \
266    _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE)
267#define _mm_cmpgt_epu16_mask(A, B) \
268    _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_GT)
269#define _mm_mask_cmpgt_epu16_mask(k, A, B) \
270    _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT)
271#define _mm_cmple_epu16_mask(A, B) \
272    _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_LE)
273#define _mm_mask_cmple_epu16_mask(k, A, B) \
274    _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE)
275#define _mm_cmplt_epu16_mask(A, B) \
276    _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_LT)
277#define _mm_mask_cmplt_epu16_mask(k, A, B) \
278    _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT)
279#define _mm_cmpneq_epu16_mask(A, B) \
280    _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_NE)
281#define _mm_mask_cmpneq_epu16_mask(k, A, B) \
282    _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
283
284#define _mm256_cmpeq_epu16_mask(A, B) \
285    _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ)
286#define _mm256_mask_cmpeq_epu16_mask(k, A, B) \
287    _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ)
288#define _mm256_cmpge_epu16_mask(A, B) \
289    _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_GE)
290#define _mm256_mask_cmpge_epu16_mask(k, A, B) \
291    _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE)
292#define _mm256_cmpgt_epu16_mask(A, B) \
293    _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_GT)
294#define _mm256_mask_cmpgt_epu16_mask(k, A, B) \
295    _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT)
296#define _mm256_cmple_epu16_mask(A, B) \
297    _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_LE)
298#define _mm256_mask_cmple_epu16_mask(k, A, B) \
299    _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE)
300#define _mm256_cmplt_epu16_mask(A, B) \
301    _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_LT)
302#define _mm256_mask_cmplt_epu16_mask(k, A, B) \
303    _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT)
304#define _mm256_cmpneq_epu16_mask(A, B) \
305    _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_NE)
306#define _mm256_mask_cmpneq_epu16_mask(k, A, B) \
307    _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
308
309static __inline__ __m256i __DEFAULT_FN_ATTRS256
310_mm256_mask_add_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B){
311  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
312                                             (__v32qi)_mm256_add_epi8(__A, __B),
313                                             (__v32qi)__W);
314}
315
316static __inline__ __m256i __DEFAULT_FN_ATTRS256
317_mm256_maskz_add_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
318  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
319                                             (__v32qi)_mm256_add_epi8(__A, __B),
320                                             (__v32qi)_mm256_setzero_si256());
321}
322
323static __inline__ __m256i __DEFAULT_FN_ATTRS256
324_mm256_mask_add_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
325  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
326                                             (__v16hi)_mm256_add_epi16(__A, __B),
327                                             (__v16hi)__W);
328}
329
330static __inline__ __m256i __DEFAULT_FN_ATTRS256
331_mm256_maskz_add_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
332  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
333                                             (__v16hi)_mm256_add_epi16(__A, __B),
334                                             (__v16hi)_mm256_setzero_si256());
335}
336
337static __inline__ __m256i __DEFAULT_FN_ATTRS256
338_mm256_mask_sub_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
339  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
340                                             (__v32qi)_mm256_sub_epi8(__A, __B),
341                                             (__v32qi)__W);
342}
343
344static __inline__ __m256i __DEFAULT_FN_ATTRS256
345_mm256_maskz_sub_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
346  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
347                                             (__v32qi)_mm256_sub_epi8(__A, __B),
348                                             (__v32qi)_mm256_setzero_si256());
349}
350
351static __inline__ __m256i __DEFAULT_FN_ATTRS256
352_mm256_mask_sub_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
353  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
354                                             (__v16hi)_mm256_sub_epi16(__A, __B),
355                                             (__v16hi)__W);
356}
357
358static __inline__ __m256i __DEFAULT_FN_ATTRS256
359_mm256_maskz_sub_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
360  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
361                                             (__v16hi)_mm256_sub_epi16(__A, __B),
362                                             (__v16hi)_mm256_setzero_si256());
363}
364
365static __inline__ __m128i __DEFAULT_FN_ATTRS128
366_mm_mask_add_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
367  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
368                                             (__v16qi)_mm_add_epi8(__A, __B),
369                                             (__v16qi)__W);
370}
371
372static __inline__ __m128i __DEFAULT_FN_ATTRS128
373_mm_maskz_add_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
374  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
375                                             (__v16qi)_mm_add_epi8(__A, __B),
376                                             (__v16qi)_mm_setzero_si128());
377}
378
379static __inline__ __m128i __DEFAULT_FN_ATTRS128
380_mm_mask_add_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
381  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
382                                             (__v8hi)_mm_add_epi16(__A, __B),
383                                             (__v8hi)__W);
384}
385
386static __inline__ __m128i __DEFAULT_FN_ATTRS128
387_mm_maskz_add_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
388  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
389                                             (__v8hi)_mm_add_epi16(__A, __B),
390                                             (__v8hi)_mm_setzero_si128());
391}
392
393static __inline__ __m128i __DEFAULT_FN_ATTRS128
394_mm_mask_sub_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
395  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
396                                             (__v16qi)_mm_sub_epi8(__A, __B),
397                                             (__v16qi)__W);
398}
399
400static __inline__ __m128i __DEFAULT_FN_ATTRS128
401_mm_maskz_sub_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
402  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
403                                             (__v16qi)_mm_sub_epi8(__A, __B),
404                                             (__v16qi)_mm_setzero_si128());
405}
406
407static __inline__ __m128i __DEFAULT_FN_ATTRS128
408_mm_mask_sub_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
409  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
410                                             (__v8hi)_mm_sub_epi16(__A, __B),
411                                             (__v8hi)__W);
412}
413
414static __inline__ __m128i __DEFAULT_FN_ATTRS128
415_mm_maskz_sub_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
416  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
417                                             (__v8hi)_mm_sub_epi16(__A, __B),
418                                             (__v8hi)_mm_setzero_si128());
419}
420
421static __inline__ __m256i __DEFAULT_FN_ATTRS256
422_mm256_mask_mullo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
423  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
424                                             (__v16hi)_mm256_mullo_epi16(__A, __B),
425                                             (__v16hi)__W);
426}
427
428static __inline__ __m256i __DEFAULT_FN_ATTRS256
429_mm256_maskz_mullo_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
430  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
431                                             (__v16hi)_mm256_mullo_epi16(__A, __B),
432                                             (__v16hi)_mm256_setzero_si256());
433}
434
435static __inline__ __m128i __DEFAULT_FN_ATTRS128
436_mm_mask_mullo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
437  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
438                                             (__v8hi)_mm_mullo_epi16(__A, __B),
439                                             (__v8hi)__W);
440}
441
442static __inline__ __m128i __DEFAULT_FN_ATTRS128
443_mm_maskz_mullo_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
444  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
445                                             (__v8hi)_mm_mullo_epi16(__A, __B),
446                                             (__v8hi)_mm_setzero_si128());
447}
448
449static __inline__ __m128i __DEFAULT_FN_ATTRS128
450_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W)
451{
452  return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U,
453              (__v16qi) __W,
454              (__v16qi) __A);
455}
456
457static __inline__ __m256i __DEFAULT_FN_ATTRS256
458_mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W)
459{
460  return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U,
461               (__v32qi) __W,
462               (__v32qi) __A);
463}
464
465static __inline__ __m128i __DEFAULT_FN_ATTRS128
466_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W)
467{
468  return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U,
469               (__v8hi) __W,
470               (__v8hi) __A);
471}
472
473static __inline__ __m256i __DEFAULT_FN_ATTRS256
474_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W)
475{
476  return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U,
477               (__v16hi) __W,
478               (__v16hi) __A);
479}
480
481static __inline__ __m128i __DEFAULT_FN_ATTRS128
482_mm_mask_abs_epi8(__m128i __W, __mmask16 __U, __m128i __A)
483{
484  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
485                                             (__v16qi)_mm_abs_epi8(__A),
486                                             (__v16qi)__W);
487}
488
489static __inline__ __m128i __DEFAULT_FN_ATTRS128
490_mm_maskz_abs_epi8(__mmask16 __U, __m128i __A)
491{
492  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
493                                             (__v16qi)_mm_abs_epi8(__A),
494                                             (__v16qi)_mm_setzero_si128());
495}
496
497static __inline__ __m256i __DEFAULT_FN_ATTRS256
498_mm256_mask_abs_epi8(__m256i __W, __mmask32 __U, __m256i __A)
499{
500  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
501                                             (__v32qi)_mm256_abs_epi8(__A),
502                                             (__v32qi)__W);
503}
504
505static __inline__ __m256i __DEFAULT_FN_ATTRS256
506_mm256_maskz_abs_epi8 (__mmask32 __U, __m256i __A)
507{
508  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
509                                             (__v32qi)_mm256_abs_epi8(__A),
510                                             (__v32qi)_mm256_setzero_si256());
511}
512
513static __inline__ __m128i __DEFAULT_FN_ATTRS128
514_mm_mask_abs_epi16(__m128i __W, __mmask8 __U, __m128i __A)
515{
516  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
517                                             (__v8hi)_mm_abs_epi16(__A),
518                                             (__v8hi)__W);
519}
520
521static __inline__ __m128i __DEFAULT_FN_ATTRS128
522_mm_maskz_abs_epi16(__mmask8 __U, __m128i __A)
523{
524  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
525                                             (__v8hi)_mm_abs_epi16(__A),
526                                             (__v8hi)_mm_setzero_si128());
527}
528
529static __inline__ __m256i __DEFAULT_FN_ATTRS256
530_mm256_mask_abs_epi16(__m256i __W, __mmask16 __U, __m256i __A)
531{
532  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
533                                             (__v16hi)_mm256_abs_epi16(__A),
534                                             (__v16hi)__W);
535}
536
537static __inline__ __m256i __DEFAULT_FN_ATTRS256
538_mm256_maskz_abs_epi16(__mmask16 __U, __m256i __A)
539{
540  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
541                                             (__v16hi)_mm256_abs_epi16(__A),
542                                             (__v16hi)_mm256_setzero_si256());
543}
544
545static __inline__ __m128i __DEFAULT_FN_ATTRS128
546_mm_maskz_packs_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
547  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
548                                             (__v8hi)_mm_packs_epi32(__A, __B),
549                                             (__v8hi)_mm_setzero_si128());
550}
551
552static __inline__ __m128i __DEFAULT_FN_ATTRS128
553_mm_mask_packs_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
554{
555  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
556                                             (__v8hi)_mm_packs_epi32(__A, __B),
557                                             (__v8hi)__W);
558}
559
560static __inline__ __m256i __DEFAULT_FN_ATTRS256
561_mm256_maskz_packs_epi32(__mmask16 __M, __m256i __A, __m256i __B)
562{
563  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
564                                          (__v16hi)_mm256_packs_epi32(__A, __B),
565                                          (__v16hi)_mm256_setzero_si256());
566}
567
568static __inline__ __m256i __DEFAULT_FN_ATTRS256
569_mm256_mask_packs_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
570{
571  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
572                                          (__v16hi)_mm256_packs_epi32(__A, __B),
573                                          (__v16hi)__W);
574}
575
576static __inline__ __m128i __DEFAULT_FN_ATTRS128
577_mm_maskz_packs_epi16(__mmask16 __M, __m128i __A, __m128i __B)
578{
579  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
580                                             (__v16qi)_mm_packs_epi16(__A, __B),
581                                             (__v16qi)_mm_setzero_si128());
582}
583
584static __inline__ __m128i __DEFAULT_FN_ATTRS128
585_mm_mask_packs_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
586{
587  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
588                                             (__v16qi)_mm_packs_epi16(__A, __B),
589                                             (__v16qi)__W);
590}
591
592static __inline__ __m256i __DEFAULT_FN_ATTRS256
593_mm256_maskz_packs_epi16(__mmask32 __M, __m256i __A, __m256i __B)
594{
595  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
596                                          (__v32qi)_mm256_packs_epi16(__A, __B),
597                                          (__v32qi)_mm256_setzero_si256());
598}
599
600static __inline__ __m256i __DEFAULT_FN_ATTRS256
601_mm256_mask_packs_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
602{
603  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
604                                          (__v32qi)_mm256_packs_epi16(__A, __B),
605                                          (__v32qi)__W);
606}
607
608static __inline__ __m128i __DEFAULT_FN_ATTRS128
609_mm_maskz_packus_epi32(__mmask8 __M, __m128i __A, __m128i __B)
610{
611  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
612                                             (__v8hi)_mm_packus_epi32(__A, __B),
613                                             (__v8hi)_mm_setzero_si128());
614}
615
616static __inline__ __m128i __DEFAULT_FN_ATTRS128
617_mm_mask_packus_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
618{
619  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
620                                             (__v8hi)_mm_packus_epi32(__A, __B),
621                                             (__v8hi)__W);
622}
623
624static __inline__ __m256i __DEFAULT_FN_ATTRS256
625_mm256_maskz_packus_epi32(__mmask16 __M, __m256i __A, __m256i __B)
626{
627  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
628                                         (__v16hi)_mm256_packus_epi32(__A, __B),
629                                         (__v16hi)_mm256_setzero_si256());
630}
631
632static __inline__ __m256i __DEFAULT_FN_ATTRS256
633_mm256_mask_packus_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
634{
635  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
636                                         (__v16hi)_mm256_packus_epi32(__A, __B),
637                                         (__v16hi)__W);
638}
639
640static __inline__ __m128i __DEFAULT_FN_ATTRS128
641_mm_maskz_packus_epi16(__mmask16 __M, __m128i __A, __m128i __B)
642{
643  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
644                                            (__v16qi)_mm_packus_epi16(__A, __B),
645                                            (__v16qi)_mm_setzero_si128());
646}
647
648static __inline__ __m128i __DEFAULT_FN_ATTRS128
649_mm_mask_packus_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
650{
651  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
652                                            (__v16qi)_mm_packus_epi16(__A, __B),
653                                            (__v16qi)__W);
654}
655
656static __inline__ __m256i __DEFAULT_FN_ATTRS256
657_mm256_maskz_packus_epi16(__mmask32 __M, __m256i __A, __m256i __B)
658{
659  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
660                                         (__v32qi)_mm256_packus_epi16(__A, __B),
661                                         (__v32qi)_mm256_setzero_si256());
662}
663
664static __inline__ __m256i __DEFAULT_FN_ATTRS256
665_mm256_mask_packus_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
666{
667  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
668                                         (__v32qi)_mm256_packus_epi16(__A, __B),
669                                         (__v32qi)__W);
670}
671
672static __inline__ __m128i __DEFAULT_FN_ATTRS128
673_mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
674{
675  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
676                                             (__v16qi)_mm_adds_epi8(__A, __B),
677                                             (__v16qi)__W);
678}
679
680static __inline__ __m128i __DEFAULT_FN_ATTRS128
681_mm_maskz_adds_epi8(__mmask16 __U, __m128i __A, __m128i __B)
682{
683  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
684                                             (__v16qi)_mm_adds_epi8(__A, __B),
685                                             (__v16qi)_mm_setzero_si128());
686}
687
688static __inline__ __m256i __DEFAULT_FN_ATTRS256
689_mm256_mask_adds_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
690{
691  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
692                                            (__v32qi)_mm256_adds_epi8(__A, __B),
693                                            (__v32qi)__W);
694}
695
696static __inline__ __m256i __DEFAULT_FN_ATTRS256
697_mm256_maskz_adds_epi8(__mmask32 __U, __m256i __A, __m256i __B)
698{
699  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
700                                            (__v32qi)_mm256_adds_epi8(__A, __B),
701                                            (__v32qi)_mm256_setzero_si256());
702}
703
704static __inline__ __m128i __DEFAULT_FN_ATTRS128
705_mm_mask_adds_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
706{
707  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
708                                             (__v8hi)_mm_adds_epi16(__A, __B),
709                                             (__v8hi)__W);
710}
711
712static __inline__ __m128i __DEFAULT_FN_ATTRS128
713_mm_maskz_adds_epi16(__mmask8 __U, __m128i __A, __m128i __B)
714{
715  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
716                                             (__v8hi)_mm_adds_epi16(__A, __B),
717                                             (__v8hi)_mm_setzero_si128());
718}
719
720static __inline__ __m256i __DEFAULT_FN_ATTRS256
721_mm256_mask_adds_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
722{
723  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
724                                           (__v16hi)_mm256_adds_epi16(__A, __B),
725                                           (__v16hi)__W);
726}
727
728static __inline__ __m256i __DEFAULT_FN_ATTRS256
729_mm256_maskz_adds_epi16(__mmask16 __U, __m256i __A, __m256i __B)
730{
731  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
732                                           (__v16hi)_mm256_adds_epi16(__A, __B),
733                                           (__v16hi)_mm256_setzero_si256());
734}
735
736static __inline__ __m128i __DEFAULT_FN_ATTRS128
737_mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
738{
739  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
740                                             (__v16qi)_mm_adds_epu8(__A, __B),
741                                             (__v16qi)__W);
742}
743
744static __inline__ __m128i __DEFAULT_FN_ATTRS128
745_mm_maskz_adds_epu8(__mmask16 __U, __m128i __A, __m128i __B)
746{
747  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
748                                             (__v16qi)_mm_adds_epu8(__A, __B),
749                                             (__v16qi)_mm_setzero_si128());
750}
751
752static __inline__ __m256i __DEFAULT_FN_ATTRS256
753_mm256_mask_adds_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
754{
755  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
756                                            (__v32qi)_mm256_adds_epu8(__A, __B),
757                                            (__v32qi)__W);
758}
759
760static __inline__ __m256i __DEFAULT_FN_ATTRS256
761_mm256_maskz_adds_epu8(__mmask32 __U, __m256i __A, __m256i __B)
762{
763  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
764                                            (__v32qi)_mm256_adds_epu8(__A, __B),
765                                            (__v32qi)_mm256_setzero_si256());
766}
767
768static __inline__ __m128i __DEFAULT_FN_ATTRS128
769_mm_mask_adds_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
770{
771  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
772                                             (__v8hi)_mm_adds_epu16(__A, __B),
773                                             (__v8hi)__W);
774}
775
776static __inline__ __m128i __DEFAULT_FN_ATTRS128
777_mm_maskz_adds_epu16(__mmask8 __U, __m128i __A, __m128i __B)
778{
779  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
780                                             (__v8hi)_mm_adds_epu16(__A, __B),
781                                             (__v8hi)_mm_setzero_si128());
782}
783
784static __inline__ __m256i __DEFAULT_FN_ATTRS256
785_mm256_mask_adds_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
786{
787  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
788                                           (__v16hi)_mm256_adds_epu16(__A, __B),
789                                           (__v16hi)__W);
790}
791
792static __inline__ __m256i __DEFAULT_FN_ATTRS256
793_mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B)
794{
795  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
796                                           (__v16hi)_mm256_adds_epu16(__A, __B),
797                                           (__v16hi)_mm256_setzero_si256());
798}
799
800static __inline__ __m128i __DEFAULT_FN_ATTRS128
801_mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
802{
803  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
804                                             (__v16qi)_mm_avg_epu8(__A, __B),
805                                             (__v16qi)__W);
806}
807
808static __inline__ __m128i __DEFAULT_FN_ATTRS128
809_mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B)
810{
811  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
812                                             (__v16qi)_mm_avg_epu8(__A, __B),
813                                             (__v16qi)_mm_setzero_si128());
814}
815
816static __inline__ __m256i __DEFAULT_FN_ATTRS256
817_mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
818{
819  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
820                                             (__v32qi)_mm256_avg_epu8(__A, __B),
821                                             (__v32qi)__W);
822}
823
824static __inline__ __m256i __DEFAULT_FN_ATTRS256
825_mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B)
826{
827  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
828                                             (__v32qi)_mm256_avg_epu8(__A, __B),
829                                             (__v32qi)_mm256_setzero_si256());
830}
831
832static __inline__ __m128i __DEFAULT_FN_ATTRS128
833_mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
834{
835  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
836                                             (__v8hi)_mm_avg_epu16(__A, __B),
837                                             (__v8hi)__W);
838}
839
840static __inline__ __m128i __DEFAULT_FN_ATTRS128
841_mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B)
842{
843  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
844                                             (__v8hi)_mm_avg_epu16(__A, __B),
845                                             (__v8hi)_mm_setzero_si128());
846}
847
848static __inline__ __m256i __DEFAULT_FN_ATTRS256
849_mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
850{
851  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
852                                            (__v16hi)_mm256_avg_epu16(__A, __B),
853                                            (__v16hi)__W);
854}
855
856static __inline__ __m256i __DEFAULT_FN_ATTRS256
857_mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B)
858{
859  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
860                                            (__v16hi)_mm256_avg_epu16(__A, __B),
861                                            (__v16hi)_mm256_setzero_si256());
862}
863
864static __inline__ __m128i __DEFAULT_FN_ATTRS128
865_mm_maskz_max_epi8(__mmask16 __M, __m128i __A, __m128i __B)
866{
867  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
868                                             (__v16qi)_mm_max_epi8(__A, __B),
869                                             (__v16qi)_mm_setzero_si128());
870}
871
872static __inline__ __m128i __DEFAULT_FN_ATTRS128
873_mm_mask_max_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
874{
875  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
876                                             (__v16qi)_mm_max_epi8(__A, __B),
877                                             (__v16qi)__W);
878}
879
880static __inline__ __m256i __DEFAULT_FN_ATTRS256
881_mm256_maskz_max_epi8(__mmask32 __M, __m256i __A, __m256i __B)
882{
883  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
884                                             (__v32qi)_mm256_max_epi8(__A, __B),
885                                             (__v32qi)_mm256_setzero_si256());
886}
887
888static __inline__ __m256i __DEFAULT_FN_ATTRS256
889_mm256_mask_max_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
890{
891  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
892                                             (__v32qi)_mm256_max_epi8(__A, __B),
893                                             (__v32qi)__W);
894}
895
896static __inline__ __m128i __DEFAULT_FN_ATTRS128
897_mm_maskz_max_epi16(__mmask8 __M, __m128i __A, __m128i __B)
898{
899  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
900                                             (__v8hi)_mm_max_epi16(__A, __B),
901                                             (__v8hi)_mm_setzero_si128());
902}
903
904static __inline__ __m128i __DEFAULT_FN_ATTRS128
905_mm_mask_max_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
906{
907  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
908                                             (__v8hi)_mm_max_epi16(__A, __B),
909                                             (__v8hi)__W);
910}
911
912static __inline__ __m256i __DEFAULT_FN_ATTRS256
913_mm256_maskz_max_epi16(__mmask16 __M, __m256i __A, __m256i __B)
914{
915  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
916                                            (__v16hi)_mm256_max_epi16(__A, __B),
917                                            (__v16hi)_mm256_setzero_si256());
918}
919
920static __inline__ __m256i __DEFAULT_FN_ATTRS256
921_mm256_mask_max_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
922{
923  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
924                                            (__v16hi)_mm256_max_epi16(__A, __B),
925                                            (__v16hi)__W);
926}
927
928static __inline__ __m128i __DEFAULT_FN_ATTRS128
929_mm_maskz_max_epu8(__mmask16 __M, __m128i __A, __m128i __B)
930{
931  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
932                                             (__v16qi)_mm_max_epu8(__A, __B),
933                                             (__v16qi)_mm_setzero_si128());
934}
935
936static __inline__ __m128i __DEFAULT_FN_ATTRS128
937_mm_mask_max_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
938{
939  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
940                                             (__v16qi)_mm_max_epu8(__A, __B),
941                                             (__v16qi)__W);
942}
943
944static __inline__ __m256i __DEFAULT_FN_ATTRS256
945_mm256_maskz_max_epu8 (__mmask32 __M, __m256i __A, __m256i __B)
946{
947  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
948                                             (__v32qi)_mm256_max_epu8(__A, __B),
949                                             (__v32qi)_mm256_setzero_si256());
950}
951
952static __inline__ __m256i __DEFAULT_FN_ATTRS256
953_mm256_mask_max_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
954{
955  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
956                                             (__v32qi)_mm256_max_epu8(__A, __B),
957                                             (__v32qi)__W);
958}
959
960static __inline__ __m128i __DEFAULT_FN_ATTRS128
961_mm_maskz_max_epu16(__mmask8 __M, __m128i __A, __m128i __B)
962{
963  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
964                                             (__v8hi)_mm_max_epu16(__A, __B),
965                                             (__v8hi)_mm_setzero_si128());
966}
967
968static __inline__ __m128i __DEFAULT_FN_ATTRS128
969_mm_mask_max_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
970{
971  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
972                                             (__v8hi)_mm_max_epu16(__A, __B),
973                                             (__v8hi)__W);
974}
975
976static __inline__ __m256i __DEFAULT_FN_ATTRS256
977_mm256_maskz_max_epu16(__mmask16 __M, __m256i __A, __m256i __B)
978{
979  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
980                                            (__v16hi)_mm256_max_epu16(__A, __B),
981                                            (__v16hi)_mm256_setzero_si256());
982}
983
984static __inline__ __m256i __DEFAULT_FN_ATTRS256
985_mm256_mask_max_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
986{
987  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
988                                            (__v16hi)_mm256_max_epu16(__A, __B),
989                                            (__v16hi)__W);
990}
991
992static __inline__ __m128i __DEFAULT_FN_ATTRS128
993_mm_maskz_min_epi8(__mmask16 __M, __m128i __A, __m128i __B)
994{
995  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
996                                             (__v16qi)_mm_min_epi8(__A, __B),
997                                             (__v16qi)_mm_setzero_si128());
998}
999
1000static __inline__ __m128i __DEFAULT_FN_ATTRS128
1001_mm_mask_min_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
1002{
1003  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
1004                                             (__v16qi)_mm_min_epi8(__A, __B),
1005                                             (__v16qi)__W);
1006}
1007
1008static __inline__ __m256i __DEFAULT_FN_ATTRS256
1009_mm256_maskz_min_epi8(__mmask32 __M, __m256i __A, __m256i __B)
1010{
1011  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
1012                                             (__v32qi)_mm256_min_epi8(__A, __B),
1013                                             (__v32qi)_mm256_setzero_si256());
1014}
1015
1016static __inline__ __m256i __DEFAULT_FN_ATTRS256
1017_mm256_mask_min_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
1018{
1019  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
1020                                             (__v32qi)_mm256_min_epi8(__A, __B),
1021                                             (__v32qi)__W);
1022}
1023
1024static __inline__ __m128i __DEFAULT_FN_ATTRS128
1025_mm_maskz_min_epi16(__mmask8 __M, __m128i __A, __m128i __B)
1026{
1027  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
1028                                             (__v8hi)_mm_min_epi16(__A, __B),
1029                                             (__v8hi)_mm_setzero_si128());
1030}
1031
1032static __inline__ __m128i __DEFAULT_FN_ATTRS128
1033_mm_mask_min_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
1034{
1035  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
1036                                             (__v8hi)_mm_min_epi16(__A, __B),
1037                                             (__v8hi)__W);
1038}
1039
1040static __inline__ __m256i __DEFAULT_FN_ATTRS256
1041_mm256_maskz_min_epi16(__mmask16 __M, __m256i __A, __m256i __B)
1042{
1043  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
1044                                            (__v16hi)_mm256_min_epi16(__A, __B),
1045                                            (__v16hi)_mm256_setzero_si256());
1046}
1047
1048static __inline__ __m256i __DEFAULT_FN_ATTRS256
1049_mm256_mask_min_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
1050{
1051  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
1052                                            (__v16hi)_mm256_min_epi16(__A, __B),
1053                                            (__v16hi)__W);
1054}
1055
1056static __inline__ __m128i __DEFAULT_FN_ATTRS128
1057_mm_maskz_min_epu8(__mmask16 __M, __m128i __A, __m128i __B)
1058{
1059  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
1060                                             (__v16qi)_mm_min_epu8(__A, __B),
1061                                             (__v16qi)_mm_setzero_si128());
1062}
1063
1064static __inline__ __m128i __DEFAULT_FN_ATTRS128
1065_mm_mask_min_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
1066{
1067  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
1068                                             (__v16qi)_mm_min_epu8(__A, __B),
1069                                             (__v16qi)__W);
1070}
1071
1072static __inline__ __m256i __DEFAULT_FN_ATTRS256
1073_mm256_maskz_min_epu8 (__mmask32 __M, __m256i __A, __m256i __B)
1074{
1075  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
1076                                             (__v32qi)_mm256_min_epu8(__A, __B),
1077                                             (__v32qi)_mm256_setzero_si256());
1078}
1079
1080static __inline__ __m256i __DEFAULT_FN_ATTRS256
1081_mm256_mask_min_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
1082{
1083  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
1084                                             (__v32qi)_mm256_min_epu8(__A, __B),
1085                                             (__v32qi)__W);
1086}
1087
1088static __inline__ __m128i __DEFAULT_FN_ATTRS128
1089_mm_maskz_min_epu16(__mmask8 __M, __m128i __A, __m128i __B)
1090{
1091  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
1092                                             (__v8hi)_mm_min_epu16(__A, __B),
1093                                             (__v8hi)_mm_setzero_si128());
1094}
1095
1096static __inline__ __m128i __DEFAULT_FN_ATTRS128
1097_mm_mask_min_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
1098{
1099  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
1100                                             (__v8hi)_mm_min_epu16(__A, __B),
1101                                             (__v8hi)__W);
1102}
1103
1104static __inline__ __m256i __DEFAULT_FN_ATTRS256
1105_mm256_maskz_min_epu16(__mmask16 __M, __m256i __A, __m256i __B)
1106{
1107  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
1108                                            (__v16hi)_mm256_min_epu16(__A, __B),
1109                                            (__v16hi)_mm256_setzero_si256());
1110}
1111
1112static __inline__ __m256i __DEFAULT_FN_ATTRS256
1113_mm256_mask_min_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
1114{
1115  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
1116                                            (__v16hi)_mm256_min_epu16(__A, __B),
1117                                            (__v16hi)__W);
1118}
1119
1120static __inline__ __m128i __DEFAULT_FN_ATTRS128
1121_mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
1122{
1123  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1124                                            (__v16qi)_mm_shuffle_epi8(__A, __B),
1125                                            (__v16qi)__W);
1126}
1127
1128static __inline__ __m128i __DEFAULT_FN_ATTRS128
1129_mm_maskz_shuffle_epi8(__mmask16 __U, __m128i __A, __m128i __B)
1130{
1131  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1132                                            (__v16qi)_mm_shuffle_epi8(__A, __B),
1133                                            (__v16qi)_mm_setzero_si128());
1134}
1135
1136static __inline__ __m256i __DEFAULT_FN_ATTRS256
1137_mm256_mask_shuffle_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
1138{
1139  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1140                                         (__v32qi)_mm256_shuffle_epi8(__A, __B),
1141                                         (__v32qi)__W);
1142}
1143
1144static __inline__ __m256i __DEFAULT_FN_ATTRS256
1145_mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B)
1146{
1147  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1148                                         (__v32qi)_mm256_shuffle_epi8(__A, __B),
1149                                         (__v32qi)_mm256_setzero_si256());
1150}
1151
1152static __inline__ __m128i __DEFAULT_FN_ATTRS128
1153_mm_mask_subs_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
1154{
1155  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1156                                             (__v16qi)_mm_subs_epi8(__A, __B),
1157                                             (__v16qi)__W);
1158}
1159
1160static __inline__ __m128i __DEFAULT_FN_ATTRS128
1161_mm_maskz_subs_epi8(__mmask16 __U, __m128i __A, __m128i __B)
1162{
1163  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1164                                             (__v16qi)_mm_subs_epi8(__A, __B),
1165                                             (__v16qi)_mm_setzero_si128());
1166}
1167
1168static __inline__ __m256i __DEFAULT_FN_ATTRS256
1169_mm256_mask_subs_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
1170{
1171  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1172                                            (__v32qi)_mm256_subs_epi8(__A, __B),
1173                                            (__v32qi)__W);
1174}
1175
1176static __inline__ __m256i __DEFAULT_FN_ATTRS256
1177_mm256_maskz_subs_epi8(__mmask32 __U, __m256i __A, __m256i __B)
1178{
1179  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1180                                            (__v32qi)_mm256_subs_epi8(__A, __B),
1181                                            (__v32qi)_mm256_setzero_si256());
1182}
1183
1184static __inline__ __m128i __DEFAULT_FN_ATTRS128
1185_mm_mask_subs_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
1186{
1187  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1188                                             (__v8hi)_mm_subs_epi16(__A, __B),
1189                                             (__v8hi)__W);
1190}
1191
1192static __inline__ __m128i __DEFAULT_FN_ATTRS128
1193_mm_maskz_subs_epi16(__mmask8 __U, __m128i __A, __m128i __B)
1194{
1195  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1196                                             (__v8hi)_mm_subs_epi16(__A, __B),
1197                                             (__v8hi)_mm_setzero_si128());
1198}
1199
1200static __inline__ __m256i __DEFAULT_FN_ATTRS256
1201_mm256_mask_subs_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
1202{
1203  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1204                                           (__v16hi)_mm256_subs_epi16(__A, __B),
1205                                           (__v16hi)__W);
1206}
1207
1208static __inline__ __m256i __DEFAULT_FN_ATTRS256
1209_mm256_maskz_subs_epi16(__mmask16 __U, __m256i __A, __m256i __B)
1210{
1211  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1212                                           (__v16hi)_mm256_subs_epi16(__A, __B),
1213                                           (__v16hi)_mm256_setzero_si256());
1214}
1215
1216static __inline__ __m128i __DEFAULT_FN_ATTRS128
1217_mm_mask_subs_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
1218{
1219  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1220                                             (__v16qi)_mm_subs_epu8(__A, __B),
1221                                             (__v16qi)__W);
1222}
1223
1224static __inline__ __m128i __DEFAULT_FN_ATTRS128
1225_mm_maskz_subs_epu8(__mmask16 __U, __m128i __A, __m128i __B)
1226{
1227  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1228                                             (__v16qi)_mm_subs_epu8(__A, __B),
1229                                             (__v16qi)_mm_setzero_si128());
1230}
1231
1232static __inline__ __m256i __DEFAULT_FN_ATTRS256
1233_mm256_mask_subs_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
1234{
1235  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1236                                            (__v32qi)_mm256_subs_epu8(__A, __B),
1237                                            (__v32qi)__W);
1238}
1239
1240static __inline__ __m256i __DEFAULT_FN_ATTRS256
1241_mm256_maskz_subs_epu8(__mmask32 __U, __m256i __A, __m256i __B)
1242{
1243  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1244                                            (__v32qi)_mm256_subs_epu8(__A, __B),
1245                                            (__v32qi)_mm256_setzero_si256());
1246}
1247
1248static __inline__ __m128i __DEFAULT_FN_ATTRS128
1249_mm_mask_subs_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
1250{
1251  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1252                                             (__v8hi)_mm_subs_epu16(__A, __B),
1253                                             (__v8hi)__W);
1254}
1255
1256static __inline__ __m128i __DEFAULT_FN_ATTRS128
1257_mm_maskz_subs_epu16(__mmask8 __U, __m128i __A, __m128i __B)
1258{
1259  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1260                                             (__v8hi)_mm_subs_epu16(__A, __B),
1261                                             (__v8hi)_mm_setzero_si128());
1262}
1263
1264static __inline__ __m256i __DEFAULT_FN_ATTRS256
1265_mm256_mask_subs_epu16(__m256i __W, __mmask16 __U, __m256i __A,
1266      __m256i __B) {
1267  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1268                                           (__v16hi)_mm256_subs_epu16(__A, __B),
1269                                           (__v16hi)__W);
1270}
1271
1272static __inline__ __m256i __DEFAULT_FN_ATTRS256
1273_mm256_maskz_subs_epu16(__mmask16 __U, __m256i __A, __m256i __B)
1274{
1275  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1276                                           (__v16hi)_mm256_subs_epu16(__A, __B),
1277                                           (__v16hi)_mm256_setzero_si256());
1278}
1279
1280static __inline__ __m128i __DEFAULT_FN_ATTRS128
1281_mm_permutex2var_epi16(__m128i __A, __m128i __I, __m128i __B)
1282{
1283  return (__m128i)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I,
1284                                                 (__v8hi) __B);
1285}
1286
1287static __inline__ __m128i __DEFAULT_FN_ATTRS128
1288_mm_mask_permutex2var_epi16(__m128i __A, __mmask8 __U, __m128i __I,
1289                            __m128i __B)
1290{
1291  return (__m128i)__builtin_ia32_selectw_128(__U,
1292                                  (__v8hi)_mm_permutex2var_epi16(__A, __I, __B),
1293                                  (__v8hi)__A);
1294}
1295
1296static __inline__ __m128i __DEFAULT_FN_ATTRS128
1297_mm_mask2_permutex2var_epi16(__m128i __A, __m128i __I, __mmask8 __U,
1298                             __m128i __B)
1299{
1300  return (__m128i)__builtin_ia32_selectw_128(__U,
1301                                  (__v8hi)_mm_permutex2var_epi16(__A, __I, __B),
1302                                  (__v8hi)__I);
1303}
1304
1305static __inline__ __m128i __DEFAULT_FN_ATTRS128
1306_mm_maskz_permutex2var_epi16 (__mmask8 __U, __m128i __A, __m128i __I,
1307            __m128i __B)
1308{
1309  return (__m128i)__builtin_ia32_selectw_128(__U,
1310                                  (__v8hi)_mm_permutex2var_epi16(__A, __I, __B),
1311                                  (__v8hi)_mm_setzero_si128());
1312}
1313
1314static __inline__ __m256i __DEFAULT_FN_ATTRS256
1315_mm256_permutex2var_epi16(__m256i __A, __m256i __I, __m256i __B)
1316{
1317  return (__m256i)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I,
1318                                                 (__v16hi)__B);
1319}
1320
1321static __inline__ __m256i __DEFAULT_FN_ATTRS256
1322_mm256_mask_permutex2var_epi16(__m256i __A, __mmask16 __U, __m256i __I,
1323                               __m256i __B)
1324{
1325  return (__m256i)__builtin_ia32_selectw_256(__U,
1326                              (__v16hi)_mm256_permutex2var_epi16(__A, __I, __B),
1327                              (__v16hi)__A);
1328}
1329
1330static __inline__ __m256i __DEFAULT_FN_ATTRS256
1331_mm256_mask2_permutex2var_epi16(__m256i __A, __m256i __I, __mmask16 __U,
1332                                __m256i __B)
1333{
1334  return (__m256i)__builtin_ia32_selectw_256(__U,
1335                              (__v16hi)_mm256_permutex2var_epi16(__A, __I, __B),
1336                              (__v16hi)__I);
1337}
1338
1339static __inline__ __m256i __DEFAULT_FN_ATTRS256
1340_mm256_maskz_permutex2var_epi16 (__mmask16 __U, __m256i __A, __m256i __I,
1341                                 __m256i __B)
1342{
1343  return (__m256i)__builtin_ia32_selectw_256(__U,
1344                              (__v16hi)_mm256_permutex2var_epi16(__A, __I, __B),
1345                              (__v16hi)_mm256_setzero_si256());
1346}
1347
1348static __inline__ __m128i __DEFAULT_FN_ATTRS128
1349_mm_mask_maddubs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) {
1350  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1351                                            (__v8hi)_mm_maddubs_epi16(__X, __Y),
1352                                            (__v8hi)__W);
1353}
1354
1355static __inline__ __m128i __DEFAULT_FN_ATTRS128
1356_mm_maskz_maddubs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) {
1357  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1358                                            (__v8hi)_mm_maddubs_epi16(__X, __Y),
1359                                            (__v8hi)_mm_setzero_si128());
1360}
1361
1362static __inline__ __m256i __DEFAULT_FN_ATTRS256
1363_mm256_mask_maddubs_epi16(__m256i __W, __mmask16 __U, __m256i __X,
1364                          __m256i __Y) {
1365  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1366                                        (__v16hi)_mm256_maddubs_epi16(__X, __Y),
1367                                        (__v16hi)__W);
1368}
1369
1370static __inline__ __m256i __DEFAULT_FN_ATTRS256
1371_mm256_maskz_maddubs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) {
1372  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1373                                        (__v16hi)_mm256_maddubs_epi16(__X, __Y),
1374                                        (__v16hi)_mm256_setzero_si256());
1375}
1376
1377static __inline__ __m128i __DEFAULT_FN_ATTRS128
1378_mm_mask_madd_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
1379  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1380                                             (__v4si)_mm_madd_epi16(__A, __B),
1381                                             (__v4si)__W);
1382}
1383
1384static __inline__ __m128i __DEFAULT_FN_ATTRS128
1385_mm_maskz_madd_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
1386  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1387                                             (__v4si)_mm_madd_epi16(__A, __B),
1388                                             (__v4si)_mm_setzero_si128());
1389}
1390
1391static __inline__ __m256i __DEFAULT_FN_ATTRS256
1392_mm256_mask_madd_epi16(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
1393  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
1394                                            (__v8si)_mm256_madd_epi16(__A, __B),
1395                                            (__v8si)__W);
1396}
1397
1398static __inline__ __m256i __DEFAULT_FN_ATTRS256
1399_mm256_maskz_madd_epi16(__mmask8 __U, __m256i __A, __m256i __B) {
1400  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
1401                                            (__v8si)_mm256_madd_epi16(__A, __B),
1402                                            (__v8si)_mm256_setzero_si256());
1403}
1404
1405static __inline__ __m128i __DEFAULT_FN_ATTRS128
1406_mm_cvtsepi16_epi8 (__m128i __A) {
1407  return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
1408               (__v16qi) _mm_setzero_si128(),
1409               (__mmask8) -1);
1410}
1411
1412static __inline__ __m128i __DEFAULT_FN_ATTRS128
1413_mm_mask_cvtsepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) {
1414  return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
1415               (__v16qi) __O,
1416                __M);
1417}
1418
1419static __inline__ __m128i __DEFAULT_FN_ATTRS128
1420_mm_maskz_cvtsepi16_epi8 (__mmask8 __M, __m128i __A) {
1421  return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
1422               (__v16qi) _mm_setzero_si128(),
1423               __M);
1424}
1425
1426static __inline__ __m128i __DEFAULT_FN_ATTRS256
1427_mm256_cvtsepi16_epi8 (__m256i __A) {
1428  return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
1429               (__v16qi) _mm_setzero_si128(),
1430               (__mmask16) -1);
1431}
1432
1433static __inline__ __m128i __DEFAULT_FN_ATTRS256
1434_mm256_mask_cvtsepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) {
1435  return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
1436               (__v16qi) __O,
1437                __M);
1438}
1439
1440static __inline__ __m128i __DEFAULT_FN_ATTRS256
1441_mm256_maskz_cvtsepi16_epi8 (__mmask16 __M, __m256i __A) {
1442  return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
1443               (__v16qi) _mm_setzero_si128(),
1444               __M);
1445}
1446
1447static __inline__ __m128i __DEFAULT_FN_ATTRS128
1448_mm_cvtusepi16_epi8 (__m128i __A) {
1449  return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
1450                (__v16qi) _mm_setzero_si128(),
1451                (__mmask8) -1);
1452}
1453
1454static __inline__ __m128i __DEFAULT_FN_ATTRS128
1455_mm_mask_cvtusepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) {
1456  return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
1457                (__v16qi) __O,
1458                __M);
1459}
1460
1461static __inline__ __m128i __DEFAULT_FN_ATTRS128
1462_mm_maskz_cvtusepi16_epi8 (__mmask8 __M, __m128i __A) {
1463  return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
1464                (__v16qi) _mm_setzero_si128(),
1465                __M);
1466}
1467
1468static __inline__ __m128i __DEFAULT_FN_ATTRS256
1469_mm256_cvtusepi16_epi8 (__m256i __A) {
1470  return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
1471                (__v16qi) _mm_setzero_si128(),
1472                (__mmask16) -1);
1473}
1474
1475static __inline__ __m128i __DEFAULT_FN_ATTRS256
1476_mm256_mask_cvtusepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) {
1477  return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
1478                (__v16qi) __O,
1479                __M);
1480}
1481
1482static __inline__ __m128i __DEFAULT_FN_ATTRS256
1483_mm256_maskz_cvtusepi16_epi8 (__mmask16 __M, __m256i __A) {
1484  return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
1485                (__v16qi) _mm_setzero_si128(),
1486                __M);
1487}
1488
1489static __inline__ __m128i __DEFAULT_FN_ATTRS128
1490_mm_cvtepi16_epi8 (__m128i __A) {
1491  return (__m128i)__builtin_shufflevector(
1492      __builtin_convertvector((__v8hi)__A, __v8qi),
1493      (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
1494      12, 13, 14, 15);
1495}
1496
1497static __inline__ __m128i __DEFAULT_FN_ATTRS128
1498_mm_mask_cvtepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) {
1499  return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
1500               (__v16qi) __O,
1501               __M);
1502}
1503
1504static __inline__ __m128i __DEFAULT_FN_ATTRS128
1505_mm_maskz_cvtepi16_epi8 (__mmask8 __M, __m128i __A) {
1506  return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
1507               (__v16qi) _mm_setzero_si128(),
1508               __M);
1509}
1510
1511static __inline__ void __DEFAULT_FN_ATTRS128
1512_mm_mask_cvtepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1513{
1514  __builtin_ia32_pmovwb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M);
1515}
1516
1517
1518static __inline__ void __DEFAULT_FN_ATTRS128
1519_mm_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1520{
1521  __builtin_ia32_pmovswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M);
1522}
1523
1524static __inline__ void __DEFAULT_FN_ATTRS128
1525_mm_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1526{
1527  __builtin_ia32_pmovuswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M);
1528}
1529
1530static __inline__ __m128i __DEFAULT_FN_ATTRS256
1531_mm256_cvtepi16_epi8 (__m256i __A) {
1532  return (__m128i)__builtin_convertvector((__v16hi) __A, __v16qi);
1533}
1534
1535static __inline__ __m128i __DEFAULT_FN_ATTRS256
1536_mm256_mask_cvtepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) {
1537  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
1538                                             (__v16qi)_mm256_cvtepi16_epi8(__A),
1539                                             (__v16qi)__O);
1540}
1541
1542static __inline__ __m128i __DEFAULT_FN_ATTRS256
1543_mm256_maskz_cvtepi16_epi8 (__mmask16 __M, __m256i __A) {
1544  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
1545                                             (__v16qi)_mm256_cvtepi16_epi8(__A),
1546                                             (__v16qi)_mm_setzero_si128());
1547}
1548
1549static __inline__ void __DEFAULT_FN_ATTRS256
1550_mm256_mask_cvtepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A)
1551{
1552  __builtin_ia32_pmovwb256mem_mask ((__v16qi *) __P, (__v16hi) __A, __M);
1553}
1554
1555static __inline__ void __DEFAULT_FN_ATTRS256
1556_mm256_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A)
1557{
1558  __builtin_ia32_pmovswb256mem_mask ((__v16qi *) __P, (__v16hi) __A, __M);
1559}
1560
1561static __inline__ void __DEFAULT_FN_ATTRS256
1562_mm256_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A)
1563{
1564  __builtin_ia32_pmovuswb256mem_mask ((__v16qi*) __P, (__v16hi) __A, __M);
1565}
1566
1567static __inline__ __m128i __DEFAULT_FN_ATTRS128
1568_mm_mask_mulhrs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) {
1569  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1570                                             (__v8hi)_mm_mulhrs_epi16(__X, __Y),
1571                                             (__v8hi)__W);
1572}
1573
1574static __inline__ __m128i __DEFAULT_FN_ATTRS128
1575_mm_maskz_mulhrs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) {
1576  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1577                                             (__v8hi)_mm_mulhrs_epi16(__X, __Y),
1578                                             (__v8hi)_mm_setzero_si128());
1579}
1580
1581static __inline__ __m256i __DEFAULT_FN_ATTRS256
1582_mm256_mask_mulhrs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y) {
1583  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1584                                         (__v16hi)_mm256_mulhrs_epi16(__X, __Y),
1585                                         (__v16hi)__W);
1586}
1587
1588static __inline__ __m256i __DEFAULT_FN_ATTRS256
1589_mm256_maskz_mulhrs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) {
1590  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1591                                         (__v16hi)_mm256_mulhrs_epi16(__X, __Y),
1592                                         (__v16hi)_mm256_setzero_si256());
1593}
1594
1595static __inline__ __m128i __DEFAULT_FN_ATTRS128
1596_mm_mask_mulhi_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
1597  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1598                                             (__v8hi)_mm_mulhi_epu16(__A, __B),
1599                                             (__v8hi)__W);
1600}
1601
1602static __inline__ __m128i __DEFAULT_FN_ATTRS128
1603_mm_maskz_mulhi_epu16(__mmask8 __U, __m128i __A, __m128i __B) {
1604  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1605                                             (__v8hi)_mm_mulhi_epu16(__A, __B),
1606                                             (__v8hi)_mm_setzero_si128());
1607}
1608
1609static __inline__ __m256i __DEFAULT_FN_ATTRS256
1610_mm256_mask_mulhi_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
1611  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1612                                          (__v16hi)_mm256_mulhi_epu16(__A, __B),
1613                                          (__v16hi)__W);
1614}
1615
1616static __inline__ __m256i __DEFAULT_FN_ATTRS256
1617_mm256_maskz_mulhi_epu16(__mmask16 __U, __m256i __A, __m256i __B) {
1618  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1619                                          (__v16hi)_mm256_mulhi_epu16(__A, __B),
1620                                          (__v16hi)_mm256_setzero_si256());
1621}
1622
1623static __inline__ __m128i __DEFAULT_FN_ATTRS128
1624_mm_mask_mulhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
1625  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1626                                             (__v8hi)_mm_mulhi_epi16(__A, __B),
1627                                             (__v8hi)__W);
1628}
1629
1630static __inline__ __m128i __DEFAULT_FN_ATTRS128
1631_mm_maskz_mulhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
1632  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1633                                             (__v8hi)_mm_mulhi_epi16(__A, __B),
1634                                             (__v8hi)_mm_setzero_si128());
1635}
1636
1637static __inline__ __m256i __DEFAULT_FN_ATTRS256
1638_mm256_mask_mulhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
1639  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1640                                          (__v16hi)_mm256_mulhi_epi16(__A, __B),
1641                                          (__v16hi)__W);
1642}
1643
1644static __inline__ __m256i __DEFAULT_FN_ATTRS256
1645_mm256_maskz_mulhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
1646  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1647                                          (__v16hi)_mm256_mulhi_epi16(__A, __B),
1648                                          (__v16hi)_mm256_setzero_si256());
1649}
1650
1651static __inline__ __m128i __DEFAULT_FN_ATTRS128
1652_mm_mask_unpackhi_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
1653  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1654                                           (__v16qi)_mm_unpackhi_epi8(__A, __B),
1655                                           (__v16qi)__W);
1656}
1657
1658static __inline__ __m128i __DEFAULT_FN_ATTRS128
1659_mm_maskz_unpackhi_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
1660  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1661                                           (__v16qi)_mm_unpackhi_epi8(__A, __B),
1662                                           (__v16qi)_mm_setzero_si128());
1663}
1664
1665static __inline__ __m256i __DEFAULT_FN_ATTRS256
1666_mm256_mask_unpackhi_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
1667  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1668                                        (__v32qi)_mm256_unpackhi_epi8(__A, __B),
1669                                        (__v32qi)__W);
1670}
1671
1672static __inline__ __m256i __DEFAULT_FN_ATTRS256
1673_mm256_maskz_unpackhi_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
1674  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1675                                        (__v32qi)_mm256_unpackhi_epi8(__A, __B),
1676                                        (__v32qi)_mm256_setzero_si256());
1677}
1678
1679static __inline__ __m128i __DEFAULT_FN_ATTRS128
1680_mm_mask_unpackhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
1681  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1682                                           (__v8hi)_mm_unpackhi_epi16(__A, __B),
1683                                           (__v8hi)__W);
1684}
1685
1686static __inline__ __m128i __DEFAULT_FN_ATTRS128
1687_mm_maskz_unpackhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
1688  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1689                                           (__v8hi)_mm_unpackhi_epi16(__A, __B),
1690                                           (__v8hi) _mm_setzero_si128());
1691}
1692
1693static __inline__ __m256i __DEFAULT_FN_ATTRS256
1694_mm256_mask_unpackhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
1695  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1696                                       (__v16hi)_mm256_unpackhi_epi16(__A, __B),
1697                                       (__v16hi)__W);
1698}
1699
1700static __inline__ __m256i __DEFAULT_FN_ATTRS256
1701_mm256_maskz_unpackhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
1702  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1703                                       (__v16hi)_mm256_unpackhi_epi16(__A, __B),
1704                                       (__v16hi)_mm256_setzero_si256());
1705}
1706
1707static __inline__ __m128i __DEFAULT_FN_ATTRS128
1708_mm_mask_unpacklo_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
1709  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1710                                           (__v16qi)_mm_unpacklo_epi8(__A, __B),
1711                                           (__v16qi)__W);
1712}
1713
1714static __inline__ __m128i __DEFAULT_FN_ATTRS128
1715_mm_maskz_unpacklo_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
1716  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1717                                           (__v16qi)_mm_unpacklo_epi8(__A, __B),
1718                                           (__v16qi)_mm_setzero_si128());
1719}
1720
1721static __inline__ __m256i __DEFAULT_FN_ATTRS256
1722_mm256_mask_unpacklo_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
1723  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1724                                        (__v32qi)_mm256_unpacklo_epi8(__A, __B),
1725                                        (__v32qi)__W);
1726}
1727
1728static __inline__ __m256i __DEFAULT_FN_ATTRS256
1729_mm256_maskz_unpacklo_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
1730  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1731                                        (__v32qi)_mm256_unpacklo_epi8(__A, __B),
1732                                        (__v32qi)_mm256_setzero_si256());
1733}
1734
1735static __inline__ __m128i __DEFAULT_FN_ATTRS128
1736_mm_mask_unpacklo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
1737  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1738                                           (__v8hi)_mm_unpacklo_epi16(__A, __B),
1739                                           (__v8hi)__W);
1740}
1741
1742static __inline__ __m128i __DEFAULT_FN_ATTRS128
1743_mm_maskz_unpacklo_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
1744  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1745                                           (__v8hi)_mm_unpacklo_epi16(__A, __B),
1746                                           (__v8hi) _mm_setzero_si128());
1747}
1748
1749static __inline__ __m256i __DEFAULT_FN_ATTRS256
1750_mm256_mask_unpacklo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
1751  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1752                                       (__v16hi)_mm256_unpacklo_epi16(__A, __B),
1753                                       (__v16hi)__W);
1754}
1755
1756static __inline__ __m256i __DEFAULT_FN_ATTRS256
1757_mm256_maskz_unpacklo_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
1758  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1759                                       (__v16hi)_mm256_unpacklo_epi16(__A, __B),
1760                                       (__v16hi)_mm256_setzero_si256());
1761}
1762
1763static __inline__ __m128i __DEFAULT_FN_ATTRS128
1764_mm_mask_cvtepi8_epi16(__m128i __W, __mmask8 __U, __m128i __A)
1765{
1766  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1767                                             (__v8hi)_mm_cvtepi8_epi16(__A),
1768                                             (__v8hi)__W);
1769}
1770
1771static __inline__ __m128i __DEFAULT_FN_ATTRS128
1772_mm_maskz_cvtepi8_epi16(__mmask8 __U, __m128i __A)
1773{
1774  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1775                                             (__v8hi)_mm_cvtepi8_epi16(__A),
1776                                             (__v8hi)_mm_setzero_si128());
1777}
1778
1779static __inline__ __m256i __DEFAULT_FN_ATTRS256
1780_mm256_mask_cvtepi8_epi16(__m256i __W, __mmask16 __U, __m128i __A)
1781{
1782  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1783                                             (__v16hi)_mm256_cvtepi8_epi16(__A),
1784                                             (__v16hi)__W);
1785}
1786
1787static __inline__ __m256i __DEFAULT_FN_ATTRS256
1788_mm256_maskz_cvtepi8_epi16(__mmask16 __U, __m128i __A)
1789{
1790  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1791                                             (__v16hi)_mm256_cvtepi8_epi16(__A),
1792                                             (__v16hi)_mm256_setzero_si256());
1793}
1794
1795
1796static __inline__ __m128i __DEFAULT_FN_ATTRS128
1797_mm_mask_cvtepu8_epi16(__m128i __W, __mmask8 __U, __m128i __A)
1798{
1799  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1800                                             (__v8hi)_mm_cvtepu8_epi16(__A),
1801                                             (__v8hi)__W);
1802}
1803
1804static __inline__ __m128i __DEFAULT_FN_ATTRS128
1805_mm_maskz_cvtepu8_epi16(__mmask8 __U, __m128i __A)
1806{
1807  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1808                                             (__v8hi)_mm_cvtepu8_epi16(__A),
1809                                             (__v8hi)_mm_setzero_si128());
1810}
1811
1812static __inline__ __m256i __DEFAULT_FN_ATTRS256
1813_mm256_mask_cvtepu8_epi16(__m256i __W, __mmask16 __U, __m128i __A)
1814{
1815  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1816                                             (__v16hi)_mm256_cvtepu8_epi16(__A),
1817                                             (__v16hi)__W);
1818}
1819
1820static __inline__ __m256i __DEFAULT_FN_ATTRS256
1821_mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A)
1822{
1823  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1824                                             (__v16hi)_mm256_cvtepu8_epi16(__A),
1825                                             (__v16hi)_mm256_setzero_si256());
1826}
1827
1828
1829#define _mm_mask_shufflehi_epi16(W, U, A, imm) \
1830  ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
1831                                       (__v8hi)_mm_shufflehi_epi16((A), (imm)), \
1832                                       (__v8hi)(__m128i)(W)))
1833
1834#define _mm_maskz_shufflehi_epi16(U, A, imm) \
1835  ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
1836                                       (__v8hi)_mm_shufflehi_epi16((A), (imm)), \
1837                                       (__v8hi)_mm_setzero_si128()))
1838
1839#define _mm256_mask_shufflehi_epi16(W, U, A, imm) \
1840  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
1841                                       (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \
1842                                       (__v16hi)(__m256i)(W)))
1843
1844#define _mm256_maskz_shufflehi_epi16(U, A, imm) \
1845  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
1846                                       (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \
1847                                       (__v16hi)_mm256_setzero_si256()))
1848
1849#define _mm_mask_shufflelo_epi16(W, U, A, imm) \
1850  ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
1851                                       (__v8hi)_mm_shufflelo_epi16((A), (imm)), \
1852                                       (__v8hi)(__m128i)(W)))
1853
1854#define _mm_maskz_shufflelo_epi16(U, A, imm) \
1855  ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
1856                                       (__v8hi)_mm_shufflelo_epi16((A), (imm)), \
1857                                       (__v8hi)_mm_setzero_si128()))
1858
1859#define _mm256_mask_shufflelo_epi16(W, U, A, imm) \
1860  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
1861                                       (__v16hi)_mm256_shufflelo_epi16((A), \
1862                                                                       (imm)), \
1863                                       (__v16hi)(__m256i)(W)))
1864
1865#define _mm256_maskz_shufflelo_epi16(U, A, imm) \
1866  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
1867                                       (__v16hi)_mm256_shufflelo_epi16((A), \
1868                                                                       (imm)), \
1869                                       (__v16hi)_mm256_setzero_si256()))
1870
1871static __inline__ __m256i __DEFAULT_FN_ATTRS256
1872_mm256_sllv_epi16(__m256i __A, __m256i __B)
1873{
1874  return (__m256i)__builtin_ia32_psllv16hi((__v16hi)__A, (__v16hi)__B);
1875}
1876
1877static __inline__ __m256i __DEFAULT_FN_ATTRS256
1878_mm256_mask_sllv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
1879{
1880  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1881                                           (__v16hi)_mm256_sllv_epi16(__A, __B),
1882                                           (__v16hi)__W);
1883}
1884
1885static __inline__ __m256i __DEFAULT_FN_ATTRS256
1886_mm256_maskz_sllv_epi16(__mmask16 __U, __m256i __A, __m256i __B)
1887{
1888  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1889                                           (__v16hi)_mm256_sllv_epi16(__A, __B),
1890                                           (__v16hi)_mm256_setzero_si256());
1891}
1892
1893static __inline__ __m128i __DEFAULT_FN_ATTRS128
1894_mm_sllv_epi16(__m128i __A, __m128i __B)
1895{
1896  return (__m128i)__builtin_ia32_psllv8hi((__v8hi)__A, (__v8hi)__B);
1897}
1898
1899static __inline__ __m128i __DEFAULT_FN_ATTRS128
1900_mm_mask_sllv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
1901{
1902  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1903                                             (__v8hi)_mm_sllv_epi16(__A, __B),
1904                                             (__v8hi)__W);
1905}
1906
1907static __inline__ __m128i __DEFAULT_FN_ATTRS128
1908_mm_maskz_sllv_epi16(__mmask8 __U, __m128i __A, __m128i __B)
1909{
1910  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1911                                             (__v8hi)_mm_sllv_epi16(__A, __B),
1912                                             (__v8hi)_mm_setzero_si128());
1913}
1914
1915static __inline__ __m128i __DEFAULT_FN_ATTRS128
1916_mm_mask_sll_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
1917{
1918  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1919                                             (__v8hi)_mm_sll_epi16(__A, __B),
1920                                             (__v8hi)__W);
1921}
1922
1923static __inline__ __m128i __DEFAULT_FN_ATTRS128
1924_mm_maskz_sll_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
1925{
1926  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1927                                             (__v8hi)_mm_sll_epi16(__A, __B),
1928                                             (__v8hi)_mm_setzero_si128());
1929}
1930
1931static __inline__ __m256i __DEFAULT_FN_ATTRS256
1932_mm256_mask_sll_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B)
1933{
1934  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1935                                          (__v16hi)_mm256_sll_epi16(__A, __B),
1936                                          (__v16hi)__W);
1937}
1938
1939static __inline__ __m256i __DEFAULT_FN_ATTRS256
1940_mm256_maskz_sll_epi16(__mmask16 __U, __m256i __A, __m128i __B)
1941{
1942  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1943                                          (__v16hi)_mm256_sll_epi16(__A, __B),
1944                                          (__v16hi)_mm256_setzero_si256());
1945}
1946
1947static __inline__ __m128i __DEFAULT_FN_ATTRS128
1948_mm_mask_slli_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
1949{
1950  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1951                                             (__v8hi)_mm_slli_epi16(__A, (int)__B),
1952                                             (__v8hi)__W);
1953}
1954
1955static __inline__ __m128i __DEFAULT_FN_ATTRS128
1956_mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B)
1957{
1958  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1959                                             (__v8hi)_mm_slli_epi16(__A, (int)__B),
1960                                             (__v8hi)_mm_setzero_si128());
1961}
1962
1963static __inline__ __m256i __DEFAULT_FN_ATTRS256
1964_mm256_mask_slli_epi16(__m256i __W, __mmask16 __U, __m256i __A,
1965                       unsigned int __B)
1966{
1967  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1968                                         (__v16hi)_mm256_slli_epi16(__A, (int)__B),
1969                                         (__v16hi)__W);
1970}
1971
1972static __inline__ __m256i __DEFAULT_FN_ATTRS256
1973_mm256_maskz_slli_epi16(__mmask16 __U, __m256i __A, unsigned int __B)
1974{
1975  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1976                                         (__v16hi)_mm256_slli_epi16(__A, (int)__B),
1977                                         (__v16hi)_mm256_setzero_si256());
1978}
1979
1980static __inline__ __m256i __DEFAULT_FN_ATTRS256
1981_mm256_srlv_epi16(__m256i __A, __m256i __B)
1982{
1983  return (__m256i)__builtin_ia32_psrlv16hi((__v16hi)__A, (__v16hi)__B);
1984}
1985
1986static __inline__ __m256i __DEFAULT_FN_ATTRS256
1987_mm256_mask_srlv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
1988{
1989  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1990                                           (__v16hi)_mm256_srlv_epi16(__A, __B),
1991                                           (__v16hi)__W);
1992}
1993
1994static __inline__ __m256i __DEFAULT_FN_ATTRS256
1995_mm256_maskz_srlv_epi16(__mmask16 __U, __m256i __A, __m256i __B)
1996{
1997  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1998                                           (__v16hi)_mm256_srlv_epi16(__A, __B),
1999                                           (__v16hi)_mm256_setzero_si256());
2000}
2001
2002static __inline__ __m128i __DEFAULT_FN_ATTRS128
2003_mm_srlv_epi16(__m128i __A, __m128i __B)
2004{
2005  return (__m128i)__builtin_ia32_psrlv8hi((__v8hi)__A, (__v8hi)__B);
2006}
2007
2008static __inline__ __m128i __DEFAULT_FN_ATTRS128
2009_mm_mask_srlv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
2010{
2011  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2012                                             (__v8hi)_mm_srlv_epi16(__A, __B),
2013                                             (__v8hi)__W);
2014}
2015
2016static __inline__ __m128i __DEFAULT_FN_ATTRS128
2017_mm_maskz_srlv_epi16(__mmask8 __U, __m128i __A, __m128i __B)
2018{
2019  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2020                                             (__v8hi)_mm_srlv_epi16(__A, __B),
2021                                             (__v8hi)_mm_setzero_si128());
2022}
2023
2024static __inline__ __m256i __DEFAULT_FN_ATTRS256
2025_mm256_srav_epi16(__m256i __A, __m256i __B)
2026{
2027  return (__m256i)__builtin_ia32_psrav16hi((__v16hi)__A, (__v16hi)__B);
2028}
2029
2030static __inline__ __m256i __DEFAULT_FN_ATTRS256
2031_mm256_mask_srav_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
2032{
2033  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2034                                           (__v16hi)_mm256_srav_epi16(__A, __B),
2035                                           (__v16hi)__W);
2036}
2037
2038static __inline__ __m256i __DEFAULT_FN_ATTRS256
2039_mm256_maskz_srav_epi16(__mmask16 __U, __m256i __A, __m256i __B)
2040{
2041  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2042                                           (__v16hi)_mm256_srav_epi16(__A, __B),
2043                                           (__v16hi)_mm256_setzero_si256());
2044}
2045
2046static __inline__ __m128i __DEFAULT_FN_ATTRS128
2047_mm_srav_epi16(__m128i __A, __m128i __B)
2048{
2049  return (__m128i)__builtin_ia32_psrav8hi((__v8hi)__A, (__v8hi)__B);
2050}
2051
2052static __inline__ __m128i __DEFAULT_FN_ATTRS128
2053_mm_mask_srav_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
2054{
2055  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2056                                             (__v8hi)_mm_srav_epi16(__A, __B),
2057                                             (__v8hi)__W);
2058}
2059
2060static __inline__ __m128i __DEFAULT_FN_ATTRS128
2061_mm_maskz_srav_epi16(__mmask8 __U, __m128i __A, __m128i __B)
2062{
2063  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2064                                             (__v8hi)_mm_srav_epi16(__A, __B),
2065                                             (__v8hi)_mm_setzero_si128());
2066}
2067
2068static __inline__ __m128i __DEFAULT_FN_ATTRS128
2069_mm_mask_sra_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
2070{
2071  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2072                                             (__v8hi)_mm_sra_epi16(__A, __B),
2073                                             (__v8hi)__W);
2074}
2075
2076static __inline__ __m128i __DEFAULT_FN_ATTRS128
2077_mm_maskz_sra_epi16(__mmask8 __U, __m128i __A, __m128i __B)
2078{
2079  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2080                                             (__v8hi)_mm_sra_epi16(__A, __B),
2081                                             (__v8hi)_mm_setzero_si128());
2082}
2083
2084static __inline__ __m256i __DEFAULT_FN_ATTRS256
2085_mm256_mask_sra_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B)
2086{
2087  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2088                                          (__v16hi)_mm256_sra_epi16(__A, __B),
2089                                          (__v16hi)__W);
2090}
2091
2092static __inline__ __m256i __DEFAULT_FN_ATTRS256
2093_mm256_maskz_sra_epi16(__mmask16 __U, __m256i __A, __m128i __B)
2094{
2095  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2096                                          (__v16hi)_mm256_sra_epi16(__A, __B),
2097                                          (__v16hi)_mm256_setzero_si256());
2098}
2099
2100static __inline__ __m128i __DEFAULT_FN_ATTRS128
2101_mm_mask_srai_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
2102{
2103  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2104                                             (__v8hi)_mm_srai_epi16(__A, (int)__B),
2105                                             (__v8hi)__W);
2106}
2107
2108static __inline__ __m128i __DEFAULT_FN_ATTRS128
2109_mm_maskz_srai_epi16(__mmask8 __U, __m128i __A, unsigned int __B)
2110{
2111  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2112                                             (__v8hi)_mm_srai_epi16(__A, (int)__B),
2113                                             (__v8hi)_mm_setzero_si128());
2114}
2115
2116static __inline__ __m256i __DEFAULT_FN_ATTRS256
2117_mm256_mask_srai_epi16(__m256i __W, __mmask16 __U, __m256i __A,
2118                       unsigned int __B)
2119{
2120  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2121                                         (__v16hi)_mm256_srai_epi16(__A, (int)__B),
2122                                         (__v16hi)__W);
2123}
2124
2125static __inline__ __m256i __DEFAULT_FN_ATTRS256
2126_mm256_maskz_srai_epi16(__mmask16 __U, __m256i __A, unsigned int __B)
2127{
2128  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2129                                         (__v16hi)_mm256_srai_epi16(__A, (int)__B),
2130                                         (__v16hi)_mm256_setzero_si256());
2131}
2132
2133static __inline__ __m128i __DEFAULT_FN_ATTRS128
2134_mm_mask_srl_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
2135{
2136  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2137                                             (__v8hi)_mm_srl_epi16(__A, __B),
2138                                             (__v8hi)__W);
2139}
2140
2141static __inline__ __m128i __DEFAULT_FN_ATTRS128
2142_mm_maskz_srl_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
2143{
2144  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2145                                             (__v8hi)_mm_srl_epi16(__A, __B),
2146                                             (__v8hi)_mm_setzero_si128());
2147}
2148
2149static __inline__ __m256i __DEFAULT_FN_ATTRS256
2150_mm256_mask_srl_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B)
2151{
2152  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2153                                          (__v16hi)_mm256_srl_epi16(__A, __B),
2154                                          (__v16hi)__W);
2155}
2156
2157static __inline__ __m256i __DEFAULT_FN_ATTRS256
2158_mm256_maskz_srl_epi16(__mmask16 __U, __m256i __A, __m128i __B)
2159{
2160  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2161                                          (__v16hi)_mm256_srl_epi16(__A, __B),
2162                                          (__v16hi)_mm256_setzero_si256());
2163}
2164
2165static __inline__ __m128i __DEFAULT_FN_ATTRS128
2166_mm_mask_srli_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B)
2167{
2168  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2169                                             (__v8hi)_mm_srli_epi16(__A, __B),
2170                                             (__v8hi)__W);
2171}
2172
2173static __inline__ __m128i __DEFAULT_FN_ATTRS128
2174_mm_maskz_srli_epi16 (__mmask8 __U, __m128i __A, int __B)
2175{
2176  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2177                                             (__v8hi)_mm_srli_epi16(__A, __B),
2178                                             (__v8hi)_mm_setzero_si128());
2179}
2180
2181static __inline__ __m256i __DEFAULT_FN_ATTRS256
2182_mm256_mask_srli_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B)
2183{
2184  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2185                                         (__v16hi)_mm256_srli_epi16(__A, __B),
2186                                         (__v16hi)__W);
2187}
2188
2189static __inline__ __m256i __DEFAULT_FN_ATTRS256
2190_mm256_maskz_srli_epi16(__mmask16 __U, __m256i __A, int __B)
2191{
2192  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2193                                         (__v16hi)_mm256_srli_epi16(__A, __B),
2194                                         (__v16hi)_mm256_setzero_si256());
2195}
2196
2197static __inline__ __m128i __DEFAULT_FN_ATTRS128
2198_mm_mask_mov_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
2199{
2200  return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U,
2201                (__v8hi) __A,
2202                (__v8hi) __W);
2203}
2204
2205static __inline__ __m128i __DEFAULT_FN_ATTRS128
2206_mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A)
2207{
2208  return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U,
2209                (__v8hi) __A,
2210                (__v8hi) _mm_setzero_si128 ());
2211}
2212
2213static __inline__ __m256i __DEFAULT_FN_ATTRS256
2214_mm256_mask_mov_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
2215{
2216  return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U,
2217                (__v16hi) __A,
2218                (__v16hi) __W);
2219}
2220
2221static __inline__ __m256i __DEFAULT_FN_ATTRS256
2222_mm256_maskz_mov_epi16 (__mmask16 __U, __m256i __A)
2223{
2224  return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U,
2225                (__v16hi) __A,
2226                (__v16hi) _mm256_setzero_si256 ());
2227}
2228
2229static __inline__ __m128i __DEFAULT_FN_ATTRS128
2230_mm_mask_mov_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
2231{
2232  return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U,
2233                (__v16qi) __A,
2234                (__v16qi) __W);
2235}
2236
2237static __inline__ __m128i __DEFAULT_FN_ATTRS128
2238_mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A)
2239{
2240  return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U,
2241                (__v16qi) __A,
2242                (__v16qi) _mm_setzero_si128 ());
2243}
2244
2245static __inline__ __m256i __DEFAULT_FN_ATTRS256
2246_mm256_mask_mov_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
2247{
2248  return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U,
2249                (__v32qi) __A,
2250                (__v32qi) __W);
2251}
2252
2253static __inline__ __m256i __DEFAULT_FN_ATTRS256
2254_mm256_maskz_mov_epi8 (__mmask32 __U, __m256i __A)
2255{
2256  return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U,
2257                (__v32qi) __A,
2258                (__v32qi) _mm256_setzero_si256 ());
2259}
2260
2261
2262static __inline__ __m128i __DEFAULT_FN_ATTRS128
2263_mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A)
2264{
2265  return (__m128i) __builtin_ia32_selectb_128(__M,
2266                                              (__v16qi) _mm_set1_epi8(__A),
2267                                              (__v16qi) __O);
2268}
2269
2270static __inline__ __m128i __DEFAULT_FN_ATTRS128
2271_mm_maskz_set1_epi8 (__mmask16 __M, char __A)
2272{
2273 return (__m128i) __builtin_ia32_selectb_128(__M,
2274                                             (__v16qi) _mm_set1_epi8(__A),
2275                                             (__v16qi) _mm_setzero_si128());
2276}
2277
2278static __inline__ __m256i __DEFAULT_FN_ATTRS256
2279_mm256_mask_set1_epi8 (__m256i __O, __mmask32 __M, char __A)
2280{
2281  return (__m256i) __builtin_ia32_selectb_256(__M,
2282                                              (__v32qi) _mm256_set1_epi8(__A),
2283                                              (__v32qi) __O);
2284}
2285
2286static __inline__ __m256i __DEFAULT_FN_ATTRS256
2287_mm256_maskz_set1_epi8 (__mmask32 __M, char __A)
2288{
2289  return (__m256i) __builtin_ia32_selectb_256(__M,
2290                                              (__v32qi) _mm256_set1_epi8(__A),
2291                                              (__v32qi) _mm256_setzero_si256());
2292}
2293
2294static __inline __m128i __DEFAULT_FN_ATTRS128
2295_mm_loadu_epi16 (void const *__P)
2296{
2297  struct __loadu_epi16 {
2298    __m128i_u __v;
2299  } __attribute__((__packed__, __may_alias__));
2300  return ((const struct __loadu_epi16*)__P)->__v;
2301}
2302
2303static __inline__ __m128i __DEFAULT_FN_ATTRS128
2304_mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P)
2305{
2306  return (__m128i) __builtin_ia32_loaddquhi128_mask ((const __v8hi *) __P,
2307                 (__v8hi) __W,
2308                 (__mmask8) __U);
2309}
2310
2311static __inline__ __m128i __DEFAULT_FN_ATTRS128
2312_mm_maskz_loadu_epi16 (__mmask8 __U, void const *__P)
2313{
2314  return (__m128i) __builtin_ia32_loaddquhi128_mask ((const __v8hi *) __P,
2315                 (__v8hi)
2316                 _mm_setzero_si128 (),
2317                 (__mmask8) __U);
2318}
2319
2320static __inline __m256i __DEFAULT_FN_ATTRS256
2321_mm256_loadu_epi16 (void const *__P)
2322{
2323  struct __loadu_epi16 {
2324    __m256i_u __v;
2325  } __attribute__((__packed__, __may_alias__));
2326  return ((const struct __loadu_epi16*)__P)->__v;
2327}
2328
2329static __inline__ __m256i __DEFAULT_FN_ATTRS256
2330_mm256_mask_loadu_epi16 (__m256i __W, __mmask16 __U, void const *__P)
2331{
2332  return (__m256i) __builtin_ia32_loaddquhi256_mask ((const __v16hi *) __P,
2333                 (__v16hi) __W,
2334                 (__mmask16) __U);
2335}
2336
2337static __inline__ __m256i __DEFAULT_FN_ATTRS256
2338_mm256_maskz_loadu_epi16 (__mmask16 __U, void const *__P)
2339{
2340  return (__m256i) __builtin_ia32_loaddquhi256_mask ((const __v16hi *) __P,
2341                 (__v16hi)
2342                 _mm256_setzero_si256 (),
2343                 (__mmask16) __U);
2344}
2345
2346static __inline __m128i __DEFAULT_FN_ATTRS128
2347_mm_loadu_epi8 (void const *__P)
2348{
2349  struct __loadu_epi8 {
2350    __m128i_u __v;
2351  } __attribute__((__packed__, __may_alias__));
2352  return ((const struct __loadu_epi8*)__P)->__v;
2353}
2354
2355static __inline__ __m128i __DEFAULT_FN_ATTRS128
2356_mm_mask_loadu_epi8 (__m128i __W, __mmask16 __U, void const *__P)
2357{
2358  return (__m128i) __builtin_ia32_loaddquqi128_mask ((const __v16qi *) __P,
2359                 (__v16qi) __W,
2360                 (__mmask16) __U);
2361}
2362
2363static __inline__ __m128i __DEFAULT_FN_ATTRS128
2364_mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P)
2365{
2366  return (__m128i) __builtin_ia32_loaddquqi128_mask ((const __v16qi *) __P,
2367                 (__v16qi)
2368                 _mm_setzero_si128 (),
2369                 (__mmask16) __U);
2370}
2371
2372static __inline __m256i __DEFAULT_FN_ATTRS256
2373_mm256_loadu_epi8 (void const *__P)
2374{
2375  struct __loadu_epi8 {
2376    __m256i_u __v;
2377  } __attribute__((__packed__, __may_alias__));
2378  return ((const struct __loadu_epi8*)__P)->__v;
2379}
2380
2381static __inline__ __m256i __DEFAULT_FN_ATTRS256
2382_mm256_mask_loadu_epi8 (__m256i __W, __mmask32 __U, void const *__P)
2383{
2384  return (__m256i) __builtin_ia32_loaddquqi256_mask ((const __v32qi *) __P,
2385                 (__v32qi) __W,
2386                 (__mmask32) __U);
2387}
2388
2389static __inline__ __m256i __DEFAULT_FN_ATTRS256
2390_mm256_maskz_loadu_epi8 (__mmask32 __U, void const *__P)
2391{
2392  return (__m256i) __builtin_ia32_loaddquqi256_mask ((const __v32qi *) __P,
2393                 (__v32qi)
2394                 _mm256_setzero_si256 (),
2395                 (__mmask32) __U);
2396}
2397
2398static __inline void __DEFAULT_FN_ATTRS128
2399_mm_storeu_epi16 (void *__P, __m128i __A)
2400{
2401  struct __storeu_epi16 {
2402    __m128i_u __v;
2403  } __attribute__((__packed__, __may_alias__));
2404  ((struct __storeu_epi16*)__P)->__v = __A;
2405}
2406
2407static __inline__ void __DEFAULT_FN_ATTRS128
2408_mm_mask_storeu_epi16 (void *__P, __mmask8 __U, __m128i __A)
2409{
2410  __builtin_ia32_storedquhi128_mask ((__v8hi *) __P,
2411             (__v8hi) __A,
2412             (__mmask8) __U);
2413}
2414
2415static __inline void __DEFAULT_FN_ATTRS256
2416_mm256_storeu_epi16 (void *__P, __m256i __A)
2417{
2418  struct __storeu_epi16 {
2419    __m256i_u __v;
2420  } __attribute__((__packed__, __may_alias__));
2421  ((struct __storeu_epi16*)__P)->__v = __A;
2422}
2423
2424static __inline__ void __DEFAULT_FN_ATTRS256
2425_mm256_mask_storeu_epi16 (void *__P, __mmask16 __U, __m256i __A)
2426{
2427  __builtin_ia32_storedquhi256_mask ((__v16hi *) __P,
2428             (__v16hi) __A,
2429             (__mmask16) __U);
2430}
2431
2432static __inline void __DEFAULT_FN_ATTRS128
2433_mm_storeu_epi8 (void *__P, __m128i __A)
2434{
2435  struct __storeu_epi8 {
2436    __m128i_u __v;
2437  } __attribute__((__packed__, __may_alias__));
2438  ((struct __storeu_epi8*)__P)->__v = __A;
2439}
2440
2441static __inline__ void __DEFAULT_FN_ATTRS128
2442_mm_mask_storeu_epi8 (void *__P, __mmask16 __U, __m128i __A)
2443{
2444  __builtin_ia32_storedquqi128_mask ((__v16qi *) __P,
2445             (__v16qi) __A,
2446             (__mmask16) __U);
2447}
2448
2449static __inline void __DEFAULT_FN_ATTRS256
2450_mm256_storeu_epi8 (void *__P, __m256i __A)
2451{
2452  struct __storeu_epi8 {
2453    __m256i_u __v;
2454  } __attribute__((__packed__, __may_alias__));
2455  ((struct __storeu_epi8*)__P)->__v = __A;
2456}
2457
2458static __inline__ void __DEFAULT_FN_ATTRS256
2459_mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A)
2460{
2461  __builtin_ia32_storedquqi256_mask ((__v32qi *) __P,
2462             (__v32qi) __A,
2463             (__mmask32) __U);
2464}
2465
2466static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2467_mm_test_epi8_mask (__m128i __A, __m128i __B)
2468{
2469  return _mm_cmpneq_epi8_mask (_mm_and_si128(__A, __B), _mm_setzero_si128());
2470}
2471
2472static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2473_mm_mask_test_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
2474{
2475  return _mm_mask_cmpneq_epi8_mask (__U, _mm_and_si128 (__A, __B),
2476                                    _mm_setzero_si128());
2477}
2478
2479static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2480_mm256_test_epi8_mask (__m256i __A, __m256i __B)
2481{
2482  return _mm256_cmpneq_epi8_mask (_mm256_and_si256(__A, __B),
2483                                  _mm256_setzero_si256());
2484}
2485
2486static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2487_mm256_mask_test_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
2488{
2489  return _mm256_mask_cmpneq_epi8_mask (__U, _mm256_and_si256(__A, __B),
2490                                       _mm256_setzero_si256());
2491}
2492
2493static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
2494_mm_test_epi16_mask (__m128i __A, __m128i __B)
2495{
2496  return _mm_cmpneq_epi16_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
2497}
2498
2499static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
2500_mm_mask_test_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
2501{
2502  return _mm_mask_cmpneq_epi16_mask (__U, _mm_and_si128 (__A, __B),
2503                                     _mm_setzero_si128());
2504}
2505
2506static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
2507_mm256_test_epi16_mask (__m256i __A, __m256i __B)
2508{
2509  return _mm256_cmpneq_epi16_mask (_mm256_and_si256 (__A, __B),
2510                                   _mm256_setzero_si256 ());
2511}
2512
2513static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
2514_mm256_mask_test_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
2515{
2516  return _mm256_mask_cmpneq_epi16_mask (__U, _mm256_and_si256(__A, __B),
2517                                        _mm256_setzero_si256());
2518}
2519
2520static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2521_mm_testn_epi8_mask (__m128i __A, __m128i __B)
2522{
2523  return _mm_cmpeq_epi8_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
2524}
2525
2526static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2527_mm_mask_testn_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
2528{
2529  return _mm_mask_cmpeq_epi8_mask (__U, _mm_and_si128 (__A, __B),
2530                                  _mm_setzero_si128());
2531}
2532
2533static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2534_mm256_testn_epi8_mask (__m256i __A, __m256i __B)
2535{
2536  return _mm256_cmpeq_epi8_mask (_mm256_and_si256 (__A, __B),
2537                                 _mm256_setzero_si256());
2538}
2539
2540static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2541_mm256_mask_testn_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
2542{
2543  return _mm256_mask_cmpeq_epi8_mask (__U, _mm256_and_si256 (__A, __B),
2544                                      _mm256_setzero_si256());
2545}
2546
2547static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
2548_mm_testn_epi16_mask (__m128i __A, __m128i __B)
2549{
2550  return _mm_cmpeq_epi16_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
2551}
2552
2553static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
2554_mm_mask_testn_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
2555{
2556  return _mm_mask_cmpeq_epi16_mask (__U, _mm_and_si128(__A, __B), _mm_setzero_si128());
2557}
2558
2559static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
2560_mm256_testn_epi16_mask (__m256i __A, __m256i __B)
2561{
2562  return _mm256_cmpeq_epi16_mask (_mm256_and_si256(__A, __B),
2563                                  _mm256_setzero_si256());
2564}
2565
2566static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
2567_mm256_mask_testn_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
2568{
2569  return _mm256_mask_cmpeq_epi16_mask (__U, _mm256_and_si256 (__A, __B),
2570                                       _mm256_setzero_si256());
2571}
2572
2573static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2574_mm_movepi8_mask (__m128i __A)
2575{
2576  return (__mmask16) __builtin_ia32_cvtb2mask128 ((__v16qi) __A);
2577}
2578
2579static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2580_mm256_movepi8_mask (__m256i __A)
2581{
2582  return (__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A);
2583}
2584
2585static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
2586_mm_movepi16_mask (__m128i __A)
2587{
2588  return (__mmask8) __builtin_ia32_cvtw2mask128 ((__v8hi) __A);
2589}
2590
2591static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
2592_mm256_movepi16_mask (__m256i __A)
2593{
2594  return (__mmask16) __builtin_ia32_cvtw2mask256 ((__v16hi) __A);
2595}
2596
2597static __inline__ __m128i __DEFAULT_FN_ATTRS128
2598_mm_movm_epi8 (__mmask16 __A)
2599{
2600  return (__m128i) __builtin_ia32_cvtmask2b128 (__A);
2601}
2602
2603static __inline__ __m256i __DEFAULT_FN_ATTRS256
2604_mm256_movm_epi8 (__mmask32 __A)
2605{
2606  return (__m256i) __builtin_ia32_cvtmask2b256 (__A);
2607}
2608
2609static __inline__ __m128i __DEFAULT_FN_ATTRS128
2610_mm_movm_epi16 (__mmask8 __A)
2611{
2612  return (__m128i) __builtin_ia32_cvtmask2w128 (__A);
2613}
2614
2615static __inline__ __m256i __DEFAULT_FN_ATTRS256
2616_mm256_movm_epi16 (__mmask16 __A)
2617{
2618  return (__m256i) __builtin_ia32_cvtmask2w256 (__A);
2619}
2620
2621static __inline__ __m128i __DEFAULT_FN_ATTRS128
2622_mm_mask_broadcastb_epi8 (__m128i __O, __mmask16 __M, __m128i __A)
2623{
2624  return (__m128i)__builtin_ia32_selectb_128(__M,
2625                                             (__v16qi) _mm_broadcastb_epi8(__A),
2626                                             (__v16qi) __O);
2627}
2628
2629static __inline__ __m128i __DEFAULT_FN_ATTRS128
2630_mm_maskz_broadcastb_epi8 (__mmask16 __M, __m128i __A)
2631{
2632  return (__m128i)__builtin_ia32_selectb_128(__M,
2633                                             (__v16qi) _mm_broadcastb_epi8(__A),
2634                                             (__v16qi) _mm_setzero_si128());
2635}
2636
2637static __inline__ __m256i __DEFAULT_FN_ATTRS256
2638_mm256_mask_broadcastb_epi8 (__m256i __O, __mmask32 __M, __m128i __A)
2639{
2640  return (__m256i)__builtin_ia32_selectb_256(__M,
2641                                             (__v32qi) _mm256_broadcastb_epi8(__A),
2642                                             (__v32qi) __O);
2643}
2644
2645static __inline__ __m256i __DEFAULT_FN_ATTRS256
2646_mm256_maskz_broadcastb_epi8 (__mmask32 __M, __m128i __A)
2647{
2648  return (__m256i)__builtin_ia32_selectb_256(__M,
2649                                             (__v32qi) _mm256_broadcastb_epi8(__A),
2650                                             (__v32qi) _mm256_setzero_si256());
2651}
2652
2653static __inline__ __m128i __DEFAULT_FN_ATTRS128
2654_mm_mask_broadcastw_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2655{
2656  return (__m128i)__builtin_ia32_selectw_128(__M,
2657                                             (__v8hi) _mm_broadcastw_epi16(__A),
2658                                             (__v8hi) __O);
2659}
2660
2661static __inline__ __m128i __DEFAULT_FN_ATTRS128
2662_mm_maskz_broadcastw_epi16 (__mmask8 __M, __m128i __A)
2663{
2664  return (__m128i)__builtin_ia32_selectw_128(__M,
2665                                             (__v8hi) _mm_broadcastw_epi16(__A),
2666                                             (__v8hi) _mm_setzero_si128());
2667}
2668
2669static __inline__ __m256i __DEFAULT_FN_ATTRS256
2670_mm256_mask_broadcastw_epi16 (__m256i __O, __mmask16 __M, __m128i __A)
2671{
2672  return (__m256i)__builtin_ia32_selectw_256(__M,
2673                                             (__v16hi) _mm256_broadcastw_epi16(__A),
2674                                             (__v16hi) __O);
2675}
2676
2677static __inline__ __m256i __DEFAULT_FN_ATTRS256
2678_mm256_maskz_broadcastw_epi16 (__mmask16 __M, __m128i __A)
2679{
2680  return (__m256i)__builtin_ia32_selectw_256(__M,
2681                                             (__v16hi) _mm256_broadcastw_epi16(__A),
2682                                             (__v16hi) _mm256_setzero_si256());
2683}
2684
2685static __inline__ __m256i __DEFAULT_FN_ATTRS256
2686_mm256_mask_set1_epi16 (__m256i __O, __mmask16 __M, short __A)
2687{
2688  return (__m256i) __builtin_ia32_selectw_256 (__M,
2689                                               (__v16hi) _mm256_set1_epi16(__A),
2690                                               (__v16hi) __O);
2691}
2692
2693static __inline__ __m256i __DEFAULT_FN_ATTRS256
2694_mm256_maskz_set1_epi16 (__mmask16 __M, short __A)
2695{
2696  return (__m256i) __builtin_ia32_selectw_256(__M,
2697                                              (__v16hi)_mm256_set1_epi16(__A),
2698                                              (__v16hi) _mm256_setzero_si256());
2699}
2700
2701static __inline__ __m128i __DEFAULT_FN_ATTRS128
2702_mm_mask_set1_epi16 (__m128i __O, __mmask8 __M, short __A)
2703{
2704  return (__m128i) __builtin_ia32_selectw_128(__M,
2705                                              (__v8hi) _mm_set1_epi16(__A),
2706                                              (__v8hi) __O);
2707}
2708
2709static __inline__ __m128i __DEFAULT_FN_ATTRS128
2710_mm_maskz_set1_epi16 (__mmask8 __M, short __A)
2711{
2712  return (__m128i) __builtin_ia32_selectw_128(__M,
2713                                              (__v8hi) _mm_set1_epi16(__A),
2714                                              (__v8hi) _mm_setzero_si128());
2715}
2716
2717static __inline__ __m128i __DEFAULT_FN_ATTRS128
2718_mm_permutexvar_epi16 (__m128i __A, __m128i __B)
2719{
2720  return (__m128i)__builtin_ia32_permvarhi128((__v8hi) __B, (__v8hi) __A);
2721}
2722
2723static __inline__ __m128i __DEFAULT_FN_ATTRS128
2724_mm_maskz_permutexvar_epi16 (__mmask8 __M, __m128i __A, __m128i __B)
2725{
2726  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
2727                                        (__v8hi)_mm_permutexvar_epi16(__A, __B),
2728                                        (__v8hi) _mm_setzero_si128());
2729}
2730
2731static __inline__ __m128i __DEFAULT_FN_ATTRS128
2732_mm_mask_permutexvar_epi16 (__m128i __W, __mmask8 __M, __m128i __A,
2733          __m128i __B)
2734{
2735  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
2736                                        (__v8hi)_mm_permutexvar_epi16(__A, __B),
2737                                        (__v8hi)__W);
2738}
2739
2740static __inline__ __m256i __DEFAULT_FN_ATTRS256
2741_mm256_permutexvar_epi16 (__m256i __A, __m256i __B)
2742{
2743  return (__m256i)__builtin_ia32_permvarhi256((__v16hi) __B, (__v16hi) __A);
2744}
2745
2746static __inline__ __m256i __DEFAULT_FN_ATTRS256
2747_mm256_maskz_permutexvar_epi16 (__mmask16 __M, __m256i __A,
2748        __m256i __B)
2749{
2750  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
2751                                    (__v16hi)_mm256_permutexvar_epi16(__A, __B),
2752                                    (__v16hi)_mm256_setzero_si256());
2753}
2754
2755static __inline__ __m256i __DEFAULT_FN_ATTRS256
2756_mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
2757             __m256i __B)
2758{
2759  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
2760                                    (__v16hi)_mm256_permutexvar_epi16(__A, __B),
2761                                    (__v16hi)__W);
2762}
2763
2764#define _mm_mask_alignr_epi8(W, U, A, B, N) \
2765  ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
2766                                 (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \
2767                                 (__v16qi)(__m128i)(W)))
2768
2769#define _mm_maskz_alignr_epi8(U, A, B, N) \
2770  ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
2771                                 (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \
2772                                 (__v16qi)_mm_setzero_si128()))
2773
2774#define _mm256_mask_alignr_epi8(W, U, A, B, N) \
2775  ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
2776                              (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \
2777                              (__v32qi)(__m256i)(W)))
2778
2779#define _mm256_maskz_alignr_epi8(U, A, B, N) \
2780  ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
2781                              (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \
2782                              (__v32qi)_mm256_setzero_si256()))
2783
2784#define _mm_dbsad_epu8(A, B, imm) \
2785  ((__m128i)__builtin_ia32_dbpsadbw128((__v16qi)(__m128i)(A), \
2786                                       (__v16qi)(__m128i)(B), (int)(imm)))
2787
2788#define _mm_mask_dbsad_epu8(W, U, A, B, imm) \
2789  ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
2790                                      (__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \
2791                                      (__v8hi)(__m128i)(W)))
2792
2793#define _mm_maskz_dbsad_epu8(U, A, B, imm) \
2794  ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
2795                                      (__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \
2796                                      (__v8hi)_mm_setzero_si128()))
2797
2798#define _mm256_dbsad_epu8(A, B, imm) \
2799  ((__m256i)__builtin_ia32_dbpsadbw256((__v32qi)(__m256i)(A), \
2800                                       (__v32qi)(__m256i)(B), (int)(imm)))
2801
2802#define _mm256_mask_dbsad_epu8(W, U, A, B, imm) \
2803  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
2804                                  (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \
2805                                  (__v16hi)(__m256i)(W)))
2806
2807#define _mm256_maskz_dbsad_epu8(U, A, B, imm) \
2808  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
2809                                  (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \
2810                                  (__v16hi)_mm256_setzero_si256()))
2811
2812static __inline__ short __DEFAULT_FN_ATTRS128
2813_mm_reduce_add_epi16(__m128i __W) {
2814  return __builtin_reduce_add((__v8hi)__W);
2815}
2816
2817static __inline__ short __DEFAULT_FN_ATTRS128
2818_mm_reduce_mul_epi16(__m128i __W) {
2819  return __builtin_reduce_mul((__v8hi)__W);
2820}
2821
2822static __inline__ short __DEFAULT_FN_ATTRS128
2823_mm_reduce_and_epi16(__m128i __W) {
2824  return __builtin_reduce_and((__v8hi)__W);
2825}
2826
2827static __inline__ short __DEFAULT_FN_ATTRS128
2828_mm_reduce_or_epi16(__m128i __W) {
2829  return __builtin_reduce_or((__v8hi)__W);
2830}
2831
2832static __inline__ short __DEFAULT_FN_ATTRS128
2833_mm_mask_reduce_add_epi16( __mmask8 __M, __m128i __W) {
2834  __W = _mm_maskz_mov_epi16(__M, __W);
2835  return __builtin_reduce_add((__v8hi)__W);
2836}
2837
2838static __inline__ short __DEFAULT_FN_ATTRS128
2839_mm_mask_reduce_mul_epi16( __mmask8 __M, __m128i __W) {
2840  __W = _mm_mask_mov_epi16(_mm_set1_epi16(1), __M, __W);
2841  return __builtin_reduce_mul((__v8hi)__W);
2842}
2843
2844static __inline__ short __DEFAULT_FN_ATTRS128
2845_mm_mask_reduce_and_epi16( __mmask8 __M, __m128i __W) {
2846  __W = _mm_mask_mov_epi16(_mm_set1_epi16(-1), __M, __W);
2847  return __builtin_reduce_and((__v8hi)__W);
2848}
2849
2850static __inline__ short __DEFAULT_FN_ATTRS128
2851_mm_mask_reduce_or_epi16(__mmask8 __M, __m128i __W) {
2852  __W = _mm_maskz_mov_epi16(__M, __W);
2853  return __builtin_reduce_or((__v8hi)__W);
2854}
2855
2856static __inline__ short __DEFAULT_FN_ATTRS128
2857_mm_reduce_max_epi16(__m128i __V) {
2858  return __builtin_reduce_max((__v8hi)__V);
2859}
2860
2861static __inline__ unsigned short __DEFAULT_FN_ATTRS128
2862_mm_reduce_max_epu16(__m128i __V) {
2863  return __builtin_reduce_max((__v8hu)__V);
2864}
2865
2866static __inline__ short __DEFAULT_FN_ATTRS128
2867_mm_reduce_min_epi16(__m128i __V) {
2868  return __builtin_reduce_min((__v8hi)__V);
2869}
2870
2871static __inline__ unsigned short __DEFAULT_FN_ATTRS128
2872_mm_reduce_min_epu16(__m128i __V) {
2873  return __builtin_reduce_min((__v8hu)__V);
2874}
2875
2876static __inline__ short __DEFAULT_FN_ATTRS128
2877_mm_mask_reduce_max_epi16(__mmask16 __M, __m128i __V) {
2878  __V = _mm_mask_mov_epi16(_mm_set1_epi16(-32767-1), __M, __V);
2879  return __builtin_reduce_max((__v8hi)__V);
2880}
2881
2882static __inline__ unsigned short __DEFAULT_FN_ATTRS128
2883_mm_mask_reduce_max_epu16(__mmask16 __M, __m128i __V) {
2884  __V = _mm_maskz_mov_epi16(__M, __V);
2885  return __builtin_reduce_max((__v8hu)__V);
2886}
2887
2888static __inline__ short __DEFAULT_FN_ATTRS128
2889_mm_mask_reduce_min_epi16(__mmask16 __M, __m128i __V) {
2890  __V = _mm_mask_mov_epi16(_mm_set1_epi16(32767), __M, __V);
2891  return __builtin_reduce_min((__v8hi)__V);
2892}
2893
2894static __inline__ unsigned short __DEFAULT_FN_ATTRS128
2895_mm_mask_reduce_min_epu16(__mmask16 __M, __m128i __V) {
2896  __V = _mm_mask_mov_epi16(_mm_set1_epi16(-1), __M, __V);
2897  return __builtin_reduce_min((__v8hu)__V);
2898}
2899
2900static __inline__ short __DEFAULT_FN_ATTRS256
2901_mm256_reduce_add_epi16(__m256i __W) {
2902  return __builtin_reduce_add((__v16hi)__W);
2903}
2904
2905static __inline__ short __DEFAULT_FN_ATTRS256
2906_mm256_reduce_mul_epi16(__m256i __W) {
2907  return __builtin_reduce_mul((__v16hi)__W);
2908}
2909
2910static __inline__ short __DEFAULT_FN_ATTRS256
2911_mm256_reduce_and_epi16(__m256i __W) {
2912  return __builtin_reduce_and((__v16hi)__W);
2913}
2914
2915static __inline__ short __DEFAULT_FN_ATTRS256
2916_mm256_reduce_or_epi16(__m256i __W) {
2917  return __builtin_reduce_or((__v16hi)__W);
2918}
2919
2920static __inline__ short __DEFAULT_FN_ATTRS256
2921_mm256_mask_reduce_add_epi16( __mmask16 __M, __m256i __W) {
2922  __W = _mm256_maskz_mov_epi16(__M, __W);
2923  return __builtin_reduce_add((__v16hi)__W);
2924}
2925
2926static __inline__ short __DEFAULT_FN_ATTRS256
2927_mm256_mask_reduce_mul_epi16( __mmask16 __M, __m256i __W) {
2928  __W = _mm256_mask_mov_epi16(_mm256_set1_epi16(1), __M, __W);
2929  return __builtin_reduce_mul((__v16hi)__W);
2930}
2931
2932static __inline__ short __DEFAULT_FN_ATTRS256
2933_mm256_mask_reduce_and_epi16( __mmask16 __M, __m256i __W) {
2934  __W = _mm256_mask_mov_epi16(_mm256_set1_epi16(-1), __M, __W);
2935  return __builtin_reduce_and((__v16hi)__W);
2936}
2937
2938static __inline__ short __DEFAULT_FN_ATTRS256
2939_mm256_mask_reduce_or_epi16(__mmask16 __M, __m256i __W) {
2940  __W = _mm256_maskz_mov_epi16(__M, __W);
2941  return __builtin_reduce_or((__v16hi)__W);
2942}
2943
2944static __inline__ short __DEFAULT_FN_ATTRS256
2945_mm256_reduce_max_epi16(__m256i __V) {
2946  return __builtin_reduce_max((__v16hi)__V);
2947}
2948
2949static __inline__ unsigned short __DEFAULT_FN_ATTRS256
2950_mm256_reduce_max_epu16(__m256i __V) {
2951  return __builtin_reduce_max((__v16hu)__V);
2952}
2953
2954static __inline__ short __DEFAULT_FN_ATTRS256
2955_mm256_reduce_min_epi16(__m256i __V) {
2956  return __builtin_reduce_min((__v16hi)__V);
2957}
2958
2959static __inline__ unsigned short __DEFAULT_FN_ATTRS256
2960_mm256_reduce_min_epu16(__m256i __V) {
2961  return __builtin_reduce_min((__v16hu)__V);
2962}
2963
2964static __inline__ short __DEFAULT_FN_ATTRS256
2965_mm256_mask_reduce_max_epi16(__mmask16 __M, __m256i __V) {
2966  __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(-32767-1), __M, __V);
2967  return __builtin_reduce_max((__v16hi)__V);
2968}
2969
2970static __inline__ unsigned short __DEFAULT_FN_ATTRS256
2971_mm256_mask_reduce_max_epu16(__mmask16 __M, __m256i __V) {
2972  __V = _mm256_maskz_mov_epi16(__M, __V);
2973  return __builtin_reduce_max((__v16hu)__V);
2974}
2975
2976static __inline__ short __DEFAULT_FN_ATTRS256
2977_mm256_mask_reduce_min_epi16(__mmask16 __M, __m256i __V) {
2978  __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(32767), __M, __V);
2979  return __builtin_reduce_min((__v16hi)__V);
2980}
2981
2982static __inline__ unsigned short __DEFAULT_FN_ATTRS256
2983_mm256_mask_reduce_min_epu16(__mmask16 __M, __m256i __V) {
2984  __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(-1), __M, __V);
2985  return __builtin_reduce_min((__v16hu)__V);
2986}
2987
2988static __inline__ signed char __DEFAULT_FN_ATTRS128
2989_mm_reduce_add_epi8(__m128i __W) {
2990  return __builtin_reduce_add((__v16qs)__W);
2991}
2992
2993static __inline__ signed char __DEFAULT_FN_ATTRS128
2994_mm_reduce_mul_epi8(__m128i __W) {
2995  return __builtin_reduce_mul((__v16qs)__W);
2996}
2997
2998static __inline__ signed char __DEFAULT_FN_ATTRS128
2999_mm_reduce_and_epi8(__m128i __W) {
3000  return __builtin_reduce_and((__v16qs)__W);
3001}
3002
3003static __inline__ signed char __DEFAULT_FN_ATTRS128
3004_mm_reduce_or_epi8(__m128i __W) {
3005  return __builtin_reduce_or((__v16qs)__W);
3006}
3007
3008static __inline__ signed char __DEFAULT_FN_ATTRS128
3009_mm_mask_reduce_add_epi8(__mmask16 __M, __m128i __W) {
3010  __W = _mm_maskz_mov_epi8(__M, __W);
3011  return __builtin_reduce_add((__v16qs)__W);
3012}
3013
3014static __inline__ signed char __DEFAULT_FN_ATTRS128
3015_mm_mask_reduce_mul_epi8(__mmask16 __M, __m128i __W) {
3016  __W = _mm_mask_mov_epi8(_mm_set1_epi8(1), __M, __W);
3017  return __builtin_reduce_mul((__v16qs)__W);
3018}
3019
3020static __inline__ signed char __DEFAULT_FN_ATTRS128
3021_mm_mask_reduce_and_epi8(__mmask16 __M, __m128i __W) {
3022  __W = _mm_mask_mov_epi8(_mm_set1_epi8(-1), __M, __W);
3023  return __builtin_reduce_and((__v16qs)__W);
3024}
3025
3026static __inline__ signed char __DEFAULT_FN_ATTRS128
3027_mm_mask_reduce_or_epi8(__mmask16 __M, __m128i __W) {
3028  __W = _mm_maskz_mov_epi8(__M, __W);
3029  return __builtin_reduce_or((__v16qs)__W);
3030}
3031
3032static __inline__ signed char __DEFAULT_FN_ATTRS128
3033_mm_reduce_max_epi8(__m128i __V) {
3034  return __builtin_reduce_max((__v16qs)__V);
3035}
3036
3037static __inline__ unsigned char __DEFAULT_FN_ATTRS128
3038_mm_reduce_max_epu8(__m128i __V) {
3039  return __builtin_reduce_max((__v16qu)__V);
3040}
3041
3042static __inline__ signed char __DEFAULT_FN_ATTRS128
3043_mm_reduce_min_epi8(__m128i __V) {
3044  return __builtin_reduce_min((__v16qs)__V);
3045}
3046
3047static __inline__ unsigned char __DEFAULT_FN_ATTRS128
3048_mm_reduce_min_epu8(__m128i __V) {
3049  return __builtin_reduce_min((__v16qu)__V);
3050}
3051
3052static __inline__ signed char __DEFAULT_FN_ATTRS128
3053_mm_mask_reduce_max_epi8(__mmask16 __M, __m128i __V) {
3054  __V = _mm_mask_mov_epi8(_mm_set1_epi8(-127-1), __M, __V);
3055  return __builtin_reduce_max((__v16qs)__V);
3056}
3057
3058static __inline__ unsigned char __DEFAULT_FN_ATTRS128
3059_mm_mask_reduce_max_epu8(__mmask16 __M, __m128i __V) {
3060  __V = _mm_maskz_mov_epi8(__M, __V);
3061  return __builtin_reduce_max((__v16qu)__V);
3062}
3063
3064static __inline__ signed char __DEFAULT_FN_ATTRS128
3065_mm_mask_reduce_min_epi8(__mmask16 __M, __m128i __V) {
3066  __V = _mm_mask_mov_epi8(_mm_set1_epi8(127), __M, __V);
3067  return __builtin_reduce_min((__v16qs)__V);
3068}
3069
3070static __inline__ unsigned char __DEFAULT_FN_ATTRS128
3071_mm_mask_reduce_min_epu8(__mmask16 __M, __m128i __V) {
3072  __V = _mm_mask_mov_epi8(_mm_set1_epi8(-1), __M, __V);
3073  return __builtin_reduce_min((__v16qu)__V);
3074}
3075
3076static __inline__ signed char __DEFAULT_FN_ATTRS256
3077_mm256_reduce_add_epi8(__m256i __W) {
3078  return __builtin_reduce_add((__v32qs)__W);
3079}
3080
3081static __inline__ signed char __DEFAULT_FN_ATTRS256
3082_mm256_reduce_mul_epi8(__m256i __W) {
3083  return __builtin_reduce_mul((__v32qs)__W);
3084}
3085
3086static __inline__ signed char __DEFAULT_FN_ATTRS256
3087_mm256_reduce_and_epi8(__m256i __W) {
3088  return __builtin_reduce_and((__v32qs)__W);
3089}
3090
3091static __inline__ signed char __DEFAULT_FN_ATTRS256
3092_mm256_reduce_or_epi8(__m256i __W) {
3093  return __builtin_reduce_or((__v32qs)__W);
3094}
3095
3096static __inline__ signed char __DEFAULT_FN_ATTRS256
3097_mm256_mask_reduce_add_epi8(__mmask32 __M, __m256i __W) {
3098  __W = _mm256_maskz_mov_epi8(__M, __W);
3099  return __builtin_reduce_add((__v32qs)__W);
3100}
3101
3102static __inline__ signed char __DEFAULT_FN_ATTRS256
3103_mm256_mask_reduce_mul_epi8(__mmask32 __M, __m256i __W) {
3104  __W = _mm256_mask_mov_epi8(_mm256_set1_epi8(1), __M, __W);
3105  return __builtin_reduce_mul((__v32qs)__W);
3106}
3107
3108static __inline__ signed char __DEFAULT_FN_ATTRS256
3109_mm256_mask_reduce_and_epi8(__mmask32 __M, __m256i __W) {
3110  __W = _mm256_mask_mov_epi8(_mm256_set1_epi8(-1), __M, __W);
3111  return __builtin_reduce_and((__v32qs)__W);
3112}
3113
3114static __inline__ signed char __DEFAULT_FN_ATTRS256
3115_mm256_mask_reduce_or_epi8(__mmask32 __M, __m256i __W) {
3116  __W = _mm256_maskz_mov_epi8(__M, __W);
3117  return __builtin_reduce_or((__v32qs)__W);
3118}
3119
3120static __inline__ signed char __DEFAULT_FN_ATTRS256
3121_mm256_reduce_max_epi8(__m256i __V) {
3122  return __builtin_reduce_max((__v32qs)__V);
3123}
3124
3125static __inline__ unsigned char __DEFAULT_FN_ATTRS256
3126_mm256_reduce_max_epu8(__m256i __V) {
3127  return __builtin_reduce_max((__v32qu)__V);
3128}
3129
3130static __inline__ signed char __DEFAULT_FN_ATTRS256
3131_mm256_reduce_min_epi8(__m256i __V) {
3132  return __builtin_reduce_min((__v32qs)__V);
3133}
3134
3135static __inline__ unsigned char __DEFAULT_FN_ATTRS256
3136_mm256_reduce_min_epu8(__m256i __V) {
3137  return __builtin_reduce_min((__v32qu)__V);
3138}
3139
3140static __inline__ signed char __DEFAULT_FN_ATTRS256
3141_mm256_mask_reduce_max_epi8(__mmask32 __M, __m256i __V) {
3142  __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(-127-1), __M, __V);
3143  return __builtin_reduce_max((__v32qs)__V);
3144}
3145
3146static __inline__ unsigned char __DEFAULT_FN_ATTRS256
3147_mm256_mask_reduce_max_epu8(__mmask32 __M, __m256i __V) {
3148  __V = _mm256_maskz_mov_epi8(__M, __V);
3149  return __builtin_reduce_max((__v32qu)__V);
3150}
3151
3152static __inline__ signed char __DEFAULT_FN_ATTRS256
3153_mm256_mask_reduce_min_epi8(__mmask32 __M, __m256i __V) {
3154  __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(127), __M, __V);
3155  return __builtin_reduce_min((__v32qs)__V);
3156}
3157
3158static __inline__ unsigned char __DEFAULT_FN_ATTRS256
3159_mm256_mask_reduce_min_epu8(__mmask32 __M, __m256i __V) {
3160  __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(-1), __M, __V);
3161  return __builtin_reduce_min((__v32qu)__V);
3162}
3163
3164#undef __DEFAULT_FN_ATTRS128
3165#undef __DEFAULT_FN_ATTRS256
3166
3167#endif /* __AVX512VLBWINTRIN_H */
3168