avx512bwintrin.h revision 341825
1/*===------------- avx512bwintrin.h - AVX512BW intrinsics ------------------===
2 *
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 * THE SOFTWARE.
21 *
22 *===-----------------------------------------------------------------------===
23 */
24#ifndef __IMMINTRIN_H
25#error "Never use <avx512bwintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef __AVX512BWINTRIN_H
29#define __AVX512BWINTRIN_H
30
31typedef unsigned int __mmask32;
32typedef unsigned long long __mmask64;
33
34/* Define the default attributes for the functions in this file. */
35#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"), __min_vector_width__(512)))
36
37/* Integer compare */
38
39#define _mm512_cmp_epi8_mask(a, b, p) \
40  (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
41                                         (__v64qi)(__m512i)(b), (int)(p), \
42                                         (__mmask64)-1)
43
44#define _mm512_mask_cmp_epi8_mask(m, a, b, p) \
45  (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
46                                         (__v64qi)(__m512i)(b), (int)(p), \
47                                         (__mmask64)(m))
48
49#define _mm512_cmp_epu8_mask(a, b, p) \
50  (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
51                                          (__v64qi)(__m512i)(b), (int)(p), \
52                                          (__mmask64)-1)
53
54#define _mm512_mask_cmp_epu8_mask(m, a, b, p) \
55  (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
56                                          (__v64qi)(__m512i)(b), (int)(p), \
57                                          (__mmask64)(m))
58
59#define _mm512_cmp_epi16_mask(a, b, p) \
60  (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
61                                         (__v32hi)(__m512i)(b), (int)(p), \
62                                         (__mmask32)-1)
63
64#define _mm512_mask_cmp_epi16_mask(m, a, b, p) \
65  (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
66                                         (__v32hi)(__m512i)(b), (int)(p), \
67                                         (__mmask32)(m))
68
69#define _mm512_cmp_epu16_mask(a, b, p) \
70  (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
71                                          (__v32hi)(__m512i)(b), (int)(p), \
72                                          (__mmask32)-1)
73
74#define _mm512_mask_cmp_epu16_mask(m, a, b, p) \
75  (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
76                                          (__v32hi)(__m512i)(b), (int)(p), \
77                                          (__mmask32)(m))
78
79#define _mm512_cmpeq_epi8_mask(A, B) \
80    _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ)
81#define _mm512_mask_cmpeq_epi8_mask(k, A, B) \
82    _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ)
83#define _mm512_cmpge_epi8_mask(A, B) \
84    _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GE)
85#define _mm512_mask_cmpge_epi8_mask(k, A, B) \
86    _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE)
87#define _mm512_cmpgt_epi8_mask(A, B) \
88    _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GT)
89#define _mm512_mask_cmpgt_epi8_mask(k, A, B) \
90    _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT)
91#define _mm512_cmple_epi8_mask(A, B) \
92    _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LE)
93#define _mm512_mask_cmple_epi8_mask(k, A, B) \
94    _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE)
95#define _mm512_cmplt_epi8_mask(A, B) \
96    _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LT)
97#define _mm512_mask_cmplt_epi8_mask(k, A, B) \
98    _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT)
99#define _mm512_cmpneq_epi8_mask(A, B) \
100    _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_NE)
101#define _mm512_mask_cmpneq_epi8_mask(k, A, B) \
102    _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE)
103
104#define _mm512_cmpeq_epu8_mask(A, B) \
105    _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ)
106#define _mm512_mask_cmpeq_epu8_mask(k, A, B) \
107    _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ)
108#define _mm512_cmpge_epu8_mask(A, B) \
109    _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GE)
110#define _mm512_mask_cmpge_epu8_mask(k, A, B) \
111    _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE)
112#define _mm512_cmpgt_epu8_mask(A, B) \
113    _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GT)
114#define _mm512_mask_cmpgt_epu8_mask(k, A, B) \
115    _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT)
116#define _mm512_cmple_epu8_mask(A, B) \
117    _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LE)
118#define _mm512_mask_cmple_epu8_mask(k, A, B) \
119    _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE)
120#define _mm512_cmplt_epu8_mask(A, B) \
121    _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LT)
122#define _mm512_mask_cmplt_epu8_mask(k, A, B) \
123    _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT)
124#define _mm512_cmpneq_epu8_mask(A, B) \
125    _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_NE)
126#define _mm512_mask_cmpneq_epu8_mask(k, A, B) \
127    _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE)
128
129#define _mm512_cmpeq_epi16_mask(A, B) \
130    _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ)
131#define _mm512_mask_cmpeq_epi16_mask(k, A, B) \
132    _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ)
133#define _mm512_cmpge_epi16_mask(A, B) \
134    _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GE)
135#define _mm512_mask_cmpge_epi16_mask(k, A, B) \
136    _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE)
137#define _mm512_cmpgt_epi16_mask(A, B) \
138    _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GT)
139#define _mm512_mask_cmpgt_epi16_mask(k, A, B) \
140    _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT)
141#define _mm512_cmple_epi16_mask(A, B) \
142    _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LE)
143#define _mm512_mask_cmple_epi16_mask(k, A, B) \
144    _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE)
145#define _mm512_cmplt_epi16_mask(A, B) \
146    _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LT)
147#define _mm512_mask_cmplt_epi16_mask(k, A, B) \
148    _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT)
149#define _mm512_cmpneq_epi16_mask(A, B) \
150    _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_NE)
151#define _mm512_mask_cmpneq_epi16_mask(k, A, B) \
152    _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE)
153
154#define _mm512_cmpeq_epu16_mask(A, B) \
155    _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ)
156#define _mm512_mask_cmpeq_epu16_mask(k, A, B) \
157    _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ)
158#define _mm512_cmpge_epu16_mask(A, B) \
159    _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GE)
160#define _mm512_mask_cmpge_epu16_mask(k, A, B) \
161    _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE)
162#define _mm512_cmpgt_epu16_mask(A, B) \
163    _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GT)
164#define _mm512_mask_cmpgt_epu16_mask(k, A, B) \
165    _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT)
166#define _mm512_cmple_epu16_mask(A, B) \
167    _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LE)
168#define _mm512_mask_cmple_epu16_mask(k, A, B) \
169    _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE)
170#define _mm512_cmplt_epu16_mask(A, B) \
171    _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LT)
172#define _mm512_mask_cmplt_epu16_mask(k, A, B) \
173    _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT)
174#define _mm512_cmpneq_epu16_mask(A, B) \
175    _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_NE)
176#define _mm512_mask_cmpneq_epu16_mask(k, A, B) \
177    _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
178
179static __inline__ __m512i __DEFAULT_FN_ATTRS
180_mm512_add_epi8 (__m512i __A, __m512i __B) {
181  return (__m512i) ((__v64qu) __A + (__v64qu) __B);
182}
183
184static __inline__ __m512i __DEFAULT_FN_ATTRS
185_mm512_mask_add_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
186  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
187                                             (__v64qi)_mm512_add_epi8(__A, __B),
188                                             (__v64qi)__W);
189}
190
191static __inline__ __m512i __DEFAULT_FN_ATTRS
192_mm512_maskz_add_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
193  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
194                                             (__v64qi)_mm512_add_epi8(__A, __B),
195                                             (__v64qi)_mm512_setzero_si512());
196}
197
198static __inline__ __m512i __DEFAULT_FN_ATTRS
199_mm512_sub_epi8 (__m512i __A, __m512i __B) {
200  return (__m512i) ((__v64qu) __A - (__v64qu) __B);
201}
202
203static __inline__ __m512i __DEFAULT_FN_ATTRS
204_mm512_mask_sub_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
205  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
206                                             (__v64qi)_mm512_sub_epi8(__A, __B),
207                                             (__v64qi)__W);
208}
209
210static __inline__ __m512i __DEFAULT_FN_ATTRS
211_mm512_maskz_sub_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
212  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
213                                             (__v64qi)_mm512_sub_epi8(__A, __B),
214                                             (__v64qi)_mm512_setzero_si512());
215}
216
217static __inline__ __m512i __DEFAULT_FN_ATTRS
218_mm512_add_epi16 (__m512i __A, __m512i __B) {
219  return (__m512i) ((__v32hu) __A + (__v32hu) __B);
220}
221
222static __inline__ __m512i __DEFAULT_FN_ATTRS
223_mm512_mask_add_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
224  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
225                                             (__v32hi)_mm512_add_epi16(__A, __B),
226                                             (__v32hi)__W);
227}
228
229static __inline__ __m512i __DEFAULT_FN_ATTRS
230_mm512_maskz_add_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
231  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
232                                             (__v32hi)_mm512_add_epi16(__A, __B),
233                                             (__v32hi)_mm512_setzero_si512());
234}
235
236static __inline__ __m512i __DEFAULT_FN_ATTRS
237_mm512_sub_epi16 (__m512i __A, __m512i __B) {
238  return (__m512i) ((__v32hu) __A - (__v32hu) __B);
239}
240
241static __inline__ __m512i __DEFAULT_FN_ATTRS
242_mm512_mask_sub_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
243  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
244                                             (__v32hi)_mm512_sub_epi16(__A, __B),
245                                             (__v32hi)__W);
246}
247
248static __inline__ __m512i __DEFAULT_FN_ATTRS
249_mm512_maskz_sub_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
250  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
251                                             (__v32hi)_mm512_sub_epi16(__A, __B),
252                                             (__v32hi)_mm512_setzero_si512());
253}
254
255static __inline__ __m512i __DEFAULT_FN_ATTRS
256_mm512_mullo_epi16 (__m512i __A, __m512i __B) {
257  return (__m512i) ((__v32hu) __A * (__v32hu) __B);
258}
259
260static __inline__ __m512i __DEFAULT_FN_ATTRS
261_mm512_mask_mullo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
262  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
263                                             (__v32hi)_mm512_mullo_epi16(__A, __B),
264                                             (__v32hi)__W);
265}
266
267static __inline__ __m512i __DEFAULT_FN_ATTRS
268_mm512_maskz_mullo_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
269  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
270                                             (__v32hi)_mm512_mullo_epi16(__A, __B),
271                                             (__v32hi)_mm512_setzero_si512());
272}
273
274static __inline__ __m512i __DEFAULT_FN_ATTRS
275_mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W)
276{
277  return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
278              (__v64qi) __W,
279              (__v64qi) __A);
280}
281
282static __inline__ __m512i __DEFAULT_FN_ATTRS
283_mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W)
284{
285  return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
286              (__v32hi) __W,
287              (__v32hi) __A);
288}
289
290static __inline__ __m512i __DEFAULT_FN_ATTRS
291_mm512_abs_epi8 (__m512i __A)
292{
293  return (__m512i)__builtin_ia32_pabsb512((__v64qi)__A);
294}
295
296static __inline__ __m512i __DEFAULT_FN_ATTRS
297_mm512_mask_abs_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
298{
299  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
300                                             (__v64qi)_mm512_abs_epi8(__A),
301                                             (__v64qi)__W);
302}
303
304static __inline__ __m512i __DEFAULT_FN_ATTRS
305_mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A)
306{
307  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
308                                             (__v64qi)_mm512_abs_epi8(__A),
309                                             (__v64qi)_mm512_setzero_si512());
310}
311
312static __inline__ __m512i __DEFAULT_FN_ATTRS
313_mm512_abs_epi16 (__m512i __A)
314{
315  return (__m512i)__builtin_ia32_pabsw512((__v32hi)__A);
316}
317
318static __inline__ __m512i __DEFAULT_FN_ATTRS
319_mm512_mask_abs_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
320{
321  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
322                                             (__v32hi)_mm512_abs_epi16(__A),
323                                             (__v32hi)__W);
324}
325
326static __inline__ __m512i __DEFAULT_FN_ATTRS
327_mm512_maskz_abs_epi16 (__mmask32 __U, __m512i __A)
328{
329  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
330                                             (__v32hi)_mm512_abs_epi16(__A),
331                                             (__v32hi)_mm512_setzero_si512());
332}
333
334static __inline__ __m512i __DEFAULT_FN_ATTRS
335_mm512_packs_epi32(__m512i __A, __m512i __B)
336{
337  return (__m512i)__builtin_ia32_packssdw512((__v16si)__A, (__v16si)__B);
338}
339
340static __inline__ __m512i __DEFAULT_FN_ATTRS
341_mm512_maskz_packs_epi32(__mmask32 __M, __m512i __A, __m512i __B)
342{
343  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
344                                       (__v32hi)_mm512_packs_epi32(__A, __B),
345                                       (__v32hi)_mm512_setzero_si512());
346}
347
348static __inline__ __m512i __DEFAULT_FN_ATTRS
349_mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
350{
351  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
352                                       (__v32hi)_mm512_packs_epi32(__A, __B),
353                                       (__v32hi)__W);
354}
355
356static __inline__ __m512i __DEFAULT_FN_ATTRS
357_mm512_packs_epi16(__m512i __A, __m512i __B)
358{
359  return (__m512i)__builtin_ia32_packsswb512((__v32hi)__A, (__v32hi) __B);
360}
361
362static __inline__ __m512i __DEFAULT_FN_ATTRS
363_mm512_mask_packs_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
364{
365  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
366                                        (__v64qi)_mm512_packs_epi16(__A, __B),
367                                        (__v64qi)__W);
368}
369
370static __inline__ __m512i __DEFAULT_FN_ATTRS
371_mm512_maskz_packs_epi16(__mmask64 __M, __m512i __A, __m512i __B)
372{
373  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
374                                        (__v64qi)_mm512_packs_epi16(__A, __B),
375                                        (__v64qi)_mm512_setzero_si512());
376}
377
378static __inline__ __m512i __DEFAULT_FN_ATTRS
379_mm512_packus_epi32(__m512i __A, __m512i __B)
380{
381  return (__m512i)__builtin_ia32_packusdw512((__v16si) __A, (__v16si) __B);
382}
383
384static __inline__ __m512i __DEFAULT_FN_ATTRS
385_mm512_maskz_packus_epi32(__mmask32 __M, __m512i __A, __m512i __B)
386{
387  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
388                                       (__v32hi)_mm512_packus_epi32(__A, __B),
389                                       (__v32hi)_mm512_setzero_si512());
390}
391
392static __inline__ __m512i __DEFAULT_FN_ATTRS
393_mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
394{
395  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
396                                       (__v32hi)_mm512_packus_epi32(__A, __B),
397                                       (__v32hi)__W);
398}
399
400static __inline__ __m512i __DEFAULT_FN_ATTRS
401_mm512_packus_epi16(__m512i __A, __m512i __B)
402{
403  return (__m512i)__builtin_ia32_packuswb512((__v32hi) __A, (__v32hi) __B);
404}
405
406static __inline__ __m512i __DEFAULT_FN_ATTRS
407_mm512_mask_packus_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
408{
409  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
410                                        (__v64qi)_mm512_packus_epi16(__A, __B),
411                                        (__v64qi)__W);
412}
413
414static __inline__ __m512i __DEFAULT_FN_ATTRS
415_mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B)
416{
417  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
418                                        (__v64qi)_mm512_packus_epi16(__A, __B),
419                                        (__v64qi)_mm512_setzero_si512());
420}
421
422static __inline__ __m512i __DEFAULT_FN_ATTRS
423_mm512_adds_epi8 (__m512i __A, __m512i __B)
424{
425  return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A,
426              (__v64qi) __B,
427              (__v64qi) _mm512_setzero_si512(),
428              (__mmask64) -1);
429}
430
431static __inline__ __m512i __DEFAULT_FN_ATTRS
432_mm512_mask_adds_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
433           __m512i __B)
434{
435  return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A,
436              (__v64qi) __B,
437              (__v64qi) __W,
438              (__mmask64) __U);
439}
440
441static __inline__ __m512i __DEFAULT_FN_ATTRS
442_mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
443{
444  return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A,
445              (__v64qi) __B,
446              (__v64qi) _mm512_setzero_si512(),
447              (__mmask64) __U);
448}
449
450static __inline__ __m512i __DEFAULT_FN_ATTRS
451_mm512_adds_epi16 (__m512i __A, __m512i __B)
452{
453  return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A,
454              (__v32hi) __B,
455              (__v32hi) _mm512_setzero_si512(),
456              (__mmask32) -1);
457}
458
459static __inline__ __m512i __DEFAULT_FN_ATTRS
460_mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
461      __m512i __B)
462{
463  return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A,
464              (__v32hi) __B,
465              (__v32hi) __W,
466              (__mmask32) __U);
467}
468
469static __inline__ __m512i __DEFAULT_FN_ATTRS
470_mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
471{
472  return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A,
473              (__v32hi) __B,
474              (__v32hi) _mm512_setzero_si512(),
475              (__mmask32) __U);
476}
477
478static __inline__ __m512i __DEFAULT_FN_ATTRS
479_mm512_adds_epu8 (__m512i __A, __m512i __B)
480{
481  return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A,
482              (__v64qi) __B,
483              (__v64qi) _mm512_setzero_si512(),
484              (__mmask64) -1);
485}
486
487static __inline__ __m512i __DEFAULT_FN_ATTRS
488_mm512_mask_adds_epu8 (__m512i __W, __mmask64 __U, __m512i __A,
489           __m512i __B)
490{
491  return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A,
492              (__v64qi) __B,
493              (__v64qi) __W,
494              (__mmask64) __U);
495}
496
497static __inline__ __m512i __DEFAULT_FN_ATTRS
498_mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
499{
500  return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A,
501              (__v64qi) __B,
502              (__v64qi) _mm512_setzero_si512(),
503              (__mmask64) __U);
504}
505
506static __inline__ __m512i __DEFAULT_FN_ATTRS
507_mm512_adds_epu16 (__m512i __A, __m512i __B)
508{
509  return (__m512i) __builtin_ia32_paddusw512_mask ((__v32hi) __A,
510              (__v32hi) __B,
511              (__v32hi) _mm512_setzero_si512(),
512              (__mmask32) -1);
513}
514
515static __inline__ __m512i __DEFAULT_FN_ATTRS
516_mm512_mask_adds_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
517      __m512i __B)
518{
519  return (__m512i) __builtin_ia32_paddusw512_mask ((__v32hi) __A,
520              (__v32hi) __B,
521              (__v32hi) __W,
522              (__mmask32) __U);
523}
524
525static __inline__ __m512i __DEFAULT_FN_ATTRS
526_mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
527{
528  return (__m512i) __builtin_ia32_paddusw512_mask ((__v32hi) __A,
529              (__v32hi) __B,
530              (__v32hi) _mm512_setzero_si512(),
531              (__mmask32) __U);
532}
533
534static __inline__ __m512i __DEFAULT_FN_ATTRS
535_mm512_avg_epu8 (__m512i __A, __m512i __B)
536{
537  typedef unsigned short __v64hu __attribute__((__vector_size__(128)));
538  return (__m512i)__builtin_convertvector(
539              ((__builtin_convertvector((__v64qu) __A, __v64hu) +
540                __builtin_convertvector((__v64qu) __B, __v64hu)) + 1)
541                >> 1, __v64qu);
542}
543
544static __inline__ __m512i __DEFAULT_FN_ATTRS
545_mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A,
546          __m512i __B)
547{
548  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
549              (__v64qi)_mm512_avg_epu8(__A, __B),
550              (__v64qi)__W);
551}
552
553static __inline__ __m512i __DEFAULT_FN_ATTRS
554_mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
555{
556  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
557              (__v64qi)_mm512_avg_epu8(__A, __B),
558              (__v64qi)_mm512_setzero_si512());
559}
560
561static __inline__ __m512i __DEFAULT_FN_ATTRS
562_mm512_avg_epu16 (__m512i __A, __m512i __B)
563{
564  typedef unsigned int __v32su __attribute__((__vector_size__(128)));
565  return (__m512i)__builtin_convertvector(
566              ((__builtin_convertvector((__v32hu) __A, __v32su) +
567                __builtin_convertvector((__v32hu) __B, __v32su)) + 1)
568                >> 1, __v32hu);
569}
570
571static __inline__ __m512i __DEFAULT_FN_ATTRS
572_mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
573           __m512i __B)
574{
575  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
576              (__v32hi)_mm512_avg_epu16(__A, __B),
577              (__v32hi)__W);
578}
579
580static __inline__ __m512i __DEFAULT_FN_ATTRS
581_mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
582{
583  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
584              (__v32hi)_mm512_avg_epu16(__A, __B),
585              (__v32hi) _mm512_setzero_si512());
586}
587
588static __inline__ __m512i __DEFAULT_FN_ATTRS
589_mm512_max_epi8 (__m512i __A, __m512i __B)
590{
591  return (__m512i)__builtin_ia32_pmaxsb512((__v64qi) __A, (__v64qi) __B);
592}
593
594static __inline__ __m512i __DEFAULT_FN_ATTRS
595_mm512_maskz_max_epi8 (__mmask64 __M, __m512i __A, __m512i __B)
596{
597  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
598                                             (__v64qi)_mm512_max_epi8(__A, __B),
599                                             (__v64qi)_mm512_setzero_si512());
600}
601
602static __inline__ __m512i __DEFAULT_FN_ATTRS
603_mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
604{
605  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
606                                             (__v64qi)_mm512_max_epi8(__A, __B),
607                                             (__v64qi)__W);
608}
609
610static __inline__ __m512i __DEFAULT_FN_ATTRS
611_mm512_max_epi16 (__m512i __A, __m512i __B)
612{
613  return (__m512i)__builtin_ia32_pmaxsw512((__v32hi) __A, (__v32hi) __B);
614}
615
616static __inline__ __m512i __DEFAULT_FN_ATTRS
617_mm512_maskz_max_epi16 (__mmask32 __M, __m512i __A, __m512i __B)
618{
619  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
620                                            (__v32hi)_mm512_max_epi16(__A, __B),
621                                            (__v32hi)_mm512_setzero_si512());
622}
623
624static __inline__ __m512i __DEFAULT_FN_ATTRS
625_mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
626           __m512i __B)
627{
628  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
629                                            (__v32hi)_mm512_max_epi16(__A, __B),
630                                            (__v32hi)__W);
631}
632
633static __inline__ __m512i __DEFAULT_FN_ATTRS
634_mm512_max_epu8 (__m512i __A, __m512i __B)
635{
636  return (__m512i)__builtin_ia32_pmaxub512((__v64qi)__A, (__v64qi)__B);
637}
638
639static __inline__ __m512i __DEFAULT_FN_ATTRS
640_mm512_maskz_max_epu8 (__mmask64 __M, __m512i __A, __m512i __B)
641{
642  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
643                                             (__v64qi)_mm512_max_epu8(__A, __B),
644                                             (__v64qi)_mm512_setzero_si512());
645}
646
647static __inline__ __m512i __DEFAULT_FN_ATTRS
648_mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
649{
650  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
651                                             (__v64qi)_mm512_max_epu8(__A, __B),
652                                             (__v64qi)__W);
653}
654
655static __inline__ __m512i __DEFAULT_FN_ATTRS
656_mm512_max_epu16 (__m512i __A, __m512i __B)
657{
658  return (__m512i)__builtin_ia32_pmaxuw512((__v32hi)__A, (__v32hi)__B);
659}
660
661static __inline__ __m512i __DEFAULT_FN_ATTRS
662_mm512_maskz_max_epu16 (__mmask32 __M, __m512i __A, __m512i __B)
663{
664  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
665                                            (__v32hi)_mm512_max_epu16(__A, __B),
666                                            (__v32hi)_mm512_setzero_si512());
667}
668
669static __inline__ __m512i __DEFAULT_FN_ATTRS
670_mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
671{
672  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
673                                            (__v32hi)_mm512_max_epu16(__A, __B),
674                                            (__v32hi)__W);
675}
676
677static __inline__ __m512i __DEFAULT_FN_ATTRS
678_mm512_min_epi8 (__m512i __A, __m512i __B)
679{
680  return (__m512i)__builtin_ia32_pminsb512((__v64qi) __A, (__v64qi) __B);
681}
682
683static __inline__ __m512i __DEFAULT_FN_ATTRS
684_mm512_maskz_min_epi8 (__mmask64 __M, __m512i __A, __m512i __B)
685{
686  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
687                                             (__v64qi)_mm512_min_epi8(__A, __B),
688                                             (__v64qi)_mm512_setzero_si512());
689}
690
691static __inline__ __m512i __DEFAULT_FN_ATTRS
692_mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
693{
694  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
695                                             (__v64qi)_mm512_min_epi8(__A, __B),
696                                             (__v64qi)__W);
697}
698
699static __inline__ __m512i __DEFAULT_FN_ATTRS
700_mm512_min_epi16 (__m512i __A, __m512i __B)
701{
702  return (__m512i)__builtin_ia32_pminsw512((__v32hi) __A, (__v32hi) __B);
703}
704
705static __inline__ __m512i __DEFAULT_FN_ATTRS
706_mm512_maskz_min_epi16 (__mmask32 __M, __m512i __A, __m512i __B)
707{
708  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
709                                            (__v32hi)_mm512_min_epi16(__A, __B),
710                                            (__v32hi)_mm512_setzero_si512());
711}
712
713static __inline__ __m512i __DEFAULT_FN_ATTRS
714_mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
715{
716  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
717                                            (__v32hi)_mm512_min_epi16(__A, __B),
718                                            (__v32hi)__W);
719}
720
721static __inline__ __m512i __DEFAULT_FN_ATTRS
722_mm512_min_epu8 (__m512i __A, __m512i __B)
723{
724  return (__m512i)__builtin_ia32_pminub512((__v64qi)__A, (__v64qi)__B);
725}
726
727static __inline__ __m512i __DEFAULT_FN_ATTRS
728_mm512_maskz_min_epu8 (__mmask64 __M, __m512i __A, __m512i __B)
729{
730  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
731                                             (__v64qi)_mm512_min_epu8(__A, __B),
732                                             (__v64qi)_mm512_setzero_si512());
733}
734
735static __inline__ __m512i __DEFAULT_FN_ATTRS
736_mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
737{
738  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
739                                             (__v64qi)_mm512_min_epu8(__A, __B),
740                                             (__v64qi)__W);
741}
742
743static __inline__ __m512i __DEFAULT_FN_ATTRS
744_mm512_min_epu16 (__m512i __A, __m512i __B)
745{
746  return (__m512i)__builtin_ia32_pminuw512((__v32hi)__A, (__v32hi)__B);
747}
748
749static __inline__ __m512i __DEFAULT_FN_ATTRS
750_mm512_maskz_min_epu16 (__mmask32 __M, __m512i __A, __m512i __B)
751{
752  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
753                                            (__v32hi)_mm512_min_epu16(__A, __B),
754                                            (__v32hi)_mm512_setzero_si512());
755}
756
757static __inline__ __m512i __DEFAULT_FN_ATTRS
758_mm512_mask_min_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
759{
760  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
761                                            (__v32hi)_mm512_min_epu16(__A, __B),
762                                            (__v32hi)__W);
763}
764
765static __inline__ __m512i __DEFAULT_FN_ATTRS
766_mm512_shuffle_epi8(__m512i __A, __m512i __B)
767{
768  return (__m512i)__builtin_ia32_pshufb512((__v64qi)__A,(__v64qi)__B);
769}
770
771static __inline__ __m512i __DEFAULT_FN_ATTRS
772_mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
773{
774  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
775                                         (__v64qi)_mm512_shuffle_epi8(__A, __B),
776                                         (__v64qi)__W);
777}
778
779static __inline__ __m512i __DEFAULT_FN_ATTRS
780_mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B)
781{
782  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
783                                         (__v64qi)_mm512_shuffle_epi8(__A, __B),
784                                         (__v64qi)_mm512_setzero_si512());
785}
786
787static __inline__ __m512i __DEFAULT_FN_ATTRS
788_mm512_subs_epi8 (__m512i __A, __m512i __B)
789{
790  return (__m512i) __builtin_ia32_psubsb512_mask ((__v64qi) __A,
791              (__v64qi) __B,
792              (__v64qi) _mm512_setzero_si512(),
793              (__mmask64) -1);
794}
795
796static __inline__ __m512i __DEFAULT_FN_ATTRS
797_mm512_mask_subs_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
798           __m512i __B)
799{
800  return (__m512i) __builtin_ia32_psubsb512_mask ((__v64qi) __A,
801              (__v64qi) __B,
802              (__v64qi) __W,
803              (__mmask64) __U);
804}
805
806static __inline__ __m512i __DEFAULT_FN_ATTRS
807_mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
808{
809  return (__m512i) __builtin_ia32_psubsb512_mask ((__v64qi) __A,
810              (__v64qi) __B,
811              (__v64qi) _mm512_setzero_si512(),
812              (__mmask64) __U);
813}
814
815static __inline__ __m512i __DEFAULT_FN_ATTRS
816_mm512_subs_epi16 (__m512i __A, __m512i __B)
817{
818  return (__m512i) __builtin_ia32_psubsw512_mask ((__v32hi) __A,
819              (__v32hi) __B,
820              (__v32hi) _mm512_setzero_si512(),
821              (__mmask32) -1);
822}
823
824static __inline__ __m512i __DEFAULT_FN_ATTRS
825_mm512_mask_subs_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
826      __m512i __B)
827{
828  return (__m512i) __builtin_ia32_psubsw512_mask ((__v32hi) __A,
829              (__v32hi) __B,
830              (__v32hi) __W,
831              (__mmask32) __U);
832}
833
834static __inline__ __m512i __DEFAULT_FN_ATTRS
835_mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
836{
837  return (__m512i) __builtin_ia32_psubsw512_mask ((__v32hi) __A,
838              (__v32hi) __B,
839              (__v32hi) _mm512_setzero_si512(),
840              (__mmask32) __U);
841}
842
843static __inline__ __m512i __DEFAULT_FN_ATTRS
844_mm512_subs_epu8 (__m512i __A, __m512i __B)
845{
846  return (__m512i) __builtin_ia32_psubusb512_mask ((__v64qi) __A,
847              (__v64qi) __B,
848              (__v64qi) _mm512_setzero_si512(),
849              (__mmask64) -1);
850}
851
852static __inline__ __m512i __DEFAULT_FN_ATTRS
853_mm512_mask_subs_epu8 (__m512i __W, __mmask64 __U, __m512i __A,
854           __m512i __B)
855{
856  return (__m512i) __builtin_ia32_psubusb512_mask ((__v64qi) __A,
857              (__v64qi) __B,
858              (__v64qi) __W,
859              (__mmask64) __U);
860}
861
862static __inline__ __m512i __DEFAULT_FN_ATTRS
863_mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
864{
865  return (__m512i) __builtin_ia32_psubusb512_mask ((__v64qi) __A,
866              (__v64qi) __B,
867              (__v64qi) _mm512_setzero_si512(),
868              (__mmask64) __U);
869}
870
871static __inline__ __m512i __DEFAULT_FN_ATTRS
872_mm512_subs_epu16 (__m512i __A, __m512i __B)
873{
874  return (__m512i) __builtin_ia32_psubusw512_mask ((__v32hi) __A,
875              (__v32hi) __B,
876              (__v32hi) _mm512_setzero_si512(),
877              (__mmask32) -1);
878}
879
880static __inline__ __m512i __DEFAULT_FN_ATTRS
881_mm512_mask_subs_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
882      __m512i __B)
883{
884  return (__m512i) __builtin_ia32_psubusw512_mask ((__v32hi) __A,
885              (__v32hi) __B,
886              (__v32hi) __W,
887              (__mmask32) __U);
888}
889
890static __inline__ __m512i __DEFAULT_FN_ATTRS
891_mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
892{
893  return (__m512i) __builtin_ia32_psubusw512_mask ((__v32hi) __A,
894              (__v32hi) __B,
895              (__v32hi) _mm512_setzero_si512(),
896              (__mmask32) __U);
897}
898
899static __inline__ __m512i __DEFAULT_FN_ATTRS
900_mm512_permutex2var_epi16(__m512i __A, __m512i __I, __m512i __B)
901{
902  return (__m512i)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I,
903                                                 (__v32hi)__B);
904}
905
906static __inline__ __m512i __DEFAULT_FN_ATTRS
907_mm512_mask_permutex2var_epi16(__m512i __A, __mmask32 __U, __m512i __I,
908                               __m512i __B)
909{
910  return (__m512i)__builtin_ia32_selectw_512(__U,
911                              (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B),
912                              (__v32hi)__A);
913}
914
915static __inline__ __m512i __DEFAULT_FN_ATTRS
916_mm512_mask2_permutex2var_epi16(__m512i __A, __m512i __I, __mmask32 __U,
917                                __m512i __B)
918{
919  return (__m512i)__builtin_ia32_selectw_512(__U,
920                              (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B),
921                              (__v32hi)__I);
922}
923
924static __inline__ __m512i __DEFAULT_FN_ATTRS
925_mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i __I,
926                                __m512i __B)
927{
928  return (__m512i)__builtin_ia32_selectw_512(__U,
929                              (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B),
930                              (__v32hi)_mm512_setzero_si512());
931}
932
933static __inline__ __m512i __DEFAULT_FN_ATTRS
934_mm512_mulhrs_epi16(__m512i __A, __m512i __B)
935{
936  return (__m512i)__builtin_ia32_pmulhrsw512((__v32hi)__A, (__v32hi)__B);
937}
938
939static __inline__ __m512i __DEFAULT_FN_ATTRS
940_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
941{
942  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
943                                         (__v32hi)_mm512_mulhrs_epi16(__A, __B),
944                                         (__v32hi)__W);
945}
946
947static __inline__ __m512i __DEFAULT_FN_ATTRS
948_mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B)
949{
950  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
951                                         (__v32hi)_mm512_mulhrs_epi16(__A, __B),
952                                         (__v32hi)_mm512_setzero_si512());
953}
954
955static __inline__ __m512i __DEFAULT_FN_ATTRS
956_mm512_mulhi_epi16(__m512i __A, __m512i __B)
957{
958  return (__m512i)__builtin_ia32_pmulhw512((__v32hi) __A, (__v32hi) __B);
959}
960
961static __inline__ __m512i __DEFAULT_FN_ATTRS
962_mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A,
963       __m512i __B)
964{
965  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
966                                          (__v32hi)_mm512_mulhi_epi16(__A, __B),
967                                          (__v32hi)__W);
968}
969
970static __inline__ __m512i __DEFAULT_FN_ATTRS
971_mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B)
972{
973  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
974                                          (__v32hi)_mm512_mulhi_epi16(__A, __B),
975                                          (__v32hi)_mm512_setzero_si512());
976}
977
978static __inline__ __m512i __DEFAULT_FN_ATTRS
979_mm512_mulhi_epu16(__m512i __A, __m512i __B)
980{
981  return (__m512i)__builtin_ia32_pmulhuw512((__v32hi) __A, (__v32hi) __B);
982}
983
984static __inline__ __m512i __DEFAULT_FN_ATTRS
985_mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
986{
987  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
988                                          (__v32hi)_mm512_mulhi_epu16(__A, __B),
989                                          (__v32hi)__W);
990}
991
992static __inline__ __m512i __DEFAULT_FN_ATTRS
993_mm512_maskz_mulhi_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
994{
995  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
996                                          (__v32hi)_mm512_mulhi_epu16(__A, __B),
997                                          (__v32hi)_mm512_setzero_si512());
998}
999
1000static __inline__ __m512i __DEFAULT_FN_ATTRS
1001_mm512_maddubs_epi16(__m512i __X, __m512i __Y) {
1002  return (__m512i)__builtin_ia32_pmaddubsw512((__v64qi)__X, (__v64qi)__Y);
1003}
1004
1005static __inline__ __m512i __DEFAULT_FN_ATTRS
1006_mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X,
1007                          __m512i __Y) {
1008  return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U,
1009                                        (__v32hi)_mm512_maddubs_epi16(__X, __Y),
1010                                        (__v32hi)__W);
1011}
1012
1013static __inline__ __m512i __DEFAULT_FN_ATTRS
1014_mm512_maskz_maddubs_epi16(__mmask32 __U, __m512i __X, __m512i __Y) {
1015  return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U,
1016                                        (__v32hi)_mm512_maddubs_epi16(__X, __Y),
1017                                        (__v32hi)_mm512_setzero_si512());
1018}
1019
1020static __inline__ __m512i __DEFAULT_FN_ATTRS
1021_mm512_madd_epi16(__m512i __A, __m512i __B) {
1022  return (__m512i)__builtin_ia32_pmaddwd512((__v32hi)__A, (__v32hi)__B);
1023}
1024
1025static __inline__ __m512i __DEFAULT_FN_ATTRS
1026_mm512_mask_madd_epi16(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
1027  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
1028                                           (__v16si)_mm512_madd_epi16(__A, __B),
1029                                           (__v16si)__W);
1030}
1031
1032static __inline__ __m512i __DEFAULT_FN_ATTRS
1033_mm512_maskz_madd_epi16(__mmask16 __U, __m512i __A, __m512i __B) {
1034  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
1035                                           (__v16si)_mm512_madd_epi16(__A, __B),
1036                                           (__v16si)_mm512_setzero_si512());
1037}
1038
1039static __inline__ __m256i __DEFAULT_FN_ATTRS
1040_mm512_cvtsepi16_epi8 (__m512i __A) {
1041  return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
1042               (__v32qi)_mm256_setzero_si256(),
1043               (__mmask32) -1);
1044}
1045
1046static __inline__ __m256i __DEFAULT_FN_ATTRS
1047_mm512_mask_cvtsepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
1048  return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
1049               (__v32qi)__O,
1050               __M);
1051}
1052
1053static __inline__ __m256i __DEFAULT_FN_ATTRS
1054_mm512_maskz_cvtsepi16_epi8 (__mmask32 __M, __m512i __A) {
1055  return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
1056               (__v32qi) _mm256_setzero_si256(),
1057               __M);
1058}
1059
1060static __inline__ __m256i __DEFAULT_FN_ATTRS
1061_mm512_cvtusepi16_epi8 (__m512i __A) {
1062  return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
1063                (__v32qi) _mm256_setzero_si256(),
1064                (__mmask32) -1);
1065}
1066
1067static __inline__ __m256i __DEFAULT_FN_ATTRS
1068_mm512_mask_cvtusepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
1069  return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
1070                (__v32qi) __O,
1071                __M);
1072}
1073
1074static __inline__ __m256i __DEFAULT_FN_ATTRS
1075_mm512_maskz_cvtusepi16_epi8 (__mmask32 __M, __m512i __A) {
1076  return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
1077                (__v32qi) _mm256_setzero_si256(),
1078                __M);
1079}
1080
1081static __inline__ __m256i __DEFAULT_FN_ATTRS
1082_mm512_cvtepi16_epi8 (__m512i __A) {
1083  return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
1084              (__v32qi) _mm256_undefined_si256(),
1085              (__mmask32) -1);
1086}
1087
1088static __inline__ __m256i __DEFAULT_FN_ATTRS
1089_mm512_mask_cvtepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
1090  return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
1091              (__v32qi) __O,
1092              __M);
1093}
1094
1095static __inline__ __m256i __DEFAULT_FN_ATTRS
1096_mm512_maskz_cvtepi16_epi8 (__mmask32 __M, __m512i __A) {
1097  return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
1098              (__v32qi) _mm256_setzero_si256(),
1099              __M);
1100}
1101
1102static __inline__ void __DEFAULT_FN_ATTRS
1103_mm512_mask_cvtepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
1104{
1105  __builtin_ia32_pmovwb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
1106}
1107
1108static __inline__ void __DEFAULT_FN_ATTRS
1109_mm512_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
1110{
1111  __builtin_ia32_pmovswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
1112}
1113
1114static __inline__ void __DEFAULT_FN_ATTRS
1115_mm512_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
1116{
1117  __builtin_ia32_pmovuswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
1118}
1119
1120static __inline__ __m512i __DEFAULT_FN_ATTRS
1121_mm512_unpackhi_epi8(__m512i __A, __m512i __B) {
1122  return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B,
1123                                          8,  64+8,   9, 64+9,
1124                                          10, 64+10, 11, 64+11,
1125                                          12, 64+12, 13, 64+13,
1126                                          14, 64+14, 15, 64+15,
1127                                          24, 64+24, 25, 64+25,
1128                                          26, 64+26, 27, 64+27,
1129                                          28, 64+28, 29, 64+29,
1130                                          30, 64+30, 31, 64+31,
1131                                          40, 64+40, 41, 64+41,
1132                                          42, 64+42, 43, 64+43,
1133                                          44, 64+44, 45, 64+45,
1134                                          46, 64+46, 47, 64+47,
1135                                          56, 64+56, 57, 64+57,
1136                                          58, 64+58, 59, 64+59,
1137                                          60, 64+60, 61, 64+61,
1138                                          62, 64+62, 63, 64+63);
1139}
1140
1141static __inline__ __m512i __DEFAULT_FN_ATTRS
1142_mm512_mask_unpackhi_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
1143  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1144                                        (__v64qi)_mm512_unpackhi_epi8(__A, __B),
1145                                        (__v64qi)__W);
1146}
1147
1148static __inline__ __m512i __DEFAULT_FN_ATTRS
1149_mm512_maskz_unpackhi_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
1150  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1151                                        (__v64qi)_mm512_unpackhi_epi8(__A, __B),
1152                                        (__v64qi)_mm512_setzero_si512());
1153}
1154
1155static __inline__ __m512i __DEFAULT_FN_ATTRS
1156_mm512_unpackhi_epi16(__m512i __A, __m512i __B) {
1157  return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B,
1158                                          4,  32+4,   5, 32+5,
1159                                          6,  32+6,   7, 32+7,
1160                                          12, 32+12, 13, 32+13,
1161                                          14, 32+14, 15, 32+15,
1162                                          20, 32+20, 21, 32+21,
1163                                          22, 32+22, 23, 32+23,
1164                                          28, 32+28, 29, 32+29,
1165                                          30, 32+30, 31, 32+31);
1166}
1167
1168static __inline__ __m512i __DEFAULT_FN_ATTRS
1169_mm512_mask_unpackhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
1170  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1171                                       (__v32hi)_mm512_unpackhi_epi16(__A, __B),
1172                                       (__v32hi)__W);
1173}
1174
1175static __inline__ __m512i __DEFAULT_FN_ATTRS
1176_mm512_maskz_unpackhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
1177  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1178                                       (__v32hi)_mm512_unpackhi_epi16(__A, __B),
1179                                       (__v32hi)_mm512_setzero_si512());
1180}
1181
1182static __inline__ __m512i __DEFAULT_FN_ATTRS
1183_mm512_unpacklo_epi8(__m512i __A, __m512i __B) {
1184  return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B,
1185                                          0,  64+0,   1, 64+1,
1186                                          2,  64+2,   3, 64+3,
1187                                          4,  64+4,   5, 64+5,
1188                                          6,  64+6,   7, 64+7,
1189                                          16, 64+16, 17, 64+17,
1190                                          18, 64+18, 19, 64+19,
1191                                          20, 64+20, 21, 64+21,
1192                                          22, 64+22, 23, 64+23,
1193                                          32, 64+32, 33, 64+33,
1194                                          34, 64+34, 35, 64+35,
1195                                          36, 64+36, 37, 64+37,
1196                                          38, 64+38, 39, 64+39,
1197                                          48, 64+48, 49, 64+49,
1198                                          50, 64+50, 51, 64+51,
1199                                          52, 64+52, 53, 64+53,
1200                                          54, 64+54, 55, 64+55);
1201}
1202
1203static __inline__ __m512i __DEFAULT_FN_ATTRS
1204_mm512_mask_unpacklo_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
1205  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1206                                        (__v64qi)_mm512_unpacklo_epi8(__A, __B),
1207                                        (__v64qi)__W);
1208}
1209
1210static __inline__ __m512i __DEFAULT_FN_ATTRS
1211_mm512_maskz_unpacklo_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
1212  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1213                                        (__v64qi)_mm512_unpacklo_epi8(__A, __B),
1214                                        (__v64qi)_mm512_setzero_si512());
1215}
1216
1217static __inline__ __m512i __DEFAULT_FN_ATTRS
1218_mm512_unpacklo_epi16(__m512i __A, __m512i __B) {
1219  return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B,
1220                                          0,  32+0,   1, 32+1,
1221                                          2,  32+2,   3, 32+3,
1222                                          8,  32+8,   9, 32+9,
1223                                          10, 32+10, 11, 32+11,
1224                                          16, 32+16, 17, 32+17,
1225                                          18, 32+18, 19, 32+19,
1226                                          24, 32+24, 25, 32+25,
1227                                          26, 32+26, 27, 32+27);
1228}
1229
1230static __inline__ __m512i __DEFAULT_FN_ATTRS
1231_mm512_mask_unpacklo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
1232  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1233                                       (__v32hi)_mm512_unpacklo_epi16(__A, __B),
1234                                       (__v32hi)__W);
1235}
1236
1237static __inline__ __m512i __DEFAULT_FN_ATTRS
1238_mm512_maskz_unpacklo_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
1239  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1240                                       (__v32hi)_mm512_unpacklo_epi16(__A, __B),
1241                                       (__v32hi)_mm512_setzero_si512());
1242}
1243
1244static __inline__ __m512i __DEFAULT_FN_ATTRS
1245_mm512_cvtepi8_epi16(__m256i __A)
1246{
1247  /* This function always performs a signed extension, but __v32qi is a char
1248     which may be signed or unsigned, so use __v32qs. */
1249  return (__m512i)__builtin_convertvector((__v32qs)__A, __v32hi);
1250}
1251
1252static __inline__ __m512i __DEFAULT_FN_ATTRS
1253_mm512_mask_cvtepi8_epi16(__m512i __W, __mmask32 __U, __m256i __A)
1254{
1255  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1256                                             (__v32hi)_mm512_cvtepi8_epi16(__A),
1257                                             (__v32hi)__W);
1258}
1259
1260static __inline__ __m512i __DEFAULT_FN_ATTRS
1261_mm512_maskz_cvtepi8_epi16(__mmask32 __U, __m256i __A)
1262{
1263  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1264                                             (__v32hi)_mm512_cvtepi8_epi16(__A),
1265                                             (__v32hi)_mm512_setzero_si512());
1266}
1267
1268static __inline__ __m512i __DEFAULT_FN_ATTRS
1269_mm512_cvtepu8_epi16(__m256i __A)
1270{
1271  return (__m512i)__builtin_convertvector((__v32qu)__A, __v32hi);
1272}
1273
1274static __inline__ __m512i __DEFAULT_FN_ATTRS
1275_mm512_mask_cvtepu8_epi16(__m512i __W, __mmask32 __U, __m256i __A)
1276{
1277  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1278                                             (__v32hi)_mm512_cvtepu8_epi16(__A),
1279                                             (__v32hi)__W);
1280}
1281
1282static __inline__ __m512i __DEFAULT_FN_ATTRS
1283_mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A)
1284{
1285  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1286                                             (__v32hi)_mm512_cvtepu8_epi16(__A),
1287                                             (__v32hi)_mm512_setzero_si512());
1288}
1289
1290
1291#define _mm512_shufflehi_epi16(A, imm) \
1292  (__m512i)__builtin_ia32_pshufhw512((__v32hi)(__m512i)(A), (int)(imm))
1293
1294#define _mm512_mask_shufflehi_epi16(W, U, A, imm) \
1295  (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1296                                      (__v32hi)_mm512_shufflehi_epi16((A), \
1297                                                                      (imm)), \
1298                                      (__v32hi)(__m512i)(W))
1299
1300#define _mm512_maskz_shufflehi_epi16(U, A, imm) \
1301  (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1302                                      (__v32hi)_mm512_shufflehi_epi16((A), \
1303                                                                      (imm)), \
1304                                      (__v32hi)_mm512_setzero_si512())
1305
1306#define _mm512_shufflelo_epi16(A, imm) \
1307  (__m512i)__builtin_ia32_pshuflw512((__v32hi)(__m512i)(A), (int)(imm))
1308
1309
1310#define _mm512_mask_shufflelo_epi16(W, U, A, imm) \
1311  (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1312                                      (__v32hi)_mm512_shufflelo_epi16((A), \
1313                                                                      (imm)), \
1314                                      (__v32hi)(__m512i)(W))
1315
1316
1317#define _mm512_maskz_shufflelo_epi16(U, A, imm) \
1318  (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1319                                      (__v32hi)_mm512_shufflelo_epi16((A), \
1320                                                                      (imm)), \
1321                                      (__v32hi)_mm512_setzero_si512())
1322
1323static __inline__ __m512i __DEFAULT_FN_ATTRS
1324_mm512_sllv_epi16(__m512i __A, __m512i __B)
1325{
1326  return (__m512i)__builtin_ia32_psllv32hi((__v32hi) __A, (__v32hi) __B);
1327}
1328
1329static __inline__ __m512i __DEFAULT_FN_ATTRS
1330_mm512_mask_sllv_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1331{
1332  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1333                                           (__v32hi)_mm512_sllv_epi16(__A, __B),
1334                                           (__v32hi)__W);
1335}
1336
1337static __inline__ __m512i __DEFAULT_FN_ATTRS
1338_mm512_maskz_sllv_epi16(__mmask32 __U, __m512i __A, __m512i __B)
1339{
1340  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1341                                           (__v32hi)_mm512_sllv_epi16(__A, __B),
1342                                           (__v32hi)_mm512_setzero_si512());
1343}
1344
1345static __inline__ __m512i __DEFAULT_FN_ATTRS
1346_mm512_sll_epi16(__m512i __A, __m128i __B)
1347{
1348  return (__m512i)__builtin_ia32_psllw512((__v32hi) __A, (__v8hi) __B);
1349}
1350
1351static __inline__ __m512i __DEFAULT_FN_ATTRS
1352_mm512_mask_sll_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B)
1353{
1354  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1355                                          (__v32hi)_mm512_sll_epi16(__A, __B),
1356                                          (__v32hi)__W);
1357}
1358
1359static __inline__ __m512i __DEFAULT_FN_ATTRS
1360_mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, __m128i __B)
1361{
1362  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1363                                          (__v32hi)_mm512_sll_epi16(__A, __B),
1364                                          (__v32hi)_mm512_setzero_si512());
1365}
1366
1367static __inline__ __m512i __DEFAULT_FN_ATTRS
1368_mm512_slli_epi16(__m512i __A, int __B)
1369{
1370  return (__m512i)__builtin_ia32_psllwi512((__v32hi)__A, __B);
1371}
1372
1373static __inline__ __m512i __DEFAULT_FN_ATTRS
1374_mm512_mask_slli_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B)
1375{
1376  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1377                                         (__v32hi)_mm512_slli_epi16(__A, __B),
1378                                         (__v32hi)__W);
1379}
1380
1381static __inline__ __m512i __DEFAULT_FN_ATTRS
1382_mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, int __B)
1383{
1384  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1385                                         (__v32hi)_mm512_slli_epi16(__A, __B),
1386                                         (__v32hi)_mm512_setzero_si512());
1387}
1388
1389#define _mm512_bslli_epi128(a, imm) \
1390  (__m512i)__builtin_ia32_pslldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm))
1391
1392static __inline__ __m512i __DEFAULT_FN_ATTRS
1393_mm512_srlv_epi16(__m512i __A, __m512i __B)
1394{
1395  return (__m512i)__builtin_ia32_psrlv32hi((__v32hi)__A, (__v32hi)__B);
1396}
1397
1398static __inline__ __m512i __DEFAULT_FN_ATTRS
1399_mm512_mask_srlv_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1400{
1401  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1402                                           (__v32hi)_mm512_srlv_epi16(__A, __B),
1403                                           (__v32hi)__W);
1404}
1405
1406static __inline__ __m512i __DEFAULT_FN_ATTRS
1407_mm512_maskz_srlv_epi16(__mmask32 __U, __m512i __A, __m512i __B)
1408{
1409  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1410                                           (__v32hi)_mm512_srlv_epi16(__A, __B),
1411                                           (__v32hi)_mm512_setzero_si512());
1412}
1413
1414static __inline__ __m512i __DEFAULT_FN_ATTRS
1415_mm512_srav_epi16(__m512i __A, __m512i __B)
1416{
1417  return (__m512i)__builtin_ia32_psrav32hi((__v32hi)__A, (__v32hi)__B);
1418}
1419
1420static __inline__ __m512i __DEFAULT_FN_ATTRS
1421_mm512_mask_srav_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1422{
1423  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1424                                           (__v32hi)_mm512_srav_epi16(__A, __B),
1425                                           (__v32hi)__W);
1426}
1427
1428static __inline__ __m512i __DEFAULT_FN_ATTRS
1429_mm512_maskz_srav_epi16(__mmask32 __U, __m512i __A, __m512i __B)
1430{
1431  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1432                                           (__v32hi)_mm512_srav_epi16(__A, __B),
1433                                           (__v32hi)_mm512_setzero_si512());
1434}
1435
1436static __inline__ __m512i __DEFAULT_FN_ATTRS
1437_mm512_sra_epi16(__m512i __A, __m128i __B)
1438{
1439  return (__m512i)__builtin_ia32_psraw512((__v32hi) __A, (__v8hi) __B);
1440}
1441
1442static __inline__ __m512i __DEFAULT_FN_ATTRS
1443_mm512_mask_sra_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B)
1444{
1445  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1446                                          (__v32hi)_mm512_sra_epi16(__A, __B),
1447                                          (__v32hi)__W);
1448}
1449
1450static __inline__ __m512i __DEFAULT_FN_ATTRS
1451_mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, __m128i __B)
1452{
1453  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1454                                          (__v32hi)_mm512_sra_epi16(__A, __B),
1455                                          (__v32hi)_mm512_setzero_si512());
1456}
1457
1458static __inline__ __m512i __DEFAULT_FN_ATTRS
1459_mm512_srai_epi16(__m512i __A, int __B)
1460{
1461  return (__m512i)__builtin_ia32_psrawi512((__v32hi)__A, __B);
1462}
1463
1464static __inline__ __m512i __DEFAULT_FN_ATTRS
1465_mm512_mask_srai_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B)
1466{
1467  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1468                                         (__v32hi)_mm512_srai_epi16(__A, __B),
1469                                         (__v32hi)__W);
1470}
1471
1472static __inline__ __m512i __DEFAULT_FN_ATTRS
1473_mm512_maskz_srai_epi16(__mmask32 __U, __m512i __A, int __B)
1474{
1475  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1476                                         (__v32hi)_mm512_srai_epi16(__A, __B),
1477                                         (__v32hi)_mm512_setzero_si512());
1478}
1479
1480static __inline__ __m512i __DEFAULT_FN_ATTRS
1481_mm512_srl_epi16(__m512i __A, __m128i __B)
1482{
1483  return (__m512i)__builtin_ia32_psrlw512((__v32hi) __A, (__v8hi) __B);
1484}
1485
1486static __inline__ __m512i __DEFAULT_FN_ATTRS
1487_mm512_mask_srl_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B)
1488{
1489  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1490                                          (__v32hi)_mm512_srl_epi16(__A, __B),
1491                                          (__v32hi)__W);
1492}
1493
1494static __inline__ __m512i __DEFAULT_FN_ATTRS
1495_mm512_maskz_srl_epi16(__mmask32 __U, __m512i __A, __m128i __B)
1496{
1497  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1498                                          (__v32hi)_mm512_srl_epi16(__A, __B),
1499                                          (__v32hi)_mm512_setzero_si512());
1500}
1501
1502static __inline__ __m512i __DEFAULT_FN_ATTRS
1503_mm512_srli_epi16(__m512i __A, int __B)
1504{
1505  return (__m512i)__builtin_ia32_psrlwi512((__v32hi)__A, __B);
1506}
1507
1508static __inline__ __m512i __DEFAULT_FN_ATTRS
1509_mm512_mask_srli_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B)
1510{
1511  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1512                                         (__v32hi)_mm512_srli_epi16(__A, __B),
1513                                         (__v32hi)__W);
1514}
1515
1516static __inline__ __m512i __DEFAULT_FN_ATTRS
1517_mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B)
1518{
1519  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1520                                         (__v32hi)_mm512_srli_epi16(__A, __B),
1521                                         (__v32hi)_mm512_setzero_si512());
1522}
1523
1524#define _mm512_bsrli_epi128(a, imm) \
1525  (__m512i)__builtin_ia32_psrldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm))
1526
1527static __inline__ __m512i __DEFAULT_FN_ATTRS
1528_mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
1529{
1530  return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
1531                (__v32hi) __A,
1532                (__v32hi) __W);
1533}
1534
1535static __inline__ __m512i __DEFAULT_FN_ATTRS
1536_mm512_maskz_mov_epi16 (__mmask32 __U, __m512i __A)
1537{
1538  return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
1539                (__v32hi) __A,
1540                (__v32hi) _mm512_setzero_si512 ());
1541}
1542
1543static __inline__ __m512i __DEFAULT_FN_ATTRS
1544_mm512_mask_mov_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
1545{
1546  return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
1547                (__v64qi) __A,
1548                (__v64qi) __W);
1549}
1550
1551static __inline__ __m512i __DEFAULT_FN_ATTRS
1552_mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A)
1553{
1554  return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
1555                (__v64qi) __A,
1556                (__v64qi) _mm512_setzero_si512 ());
1557}
1558
1559static __inline__ __m512i __DEFAULT_FN_ATTRS
1560_mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A)
1561{
1562  return (__m512i) __builtin_ia32_selectb_512(__M,
1563                                              (__v64qi)_mm512_set1_epi8(__A),
1564                                              (__v64qi) __O);
1565}
1566
1567static __inline__ __m512i __DEFAULT_FN_ATTRS
1568_mm512_maskz_set1_epi8 (__mmask64 __M, char __A)
1569{
1570  return (__m512i) __builtin_ia32_selectb_512(__M,
1571                                              (__v64qi) _mm512_set1_epi8(__A),
1572                                              (__v64qi) _mm512_setzero_si512());
1573}
1574
1575static __inline__ __mmask64 __DEFAULT_FN_ATTRS
1576_mm512_kunpackd (__mmask64 __A, __mmask64 __B)
1577{
1578  return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
1579                (__mmask64) __B);
1580}
1581
1582static __inline__ __mmask32 __DEFAULT_FN_ATTRS
1583_mm512_kunpackw (__mmask32 __A, __mmask32 __B)
1584{
1585  return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,
1586                (__mmask32) __B);
1587}
1588
1589static __inline__ __m512i __DEFAULT_FN_ATTRS
1590_mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P)
1591{
1592  return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P,
1593                 (__v32hi) __W,
1594                 (__mmask32) __U);
1595}
1596
1597static __inline__ __m512i __DEFAULT_FN_ATTRS
1598_mm512_maskz_loadu_epi16 (__mmask32 __U, void const *__P)
1599{
1600  return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P,
1601                 (__v32hi)
1602                 _mm512_setzero_si512 (),
1603                 (__mmask32) __U);
1604}
1605
1606static __inline__ __m512i __DEFAULT_FN_ATTRS
1607_mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P)
1608{
1609  return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P,
1610                 (__v64qi) __W,
1611                 (__mmask64) __U);
1612}
1613
1614static __inline__ __m512i __DEFAULT_FN_ATTRS
1615_mm512_maskz_loadu_epi8 (__mmask64 __U, void const *__P)
1616{
1617  return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P,
1618                 (__v64qi)
1619                 _mm512_setzero_si512 (),
1620                 (__mmask64) __U);
1621}
1622static __inline__ void __DEFAULT_FN_ATTRS
1623_mm512_mask_storeu_epi16 (void *__P, __mmask32 __U, __m512i __A)
1624{
1625  __builtin_ia32_storedquhi512_mask ((__v32hi *) __P,
1626             (__v32hi) __A,
1627             (__mmask32) __U);
1628}
1629
1630static __inline__ void __DEFAULT_FN_ATTRS
1631_mm512_mask_storeu_epi8 (void *__P, __mmask64 __U, __m512i __A)
1632{
1633  __builtin_ia32_storedquqi512_mask ((__v64qi *) __P,
1634             (__v64qi) __A,
1635             (__mmask64) __U);
1636}
1637
1638static __inline__ __mmask64 __DEFAULT_FN_ATTRS
1639_mm512_test_epi8_mask (__m512i __A, __m512i __B)
1640{
1641  return _mm512_cmpneq_epi8_mask (_mm512_and_epi32 (__A, __B),
1642                                  _mm512_setzero_si512());
1643}
1644
1645static __inline__ __mmask64 __DEFAULT_FN_ATTRS
1646_mm512_mask_test_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
1647{
1648  return _mm512_mask_cmpneq_epi8_mask (__U, _mm512_and_epi32 (__A, __B),
1649                                       _mm512_setzero_si512());
1650}
1651
1652static __inline__ __mmask32 __DEFAULT_FN_ATTRS
1653_mm512_test_epi16_mask (__m512i __A, __m512i __B)
1654{
1655  return _mm512_cmpneq_epi16_mask (_mm512_and_epi32 (__A, __B),
1656                                   _mm512_setzero_si512());
1657}
1658
1659static __inline__ __mmask32 __DEFAULT_FN_ATTRS
1660_mm512_mask_test_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
1661{
1662  return _mm512_mask_cmpneq_epi16_mask (__U, _mm512_and_epi32 (__A, __B),
1663                                        _mm512_setzero_si512());
1664}
1665
1666static __inline__ __mmask64 __DEFAULT_FN_ATTRS
1667_mm512_testn_epi8_mask (__m512i __A, __m512i __B)
1668{
1669  return _mm512_cmpeq_epi8_mask (_mm512_and_epi32 (__A, __B), _mm512_setzero_si512());
1670}
1671
1672static __inline__ __mmask64 __DEFAULT_FN_ATTRS
1673_mm512_mask_testn_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
1674{
1675  return _mm512_mask_cmpeq_epi8_mask (__U, _mm512_and_epi32 (__A, __B),
1676                                      _mm512_setzero_si512());
1677}
1678
1679static __inline__ __mmask32 __DEFAULT_FN_ATTRS
1680_mm512_testn_epi16_mask (__m512i __A, __m512i __B)
1681{
1682  return _mm512_cmpeq_epi16_mask (_mm512_and_epi32 (__A, __B),
1683                                  _mm512_setzero_si512());
1684}
1685
1686static __inline__ __mmask32 __DEFAULT_FN_ATTRS
1687_mm512_mask_testn_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
1688{
1689  return _mm512_mask_cmpeq_epi16_mask (__U, _mm512_and_epi32 (__A, __B),
1690                                       _mm512_setzero_si512());
1691}
1692
1693static __inline__ __mmask64 __DEFAULT_FN_ATTRS
1694_mm512_movepi8_mask (__m512i __A)
1695{
1696  return (__mmask64) __builtin_ia32_cvtb2mask512 ((__v64qi) __A);
1697}
1698
1699static __inline__ __mmask32 __DEFAULT_FN_ATTRS
1700_mm512_movepi16_mask (__m512i __A)
1701{
1702  return (__mmask32) __builtin_ia32_cvtw2mask512 ((__v32hi) __A);
1703}
1704
1705static __inline__ __m512i __DEFAULT_FN_ATTRS
1706_mm512_movm_epi8 (__mmask64 __A)
1707{
1708  return (__m512i) __builtin_ia32_cvtmask2b512 (__A);
1709}
1710
1711static __inline__ __m512i __DEFAULT_FN_ATTRS
1712_mm512_movm_epi16 (__mmask32 __A)
1713{
1714  return (__m512i) __builtin_ia32_cvtmask2w512 (__A);
1715}
1716
1717static __inline__ __m512i __DEFAULT_FN_ATTRS
1718_mm512_broadcastb_epi8 (__m128i __A)
1719{
1720  return (__m512i)__builtin_shufflevector((__v16qi) __A, (__v16qi) __A,
1721                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1722                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1723                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1724                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1725}
1726
1727static __inline__ __m512i __DEFAULT_FN_ATTRS
1728_mm512_mask_broadcastb_epi8 (__m512i __O, __mmask64 __M, __m128i __A)
1729{
1730  return (__m512i)__builtin_ia32_selectb_512(__M,
1731                                             (__v64qi) _mm512_broadcastb_epi8(__A),
1732                                             (__v64qi) __O);
1733}
1734
1735static __inline__ __m512i __DEFAULT_FN_ATTRS
1736_mm512_maskz_broadcastb_epi8 (__mmask64 __M, __m128i __A)
1737{
1738  return (__m512i)__builtin_ia32_selectb_512(__M,
1739                                             (__v64qi) _mm512_broadcastb_epi8(__A),
1740                                             (__v64qi) _mm512_setzero_si512());
1741}
1742
1743static __inline__ __m512i __DEFAULT_FN_ATTRS
1744_mm512_mask_set1_epi16 (__m512i __O, __mmask32 __M, short __A)
1745{
1746  return (__m512i) __builtin_ia32_selectw_512(__M,
1747                                              (__v32hi) _mm512_set1_epi16(__A),
1748                                              (__v32hi) __O);
1749}
1750
1751static __inline__ __m512i __DEFAULT_FN_ATTRS
1752_mm512_maskz_set1_epi16 (__mmask32 __M, short __A)
1753{
1754  return (__m512i) __builtin_ia32_selectw_512(__M,
1755                                              (__v32hi) _mm512_set1_epi16(__A),
1756                                              (__v32hi) _mm512_setzero_si512());
1757}
1758
1759static __inline__ __m512i __DEFAULT_FN_ATTRS
1760_mm512_broadcastw_epi16 (__m128i __A)
1761{
1762  return (__m512i)__builtin_shufflevector((__v8hi) __A, (__v8hi) __A,
1763                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1764                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1765}
1766
1767static __inline__ __m512i __DEFAULT_FN_ATTRS
1768_mm512_mask_broadcastw_epi16 (__m512i __O, __mmask32 __M, __m128i __A)
1769{
1770  return (__m512i)__builtin_ia32_selectw_512(__M,
1771                                             (__v32hi) _mm512_broadcastw_epi16(__A),
1772                                             (__v32hi) __O);
1773}
1774
1775static __inline__ __m512i __DEFAULT_FN_ATTRS
1776_mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A)
1777{
1778  return (__m512i)__builtin_ia32_selectw_512(__M,
1779                                             (__v32hi) _mm512_broadcastw_epi16(__A),
1780                                             (__v32hi) _mm512_setzero_si512());
1781}
1782
1783static __inline__ __m512i __DEFAULT_FN_ATTRS
1784_mm512_permutexvar_epi16 (__m512i __A, __m512i __B)
1785{
1786  return (__m512i)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A);
1787}
1788
1789static __inline__ __m512i __DEFAULT_FN_ATTRS
1790_mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A,
1791        __m512i __B)
1792{
1793  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
1794                                    (__v32hi)_mm512_permutexvar_epi16(__A, __B),
1795                                    (__v32hi)_mm512_setzero_si512());
1796}
1797
1798static __inline__ __m512i __DEFAULT_FN_ATTRS
1799_mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
1800             __m512i __B)
1801{
1802  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
1803                                    (__v32hi)_mm512_permutexvar_epi16(__A, __B),
1804                                    (__v32hi)__W);
1805}
1806
1807#define _mm512_alignr_epi8(A, B, N) \
1808  (__m512i)__builtin_ia32_palignr512((__v64qi)(__m512i)(A), \
1809                                     (__v64qi)(__m512i)(B), (int)(N))
1810
1811#define _mm512_mask_alignr_epi8(W, U, A, B, N) \
1812  (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
1813                             (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \
1814                             (__v64qi)(__m512i)(W))
1815
1816#define _mm512_maskz_alignr_epi8(U, A, B, N) \
1817  (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
1818                              (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \
1819                              (__v64qi)(__m512i)_mm512_setzero_si512())
1820
1821#define _mm512_dbsad_epu8(A, B, imm) \
1822  (__m512i)__builtin_ia32_dbpsadbw512((__v64qi)(__m512i)(A), \
1823                                      (__v64qi)(__m512i)(B), (int)(imm))
1824
1825#define _mm512_mask_dbsad_epu8(W, U, A, B, imm) \
1826  (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1827                                  (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \
1828                                  (__v32hi)(__m512i)(W))
1829
1830#define _mm512_maskz_dbsad_epu8(U, A, B, imm) \
1831  (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1832                                  (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \
1833                                  (__v32hi)_mm512_setzero_si512())
1834
1835static __inline__ __m512i __DEFAULT_FN_ATTRS
1836_mm512_sad_epu8 (__m512i __A, __m512i __B)
1837{
1838 return (__m512i) __builtin_ia32_psadbw512 ((__v64qi) __A,
1839               (__v64qi) __B);
1840}
1841
1842
1843
1844#undef __DEFAULT_FN_ATTRS
1845
1846#endif
1847