1/*===------------- avx512bwintrin.h - AVX512BW intrinsics ------------------===
2 *
3 *
4 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 * See https://llvm.org/LICENSE.txt for license information.
6 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *
8 *===-----------------------------------------------------------------------===
9 */
10#ifndef __IMMINTRIN_H
11#error "Never use <avx512bwintrin.h> directly; include <immintrin.h> instead."
12#endif
13
14#ifndef __AVX512BWINTRIN_H
15#define __AVX512BWINTRIN_H
16
17typedef unsigned int __mmask32;
18typedef unsigned long long __mmask64;
19
20/* Define the default attributes for the functions in this file. */
21#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"), __min_vector_width__(512)))
22#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw")))
23
24static __inline __mmask32 __DEFAULT_FN_ATTRS
25_knot_mask32(__mmask32 __M)
26{
27  return __builtin_ia32_knotsi(__M);
28}
29
30static __inline __mmask64 __DEFAULT_FN_ATTRS
31_knot_mask64(__mmask64 __M)
32{
33  return __builtin_ia32_knotdi(__M);
34}
35
36static __inline__ __mmask32 __DEFAULT_FN_ATTRS
37_kand_mask32(__mmask32 __A, __mmask32 __B)
38{
39  return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B);
40}
41
42static __inline__ __mmask64 __DEFAULT_FN_ATTRS
43_kand_mask64(__mmask64 __A, __mmask64 __B)
44{
45  return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B);
46}
47
48static __inline__ __mmask32 __DEFAULT_FN_ATTRS
49_kandn_mask32(__mmask32 __A, __mmask32 __B)
50{
51  return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B);
52}
53
54static __inline__ __mmask64 __DEFAULT_FN_ATTRS
55_kandn_mask64(__mmask64 __A, __mmask64 __B)
56{
57  return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B);
58}
59
60static __inline__ __mmask32 __DEFAULT_FN_ATTRS
61_kor_mask32(__mmask32 __A, __mmask32 __B)
62{
63  return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B);
64}
65
66static __inline__ __mmask64 __DEFAULT_FN_ATTRS
67_kor_mask64(__mmask64 __A, __mmask64 __B)
68{
69  return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B);
70}
71
72static __inline__ __mmask32 __DEFAULT_FN_ATTRS
73_kxnor_mask32(__mmask32 __A, __mmask32 __B)
74{
75  return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B);
76}
77
78static __inline__ __mmask64 __DEFAULT_FN_ATTRS
79_kxnor_mask64(__mmask64 __A, __mmask64 __B)
80{
81  return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B);
82}
83
84static __inline__ __mmask32 __DEFAULT_FN_ATTRS
85_kxor_mask32(__mmask32 __A, __mmask32 __B)
86{
87  return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B);
88}
89
90static __inline__ __mmask64 __DEFAULT_FN_ATTRS
91_kxor_mask64(__mmask64 __A, __mmask64 __B)
92{
93  return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B);
94}
95
96static __inline__ unsigned char __DEFAULT_FN_ATTRS
97_kortestc_mask32_u8(__mmask32 __A, __mmask32 __B)
98{
99  return (unsigned char)__builtin_ia32_kortestcsi(__A, __B);
100}
101
102static __inline__ unsigned char __DEFAULT_FN_ATTRS
103_kortestz_mask32_u8(__mmask32 __A, __mmask32 __B)
104{
105  return (unsigned char)__builtin_ia32_kortestzsi(__A, __B);
106}
107
108static __inline__ unsigned char __DEFAULT_FN_ATTRS
109_kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
110  *__C = (unsigned char)__builtin_ia32_kortestcsi(__A, __B);
111  return (unsigned char)__builtin_ia32_kortestzsi(__A, __B);
112}
113
114static __inline__ unsigned char __DEFAULT_FN_ATTRS
115_kortestc_mask64_u8(__mmask64 __A, __mmask64 __B)
116{
117  return (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
118}
119
120static __inline__ unsigned char __DEFAULT_FN_ATTRS
121_kortestz_mask64_u8(__mmask64 __A, __mmask64 __B)
122{
123  return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
124}
125
126static __inline__ unsigned char __DEFAULT_FN_ATTRS
127_kortest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
128  *__C = (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
129  return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
130}
131
132static __inline__ unsigned char __DEFAULT_FN_ATTRS
133_ktestc_mask32_u8(__mmask32 __A, __mmask32 __B)
134{
135  return (unsigned char)__builtin_ia32_ktestcsi(__A, __B);
136}
137
138static __inline__ unsigned char __DEFAULT_FN_ATTRS
139_ktestz_mask32_u8(__mmask32 __A, __mmask32 __B)
140{
141  return (unsigned char)__builtin_ia32_ktestzsi(__A, __B);
142}
143
144static __inline__ unsigned char __DEFAULT_FN_ATTRS
145_ktest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
146  *__C = (unsigned char)__builtin_ia32_ktestcsi(__A, __B);
147  return (unsigned char)__builtin_ia32_ktestzsi(__A, __B);
148}
149
150static __inline__ unsigned char __DEFAULT_FN_ATTRS
151_ktestc_mask64_u8(__mmask64 __A, __mmask64 __B)
152{
153  return (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
154}
155
156static __inline__ unsigned char __DEFAULT_FN_ATTRS
157_ktestz_mask64_u8(__mmask64 __A, __mmask64 __B)
158{
159  return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
160}
161
162static __inline__ unsigned char __DEFAULT_FN_ATTRS
163_ktest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
164  *__C = (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
165  return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
166}
167
168static __inline__ __mmask32 __DEFAULT_FN_ATTRS
169_kadd_mask32(__mmask32 __A, __mmask32 __B)
170{
171  return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B);
172}
173
174static __inline__ __mmask64 __DEFAULT_FN_ATTRS
175_kadd_mask64(__mmask64 __A, __mmask64 __B)
176{
177  return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B);
178}
179
180#define _kshiftli_mask32(A, I) \
181  (__mmask32)__builtin_ia32_kshiftlisi((__mmask32)(A), (unsigned int)(I))
182
183#define _kshiftri_mask32(A, I) \
184  (__mmask32)__builtin_ia32_kshiftrisi((__mmask32)(A), (unsigned int)(I))
185
186#define _kshiftli_mask64(A, I) \
187  (__mmask64)__builtin_ia32_kshiftlidi((__mmask64)(A), (unsigned int)(I))
188
189#define _kshiftri_mask64(A, I) \
190  (__mmask64)__builtin_ia32_kshiftridi((__mmask64)(A), (unsigned int)(I))
191
192static __inline__ unsigned int __DEFAULT_FN_ATTRS
193_cvtmask32_u32(__mmask32 __A) {
194  return (unsigned int)__builtin_ia32_kmovd((__mmask32)__A);
195}
196
197static __inline__ unsigned long long __DEFAULT_FN_ATTRS
198_cvtmask64_u64(__mmask64 __A) {
199  return (unsigned long long)__builtin_ia32_kmovq((__mmask64)__A);
200}
201
202static __inline__ __mmask32 __DEFAULT_FN_ATTRS
203_cvtu32_mask32(unsigned int __A) {
204  return (__mmask32)__builtin_ia32_kmovd((__mmask32)__A);
205}
206
207static __inline__ __mmask64 __DEFAULT_FN_ATTRS
208_cvtu64_mask64(unsigned long long __A) {
209  return (__mmask64)__builtin_ia32_kmovq((__mmask64)__A);
210}
211
212static __inline__ __mmask32 __DEFAULT_FN_ATTRS
213_load_mask32(__mmask32 *__A) {
214  return (__mmask32)__builtin_ia32_kmovd(*(__mmask32 *)__A);
215}
216
217static __inline__ __mmask64 __DEFAULT_FN_ATTRS
218_load_mask64(__mmask64 *__A) {
219  return (__mmask64)__builtin_ia32_kmovq(*(__mmask64 *)__A);
220}
221
222static __inline__ void __DEFAULT_FN_ATTRS
223_store_mask32(__mmask32 *__A, __mmask32 __B) {
224  *(__mmask32 *)__A = __builtin_ia32_kmovd((__mmask32)__B);
225}
226
227static __inline__ void __DEFAULT_FN_ATTRS
228_store_mask64(__mmask64 *__A, __mmask64 __B) {
229  *(__mmask64 *)__A = __builtin_ia32_kmovq((__mmask64)__B);
230}
231
232/* Integer compare */
233
234#define _mm512_cmp_epi8_mask(a, b, p) \
235  (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
236                                         (__v64qi)(__m512i)(b), (int)(p), \
237                                         (__mmask64)-1)
238
239#define _mm512_mask_cmp_epi8_mask(m, a, b, p) \
240  (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
241                                         (__v64qi)(__m512i)(b), (int)(p), \
242                                         (__mmask64)(m))
243
244#define _mm512_cmp_epu8_mask(a, b, p) \
245  (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
246                                          (__v64qi)(__m512i)(b), (int)(p), \
247                                          (__mmask64)-1)
248
249#define _mm512_mask_cmp_epu8_mask(m, a, b, p) \
250  (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
251                                          (__v64qi)(__m512i)(b), (int)(p), \
252                                          (__mmask64)(m))
253
254#define _mm512_cmp_epi16_mask(a, b, p) \
255  (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
256                                         (__v32hi)(__m512i)(b), (int)(p), \
257                                         (__mmask32)-1)
258
259#define _mm512_mask_cmp_epi16_mask(m, a, b, p) \
260  (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
261                                         (__v32hi)(__m512i)(b), (int)(p), \
262                                         (__mmask32)(m))
263
264#define _mm512_cmp_epu16_mask(a, b, p) \
265  (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
266                                          (__v32hi)(__m512i)(b), (int)(p), \
267                                          (__mmask32)-1)
268
269#define _mm512_mask_cmp_epu16_mask(m, a, b, p) \
270  (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
271                                          (__v32hi)(__m512i)(b), (int)(p), \
272                                          (__mmask32)(m))
273
274#define _mm512_cmpeq_epi8_mask(A, B) \
275    _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ)
276#define _mm512_mask_cmpeq_epi8_mask(k, A, B) \
277    _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ)
278#define _mm512_cmpge_epi8_mask(A, B) \
279    _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GE)
280#define _mm512_mask_cmpge_epi8_mask(k, A, B) \
281    _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE)
282#define _mm512_cmpgt_epi8_mask(A, B) \
283    _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GT)
284#define _mm512_mask_cmpgt_epi8_mask(k, A, B) \
285    _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT)
286#define _mm512_cmple_epi8_mask(A, B) \
287    _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LE)
288#define _mm512_mask_cmple_epi8_mask(k, A, B) \
289    _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE)
290#define _mm512_cmplt_epi8_mask(A, B) \
291    _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LT)
292#define _mm512_mask_cmplt_epi8_mask(k, A, B) \
293    _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT)
294#define _mm512_cmpneq_epi8_mask(A, B) \
295    _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_NE)
296#define _mm512_mask_cmpneq_epi8_mask(k, A, B) \
297    _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE)
298
299#define _mm512_cmpeq_epu8_mask(A, B) \
300    _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ)
301#define _mm512_mask_cmpeq_epu8_mask(k, A, B) \
302    _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ)
303#define _mm512_cmpge_epu8_mask(A, B) \
304    _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GE)
305#define _mm512_mask_cmpge_epu8_mask(k, A, B) \
306    _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE)
307#define _mm512_cmpgt_epu8_mask(A, B) \
308    _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GT)
309#define _mm512_mask_cmpgt_epu8_mask(k, A, B) \
310    _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT)
311#define _mm512_cmple_epu8_mask(A, B) \
312    _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LE)
313#define _mm512_mask_cmple_epu8_mask(k, A, B) \
314    _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE)
315#define _mm512_cmplt_epu8_mask(A, B) \
316    _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LT)
317#define _mm512_mask_cmplt_epu8_mask(k, A, B) \
318    _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT)
319#define _mm512_cmpneq_epu8_mask(A, B) \
320    _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_NE)
321#define _mm512_mask_cmpneq_epu8_mask(k, A, B) \
322    _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE)
323
324#define _mm512_cmpeq_epi16_mask(A, B) \
325    _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ)
326#define _mm512_mask_cmpeq_epi16_mask(k, A, B) \
327    _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ)
328#define _mm512_cmpge_epi16_mask(A, B) \
329    _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GE)
330#define _mm512_mask_cmpge_epi16_mask(k, A, B) \
331    _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE)
332#define _mm512_cmpgt_epi16_mask(A, B) \
333    _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GT)
334#define _mm512_mask_cmpgt_epi16_mask(k, A, B) \
335    _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT)
336#define _mm512_cmple_epi16_mask(A, B) \
337    _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LE)
338#define _mm512_mask_cmple_epi16_mask(k, A, B) \
339    _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE)
340#define _mm512_cmplt_epi16_mask(A, B) \
341    _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LT)
342#define _mm512_mask_cmplt_epi16_mask(k, A, B) \
343    _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT)
344#define _mm512_cmpneq_epi16_mask(A, B) \
345    _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_NE)
346#define _mm512_mask_cmpneq_epi16_mask(k, A, B) \
347    _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE)
348
349#define _mm512_cmpeq_epu16_mask(A, B) \
350    _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ)
351#define _mm512_mask_cmpeq_epu16_mask(k, A, B) \
352    _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ)
353#define _mm512_cmpge_epu16_mask(A, B) \
354    _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GE)
355#define _mm512_mask_cmpge_epu16_mask(k, A, B) \
356    _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE)
357#define _mm512_cmpgt_epu16_mask(A, B) \
358    _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GT)
359#define _mm512_mask_cmpgt_epu16_mask(k, A, B) \
360    _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT)
361#define _mm512_cmple_epu16_mask(A, B) \
362    _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LE)
363#define _mm512_mask_cmple_epu16_mask(k, A, B) \
364    _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE)
365#define _mm512_cmplt_epu16_mask(A, B) \
366    _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LT)
367#define _mm512_mask_cmplt_epu16_mask(k, A, B) \
368    _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT)
369#define _mm512_cmpneq_epu16_mask(A, B) \
370    _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_NE)
371#define _mm512_mask_cmpneq_epu16_mask(k, A, B) \
372    _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
373
374static __inline__ __m512i __DEFAULT_FN_ATTRS512
375_mm512_add_epi8 (__m512i __A, __m512i __B) {
376  return (__m512i) ((__v64qu) __A + (__v64qu) __B);
377}
378
379static __inline__ __m512i __DEFAULT_FN_ATTRS512
380_mm512_mask_add_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
381  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
382                                             (__v64qi)_mm512_add_epi8(__A, __B),
383                                             (__v64qi)__W);
384}
385
386static __inline__ __m512i __DEFAULT_FN_ATTRS512
387_mm512_maskz_add_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
388  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
389                                             (__v64qi)_mm512_add_epi8(__A, __B),
390                                             (__v64qi)_mm512_setzero_si512());
391}
392
393static __inline__ __m512i __DEFAULT_FN_ATTRS512
394_mm512_sub_epi8 (__m512i __A, __m512i __B) {
395  return (__m512i) ((__v64qu) __A - (__v64qu) __B);
396}
397
398static __inline__ __m512i __DEFAULT_FN_ATTRS512
399_mm512_mask_sub_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
400  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
401                                             (__v64qi)_mm512_sub_epi8(__A, __B),
402                                             (__v64qi)__W);
403}
404
405static __inline__ __m512i __DEFAULT_FN_ATTRS512
406_mm512_maskz_sub_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
407  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
408                                             (__v64qi)_mm512_sub_epi8(__A, __B),
409                                             (__v64qi)_mm512_setzero_si512());
410}
411
412static __inline__ __m512i __DEFAULT_FN_ATTRS512
413_mm512_add_epi16 (__m512i __A, __m512i __B) {
414  return (__m512i) ((__v32hu) __A + (__v32hu) __B);
415}
416
417static __inline__ __m512i __DEFAULT_FN_ATTRS512
418_mm512_mask_add_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
419  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
420                                             (__v32hi)_mm512_add_epi16(__A, __B),
421                                             (__v32hi)__W);
422}
423
424static __inline__ __m512i __DEFAULT_FN_ATTRS512
425_mm512_maskz_add_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
426  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
427                                             (__v32hi)_mm512_add_epi16(__A, __B),
428                                             (__v32hi)_mm512_setzero_si512());
429}
430
431static __inline__ __m512i __DEFAULT_FN_ATTRS512
432_mm512_sub_epi16 (__m512i __A, __m512i __B) {
433  return (__m512i) ((__v32hu) __A - (__v32hu) __B);
434}
435
436static __inline__ __m512i __DEFAULT_FN_ATTRS512
437_mm512_mask_sub_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
438  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
439                                             (__v32hi)_mm512_sub_epi16(__A, __B),
440                                             (__v32hi)__W);
441}
442
443static __inline__ __m512i __DEFAULT_FN_ATTRS512
444_mm512_maskz_sub_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
445  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
446                                             (__v32hi)_mm512_sub_epi16(__A, __B),
447                                             (__v32hi)_mm512_setzero_si512());
448}
449
450static __inline__ __m512i __DEFAULT_FN_ATTRS512
451_mm512_mullo_epi16 (__m512i __A, __m512i __B) {
452  return (__m512i) ((__v32hu) __A * (__v32hu) __B);
453}
454
455static __inline__ __m512i __DEFAULT_FN_ATTRS512
456_mm512_mask_mullo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
457  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
458                                             (__v32hi)_mm512_mullo_epi16(__A, __B),
459                                             (__v32hi)__W);
460}
461
462static __inline__ __m512i __DEFAULT_FN_ATTRS512
463_mm512_maskz_mullo_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
464  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
465                                             (__v32hi)_mm512_mullo_epi16(__A, __B),
466                                             (__v32hi)_mm512_setzero_si512());
467}
468
469static __inline__ __m512i __DEFAULT_FN_ATTRS512
470_mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W)
471{
472  return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
473              (__v64qi) __W,
474              (__v64qi) __A);
475}
476
477static __inline__ __m512i __DEFAULT_FN_ATTRS512
478_mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W)
479{
480  return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
481              (__v32hi) __W,
482              (__v32hi) __A);
483}
484
485static __inline__ __m512i __DEFAULT_FN_ATTRS512
486_mm512_abs_epi8 (__m512i __A)
487{
488  return (__m512i)__builtin_ia32_pabsb512((__v64qi)__A);
489}
490
491static __inline__ __m512i __DEFAULT_FN_ATTRS512
492_mm512_mask_abs_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
493{
494  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
495                                             (__v64qi)_mm512_abs_epi8(__A),
496                                             (__v64qi)__W);
497}
498
499static __inline__ __m512i __DEFAULT_FN_ATTRS512
500_mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A)
501{
502  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
503                                             (__v64qi)_mm512_abs_epi8(__A),
504                                             (__v64qi)_mm512_setzero_si512());
505}
506
507static __inline__ __m512i __DEFAULT_FN_ATTRS512
508_mm512_abs_epi16 (__m512i __A)
509{
510  return (__m512i)__builtin_ia32_pabsw512((__v32hi)__A);
511}
512
513static __inline__ __m512i __DEFAULT_FN_ATTRS512
514_mm512_mask_abs_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
515{
516  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
517                                             (__v32hi)_mm512_abs_epi16(__A),
518                                             (__v32hi)__W);
519}
520
521static __inline__ __m512i __DEFAULT_FN_ATTRS512
522_mm512_maskz_abs_epi16 (__mmask32 __U, __m512i __A)
523{
524  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
525                                             (__v32hi)_mm512_abs_epi16(__A),
526                                             (__v32hi)_mm512_setzero_si512());
527}
528
529static __inline__ __m512i __DEFAULT_FN_ATTRS512
530_mm512_packs_epi32(__m512i __A, __m512i __B)
531{
532  return (__m512i)__builtin_ia32_packssdw512((__v16si)__A, (__v16si)__B);
533}
534
535static __inline__ __m512i __DEFAULT_FN_ATTRS512
536_mm512_maskz_packs_epi32(__mmask32 __M, __m512i __A, __m512i __B)
537{
538  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
539                                       (__v32hi)_mm512_packs_epi32(__A, __B),
540                                       (__v32hi)_mm512_setzero_si512());
541}
542
543static __inline__ __m512i __DEFAULT_FN_ATTRS512
544_mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
545{
546  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
547                                       (__v32hi)_mm512_packs_epi32(__A, __B),
548                                       (__v32hi)__W);
549}
550
551static __inline__ __m512i __DEFAULT_FN_ATTRS512
552_mm512_packs_epi16(__m512i __A, __m512i __B)
553{
554  return (__m512i)__builtin_ia32_packsswb512((__v32hi)__A, (__v32hi) __B);
555}
556
557static __inline__ __m512i __DEFAULT_FN_ATTRS512
558_mm512_mask_packs_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
559{
560  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
561                                        (__v64qi)_mm512_packs_epi16(__A, __B),
562                                        (__v64qi)__W);
563}
564
565static __inline__ __m512i __DEFAULT_FN_ATTRS512
566_mm512_maskz_packs_epi16(__mmask64 __M, __m512i __A, __m512i __B)
567{
568  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
569                                        (__v64qi)_mm512_packs_epi16(__A, __B),
570                                        (__v64qi)_mm512_setzero_si512());
571}
572
573static __inline__ __m512i __DEFAULT_FN_ATTRS512
574_mm512_packus_epi32(__m512i __A, __m512i __B)
575{
576  return (__m512i)__builtin_ia32_packusdw512((__v16si) __A, (__v16si) __B);
577}
578
579static __inline__ __m512i __DEFAULT_FN_ATTRS512
580_mm512_maskz_packus_epi32(__mmask32 __M, __m512i __A, __m512i __B)
581{
582  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
583                                       (__v32hi)_mm512_packus_epi32(__A, __B),
584                                       (__v32hi)_mm512_setzero_si512());
585}
586
587static __inline__ __m512i __DEFAULT_FN_ATTRS512
588_mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
589{
590  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
591                                       (__v32hi)_mm512_packus_epi32(__A, __B),
592                                       (__v32hi)__W);
593}
594
595static __inline__ __m512i __DEFAULT_FN_ATTRS512
596_mm512_packus_epi16(__m512i __A, __m512i __B)
597{
598  return (__m512i)__builtin_ia32_packuswb512((__v32hi) __A, (__v32hi) __B);
599}
600
601static __inline__ __m512i __DEFAULT_FN_ATTRS512
602_mm512_mask_packus_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
603{
604  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
605                                        (__v64qi)_mm512_packus_epi16(__A, __B),
606                                        (__v64qi)__W);
607}
608
609static __inline__ __m512i __DEFAULT_FN_ATTRS512
610_mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B)
611{
612  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
613                                        (__v64qi)_mm512_packus_epi16(__A, __B),
614                                        (__v64qi)_mm512_setzero_si512());
615}
616
617static __inline__ __m512i __DEFAULT_FN_ATTRS512
618_mm512_adds_epi8 (__m512i __A, __m512i __B)
619{
620  return (__m512i)__builtin_ia32_paddsb512((__v64qi)__A, (__v64qi)__B);
621}
622
623static __inline__ __m512i __DEFAULT_FN_ATTRS512
624_mm512_mask_adds_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
625{
626  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
627                                        (__v64qi)_mm512_adds_epi8(__A, __B),
628                                        (__v64qi)__W);
629}
630
631static __inline__ __m512i __DEFAULT_FN_ATTRS512
632_mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
633{
634  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
635                                        (__v64qi)_mm512_adds_epi8(__A, __B),
636                                        (__v64qi)_mm512_setzero_si512());
637}
638
639static __inline__ __m512i __DEFAULT_FN_ATTRS512
640_mm512_adds_epi16 (__m512i __A, __m512i __B)
641{
642  return (__m512i)__builtin_ia32_paddsw512((__v32hi)__A, (__v32hi)__B);
643}
644
645static __inline__ __m512i __DEFAULT_FN_ATTRS512
646_mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
647{
648  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
649                                        (__v32hi)_mm512_adds_epi16(__A, __B),
650                                        (__v32hi)__W);
651}
652
653static __inline__ __m512i __DEFAULT_FN_ATTRS512
654_mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
655{
656  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
657                                        (__v32hi)_mm512_adds_epi16(__A, __B),
658                                        (__v32hi)_mm512_setzero_si512());
659}
660
661static __inline__ __m512i __DEFAULT_FN_ATTRS512
662_mm512_adds_epu8 (__m512i __A, __m512i __B)
663{
664  return (__m512i)__builtin_ia32_paddusb512((__v64qi) __A, (__v64qi) __B);
665}
666
667static __inline__ __m512i __DEFAULT_FN_ATTRS512
668_mm512_mask_adds_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
669{
670  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
671                                        (__v64qi)_mm512_adds_epu8(__A, __B),
672                                        (__v64qi)__W);
673}
674
675static __inline__ __m512i __DEFAULT_FN_ATTRS512
676_mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
677{
678  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
679                                        (__v64qi)_mm512_adds_epu8(__A, __B),
680                                        (__v64qi)_mm512_setzero_si512());
681}
682
683static __inline__ __m512i __DEFAULT_FN_ATTRS512
684_mm512_adds_epu16 (__m512i __A, __m512i __B)
685{
686  return (__m512i)__builtin_ia32_paddusw512((__v32hi) __A, (__v32hi) __B);
687}
688
689static __inline__ __m512i __DEFAULT_FN_ATTRS512
690_mm512_mask_adds_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
691{
692  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
693                                        (__v32hi)_mm512_adds_epu16(__A, __B),
694                                        (__v32hi)__W);
695}
696
697static __inline__ __m512i __DEFAULT_FN_ATTRS512
698_mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
699{
700  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
701                                        (__v32hi)_mm512_adds_epu16(__A, __B),
702                                        (__v32hi)_mm512_setzero_si512());
703}
704
705static __inline__ __m512i __DEFAULT_FN_ATTRS512
706_mm512_avg_epu8 (__m512i __A, __m512i __B)
707{
708  return (__m512i)__builtin_ia32_pavgb512((__v64qi)__A, (__v64qi)__B);
709}
710
711static __inline__ __m512i __DEFAULT_FN_ATTRS512
712_mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A,
713          __m512i __B)
714{
715  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
716              (__v64qi)_mm512_avg_epu8(__A, __B),
717              (__v64qi)__W);
718}
719
720static __inline__ __m512i __DEFAULT_FN_ATTRS512
721_mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
722{
723  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
724              (__v64qi)_mm512_avg_epu8(__A, __B),
725              (__v64qi)_mm512_setzero_si512());
726}
727
728static __inline__ __m512i __DEFAULT_FN_ATTRS512
729_mm512_avg_epu16 (__m512i __A, __m512i __B)
730{
731  return (__m512i)__builtin_ia32_pavgw512((__v32hi)__A, (__v32hi)__B);
732}
733
734static __inline__ __m512i __DEFAULT_FN_ATTRS512
735_mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
736           __m512i __B)
737{
738  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
739              (__v32hi)_mm512_avg_epu16(__A, __B),
740              (__v32hi)__W);
741}
742
743static __inline__ __m512i __DEFAULT_FN_ATTRS512
744_mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
745{
746  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
747              (__v32hi)_mm512_avg_epu16(__A, __B),
748              (__v32hi) _mm512_setzero_si512());
749}
750
751static __inline__ __m512i __DEFAULT_FN_ATTRS512
752_mm512_max_epi8 (__m512i __A, __m512i __B)
753{
754  return (__m512i)__builtin_ia32_pmaxsb512((__v64qi) __A, (__v64qi) __B);
755}
756
757static __inline__ __m512i __DEFAULT_FN_ATTRS512
758_mm512_maskz_max_epi8 (__mmask64 __M, __m512i __A, __m512i __B)
759{
760  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
761                                             (__v64qi)_mm512_max_epi8(__A, __B),
762                                             (__v64qi)_mm512_setzero_si512());
763}
764
765static __inline__ __m512i __DEFAULT_FN_ATTRS512
766_mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
767{
768  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
769                                             (__v64qi)_mm512_max_epi8(__A, __B),
770                                             (__v64qi)__W);
771}
772
773static __inline__ __m512i __DEFAULT_FN_ATTRS512
774_mm512_max_epi16 (__m512i __A, __m512i __B)
775{
776  return (__m512i)__builtin_ia32_pmaxsw512((__v32hi) __A, (__v32hi) __B);
777}
778
779static __inline__ __m512i __DEFAULT_FN_ATTRS512
780_mm512_maskz_max_epi16 (__mmask32 __M, __m512i __A, __m512i __B)
781{
782  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
783                                            (__v32hi)_mm512_max_epi16(__A, __B),
784                                            (__v32hi)_mm512_setzero_si512());
785}
786
787static __inline__ __m512i __DEFAULT_FN_ATTRS512
788_mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
789           __m512i __B)
790{
791  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
792                                            (__v32hi)_mm512_max_epi16(__A, __B),
793                                            (__v32hi)__W);
794}
795
796static __inline__ __m512i __DEFAULT_FN_ATTRS512
797_mm512_max_epu8 (__m512i __A, __m512i __B)
798{
799  return (__m512i)__builtin_ia32_pmaxub512((__v64qi)__A, (__v64qi)__B);
800}
801
802static __inline__ __m512i __DEFAULT_FN_ATTRS512
803_mm512_maskz_max_epu8 (__mmask64 __M, __m512i __A, __m512i __B)
804{
805  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
806                                             (__v64qi)_mm512_max_epu8(__A, __B),
807                                             (__v64qi)_mm512_setzero_si512());
808}
809
810static __inline__ __m512i __DEFAULT_FN_ATTRS512
811_mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
812{
813  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
814                                             (__v64qi)_mm512_max_epu8(__A, __B),
815                                             (__v64qi)__W);
816}
817
818static __inline__ __m512i __DEFAULT_FN_ATTRS512
819_mm512_max_epu16 (__m512i __A, __m512i __B)
820{
821  return (__m512i)__builtin_ia32_pmaxuw512((__v32hi)__A, (__v32hi)__B);
822}
823
824static __inline__ __m512i __DEFAULT_FN_ATTRS512
825_mm512_maskz_max_epu16 (__mmask32 __M, __m512i __A, __m512i __B)
826{
827  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
828                                            (__v32hi)_mm512_max_epu16(__A, __B),
829                                            (__v32hi)_mm512_setzero_si512());
830}
831
832static __inline__ __m512i __DEFAULT_FN_ATTRS512
833_mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
834{
835  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
836                                            (__v32hi)_mm512_max_epu16(__A, __B),
837                                            (__v32hi)__W);
838}
839
840static __inline__ __m512i __DEFAULT_FN_ATTRS512
841_mm512_min_epi8 (__m512i __A, __m512i __B)
842{
843  return (__m512i)__builtin_ia32_pminsb512((__v64qi) __A, (__v64qi) __B);
844}
845
846static __inline__ __m512i __DEFAULT_FN_ATTRS512
847_mm512_maskz_min_epi8 (__mmask64 __M, __m512i __A, __m512i __B)
848{
849  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
850                                             (__v64qi)_mm512_min_epi8(__A, __B),
851                                             (__v64qi)_mm512_setzero_si512());
852}
853
854static __inline__ __m512i __DEFAULT_FN_ATTRS512
855_mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
856{
857  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
858                                             (__v64qi)_mm512_min_epi8(__A, __B),
859                                             (__v64qi)__W);
860}
861
862static __inline__ __m512i __DEFAULT_FN_ATTRS512
863_mm512_min_epi16 (__m512i __A, __m512i __B)
864{
865  return (__m512i)__builtin_ia32_pminsw512((__v32hi) __A, (__v32hi) __B);
866}
867
868static __inline__ __m512i __DEFAULT_FN_ATTRS512
869_mm512_maskz_min_epi16 (__mmask32 __M, __m512i __A, __m512i __B)
870{
871  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
872                                            (__v32hi)_mm512_min_epi16(__A, __B),
873                                            (__v32hi)_mm512_setzero_si512());
874}
875
876static __inline__ __m512i __DEFAULT_FN_ATTRS512
877_mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
878{
879  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
880                                            (__v32hi)_mm512_min_epi16(__A, __B),
881                                            (__v32hi)__W);
882}
883
884static __inline__ __m512i __DEFAULT_FN_ATTRS512
885_mm512_min_epu8 (__m512i __A, __m512i __B)
886{
887  return (__m512i)__builtin_ia32_pminub512((__v64qi)__A, (__v64qi)__B);
888}
889
890static __inline__ __m512i __DEFAULT_FN_ATTRS512
891_mm512_maskz_min_epu8 (__mmask64 __M, __m512i __A, __m512i __B)
892{
893  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
894                                             (__v64qi)_mm512_min_epu8(__A, __B),
895                                             (__v64qi)_mm512_setzero_si512());
896}
897
898static __inline__ __m512i __DEFAULT_FN_ATTRS512
899_mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
900{
901  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
902                                             (__v64qi)_mm512_min_epu8(__A, __B),
903                                             (__v64qi)__W);
904}
905
906static __inline__ __m512i __DEFAULT_FN_ATTRS512
907_mm512_min_epu16 (__m512i __A, __m512i __B)
908{
909  return (__m512i)__builtin_ia32_pminuw512((__v32hi)__A, (__v32hi)__B);
910}
911
912static __inline__ __m512i __DEFAULT_FN_ATTRS512
913_mm512_maskz_min_epu16 (__mmask32 __M, __m512i __A, __m512i __B)
914{
915  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
916                                            (__v32hi)_mm512_min_epu16(__A, __B),
917                                            (__v32hi)_mm512_setzero_si512());
918}
919
920static __inline__ __m512i __DEFAULT_FN_ATTRS512
921_mm512_mask_min_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
922{
923  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
924                                            (__v32hi)_mm512_min_epu16(__A, __B),
925                                            (__v32hi)__W);
926}
927
928static __inline__ __m512i __DEFAULT_FN_ATTRS512
929_mm512_shuffle_epi8(__m512i __A, __m512i __B)
930{
931  return (__m512i)__builtin_ia32_pshufb512((__v64qi)__A,(__v64qi)__B);
932}
933
934static __inline__ __m512i __DEFAULT_FN_ATTRS512
935_mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
936{
937  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
938                                         (__v64qi)_mm512_shuffle_epi8(__A, __B),
939                                         (__v64qi)__W);
940}
941
942static __inline__ __m512i __DEFAULT_FN_ATTRS512
943_mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B)
944{
945  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
946                                         (__v64qi)_mm512_shuffle_epi8(__A, __B),
947                                         (__v64qi)_mm512_setzero_si512());
948}
949
950static __inline__ __m512i __DEFAULT_FN_ATTRS512
951_mm512_subs_epi8 (__m512i __A, __m512i __B)
952{
953  return (__m512i)__builtin_ia32_psubsb512((__v64qi)__A, (__v64qi)__B);
954}
955
956static __inline__ __m512i __DEFAULT_FN_ATTRS512
957_mm512_mask_subs_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
958{
959  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
960                                        (__v64qi)_mm512_subs_epi8(__A, __B),
961                                        (__v64qi)__W);
962}
963
964static __inline__ __m512i __DEFAULT_FN_ATTRS512
965_mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
966{
967  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
968                                        (__v64qi)_mm512_subs_epi8(__A, __B),
969                                        (__v64qi)_mm512_setzero_si512());
970}
971
972static __inline__ __m512i __DEFAULT_FN_ATTRS512
973_mm512_subs_epi16 (__m512i __A, __m512i __B)
974{
975  return (__m512i)__builtin_ia32_psubsw512((__v32hi)__A, (__v32hi)__B);
976}
977
978static __inline__ __m512i __DEFAULT_FN_ATTRS512
979_mm512_mask_subs_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
980{
981  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
982                                        (__v32hi)_mm512_subs_epi16(__A, __B),
983                                        (__v32hi)__W);
984}
985
986static __inline__ __m512i __DEFAULT_FN_ATTRS512
987_mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
988{
989  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
990                                        (__v32hi)_mm512_subs_epi16(__A, __B),
991                                        (__v32hi)_mm512_setzero_si512());
992}
993
994static __inline__ __m512i __DEFAULT_FN_ATTRS512
995_mm512_subs_epu8 (__m512i __A, __m512i __B)
996{
997  return (__m512i)__builtin_ia32_psubusb512((__v64qi) __A, (__v64qi) __B);
998}
999
1000static __inline__ __m512i __DEFAULT_FN_ATTRS512
1001_mm512_mask_subs_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
1002{
1003  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1004                                        (__v64qi)_mm512_subs_epu8(__A, __B),
1005                                        (__v64qi)__W);
1006}
1007
1008static __inline__ __m512i __DEFAULT_FN_ATTRS512
1009_mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
1010{
1011  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1012                                        (__v64qi)_mm512_subs_epu8(__A, __B),
1013                                        (__v64qi)_mm512_setzero_si512());
1014}
1015
1016static __inline__ __m512i __DEFAULT_FN_ATTRS512
1017_mm512_subs_epu16 (__m512i __A, __m512i __B)
1018{
1019  return (__m512i)__builtin_ia32_psubusw512((__v32hi) __A, (__v32hi) __B);
1020}
1021
1022static __inline__ __m512i __DEFAULT_FN_ATTRS512
1023_mm512_mask_subs_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1024{
1025  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1026                                        (__v32hi)_mm512_subs_epu16(__A, __B),
1027                                        (__v32hi)__W);
1028}
1029
1030static __inline__ __m512i __DEFAULT_FN_ATTRS512
1031_mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
1032{
1033  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1034                                        (__v32hi)_mm512_subs_epu16(__A, __B),
1035                                        (__v32hi)_mm512_setzero_si512());
1036}
1037
1038static __inline__ __m512i __DEFAULT_FN_ATTRS512
1039_mm512_permutex2var_epi16(__m512i __A, __m512i __I, __m512i __B)
1040{
1041  return (__m512i)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I,
1042                                                 (__v32hi)__B);
1043}
1044
1045static __inline__ __m512i __DEFAULT_FN_ATTRS512
1046_mm512_mask_permutex2var_epi16(__m512i __A, __mmask32 __U, __m512i __I,
1047                               __m512i __B)
1048{
1049  return (__m512i)__builtin_ia32_selectw_512(__U,
1050                              (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B),
1051                              (__v32hi)__A);
1052}
1053
1054static __inline__ __m512i __DEFAULT_FN_ATTRS512
1055_mm512_mask2_permutex2var_epi16(__m512i __A, __m512i __I, __mmask32 __U,
1056                                __m512i __B)
1057{
1058  return (__m512i)__builtin_ia32_selectw_512(__U,
1059                              (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B),
1060                              (__v32hi)__I);
1061}
1062
1063static __inline__ __m512i __DEFAULT_FN_ATTRS512
1064_mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i __I,
1065                                __m512i __B)
1066{
1067  return (__m512i)__builtin_ia32_selectw_512(__U,
1068                              (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B),
1069                              (__v32hi)_mm512_setzero_si512());
1070}
1071
1072static __inline__ __m512i __DEFAULT_FN_ATTRS512
1073_mm512_mulhrs_epi16(__m512i __A, __m512i __B)
1074{
1075  return (__m512i)__builtin_ia32_pmulhrsw512((__v32hi)__A, (__v32hi)__B);
1076}
1077
1078static __inline__ __m512i __DEFAULT_FN_ATTRS512
1079_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1080{
1081  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1082                                         (__v32hi)_mm512_mulhrs_epi16(__A, __B),
1083                                         (__v32hi)__W);
1084}
1085
1086static __inline__ __m512i __DEFAULT_FN_ATTRS512
1087_mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B)
1088{
1089  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1090                                         (__v32hi)_mm512_mulhrs_epi16(__A, __B),
1091                                         (__v32hi)_mm512_setzero_si512());
1092}
1093
1094static __inline__ __m512i __DEFAULT_FN_ATTRS512
1095_mm512_mulhi_epi16(__m512i __A, __m512i __B)
1096{
1097  return (__m512i)__builtin_ia32_pmulhw512((__v32hi) __A, (__v32hi) __B);
1098}
1099
1100static __inline__ __m512i __DEFAULT_FN_ATTRS512
1101_mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A,
1102       __m512i __B)
1103{
1104  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1105                                          (__v32hi)_mm512_mulhi_epi16(__A, __B),
1106                                          (__v32hi)__W);
1107}
1108
1109static __inline__ __m512i __DEFAULT_FN_ATTRS512
1110_mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B)
1111{
1112  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1113                                          (__v32hi)_mm512_mulhi_epi16(__A, __B),
1114                                          (__v32hi)_mm512_setzero_si512());
1115}
1116
1117static __inline__ __m512i __DEFAULT_FN_ATTRS512
1118_mm512_mulhi_epu16(__m512i __A, __m512i __B)
1119{
1120  return (__m512i)__builtin_ia32_pmulhuw512((__v32hi) __A, (__v32hi) __B);
1121}
1122
1123static __inline__ __m512i __DEFAULT_FN_ATTRS512
1124_mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1125{
1126  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1127                                          (__v32hi)_mm512_mulhi_epu16(__A, __B),
1128                                          (__v32hi)__W);
1129}
1130
1131static __inline__ __m512i __DEFAULT_FN_ATTRS512
1132_mm512_maskz_mulhi_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
1133{
1134  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1135                                          (__v32hi)_mm512_mulhi_epu16(__A, __B),
1136                                          (__v32hi)_mm512_setzero_si512());
1137}
1138
1139static __inline__ __m512i __DEFAULT_FN_ATTRS512
1140_mm512_maddubs_epi16(__m512i __X, __m512i __Y) {
1141  return (__m512i)__builtin_ia32_pmaddubsw512((__v64qi)__X, (__v64qi)__Y);
1142}
1143
1144static __inline__ __m512i __DEFAULT_FN_ATTRS512
1145_mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X,
1146                          __m512i __Y) {
1147  return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U,
1148                                        (__v32hi)_mm512_maddubs_epi16(__X, __Y),
1149                                        (__v32hi)__W);
1150}
1151
1152static __inline__ __m512i __DEFAULT_FN_ATTRS512
1153_mm512_maskz_maddubs_epi16(__mmask32 __U, __m512i __X, __m512i __Y) {
1154  return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U,
1155                                        (__v32hi)_mm512_maddubs_epi16(__X, __Y),
1156                                        (__v32hi)_mm512_setzero_si512());
1157}
1158
1159static __inline__ __m512i __DEFAULT_FN_ATTRS512
1160_mm512_madd_epi16(__m512i __A, __m512i __B) {
1161  return (__m512i)__builtin_ia32_pmaddwd512((__v32hi)__A, (__v32hi)__B);
1162}
1163
1164static __inline__ __m512i __DEFAULT_FN_ATTRS512
1165_mm512_mask_madd_epi16(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
1166  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
1167                                           (__v16si)_mm512_madd_epi16(__A, __B),
1168                                           (__v16si)__W);
1169}
1170
1171static __inline__ __m512i __DEFAULT_FN_ATTRS512
1172_mm512_maskz_madd_epi16(__mmask16 __U, __m512i __A, __m512i __B) {
1173  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
1174                                           (__v16si)_mm512_madd_epi16(__A, __B),
1175                                           (__v16si)_mm512_setzero_si512());
1176}
1177
1178static __inline__ __m256i __DEFAULT_FN_ATTRS512
1179_mm512_cvtsepi16_epi8 (__m512i __A) {
1180  return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
1181               (__v32qi)_mm256_setzero_si256(),
1182               (__mmask32) -1);
1183}
1184
1185static __inline__ __m256i __DEFAULT_FN_ATTRS512
1186_mm512_mask_cvtsepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
1187  return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
1188               (__v32qi)__O,
1189               __M);
1190}
1191
1192static __inline__ __m256i __DEFAULT_FN_ATTRS512
1193_mm512_maskz_cvtsepi16_epi8 (__mmask32 __M, __m512i __A) {
1194  return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
1195               (__v32qi) _mm256_setzero_si256(),
1196               __M);
1197}
1198
1199static __inline__ __m256i __DEFAULT_FN_ATTRS512
1200_mm512_cvtusepi16_epi8 (__m512i __A) {
1201  return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
1202                (__v32qi) _mm256_setzero_si256(),
1203                (__mmask32) -1);
1204}
1205
1206static __inline__ __m256i __DEFAULT_FN_ATTRS512
1207_mm512_mask_cvtusepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
1208  return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
1209                (__v32qi) __O,
1210                __M);
1211}
1212
1213static __inline__ __m256i __DEFAULT_FN_ATTRS512
1214_mm512_maskz_cvtusepi16_epi8 (__mmask32 __M, __m512i __A) {
1215  return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
1216                (__v32qi) _mm256_setzero_si256(),
1217                __M);
1218}
1219
1220static __inline__ __m256i __DEFAULT_FN_ATTRS512
1221_mm512_cvtepi16_epi8 (__m512i __A) {
1222  return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
1223              (__v32qi) _mm256_undefined_si256(),
1224              (__mmask32) -1);
1225}
1226
1227static __inline__ __m256i __DEFAULT_FN_ATTRS512
1228_mm512_mask_cvtepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
1229  return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
1230              (__v32qi) __O,
1231              __M);
1232}
1233
1234static __inline__ __m256i __DEFAULT_FN_ATTRS512
1235_mm512_maskz_cvtepi16_epi8 (__mmask32 __M, __m512i __A) {
1236  return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
1237              (__v32qi) _mm256_setzero_si256(),
1238              __M);
1239}
1240
1241static __inline__ void __DEFAULT_FN_ATTRS512
1242_mm512_mask_cvtepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
1243{
1244  __builtin_ia32_pmovwb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
1245}
1246
1247static __inline__ void __DEFAULT_FN_ATTRS512
1248_mm512_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
1249{
1250  __builtin_ia32_pmovswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
1251}
1252
1253static __inline__ void __DEFAULT_FN_ATTRS512
1254_mm512_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
1255{
1256  __builtin_ia32_pmovuswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
1257}
1258
1259static __inline__ __m512i __DEFAULT_FN_ATTRS512
1260_mm512_unpackhi_epi8(__m512i __A, __m512i __B) {
1261  return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B,
1262                                          8,  64+8,   9, 64+9,
1263                                          10, 64+10, 11, 64+11,
1264                                          12, 64+12, 13, 64+13,
1265                                          14, 64+14, 15, 64+15,
1266                                          24, 64+24, 25, 64+25,
1267                                          26, 64+26, 27, 64+27,
1268                                          28, 64+28, 29, 64+29,
1269                                          30, 64+30, 31, 64+31,
1270                                          40, 64+40, 41, 64+41,
1271                                          42, 64+42, 43, 64+43,
1272                                          44, 64+44, 45, 64+45,
1273                                          46, 64+46, 47, 64+47,
1274                                          56, 64+56, 57, 64+57,
1275                                          58, 64+58, 59, 64+59,
1276                                          60, 64+60, 61, 64+61,
1277                                          62, 64+62, 63, 64+63);
1278}
1279
1280static __inline__ __m512i __DEFAULT_FN_ATTRS512
1281_mm512_mask_unpackhi_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
1282  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1283                                        (__v64qi)_mm512_unpackhi_epi8(__A, __B),
1284                                        (__v64qi)__W);
1285}
1286
1287static __inline__ __m512i __DEFAULT_FN_ATTRS512
1288_mm512_maskz_unpackhi_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
1289  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1290                                        (__v64qi)_mm512_unpackhi_epi8(__A, __B),
1291                                        (__v64qi)_mm512_setzero_si512());
1292}
1293
1294static __inline__ __m512i __DEFAULT_FN_ATTRS512
1295_mm512_unpackhi_epi16(__m512i __A, __m512i __B) {
1296  return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B,
1297                                          4,  32+4,   5, 32+5,
1298                                          6,  32+6,   7, 32+7,
1299                                          12, 32+12, 13, 32+13,
1300                                          14, 32+14, 15, 32+15,
1301                                          20, 32+20, 21, 32+21,
1302                                          22, 32+22, 23, 32+23,
1303                                          28, 32+28, 29, 32+29,
1304                                          30, 32+30, 31, 32+31);
1305}
1306
1307static __inline__ __m512i __DEFAULT_FN_ATTRS512
1308_mm512_mask_unpackhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
1309  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1310                                       (__v32hi)_mm512_unpackhi_epi16(__A, __B),
1311                                       (__v32hi)__W);
1312}
1313
1314static __inline__ __m512i __DEFAULT_FN_ATTRS512
1315_mm512_maskz_unpackhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
1316  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1317                                       (__v32hi)_mm512_unpackhi_epi16(__A, __B),
1318                                       (__v32hi)_mm512_setzero_si512());
1319}
1320
1321static __inline__ __m512i __DEFAULT_FN_ATTRS512
1322_mm512_unpacklo_epi8(__m512i __A, __m512i __B) {
1323  return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B,
1324                                          0,  64+0,   1, 64+1,
1325                                          2,  64+2,   3, 64+3,
1326                                          4,  64+4,   5, 64+5,
1327                                          6,  64+6,   7, 64+7,
1328                                          16, 64+16, 17, 64+17,
1329                                          18, 64+18, 19, 64+19,
1330                                          20, 64+20, 21, 64+21,
1331                                          22, 64+22, 23, 64+23,
1332                                          32, 64+32, 33, 64+33,
1333                                          34, 64+34, 35, 64+35,
1334                                          36, 64+36, 37, 64+37,
1335                                          38, 64+38, 39, 64+39,
1336                                          48, 64+48, 49, 64+49,
1337                                          50, 64+50, 51, 64+51,
1338                                          52, 64+52, 53, 64+53,
1339                                          54, 64+54, 55, 64+55);
1340}
1341
1342static __inline__ __m512i __DEFAULT_FN_ATTRS512
1343_mm512_mask_unpacklo_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
1344  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1345                                        (__v64qi)_mm512_unpacklo_epi8(__A, __B),
1346                                        (__v64qi)__W);
1347}
1348
1349static __inline__ __m512i __DEFAULT_FN_ATTRS512
1350_mm512_maskz_unpacklo_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
1351  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1352                                        (__v64qi)_mm512_unpacklo_epi8(__A, __B),
1353                                        (__v64qi)_mm512_setzero_si512());
1354}
1355
1356static __inline__ __m512i __DEFAULT_FN_ATTRS512
1357_mm512_unpacklo_epi16(__m512i __A, __m512i __B) {
1358  return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B,
1359                                          0,  32+0,   1, 32+1,
1360                                          2,  32+2,   3, 32+3,
1361                                          8,  32+8,   9, 32+9,
1362                                          10, 32+10, 11, 32+11,
1363                                          16, 32+16, 17, 32+17,
1364                                          18, 32+18, 19, 32+19,
1365                                          24, 32+24, 25, 32+25,
1366                                          26, 32+26, 27, 32+27);
1367}
1368
1369static __inline__ __m512i __DEFAULT_FN_ATTRS512
1370_mm512_mask_unpacklo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
1371  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1372                                       (__v32hi)_mm512_unpacklo_epi16(__A, __B),
1373                                       (__v32hi)__W);
1374}
1375
1376static __inline__ __m512i __DEFAULT_FN_ATTRS512
1377_mm512_maskz_unpacklo_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
1378  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1379                                       (__v32hi)_mm512_unpacklo_epi16(__A, __B),
1380                                       (__v32hi)_mm512_setzero_si512());
1381}
1382
1383static __inline__ __m512i __DEFAULT_FN_ATTRS512
1384_mm512_cvtepi8_epi16(__m256i __A)
1385{
1386  /* This function always performs a signed extension, but __v32qi is a char
1387     which may be signed or unsigned, so use __v32qs. */
1388  return (__m512i)__builtin_convertvector((__v32qs)__A, __v32hi);
1389}
1390
1391static __inline__ __m512i __DEFAULT_FN_ATTRS512
1392_mm512_mask_cvtepi8_epi16(__m512i __W, __mmask32 __U, __m256i __A)
1393{
1394  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1395                                             (__v32hi)_mm512_cvtepi8_epi16(__A),
1396                                             (__v32hi)__W);
1397}
1398
1399static __inline__ __m512i __DEFAULT_FN_ATTRS512
1400_mm512_maskz_cvtepi8_epi16(__mmask32 __U, __m256i __A)
1401{
1402  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1403                                             (__v32hi)_mm512_cvtepi8_epi16(__A),
1404                                             (__v32hi)_mm512_setzero_si512());
1405}
1406
1407static __inline__ __m512i __DEFAULT_FN_ATTRS512
1408_mm512_cvtepu8_epi16(__m256i __A)
1409{
1410  return (__m512i)__builtin_convertvector((__v32qu)__A, __v32hi);
1411}
1412
1413static __inline__ __m512i __DEFAULT_FN_ATTRS512
1414_mm512_mask_cvtepu8_epi16(__m512i __W, __mmask32 __U, __m256i __A)
1415{
1416  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1417                                             (__v32hi)_mm512_cvtepu8_epi16(__A),
1418                                             (__v32hi)__W);
1419}
1420
1421static __inline__ __m512i __DEFAULT_FN_ATTRS512
1422_mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A)
1423{
1424  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1425                                             (__v32hi)_mm512_cvtepu8_epi16(__A),
1426                                             (__v32hi)_mm512_setzero_si512());
1427}
1428
1429
1430#define _mm512_shufflehi_epi16(A, imm) \
1431  (__m512i)__builtin_ia32_pshufhw512((__v32hi)(__m512i)(A), (int)(imm))
1432
1433#define _mm512_mask_shufflehi_epi16(W, U, A, imm) \
1434  (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1435                                      (__v32hi)_mm512_shufflehi_epi16((A), \
1436                                                                      (imm)), \
1437                                      (__v32hi)(__m512i)(W))
1438
1439#define _mm512_maskz_shufflehi_epi16(U, A, imm) \
1440  (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1441                                      (__v32hi)_mm512_shufflehi_epi16((A), \
1442                                                                      (imm)), \
1443                                      (__v32hi)_mm512_setzero_si512())
1444
1445#define _mm512_shufflelo_epi16(A, imm) \
1446  (__m512i)__builtin_ia32_pshuflw512((__v32hi)(__m512i)(A), (int)(imm))
1447
1448
1449#define _mm512_mask_shufflelo_epi16(W, U, A, imm) \
1450  (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1451                                      (__v32hi)_mm512_shufflelo_epi16((A), \
1452                                                                      (imm)), \
1453                                      (__v32hi)(__m512i)(W))
1454
1455
1456#define _mm512_maskz_shufflelo_epi16(U, A, imm) \
1457  (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1458                                      (__v32hi)_mm512_shufflelo_epi16((A), \
1459                                                                      (imm)), \
1460                                      (__v32hi)_mm512_setzero_si512())
1461
1462static __inline__ __m512i __DEFAULT_FN_ATTRS512
1463_mm512_sllv_epi16(__m512i __A, __m512i __B)
1464{
1465  return (__m512i)__builtin_ia32_psllv32hi((__v32hi) __A, (__v32hi) __B);
1466}
1467
1468static __inline__ __m512i __DEFAULT_FN_ATTRS512
1469_mm512_mask_sllv_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1470{
1471  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1472                                           (__v32hi)_mm512_sllv_epi16(__A, __B),
1473                                           (__v32hi)__W);
1474}
1475
1476static __inline__ __m512i __DEFAULT_FN_ATTRS512
1477_mm512_maskz_sllv_epi16(__mmask32 __U, __m512i __A, __m512i __B)
1478{
1479  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1480                                           (__v32hi)_mm512_sllv_epi16(__A, __B),
1481                                           (__v32hi)_mm512_setzero_si512());
1482}
1483
1484static __inline__ __m512i __DEFAULT_FN_ATTRS512
1485_mm512_sll_epi16(__m512i __A, __m128i __B)
1486{
1487  return (__m512i)__builtin_ia32_psllw512((__v32hi) __A, (__v8hi) __B);
1488}
1489
1490static __inline__ __m512i __DEFAULT_FN_ATTRS512
1491_mm512_mask_sll_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B)
1492{
1493  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1494                                          (__v32hi)_mm512_sll_epi16(__A, __B),
1495                                          (__v32hi)__W);
1496}
1497
1498static __inline__ __m512i __DEFAULT_FN_ATTRS512
1499_mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, __m128i __B)
1500{
1501  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1502                                          (__v32hi)_mm512_sll_epi16(__A, __B),
1503                                          (__v32hi)_mm512_setzero_si512());
1504}
1505
1506static __inline__ __m512i __DEFAULT_FN_ATTRS512
1507_mm512_slli_epi16(__m512i __A, int __B)
1508{
1509  return (__m512i)__builtin_ia32_psllwi512((__v32hi)__A, __B);
1510}
1511
1512static __inline__ __m512i __DEFAULT_FN_ATTRS512
1513_mm512_mask_slli_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B)
1514{
1515  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1516                                         (__v32hi)_mm512_slli_epi16(__A, __B),
1517                                         (__v32hi)__W);
1518}
1519
1520static __inline__ __m512i __DEFAULT_FN_ATTRS512
1521_mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, int __B)
1522{
1523  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1524                                         (__v32hi)_mm512_slli_epi16(__A, __B),
1525                                         (__v32hi)_mm512_setzero_si512());
1526}
1527
1528#define _mm512_bslli_epi128(a, imm) \
1529  (__m512i)__builtin_ia32_pslldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm))
1530
1531static __inline__ __m512i __DEFAULT_FN_ATTRS512
1532_mm512_srlv_epi16(__m512i __A, __m512i __B)
1533{
1534  return (__m512i)__builtin_ia32_psrlv32hi((__v32hi)__A, (__v32hi)__B);
1535}
1536
1537static __inline__ __m512i __DEFAULT_FN_ATTRS512
1538_mm512_mask_srlv_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1539{
1540  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1541                                           (__v32hi)_mm512_srlv_epi16(__A, __B),
1542                                           (__v32hi)__W);
1543}
1544
1545static __inline__ __m512i __DEFAULT_FN_ATTRS512
1546_mm512_maskz_srlv_epi16(__mmask32 __U, __m512i __A, __m512i __B)
1547{
1548  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1549                                           (__v32hi)_mm512_srlv_epi16(__A, __B),
1550                                           (__v32hi)_mm512_setzero_si512());
1551}
1552
1553static __inline__ __m512i __DEFAULT_FN_ATTRS512
1554_mm512_srav_epi16(__m512i __A, __m512i __B)
1555{
1556  return (__m512i)__builtin_ia32_psrav32hi((__v32hi)__A, (__v32hi)__B);
1557}
1558
1559static __inline__ __m512i __DEFAULT_FN_ATTRS512
1560_mm512_mask_srav_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1561{
1562  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1563                                           (__v32hi)_mm512_srav_epi16(__A, __B),
1564                                           (__v32hi)__W);
1565}
1566
1567static __inline__ __m512i __DEFAULT_FN_ATTRS512
1568_mm512_maskz_srav_epi16(__mmask32 __U, __m512i __A, __m512i __B)
1569{
1570  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1571                                           (__v32hi)_mm512_srav_epi16(__A, __B),
1572                                           (__v32hi)_mm512_setzero_si512());
1573}
1574
1575static __inline__ __m512i __DEFAULT_FN_ATTRS512
1576_mm512_sra_epi16(__m512i __A, __m128i __B)
1577{
1578  return (__m512i)__builtin_ia32_psraw512((__v32hi) __A, (__v8hi) __B);
1579}
1580
1581static __inline__ __m512i __DEFAULT_FN_ATTRS512
1582_mm512_mask_sra_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B)
1583{
1584  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1585                                          (__v32hi)_mm512_sra_epi16(__A, __B),
1586                                          (__v32hi)__W);
1587}
1588
1589static __inline__ __m512i __DEFAULT_FN_ATTRS512
1590_mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, __m128i __B)
1591{
1592  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1593                                          (__v32hi)_mm512_sra_epi16(__A, __B),
1594                                          (__v32hi)_mm512_setzero_si512());
1595}
1596
1597static __inline__ __m512i __DEFAULT_FN_ATTRS512
1598_mm512_srai_epi16(__m512i __A, int __B)
1599{
1600  return (__m512i)__builtin_ia32_psrawi512((__v32hi)__A, __B);
1601}
1602
1603static __inline__ __m512i __DEFAULT_FN_ATTRS512
1604_mm512_mask_srai_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B)
1605{
1606  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1607                                         (__v32hi)_mm512_srai_epi16(__A, __B),
1608                                         (__v32hi)__W);
1609}
1610
1611static __inline__ __m512i __DEFAULT_FN_ATTRS512
1612_mm512_maskz_srai_epi16(__mmask32 __U, __m512i __A, int __B)
1613{
1614  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1615                                         (__v32hi)_mm512_srai_epi16(__A, __B),
1616                                         (__v32hi)_mm512_setzero_si512());
1617}
1618
1619static __inline__ __m512i __DEFAULT_FN_ATTRS512
1620_mm512_srl_epi16(__m512i __A, __m128i __B)
1621{
1622  return (__m512i)__builtin_ia32_psrlw512((__v32hi) __A, (__v8hi) __B);
1623}
1624
1625static __inline__ __m512i __DEFAULT_FN_ATTRS512
1626_mm512_mask_srl_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B)
1627{
1628  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1629                                          (__v32hi)_mm512_srl_epi16(__A, __B),
1630                                          (__v32hi)__W);
1631}
1632
1633static __inline__ __m512i __DEFAULT_FN_ATTRS512
1634_mm512_maskz_srl_epi16(__mmask32 __U, __m512i __A, __m128i __B)
1635{
1636  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1637                                          (__v32hi)_mm512_srl_epi16(__A, __B),
1638                                          (__v32hi)_mm512_setzero_si512());
1639}
1640
1641static __inline__ __m512i __DEFAULT_FN_ATTRS512
1642_mm512_srli_epi16(__m512i __A, int __B)
1643{
1644  return (__m512i)__builtin_ia32_psrlwi512((__v32hi)__A, __B);
1645}
1646
1647static __inline__ __m512i __DEFAULT_FN_ATTRS512
1648_mm512_mask_srli_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B)
1649{
1650  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1651                                         (__v32hi)_mm512_srli_epi16(__A, __B),
1652                                         (__v32hi)__W);
1653}
1654
1655static __inline__ __m512i __DEFAULT_FN_ATTRS512
1656_mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B)
1657{
1658  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1659                                         (__v32hi)_mm512_srli_epi16(__A, __B),
1660                                         (__v32hi)_mm512_setzero_si512());
1661}
1662
1663#define _mm512_bsrli_epi128(a, imm) \
1664  (__m512i)__builtin_ia32_psrldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm))
1665
1666static __inline__ __m512i __DEFAULT_FN_ATTRS512
1667_mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
1668{
1669  return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
1670                (__v32hi) __A,
1671                (__v32hi) __W);
1672}
1673
1674static __inline__ __m512i __DEFAULT_FN_ATTRS512
1675_mm512_maskz_mov_epi16 (__mmask32 __U, __m512i __A)
1676{
1677  return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
1678                (__v32hi) __A,
1679                (__v32hi) _mm512_setzero_si512 ());
1680}
1681
1682static __inline__ __m512i __DEFAULT_FN_ATTRS512
1683_mm512_mask_mov_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
1684{
1685  return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
1686                (__v64qi) __A,
1687                (__v64qi) __W);
1688}
1689
1690static __inline__ __m512i __DEFAULT_FN_ATTRS512
1691_mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A)
1692{
1693  return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
1694                (__v64qi) __A,
1695                (__v64qi) _mm512_setzero_si512 ());
1696}
1697
1698static __inline__ __m512i __DEFAULT_FN_ATTRS512
1699_mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A)
1700{
1701  return (__m512i) __builtin_ia32_selectb_512(__M,
1702                                              (__v64qi)_mm512_set1_epi8(__A),
1703                                              (__v64qi) __O);
1704}
1705
1706static __inline__ __m512i __DEFAULT_FN_ATTRS512
1707_mm512_maskz_set1_epi8 (__mmask64 __M, char __A)
1708{
1709  return (__m512i) __builtin_ia32_selectb_512(__M,
1710                                              (__v64qi) _mm512_set1_epi8(__A),
1711                                              (__v64qi) _mm512_setzero_si512());
1712}
1713
1714static __inline__ __mmask64 __DEFAULT_FN_ATTRS
1715_mm512_kunpackd (__mmask64 __A, __mmask64 __B)
1716{
1717  return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
1718                (__mmask64) __B);
1719}
1720
1721static __inline__ __mmask32 __DEFAULT_FN_ATTRS
1722_mm512_kunpackw (__mmask32 __A, __mmask32 __B)
1723{
1724  return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,
1725                (__mmask32) __B);
1726}
1727
1728static __inline __m512i __DEFAULT_FN_ATTRS512
1729_mm512_loadu_epi16 (void const *__P)
1730{
1731  struct __loadu_epi16 {
1732    __m512i_u __v;
1733  } __attribute__((__packed__, __may_alias__));
1734  return ((const struct __loadu_epi16*)__P)->__v;
1735}
1736
1737static __inline__ __m512i __DEFAULT_FN_ATTRS512
1738_mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P)
1739{
1740  return (__m512i) __builtin_ia32_loaddquhi512_mask ((const __v32hi *) __P,
1741                 (__v32hi) __W,
1742                 (__mmask32) __U);
1743}
1744
1745static __inline__ __m512i __DEFAULT_FN_ATTRS512
1746_mm512_maskz_loadu_epi16 (__mmask32 __U, void const *__P)
1747{
1748  return (__m512i) __builtin_ia32_loaddquhi512_mask ((const __v32hi *) __P,
1749                 (__v32hi)
1750                 _mm512_setzero_si512 (),
1751                 (__mmask32) __U);
1752}
1753
1754static __inline __m512i __DEFAULT_FN_ATTRS512
1755_mm512_loadu_epi8 (void const *__P)
1756{
1757  struct __loadu_epi8 {
1758    __m512i_u __v;
1759  } __attribute__((__packed__, __may_alias__));
1760  return ((const struct __loadu_epi8*)__P)->__v;
1761}
1762
1763static __inline__ __m512i __DEFAULT_FN_ATTRS512
1764_mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P)
1765{
1766  return (__m512i) __builtin_ia32_loaddquqi512_mask ((const __v64qi *) __P,
1767                 (__v64qi) __W,
1768                 (__mmask64) __U);
1769}
1770
1771static __inline__ __m512i __DEFAULT_FN_ATTRS512
1772_mm512_maskz_loadu_epi8 (__mmask64 __U, void const *__P)
1773{
1774  return (__m512i) __builtin_ia32_loaddquqi512_mask ((const __v64qi *) __P,
1775                 (__v64qi)
1776                 _mm512_setzero_si512 (),
1777                 (__mmask64) __U);
1778}
1779
1780static __inline void __DEFAULT_FN_ATTRS512
1781_mm512_storeu_epi16 (void *__P, __m512i __A)
1782{
1783  struct __storeu_epi16 {
1784    __m512i_u __v;
1785  } __attribute__((__packed__, __may_alias__));
1786  ((struct __storeu_epi16*)__P)->__v = __A;
1787}
1788
1789static __inline__ void __DEFAULT_FN_ATTRS512
1790_mm512_mask_storeu_epi16 (void *__P, __mmask32 __U, __m512i __A)
1791{
1792  __builtin_ia32_storedquhi512_mask ((__v32hi *) __P,
1793             (__v32hi) __A,
1794             (__mmask32) __U);
1795}
1796
1797static __inline void __DEFAULT_FN_ATTRS512
1798_mm512_storeu_epi8 (void *__P, __m512i __A)
1799{
1800  struct __storeu_epi8 {
1801    __m512i_u __v;
1802  } __attribute__((__packed__, __may_alias__));
1803  ((struct __storeu_epi8*)__P)->__v = __A;
1804}
1805
1806static __inline__ void __DEFAULT_FN_ATTRS512
1807_mm512_mask_storeu_epi8 (void *__P, __mmask64 __U, __m512i __A)
1808{
1809  __builtin_ia32_storedquqi512_mask ((__v64qi *) __P,
1810             (__v64qi) __A,
1811             (__mmask64) __U);
1812}
1813
1814static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
1815_mm512_test_epi8_mask (__m512i __A, __m512i __B)
1816{
1817  return _mm512_cmpneq_epi8_mask (_mm512_and_epi32 (__A, __B),
1818                                  _mm512_setzero_si512());
1819}
1820
1821static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
1822_mm512_mask_test_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
1823{
1824  return _mm512_mask_cmpneq_epi8_mask (__U, _mm512_and_epi32 (__A, __B),
1825                                       _mm512_setzero_si512());
1826}
1827
1828static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
1829_mm512_test_epi16_mask (__m512i __A, __m512i __B)
1830{
1831  return _mm512_cmpneq_epi16_mask (_mm512_and_epi32 (__A, __B),
1832                                   _mm512_setzero_si512());
1833}
1834
1835static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
1836_mm512_mask_test_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
1837{
1838  return _mm512_mask_cmpneq_epi16_mask (__U, _mm512_and_epi32 (__A, __B),
1839                                        _mm512_setzero_si512());
1840}
1841
1842static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
1843_mm512_testn_epi8_mask (__m512i __A, __m512i __B)
1844{
1845  return _mm512_cmpeq_epi8_mask (_mm512_and_epi32 (__A, __B), _mm512_setzero_si512());
1846}
1847
1848static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
1849_mm512_mask_testn_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
1850{
1851  return _mm512_mask_cmpeq_epi8_mask (__U, _mm512_and_epi32 (__A, __B),
1852                                      _mm512_setzero_si512());
1853}
1854
1855static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
1856_mm512_testn_epi16_mask (__m512i __A, __m512i __B)
1857{
1858  return _mm512_cmpeq_epi16_mask (_mm512_and_epi32 (__A, __B),
1859                                  _mm512_setzero_si512());
1860}
1861
1862static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
1863_mm512_mask_testn_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
1864{
1865  return _mm512_mask_cmpeq_epi16_mask (__U, _mm512_and_epi32 (__A, __B),
1866                                       _mm512_setzero_si512());
1867}
1868
1869static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
1870_mm512_movepi8_mask (__m512i __A)
1871{
1872  return (__mmask64) __builtin_ia32_cvtb2mask512 ((__v64qi) __A);
1873}
1874
1875static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
1876_mm512_movepi16_mask (__m512i __A)
1877{
1878  return (__mmask32) __builtin_ia32_cvtw2mask512 ((__v32hi) __A);
1879}
1880
1881static __inline__ __m512i __DEFAULT_FN_ATTRS512
1882_mm512_movm_epi8 (__mmask64 __A)
1883{
1884  return (__m512i) __builtin_ia32_cvtmask2b512 (__A);
1885}
1886
1887static __inline__ __m512i __DEFAULT_FN_ATTRS512
1888_mm512_movm_epi16 (__mmask32 __A)
1889{
1890  return (__m512i) __builtin_ia32_cvtmask2w512 (__A);
1891}
1892
1893static __inline__ __m512i __DEFAULT_FN_ATTRS512
1894_mm512_broadcastb_epi8 (__m128i __A)
1895{
1896  return (__m512i)__builtin_shufflevector((__v16qi) __A, (__v16qi) __A,
1897                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1898                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1899                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1900                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1901}
1902
1903static __inline__ __m512i __DEFAULT_FN_ATTRS512
1904_mm512_mask_broadcastb_epi8 (__m512i __O, __mmask64 __M, __m128i __A)
1905{
1906  return (__m512i)__builtin_ia32_selectb_512(__M,
1907                                             (__v64qi) _mm512_broadcastb_epi8(__A),
1908                                             (__v64qi) __O);
1909}
1910
1911static __inline__ __m512i __DEFAULT_FN_ATTRS512
1912_mm512_maskz_broadcastb_epi8 (__mmask64 __M, __m128i __A)
1913{
1914  return (__m512i)__builtin_ia32_selectb_512(__M,
1915                                             (__v64qi) _mm512_broadcastb_epi8(__A),
1916                                             (__v64qi) _mm512_setzero_si512());
1917}
1918
1919static __inline__ __m512i __DEFAULT_FN_ATTRS512
1920_mm512_mask_set1_epi16 (__m512i __O, __mmask32 __M, short __A)
1921{
1922  return (__m512i) __builtin_ia32_selectw_512(__M,
1923                                              (__v32hi) _mm512_set1_epi16(__A),
1924                                              (__v32hi) __O);
1925}
1926
1927static __inline__ __m512i __DEFAULT_FN_ATTRS512
1928_mm512_maskz_set1_epi16 (__mmask32 __M, short __A)
1929{
1930  return (__m512i) __builtin_ia32_selectw_512(__M,
1931                                              (__v32hi) _mm512_set1_epi16(__A),
1932                                              (__v32hi) _mm512_setzero_si512());
1933}
1934
1935static __inline__ __m512i __DEFAULT_FN_ATTRS512
1936_mm512_broadcastw_epi16 (__m128i __A)
1937{
1938  return (__m512i)__builtin_shufflevector((__v8hi) __A, (__v8hi) __A,
1939                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1940                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1941}
1942
1943static __inline__ __m512i __DEFAULT_FN_ATTRS512
1944_mm512_mask_broadcastw_epi16 (__m512i __O, __mmask32 __M, __m128i __A)
1945{
1946  return (__m512i)__builtin_ia32_selectw_512(__M,
1947                                             (__v32hi) _mm512_broadcastw_epi16(__A),
1948                                             (__v32hi) __O);
1949}
1950
1951static __inline__ __m512i __DEFAULT_FN_ATTRS512
1952_mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A)
1953{
1954  return (__m512i)__builtin_ia32_selectw_512(__M,
1955                                             (__v32hi) _mm512_broadcastw_epi16(__A),
1956                                             (__v32hi) _mm512_setzero_si512());
1957}
1958
1959static __inline__ __m512i __DEFAULT_FN_ATTRS512
1960_mm512_permutexvar_epi16 (__m512i __A, __m512i __B)
1961{
1962  return (__m512i)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A);
1963}
1964
1965static __inline__ __m512i __DEFAULT_FN_ATTRS512
1966_mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A,
1967        __m512i __B)
1968{
1969  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
1970                                    (__v32hi)_mm512_permutexvar_epi16(__A, __B),
1971                                    (__v32hi)_mm512_setzero_si512());
1972}
1973
1974static __inline__ __m512i __DEFAULT_FN_ATTRS512
1975_mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
1976             __m512i __B)
1977{
1978  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
1979                                    (__v32hi)_mm512_permutexvar_epi16(__A, __B),
1980                                    (__v32hi)__W);
1981}
1982
1983#define _mm512_alignr_epi8(A, B, N) \
1984  (__m512i)__builtin_ia32_palignr512((__v64qi)(__m512i)(A), \
1985                                     (__v64qi)(__m512i)(B), (int)(N))
1986
1987#define _mm512_mask_alignr_epi8(W, U, A, B, N) \
1988  (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
1989                             (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \
1990                             (__v64qi)(__m512i)(W))
1991
1992#define _mm512_maskz_alignr_epi8(U, A, B, N) \
1993  (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
1994                              (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \
1995                              (__v64qi)(__m512i)_mm512_setzero_si512())
1996
1997#define _mm512_dbsad_epu8(A, B, imm) \
1998  (__m512i)__builtin_ia32_dbpsadbw512((__v64qi)(__m512i)(A), \
1999                                      (__v64qi)(__m512i)(B), (int)(imm))
2000
2001#define _mm512_mask_dbsad_epu8(W, U, A, B, imm) \
2002  (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
2003                                  (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \
2004                                  (__v32hi)(__m512i)(W))
2005
2006#define _mm512_maskz_dbsad_epu8(U, A, B, imm) \
2007  (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
2008                                  (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \
2009                                  (__v32hi)_mm512_setzero_si512())
2010
2011static __inline__ __m512i __DEFAULT_FN_ATTRS512
2012_mm512_sad_epu8 (__m512i __A, __m512i __B)
2013{
2014 return (__m512i) __builtin_ia32_psadbw512 ((__v64qi) __A,
2015               (__v64qi) __B);
2016}
2017
2018#undef __DEFAULT_FN_ATTRS512
2019#undef __DEFAULT_FN_ATTRS
2020
2021#endif
2022