avx512bwintrin.h revision 344779
1/*===------------- avx512bwintrin.h - AVX512BW intrinsics ------------------===
2 *
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 * THE SOFTWARE.
21 *
22 *===-----------------------------------------------------------------------===
23 */
24#ifndef __IMMINTRIN_H
25#error "Never use <avx512bwintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef __AVX512BWINTRIN_H
29#define __AVX512BWINTRIN_H
30
31typedef unsigned int __mmask32;
32typedef unsigned long long __mmask64;
33
34/* Define the default attributes for the functions in this file. */
35#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"), __min_vector_width__(512)))
36#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw")))
37
38static __inline __mmask32 __DEFAULT_FN_ATTRS
39_knot_mask32(__mmask32 __M)
40{
41  return __builtin_ia32_knotsi(__M);
42}
43
44static __inline __mmask64 __DEFAULT_FN_ATTRS
45_knot_mask64(__mmask64 __M)
46{
47  return __builtin_ia32_knotdi(__M);
48}
49
50static __inline__ __mmask32 __DEFAULT_FN_ATTRS
51_kand_mask32(__mmask32 __A, __mmask32 __B)
52{
53  return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B);
54}
55
56static __inline__ __mmask64 __DEFAULT_FN_ATTRS
57_kand_mask64(__mmask64 __A, __mmask64 __B)
58{
59  return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B);
60}
61
62static __inline__ __mmask32 __DEFAULT_FN_ATTRS
63_kandn_mask32(__mmask32 __A, __mmask32 __B)
64{
65  return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B);
66}
67
68static __inline__ __mmask64 __DEFAULT_FN_ATTRS
69_kandn_mask64(__mmask64 __A, __mmask64 __B)
70{
71  return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B);
72}
73
74static __inline__ __mmask32 __DEFAULT_FN_ATTRS
75_kor_mask32(__mmask32 __A, __mmask32 __B)
76{
77  return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B);
78}
79
80static __inline__ __mmask64 __DEFAULT_FN_ATTRS
81_kor_mask64(__mmask64 __A, __mmask64 __B)
82{
83  return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B);
84}
85
86static __inline__ __mmask32 __DEFAULT_FN_ATTRS
87_kxnor_mask32(__mmask32 __A, __mmask32 __B)
88{
89  return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B);
90}
91
92static __inline__ __mmask64 __DEFAULT_FN_ATTRS
93_kxnor_mask64(__mmask64 __A, __mmask64 __B)
94{
95  return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B);
96}
97
98static __inline__ __mmask32 __DEFAULT_FN_ATTRS
99_kxor_mask32(__mmask32 __A, __mmask32 __B)
100{
101  return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B);
102}
103
104static __inline__ __mmask64 __DEFAULT_FN_ATTRS
105_kxor_mask64(__mmask64 __A, __mmask64 __B)
106{
107  return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B);
108}
109
110static __inline__ unsigned char __DEFAULT_FN_ATTRS
111_kortestc_mask32_u8(__mmask32 __A, __mmask32 __B)
112{
113  return (unsigned char)__builtin_ia32_kortestcsi(__A, __B);
114}
115
116static __inline__ unsigned char __DEFAULT_FN_ATTRS
117_kortestz_mask32_u8(__mmask32 __A, __mmask32 __B)
118{
119  return (unsigned char)__builtin_ia32_kortestzsi(__A, __B);
120}
121
122static __inline__ unsigned char __DEFAULT_FN_ATTRS
123_kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
124  *__C = (unsigned char)__builtin_ia32_kortestcsi(__A, __B);
125  return (unsigned char)__builtin_ia32_kortestzsi(__A, __B);
126}
127
128static __inline__ unsigned char __DEFAULT_FN_ATTRS
129_kortestc_mask64_u8(__mmask64 __A, __mmask64 __B)
130{
131  return (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
132}
133
134static __inline__ unsigned char __DEFAULT_FN_ATTRS
135_kortestz_mask64_u8(__mmask64 __A, __mmask64 __B)
136{
137  return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
138}
139
140static __inline__ unsigned char __DEFAULT_FN_ATTRS
141_kortest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
142  *__C = (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
143  return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
144}
145
146static __inline__ unsigned char __DEFAULT_FN_ATTRS
147_ktestc_mask32_u8(__mmask32 __A, __mmask32 __B)
148{
149  return (unsigned char)__builtin_ia32_ktestcsi(__A, __B);
150}
151
152static __inline__ unsigned char __DEFAULT_FN_ATTRS
153_ktestz_mask32_u8(__mmask32 __A, __mmask32 __B)
154{
155  return (unsigned char)__builtin_ia32_ktestzsi(__A, __B);
156}
157
158static __inline__ unsigned char __DEFAULT_FN_ATTRS
159_ktest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
160  *__C = (unsigned char)__builtin_ia32_ktestcsi(__A, __B);
161  return (unsigned char)__builtin_ia32_ktestzsi(__A, __B);
162}
163
164static __inline__ unsigned char __DEFAULT_FN_ATTRS
165_ktestc_mask64_u8(__mmask64 __A, __mmask64 __B)
166{
167  return (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
168}
169
170static __inline__ unsigned char __DEFAULT_FN_ATTRS
171_ktestz_mask64_u8(__mmask64 __A, __mmask64 __B)
172{
173  return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
174}
175
176static __inline__ unsigned char __DEFAULT_FN_ATTRS
177_ktest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
178  *__C = (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
179  return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
180}
181
182static __inline__ __mmask32 __DEFAULT_FN_ATTRS
183_kadd_mask32(__mmask32 __A, __mmask32 __B)
184{
185  return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B);
186}
187
188static __inline__ __mmask64 __DEFAULT_FN_ATTRS
189_kadd_mask64(__mmask64 __A, __mmask64 __B)
190{
191  return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B);
192}
193
194#define _kshiftli_mask32(A, I) \
195  (__mmask32)__builtin_ia32_kshiftlisi((__mmask32)(A), (unsigned int)(I))
196
197#define _kshiftri_mask32(A, I) \
198  (__mmask32)__builtin_ia32_kshiftrisi((__mmask32)(A), (unsigned int)(I))
199
200#define _kshiftli_mask64(A, I) \
201  (__mmask64)__builtin_ia32_kshiftlidi((__mmask64)(A), (unsigned int)(I))
202
203#define _kshiftri_mask64(A, I) \
204  (__mmask64)__builtin_ia32_kshiftridi((__mmask64)(A), (unsigned int)(I))
205
206static __inline__ unsigned int __DEFAULT_FN_ATTRS
207_cvtmask32_u32(__mmask32 __A) {
208  return (unsigned int)__builtin_ia32_kmovd((__mmask32)__A);
209}
210
211static __inline__ unsigned long long __DEFAULT_FN_ATTRS
212_cvtmask64_u64(__mmask64 __A) {
213  return (unsigned long long)__builtin_ia32_kmovq((__mmask64)__A);
214}
215
216static __inline__ __mmask32 __DEFAULT_FN_ATTRS
217_cvtu32_mask32(unsigned int __A) {
218  return (__mmask32)__builtin_ia32_kmovd((__mmask32)__A);
219}
220
221static __inline__ __mmask64 __DEFAULT_FN_ATTRS
222_cvtu64_mask64(unsigned long long __A) {
223  return (__mmask64)__builtin_ia32_kmovq((__mmask64)__A);
224}
225
226static __inline__ __mmask32 __DEFAULT_FN_ATTRS
227_load_mask32(__mmask32 *__A) {
228  return (__mmask32)__builtin_ia32_kmovd(*(__mmask32 *)__A);
229}
230
231static __inline__ __mmask64 __DEFAULT_FN_ATTRS
232_load_mask64(__mmask64 *__A) {
233  return (__mmask64)__builtin_ia32_kmovq(*(__mmask64 *)__A);
234}
235
236static __inline__ void __DEFAULT_FN_ATTRS
237_store_mask32(__mmask32 *__A, __mmask32 __B) {
238  *(__mmask32 *)__A = __builtin_ia32_kmovd((__mmask32)__B);
239}
240
241static __inline__ void __DEFAULT_FN_ATTRS
242_store_mask64(__mmask64 *__A, __mmask64 __B) {
243  *(__mmask64 *)__A = __builtin_ia32_kmovq((__mmask64)__B);
244}
245
246/* Integer compare */
247
248#define _mm512_cmp_epi8_mask(a, b, p) \
249  (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
250                                         (__v64qi)(__m512i)(b), (int)(p), \
251                                         (__mmask64)-1)
252
253#define _mm512_mask_cmp_epi8_mask(m, a, b, p) \
254  (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
255                                         (__v64qi)(__m512i)(b), (int)(p), \
256                                         (__mmask64)(m))
257
258#define _mm512_cmp_epu8_mask(a, b, p) \
259  (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
260                                          (__v64qi)(__m512i)(b), (int)(p), \
261                                          (__mmask64)-1)
262
263#define _mm512_mask_cmp_epu8_mask(m, a, b, p) \
264  (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
265                                          (__v64qi)(__m512i)(b), (int)(p), \
266                                          (__mmask64)(m))
267
268#define _mm512_cmp_epi16_mask(a, b, p) \
269  (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
270                                         (__v32hi)(__m512i)(b), (int)(p), \
271                                         (__mmask32)-1)
272
273#define _mm512_mask_cmp_epi16_mask(m, a, b, p) \
274  (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
275                                         (__v32hi)(__m512i)(b), (int)(p), \
276                                         (__mmask32)(m))
277
278#define _mm512_cmp_epu16_mask(a, b, p) \
279  (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
280                                          (__v32hi)(__m512i)(b), (int)(p), \
281                                          (__mmask32)-1)
282
283#define _mm512_mask_cmp_epu16_mask(m, a, b, p) \
284  (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
285                                          (__v32hi)(__m512i)(b), (int)(p), \
286                                          (__mmask32)(m))
287
288#define _mm512_cmpeq_epi8_mask(A, B) \
289    _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ)
290#define _mm512_mask_cmpeq_epi8_mask(k, A, B) \
291    _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ)
292#define _mm512_cmpge_epi8_mask(A, B) \
293    _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GE)
294#define _mm512_mask_cmpge_epi8_mask(k, A, B) \
295    _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE)
296#define _mm512_cmpgt_epi8_mask(A, B) \
297    _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GT)
298#define _mm512_mask_cmpgt_epi8_mask(k, A, B) \
299    _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT)
300#define _mm512_cmple_epi8_mask(A, B) \
301    _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LE)
302#define _mm512_mask_cmple_epi8_mask(k, A, B) \
303    _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE)
304#define _mm512_cmplt_epi8_mask(A, B) \
305    _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LT)
306#define _mm512_mask_cmplt_epi8_mask(k, A, B) \
307    _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT)
308#define _mm512_cmpneq_epi8_mask(A, B) \
309    _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_NE)
310#define _mm512_mask_cmpneq_epi8_mask(k, A, B) \
311    _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE)
312
313#define _mm512_cmpeq_epu8_mask(A, B) \
314    _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ)
315#define _mm512_mask_cmpeq_epu8_mask(k, A, B) \
316    _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ)
317#define _mm512_cmpge_epu8_mask(A, B) \
318    _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GE)
319#define _mm512_mask_cmpge_epu8_mask(k, A, B) \
320    _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE)
321#define _mm512_cmpgt_epu8_mask(A, B) \
322    _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GT)
323#define _mm512_mask_cmpgt_epu8_mask(k, A, B) \
324    _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT)
325#define _mm512_cmple_epu8_mask(A, B) \
326    _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LE)
327#define _mm512_mask_cmple_epu8_mask(k, A, B) \
328    _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE)
329#define _mm512_cmplt_epu8_mask(A, B) \
330    _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LT)
331#define _mm512_mask_cmplt_epu8_mask(k, A, B) \
332    _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT)
333#define _mm512_cmpneq_epu8_mask(A, B) \
334    _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_NE)
335#define _mm512_mask_cmpneq_epu8_mask(k, A, B) \
336    _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE)
337
338#define _mm512_cmpeq_epi16_mask(A, B) \
339    _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ)
340#define _mm512_mask_cmpeq_epi16_mask(k, A, B) \
341    _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ)
342#define _mm512_cmpge_epi16_mask(A, B) \
343    _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GE)
344#define _mm512_mask_cmpge_epi16_mask(k, A, B) \
345    _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE)
346#define _mm512_cmpgt_epi16_mask(A, B) \
347    _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GT)
348#define _mm512_mask_cmpgt_epi16_mask(k, A, B) \
349    _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT)
350#define _mm512_cmple_epi16_mask(A, B) \
351    _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LE)
352#define _mm512_mask_cmple_epi16_mask(k, A, B) \
353    _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE)
354#define _mm512_cmplt_epi16_mask(A, B) \
355    _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LT)
356#define _mm512_mask_cmplt_epi16_mask(k, A, B) \
357    _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT)
358#define _mm512_cmpneq_epi16_mask(A, B) \
359    _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_NE)
360#define _mm512_mask_cmpneq_epi16_mask(k, A, B) \
361    _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE)
362
363#define _mm512_cmpeq_epu16_mask(A, B) \
364    _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ)
365#define _mm512_mask_cmpeq_epu16_mask(k, A, B) \
366    _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ)
367#define _mm512_cmpge_epu16_mask(A, B) \
368    _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GE)
369#define _mm512_mask_cmpge_epu16_mask(k, A, B) \
370    _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE)
371#define _mm512_cmpgt_epu16_mask(A, B) \
372    _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GT)
373#define _mm512_mask_cmpgt_epu16_mask(k, A, B) \
374    _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT)
375#define _mm512_cmple_epu16_mask(A, B) \
376    _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LE)
377#define _mm512_mask_cmple_epu16_mask(k, A, B) \
378    _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE)
379#define _mm512_cmplt_epu16_mask(A, B) \
380    _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LT)
381#define _mm512_mask_cmplt_epu16_mask(k, A, B) \
382    _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT)
383#define _mm512_cmpneq_epu16_mask(A, B) \
384    _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_NE)
385#define _mm512_mask_cmpneq_epu16_mask(k, A, B) \
386    _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
387
388static __inline__ __m512i __DEFAULT_FN_ATTRS512
389_mm512_add_epi8 (__m512i __A, __m512i __B) {
390  return (__m512i) ((__v64qu) __A + (__v64qu) __B);
391}
392
393static __inline__ __m512i __DEFAULT_FN_ATTRS512
394_mm512_mask_add_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
395  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
396                                             (__v64qi)_mm512_add_epi8(__A, __B),
397                                             (__v64qi)__W);
398}
399
400static __inline__ __m512i __DEFAULT_FN_ATTRS512
401_mm512_maskz_add_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
402  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
403                                             (__v64qi)_mm512_add_epi8(__A, __B),
404                                             (__v64qi)_mm512_setzero_si512());
405}
406
407static __inline__ __m512i __DEFAULT_FN_ATTRS512
408_mm512_sub_epi8 (__m512i __A, __m512i __B) {
409  return (__m512i) ((__v64qu) __A - (__v64qu) __B);
410}
411
412static __inline__ __m512i __DEFAULT_FN_ATTRS512
413_mm512_mask_sub_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
414  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
415                                             (__v64qi)_mm512_sub_epi8(__A, __B),
416                                             (__v64qi)__W);
417}
418
419static __inline__ __m512i __DEFAULT_FN_ATTRS512
420_mm512_maskz_sub_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
421  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
422                                             (__v64qi)_mm512_sub_epi8(__A, __B),
423                                             (__v64qi)_mm512_setzero_si512());
424}
425
426static __inline__ __m512i __DEFAULT_FN_ATTRS512
427_mm512_add_epi16 (__m512i __A, __m512i __B) {
428  return (__m512i) ((__v32hu) __A + (__v32hu) __B);
429}
430
431static __inline__ __m512i __DEFAULT_FN_ATTRS512
432_mm512_mask_add_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
433  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
434                                             (__v32hi)_mm512_add_epi16(__A, __B),
435                                             (__v32hi)__W);
436}
437
438static __inline__ __m512i __DEFAULT_FN_ATTRS512
439_mm512_maskz_add_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
440  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
441                                             (__v32hi)_mm512_add_epi16(__A, __B),
442                                             (__v32hi)_mm512_setzero_si512());
443}
444
445static __inline__ __m512i __DEFAULT_FN_ATTRS512
446_mm512_sub_epi16 (__m512i __A, __m512i __B) {
447  return (__m512i) ((__v32hu) __A - (__v32hu) __B);
448}
449
450static __inline__ __m512i __DEFAULT_FN_ATTRS512
451_mm512_mask_sub_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
452  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
453                                             (__v32hi)_mm512_sub_epi16(__A, __B),
454                                             (__v32hi)__W);
455}
456
457static __inline__ __m512i __DEFAULT_FN_ATTRS512
458_mm512_maskz_sub_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
459  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
460                                             (__v32hi)_mm512_sub_epi16(__A, __B),
461                                             (__v32hi)_mm512_setzero_si512());
462}
463
464static __inline__ __m512i __DEFAULT_FN_ATTRS512
465_mm512_mullo_epi16 (__m512i __A, __m512i __B) {
466  return (__m512i) ((__v32hu) __A * (__v32hu) __B);
467}
468
469static __inline__ __m512i __DEFAULT_FN_ATTRS512
470_mm512_mask_mullo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
471  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
472                                             (__v32hi)_mm512_mullo_epi16(__A, __B),
473                                             (__v32hi)__W);
474}
475
476static __inline__ __m512i __DEFAULT_FN_ATTRS512
477_mm512_maskz_mullo_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
478  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
479                                             (__v32hi)_mm512_mullo_epi16(__A, __B),
480                                             (__v32hi)_mm512_setzero_si512());
481}
482
483static __inline__ __m512i __DEFAULT_FN_ATTRS512
484_mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W)
485{
486  return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
487              (__v64qi) __W,
488              (__v64qi) __A);
489}
490
491static __inline__ __m512i __DEFAULT_FN_ATTRS512
492_mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W)
493{
494  return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
495              (__v32hi) __W,
496              (__v32hi) __A);
497}
498
499static __inline__ __m512i __DEFAULT_FN_ATTRS512
500_mm512_abs_epi8 (__m512i __A)
501{
502  return (__m512i)__builtin_ia32_pabsb512((__v64qi)__A);
503}
504
505static __inline__ __m512i __DEFAULT_FN_ATTRS512
506_mm512_mask_abs_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
507{
508  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
509                                             (__v64qi)_mm512_abs_epi8(__A),
510                                             (__v64qi)__W);
511}
512
513static __inline__ __m512i __DEFAULT_FN_ATTRS512
514_mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A)
515{
516  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
517                                             (__v64qi)_mm512_abs_epi8(__A),
518                                             (__v64qi)_mm512_setzero_si512());
519}
520
521static __inline__ __m512i __DEFAULT_FN_ATTRS512
522_mm512_abs_epi16 (__m512i __A)
523{
524  return (__m512i)__builtin_ia32_pabsw512((__v32hi)__A);
525}
526
527static __inline__ __m512i __DEFAULT_FN_ATTRS512
528_mm512_mask_abs_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
529{
530  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
531                                             (__v32hi)_mm512_abs_epi16(__A),
532                                             (__v32hi)__W);
533}
534
535static __inline__ __m512i __DEFAULT_FN_ATTRS512
536_mm512_maskz_abs_epi16 (__mmask32 __U, __m512i __A)
537{
538  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
539                                             (__v32hi)_mm512_abs_epi16(__A),
540                                             (__v32hi)_mm512_setzero_si512());
541}
542
543static __inline__ __m512i __DEFAULT_FN_ATTRS512
544_mm512_packs_epi32(__m512i __A, __m512i __B)
545{
546  return (__m512i)__builtin_ia32_packssdw512((__v16si)__A, (__v16si)__B);
547}
548
549static __inline__ __m512i __DEFAULT_FN_ATTRS512
550_mm512_maskz_packs_epi32(__mmask32 __M, __m512i __A, __m512i __B)
551{
552  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
553                                       (__v32hi)_mm512_packs_epi32(__A, __B),
554                                       (__v32hi)_mm512_setzero_si512());
555}
556
557static __inline__ __m512i __DEFAULT_FN_ATTRS512
558_mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
559{
560  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
561                                       (__v32hi)_mm512_packs_epi32(__A, __B),
562                                       (__v32hi)__W);
563}
564
565static __inline__ __m512i __DEFAULT_FN_ATTRS512
566_mm512_packs_epi16(__m512i __A, __m512i __B)
567{
568  return (__m512i)__builtin_ia32_packsswb512((__v32hi)__A, (__v32hi) __B);
569}
570
571static __inline__ __m512i __DEFAULT_FN_ATTRS512
572_mm512_mask_packs_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
573{
574  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
575                                        (__v64qi)_mm512_packs_epi16(__A, __B),
576                                        (__v64qi)__W);
577}
578
579static __inline__ __m512i __DEFAULT_FN_ATTRS512
580_mm512_maskz_packs_epi16(__mmask64 __M, __m512i __A, __m512i __B)
581{
582  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
583                                        (__v64qi)_mm512_packs_epi16(__A, __B),
584                                        (__v64qi)_mm512_setzero_si512());
585}
586
587static __inline__ __m512i __DEFAULT_FN_ATTRS512
588_mm512_packus_epi32(__m512i __A, __m512i __B)
589{
590  return (__m512i)__builtin_ia32_packusdw512((__v16si) __A, (__v16si) __B);
591}
592
593static __inline__ __m512i __DEFAULT_FN_ATTRS512
594_mm512_maskz_packus_epi32(__mmask32 __M, __m512i __A, __m512i __B)
595{
596  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
597                                       (__v32hi)_mm512_packus_epi32(__A, __B),
598                                       (__v32hi)_mm512_setzero_si512());
599}
600
601static __inline__ __m512i __DEFAULT_FN_ATTRS512
602_mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
603{
604  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
605                                       (__v32hi)_mm512_packus_epi32(__A, __B),
606                                       (__v32hi)__W);
607}
608
609static __inline__ __m512i __DEFAULT_FN_ATTRS512
610_mm512_packus_epi16(__m512i __A, __m512i __B)
611{
612  return (__m512i)__builtin_ia32_packuswb512((__v32hi) __A, (__v32hi) __B);
613}
614
615static __inline__ __m512i __DEFAULT_FN_ATTRS512
616_mm512_mask_packus_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
617{
618  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
619                                        (__v64qi)_mm512_packus_epi16(__A, __B),
620                                        (__v64qi)__W);
621}
622
623static __inline__ __m512i __DEFAULT_FN_ATTRS512
624_mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B)
625{
626  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
627                                        (__v64qi)_mm512_packus_epi16(__A, __B),
628                                        (__v64qi)_mm512_setzero_si512());
629}
630
631static __inline__ __m512i __DEFAULT_FN_ATTRS512
632_mm512_adds_epi8 (__m512i __A, __m512i __B)
633{
634  return (__m512i)__builtin_ia32_paddsb512((__v64qi)__A, (__v64qi)__B);
635}
636
637static __inline__ __m512i __DEFAULT_FN_ATTRS512
638_mm512_mask_adds_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
639{
640  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
641                                        (__v64qi)_mm512_adds_epi8(__A, __B),
642                                        (__v64qi)__W);
643}
644
645static __inline__ __m512i __DEFAULT_FN_ATTRS512
646_mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
647{
648  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
649                                        (__v64qi)_mm512_adds_epi8(__A, __B),
650                                        (__v64qi)_mm512_setzero_si512());
651}
652
653static __inline__ __m512i __DEFAULT_FN_ATTRS512
654_mm512_adds_epi16 (__m512i __A, __m512i __B)
655{
656  return (__m512i)__builtin_ia32_paddsw512((__v32hi)__A, (__v32hi)__B);
657}
658
659static __inline__ __m512i __DEFAULT_FN_ATTRS512
660_mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
661{
662  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
663                                        (__v32hi)_mm512_adds_epi16(__A, __B),
664                                        (__v32hi)__W);
665}
666
667static __inline__ __m512i __DEFAULT_FN_ATTRS512
668_mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
669{
670  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
671                                        (__v32hi)_mm512_adds_epi16(__A, __B),
672                                        (__v32hi)_mm512_setzero_si512());
673}
674
675static __inline__ __m512i __DEFAULT_FN_ATTRS512
676_mm512_adds_epu8 (__m512i __A, __m512i __B)
677{
678  return (__m512i)__builtin_ia32_paddusb512((__v64qi) __A, (__v64qi) __B);
679}
680
681static __inline__ __m512i __DEFAULT_FN_ATTRS512
682_mm512_mask_adds_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
683{
684  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
685                                        (__v64qi)_mm512_adds_epu8(__A, __B),
686                                        (__v64qi)__W);
687}
688
689static __inline__ __m512i __DEFAULT_FN_ATTRS512
690_mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
691{
692  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
693                                        (__v64qi)_mm512_adds_epu8(__A, __B),
694                                        (__v64qi)_mm512_setzero_si512());
695}
696
697static __inline__ __m512i __DEFAULT_FN_ATTRS512
698_mm512_adds_epu16 (__m512i __A, __m512i __B)
699{
700  return (__m512i)__builtin_ia32_paddusw512((__v32hi) __A, (__v32hi) __B);
701}
702
703static __inline__ __m512i __DEFAULT_FN_ATTRS512
704_mm512_mask_adds_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
705{
706  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
707                                        (__v32hi)_mm512_adds_epu16(__A, __B),
708                                        (__v32hi)__W);
709}
710
711static __inline__ __m512i __DEFAULT_FN_ATTRS512
712_mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
713{
714  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
715                                        (__v32hi)_mm512_adds_epu16(__A, __B),
716                                        (__v32hi)_mm512_setzero_si512());
717}
718
719static __inline__ __m512i __DEFAULT_FN_ATTRS512
720_mm512_avg_epu8 (__m512i __A, __m512i __B)
721{
722  typedef unsigned short __v64hu __attribute__((__vector_size__(128)));
723  return (__m512i)__builtin_convertvector(
724              ((__builtin_convertvector((__v64qu) __A, __v64hu) +
725                __builtin_convertvector((__v64qu) __B, __v64hu)) + 1)
726                >> 1, __v64qu);
727}
728
729static __inline__ __m512i __DEFAULT_FN_ATTRS512
730_mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A,
731          __m512i __B)
732{
733  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
734              (__v64qi)_mm512_avg_epu8(__A, __B),
735              (__v64qi)__W);
736}
737
738static __inline__ __m512i __DEFAULT_FN_ATTRS512
739_mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
740{
741  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
742              (__v64qi)_mm512_avg_epu8(__A, __B),
743              (__v64qi)_mm512_setzero_si512());
744}
745
746static __inline__ __m512i __DEFAULT_FN_ATTRS512
747_mm512_avg_epu16 (__m512i __A, __m512i __B)
748{
749  typedef unsigned int __v32su __attribute__((__vector_size__(128)));
750  return (__m512i)__builtin_convertvector(
751              ((__builtin_convertvector((__v32hu) __A, __v32su) +
752                __builtin_convertvector((__v32hu) __B, __v32su)) + 1)
753                >> 1, __v32hu);
754}
755
756static __inline__ __m512i __DEFAULT_FN_ATTRS512
757_mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
758           __m512i __B)
759{
760  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
761              (__v32hi)_mm512_avg_epu16(__A, __B),
762              (__v32hi)__W);
763}
764
765static __inline__ __m512i __DEFAULT_FN_ATTRS512
766_mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
767{
768  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
769              (__v32hi)_mm512_avg_epu16(__A, __B),
770              (__v32hi) _mm512_setzero_si512());
771}
772
773static __inline__ __m512i __DEFAULT_FN_ATTRS512
774_mm512_max_epi8 (__m512i __A, __m512i __B)
775{
776  return (__m512i)__builtin_ia32_pmaxsb512((__v64qi) __A, (__v64qi) __B);
777}
778
779static __inline__ __m512i __DEFAULT_FN_ATTRS512
780_mm512_maskz_max_epi8 (__mmask64 __M, __m512i __A, __m512i __B)
781{
782  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
783                                             (__v64qi)_mm512_max_epi8(__A, __B),
784                                             (__v64qi)_mm512_setzero_si512());
785}
786
787static __inline__ __m512i __DEFAULT_FN_ATTRS512
788_mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
789{
790  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
791                                             (__v64qi)_mm512_max_epi8(__A, __B),
792                                             (__v64qi)__W);
793}
794
795static __inline__ __m512i __DEFAULT_FN_ATTRS512
796_mm512_max_epi16 (__m512i __A, __m512i __B)
797{
798  return (__m512i)__builtin_ia32_pmaxsw512((__v32hi) __A, (__v32hi) __B);
799}
800
801static __inline__ __m512i __DEFAULT_FN_ATTRS512
802_mm512_maskz_max_epi16 (__mmask32 __M, __m512i __A, __m512i __B)
803{
804  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
805                                            (__v32hi)_mm512_max_epi16(__A, __B),
806                                            (__v32hi)_mm512_setzero_si512());
807}
808
809static __inline__ __m512i __DEFAULT_FN_ATTRS512
810_mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
811           __m512i __B)
812{
813  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
814                                            (__v32hi)_mm512_max_epi16(__A, __B),
815                                            (__v32hi)__W);
816}
817
818static __inline__ __m512i __DEFAULT_FN_ATTRS512
819_mm512_max_epu8 (__m512i __A, __m512i __B)
820{
821  return (__m512i)__builtin_ia32_pmaxub512((__v64qi)__A, (__v64qi)__B);
822}
823
824static __inline__ __m512i __DEFAULT_FN_ATTRS512
825_mm512_maskz_max_epu8 (__mmask64 __M, __m512i __A, __m512i __B)
826{
827  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
828                                             (__v64qi)_mm512_max_epu8(__A, __B),
829                                             (__v64qi)_mm512_setzero_si512());
830}
831
832static __inline__ __m512i __DEFAULT_FN_ATTRS512
833_mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
834{
835  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
836                                             (__v64qi)_mm512_max_epu8(__A, __B),
837                                             (__v64qi)__W);
838}
839
840static __inline__ __m512i __DEFAULT_FN_ATTRS512
841_mm512_max_epu16 (__m512i __A, __m512i __B)
842{
843  return (__m512i)__builtin_ia32_pmaxuw512((__v32hi)__A, (__v32hi)__B);
844}
845
846static __inline__ __m512i __DEFAULT_FN_ATTRS512
847_mm512_maskz_max_epu16 (__mmask32 __M, __m512i __A, __m512i __B)
848{
849  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
850                                            (__v32hi)_mm512_max_epu16(__A, __B),
851                                            (__v32hi)_mm512_setzero_si512());
852}
853
854static __inline__ __m512i __DEFAULT_FN_ATTRS512
855_mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
856{
857  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
858                                            (__v32hi)_mm512_max_epu16(__A, __B),
859                                            (__v32hi)__W);
860}
861
862static __inline__ __m512i __DEFAULT_FN_ATTRS512
863_mm512_min_epi8 (__m512i __A, __m512i __B)
864{
865  return (__m512i)__builtin_ia32_pminsb512((__v64qi) __A, (__v64qi) __B);
866}
867
868static __inline__ __m512i __DEFAULT_FN_ATTRS512
869_mm512_maskz_min_epi8 (__mmask64 __M, __m512i __A, __m512i __B)
870{
871  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
872                                             (__v64qi)_mm512_min_epi8(__A, __B),
873                                             (__v64qi)_mm512_setzero_si512());
874}
875
876static __inline__ __m512i __DEFAULT_FN_ATTRS512
877_mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
878{
879  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
880                                             (__v64qi)_mm512_min_epi8(__A, __B),
881                                             (__v64qi)__W);
882}
883
884static __inline__ __m512i __DEFAULT_FN_ATTRS512
885_mm512_min_epi16 (__m512i __A, __m512i __B)
886{
887  return (__m512i)__builtin_ia32_pminsw512((__v32hi) __A, (__v32hi) __B);
888}
889
890static __inline__ __m512i __DEFAULT_FN_ATTRS512
891_mm512_maskz_min_epi16 (__mmask32 __M, __m512i __A, __m512i __B)
892{
893  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
894                                            (__v32hi)_mm512_min_epi16(__A, __B),
895                                            (__v32hi)_mm512_setzero_si512());
896}
897
898static __inline__ __m512i __DEFAULT_FN_ATTRS512
899_mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
900{
901  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
902                                            (__v32hi)_mm512_min_epi16(__A, __B),
903                                            (__v32hi)__W);
904}
905
906static __inline__ __m512i __DEFAULT_FN_ATTRS512
907_mm512_min_epu8 (__m512i __A, __m512i __B)
908{
909  return (__m512i)__builtin_ia32_pminub512((__v64qi)__A, (__v64qi)__B);
910}
911
912static __inline__ __m512i __DEFAULT_FN_ATTRS512
913_mm512_maskz_min_epu8 (__mmask64 __M, __m512i __A, __m512i __B)
914{
915  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
916                                             (__v64qi)_mm512_min_epu8(__A, __B),
917                                             (__v64qi)_mm512_setzero_si512());
918}
919
920static __inline__ __m512i __DEFAULT_FN_ATTRS512
921_mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
922{
923  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
924                                             (__v64qi)_mm512_min_epu8(__A, __B),
925                                             (__v64qi)__W);
926}
927
928static __inline__ __m512i __DEFAULT_FN_ATTRS512
929_mm512_min_epu16 (__m512i __A, __m512i __B)
930{
931  return (__m512i)__builtin_ia32_pminuw512((__v32hi)__A, (__v32hi)__B);
932}
933
934static __inline__ __m512i __DEFAULT_FN_ATTRS512
935_mm512_maskz_min_epu16 (__mmask32 __M, __m512i __A, __m512i __B)
936{
937  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
938                                            (__v32hi)_mm512_min_epu16(__A, __B),
939                                            (__v32hi)_mm512_setzero_si512());
940}
941
942static __inline__ __m512i __DEFAULT_FN_ATTRS512
943_mm512_mask_min_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
944{
945  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
946                                            (__v32hi)_mm512_min_epu16(__A, __B),
947                                            (__v32hi)__W);
948}
949
950static __inline__ __m512i __DEFAULT_FN_ATTRS512
951_mm512_shuffle_epi8(__m512i __A, __m512i __B)
952{
953  return (__m512i)__builtin_ia32_pshufb512((__v64qi)__A,(__v64qi)__B);
954}
955
956static __inline__ __m512i __DEFAULT_FN_ATTRS512
957_mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
958{
959  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
960                                         (__v64qi)_mm512_shuffle_epi8(__A, __B),
961                                         (__v64qi)__W);
962}
963
964static __inline__ __m512i __DEFAULT_FN_ATTRS512
965_mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B)
966{
967  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
968                                         (__v64qi)_mm512_shuffle_epi8(__A, __B),
969                                         (__v64qi)_mm512_setzero_si512());
970}
971
972static __inline__ __m512i __DEFAULT_FN_ATTRS512
973_mm512_subs_epi8 (__m512i __A, __m512i __B)
974{
975  return (__m512i)__builtin_ia32_psubsb512((__v64qi)__A, (__v64qi)__B);
976}
977
978static __inline__ __m512i __DEFAULT_FN_ATTRS512
979_mm512_mask_subs_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
980{
981  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
982                                        (__v64qi)_mm512_subs_epi8(__A, __B),
983                                        (__v64qi)__W);
984}
985
986static __inline__ __m512i __DEFAULT_FN_ATTRS512
987_mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
988{
989  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
990                                        (__v64qi)_mm512_subs_epi8(__A, __B),
991                                        (__v64qi)_mm512_setzero_si512());
992}
993
994static __inline__ __m512i __DEFAULT_FN_ATTRS512
995_mm512_subs_epi16 (__m512i __A, __m512i __B)
996{
997  return (__m512i)__builtin_ia32_psubsw512((__v32hi)__A, (__v32hi)__B);
998}
999
1000static __inline__ __m512i __DEFAULT_FN_ATTRS512
1001_mm512_mask_subs_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1002{
1003  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1004                                        (__v32hi)_mm512_subs_epi16(__A, __B),
1005                                        (__v32hi)__W);
1006}
1007
1008static __inline__ __m512i __DEFAULT_FN_ATTRS512
1009_mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
1010{
1011  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1012                                        (__v32hi)_mm512_subs_epi16(__A, __B),
1013                                        (__v32hi)_mm512_setzero_si512());
1014}
1015
1016static __inline__ __m512i __DEFAULT_FN_ATTRS512
1017_mm512_subs_epu8 (__m512i __A, __m512i __B)
1018{
1019  return (__m512i)__builtin_ia32_psubusb512((__v64qi) __A, (__v64qi) __B);
1020}
1021
1022static __inline__ __m512i __DEFAULT_FN_ATTRS512
1023_mm512_mask_subs_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
1024{
1025  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1026                                        (__v64qi)_mm512_subs_epu8(__A, __B),
1027                                        (__v64qi)__W);
1028}
1029
1030static __inline__ __m512i __DEFAULT_FN_ATTRS512
1031_mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
1032{
1033  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1034                                        (__v64qi)_mm512_subs_epu8(__A, __B),
1035                                        (__v64qi)_mm512_setzero_si512());
1036}
1037
1038static __inline__ __m512i __DEFAULT_FN_ATTRS512
1039_mm512_subs_epu16 (__m512i __A, __m512i __B)
1040{
1041  return (__m512i)__builtin_ia32_psubusw512((__v32hi) __A, (__v32hi) __B);
1042}
1043
1044static __inline__ __m512i __DEFAULT_FN_ATTRS512
1045_mm512_mask_subs_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1046{
1047  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1048                                        (__v32hi)_mm512_subs_epu16(__A, __B),
1049                                        (__v32hi)__W);
1050}
1051
1052static __inline__ __m512i __DEFAULT_FN_ATTRS512
1053_mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
1054{
1055  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1056                                        (__v32hi)_mm512_subs_epu16(__A, __B),
1057                                        (__v32hi)_mm512_setzero_si512());
1058}
1059
1060static __inline__ __m512i __DEFAULT_FN_ATTRS512
1061_mm512_permutex2var_epi16(__m512i __A, __m512i __I, __m512i __B)
1062{
1063  return (__m512i)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I,
1064                                                 (__v32hi)__B);
1065}
1066
1067static __inline__ __m512i __DEFAULT_FN_ATTRS512
1068_mm512_mask_permutex2var_epi16(__m512i __A, __mmask32 __U, __m512i __I,
1069                               __m512i __B)
1070{
1071  return (__m512i)__builtin_ia32_selectw_512(__U,
1072                              (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B),
1073                              (__v32hi)__A);
1074}
1075
1076static __inline__ __m512i __DEFAULT_FN_ATTRS512
1077_mm512_mask2_permutex2var_epi16(__m512i __A, __m512i __I, __mmask32 __U,
1078                                __m512i __B)
1079{
1080  return (__m512i)__builtin_ia32_selectw_512(__U,
1081                              (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B),
1082                              (__v32hi)__I);
1083}
1084
1085static __inline__ __m512i __DEFAULT_FN_ATTRS512
1086_mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i __I,
1087                                __m512i __B)
1088{
1089  return (__m512i)__builtin_ia32_selectw_512(__U,
1090                              (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B),
1091                              (__v32hi)_mm512_setzero_si512());
1092}
1093
1094static __inline__ __m512i __DEFAULT_FN_ATTRS512
1095_mm512_mulhrs_epi16(__m512i __A, __m512i __B)
1096{
1097  return (__m512i)__builtin_ia32_pmulhrsw512((__v32hi)__A, (__v32hi)__B);
1098}
1099
1100static __inline__ __m512i __DEFAULT_FN_ATTRS512
1101_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1102{
1103  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1104                                         (__v32hi)_mm512_mulhrs_epi16(__A, __B),
1105                                         (__v32hi)__W);
1106}
1107
1108static __inline__ __m512i __DEFAULT_FN_ATTRS512
1109_mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B)
1110{
1111  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1112                                         (__v32hi)_mm512_mulhrs_epi16(__A, __B),
1113                                         (__v32hi)_mm512_setzero_si512());
1114}
1115
1116static __inline__ __m512i __DEFAULT_FN_ATTRS512
1117_mm512_mulhi_epi16(__m512i __A, __m512i __B)
1118{
1119  return (__m512i)__builtin_ia32_pmulhw512((__v32hi) __A, (__v32hi) __B);
1120}
1121
1122static __inline__ __m512i __DEFAULT_FN_ATTRS512
1123_mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A,
1124       __m512i __B)
1125{
1126  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1127                                          (__v32hi)_mm512_mulhi_epi16(__A, __B),
1128                                          (__v32hi)__W);
1129}
1130
1131static __inline__ __m512i __DEFAULT_FN_ATTRS512
1132_mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B)
1133{
1134  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1135                                          (__v32hi)_mm512_mulhi_epi16(__A, __B),
1136                                          (__v32hi)_mm512_setzero_si512());
1137}
1138
1139static __inline__ __m512i __DEFAULT_FN_ATTRS512
1140_mm512_mulhi_epu16(__m512i __A, __m512i __B)
1141{
1142  return (__m512i)__builtin_ia32_pmulhuw512((__v32hi) __A, (__v32hi) __B);
1143}
1144
1145static __inline__ __m512i __DEFAULT_FN_ATTRS512
1146_mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1147{
1148  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1149                                          (__v32hi)_mm512_mulhi_epu16(__A, __B),
1150                                          (__v32hi)__W);
1151}
1152
1153static __inline__ __m512i __DEFAULT_FN_ATTRS512
1154_mm512_maskz_mulhi_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
1155{
1156  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1157                                          (__v32hi)_mm512_mulhi_epu16(__A, __B),
1158                                          (__v32hi)_mm512_setzero_si512());
1159}
1160
1161static __inline__ __m512i __DEFAULT_FN_ATTRS512
1162_mm512_maddubs_epi16(__m512i __X, __m512i __Y) {
1163  return (__m512i)__builtin_ia32_pmaddubsw512((__v64qi)__X, (__v64qi)__Y);
1164}
1165
1166static __inline__ __m512i __DEFAULT_FN_ATTRS512
1167_mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X,
1168                          __m512i __Y) {
1169  return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U,
1170                                        (__v32hi)_mm512_maddubs_epi16(__X, __Y),
1171                                        (__v32hi)__W);
1172}
1173
1174static __inline__ __m512i __DEFAULT_FN_ATTRS512
1175_mm512_maskz_maddubs_epi16(__mmask32 __U, __m512i __X, __m512i __Y) {
1176  return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U,
1177                                        (__v32hi)_mm512_maddubs_epi16(__X, __Y),
1178                                        (__v32hi)_mm512_setzero_si512());
1179}
1180
1181static __inline__ __m512i __DEFAULT_FN_ATTRS512
1182_mm512_madd_epi16(__m512i __A, __m512i __B) {
1183  return (__m512i)__builtin_ia32_pmaddwd512((__v32hi)__A, (__v32hi)__B);
1184}
1185
1186static __inline__ __m512i __DEFAULT_FN_ATTRS512
1187_mm512_mask_madd_epi16(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
1188  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
1189                                           (__v16si)_mm512_madd_epi16(__A, __B),
1190                                           (__v16si)__W);
1191}
1192
1193static __inline__ __m512i __DEFAULT_FN_ATTRS512
1194_mm512_maskz_madd_epi16(__mmask16 __U, __m512i __A, __m512i __B) {
1195  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
1196                                           (__v16si)_mm512_madd_epi16(__A, __B),
1197                                           (__v16si)_mm512_setzero_si512());
1198}
1199
1200static __inline__ __m256i __DEFAULT_FN_ATTRS512
1201_mm512_cvtsepi16_epi8 (__m512i __A) {
1202  return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
1203               (__v32qi)_mm256_setzero_si256(),
1204               (__mmask32) -1);
1205}
1206
1207static __inline__ __m256i __DEFAULT_FN_ATTRS512
1208_mm512_mask_cvtsepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
1209  return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
1210               (__v32qi)__O,
1211               __M);
1212}
1213
1214static __inline__ __m256i __DEFAULT_FN_ATTRS512
1215_mm512_maskz_cvtsepi16_epi8 (__mmask32 __M, __m512i __A) {
1216  return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
1217               (__v32qi) _mm256_setzero_si256(),
1218               __M);
1219}
1220
1221static __inline__ __m256i __DEFAULT_FN_ATTRS512
1222_mm512_cvtusepi16_epi8 (__m512i __A) {
1223  return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
1224                (__v32qi) _mm256_setzero_si256(),
1225                (__mmask32) -1);
1226}
1227
1228static __inline__ __m256i __DEFAULT_FN_ATTRS512
1229_mm512_mask_cvtusepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
1230  return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
1231                (__v32qi) __O,
1232                __M);
1233}
1234
1235static __inline__ __m256i __DEFAULT_FN_ATTRS512
1236_mm512_maskz_cvtusepi16_epi8 (__mmask32 __M, __m512i __A) {
1237  return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
1238                (__v32qi) _mm256_setzero_si256(),
1239                __M);
1240}
1241
1242static __inline__ __m256i __DEFAULT_FN_ATTRS512
1243_mm512_cvtepi16_epi8 (__m512i __A) {
1244  return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
1245              (__v32qi) _mm256_undefined_si256(),
1246              (__mmask32) -1);
1247}
1248
1249static __inline__ __m256i __DEFAULT_FN_ATTRS512
1250_mm512_mask_cvtepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
1251  return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
1252              (__v32qi) __O,
1253              __M);
1254}
1255
1256static __inline__ __m256i __DEFAULT_FN_ATTRS512
1257_mm512_maskz_cvtepi16_epi8 (__mmask32 __M, __m512i __A) {
1258  return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
1259              (__v32qi) _mm256_setzero_si256(),
1260              __M);
1261}
1262
1263static __inline__ void __DEFAULT_FN_ATTRS512
1264_mm512_mask_cvtepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
1265{
1266  __builtin_ia32_pmovwb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
1267}
1268
1269static __inline__ void __DEFAULT_FN_ATTRS512
1270_mm512_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
1271{
1272  __builtin_ia32_pmovswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
1273}
1274
1275static __inline__ void __DEFAULT_FN_ATTRS512
1276_mm512_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
1277{
1278  __builtin_ia32_pmovuswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
1279}
1280
1281static __inline__ __m512i __DEFAULT_FN_ATTRS512
1282_mm512_unpackhi_epi8(__m512i __A, __m512i __B) {
1283  return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B,
1284                                          8,  64+8,   9, 64+9,
1285                                          10, 64+10, 11, 64+11,
1286                                          12, 64+12, 13, 64+13,
1287                                          14, 64+14, 15, 64+15,
1288                                          24, 64+24, 25, 64+25,
1289                                          26, 64+26, 27, 64+27,
1290                                          28, 64+28, 29, 64+29,
1291                                          30, 64+30, 31, 64+31,
1292                                          40, 64+40, 41, 64+41,
1293                                          42, 64+42, 43, 64+43,
1294                                          44, 64+44, 45, 64+45,
1295                                          46, 64+46, 47, 64+47,
1296                                          56, 64+56, 57, 64+57,
1297                                          58, 64+58, 59, 64+59,
1298                                          60, 64+60, 61, 64+61,
1299                                          62, 64+62, 63, 64+63);
1300}
1301
1302static __inline__ __m512i __DEFAULT_FN_ATTRS512
1303_mm512_mask_unpackhi_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
1304  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1305                                        (__v64qi)_mm512_unpackhi_epi8(__A, __B),
1306                                        (__v64qi)__W);
1307}
1308
1309static __inline__ __m512i __DEFAULT_FN_ATTRS512
1310_mm512_maskz_unpackhi_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
1311  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1312                                        (__v64qi)_mm512_unpackhi_epi8(__A, __B),
1313                                        (__v64qi)_mm512_setzero_si512());
1314}
1315
1316static __inline__ __m512i __DEFAULT_FN_ATTRS512
1317_mm512_unpackhi_epi16(__m512i __A, __m512i __B) {
1318  return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B,
1319                                          4,  32+4,   5, 32+5,
1320                                          6,  32+6,   7, 32+7,
1321                                          12, 32+12, 13, 32+13,
1322                                          14, 32+14, 15, 32+15,
1323                                          20, 32+20, 21, 32+21,
1324                                          22, 32+22, 23, 32+23,
1325                                          28, 32+28, 29, 32+29,
1326                                          30, 32+30, 31, 32+31);
1327}
1328
1329static __inline__ __m512i __DEFAULT_FN_ATTRS512
1330_mm512_mask_unpackhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
1331  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1332                                       (__v32hi)_mm512_unpackhi_epi16(__A, __B),
1333                                       (__v32hi)__W);
1334}
1335
1336static __inline__ __m512i __DEFAULT_FN_ATTRS512
1337_mm512_maskz_unpackhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
1338  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1339                                       (__v32hi)_mm512_unpackhi_epi16(__A, __B),
1340                                       (__v32hi)_mm512_setzero_si512());
1341}
1342
1343static __inline__ __m512i __DEFAULT_FN_ATTRS512
1344_mm512_unpacklo_epi8(__m512i __A, __m512i __B) {
1345  return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B,
1346                                          0,  64+0,   1, 64+1,
1347                                          2,  64+2,   3, 64+3,
1348                                          4,  64+4,   5, 64+5,
1349                                          6,  64+6,   7, 64+7,
1350                                          16, 64+16, 17, 64+17,
1351                                          18, 64+18, 19, 64+19,
1352                                          20, 64+20, 21, 64+21,
1353                                          22, 64+22, 23, 64+23,
1354                                          32, 64+32, 33, 64+33,
1355                                          34, 64+34, 35, 64+35,
1356                                          36, 64+36, 37, 64+37,
1357                                          38, 64+38, 39, 64+39,
1358                                          48, 64+48, 49, 64+49,
1359                                          50, 64+50, 51, 64+51,
1360                                          52, 64+52, 53, 64+53,
1361                                          54, 64+54, 55, 64+55);
1362}
1363
1364static __inline__ __m512i __DEFAULT_FN_ATTRS512
1365_mm512_mask_unpacklo_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
1366  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1367                                        (__v64qi)_mm512_unpacklo_epi8(__A, __B),
1368                                        (__v64qi)__W);
1369}
1370
1371static __inline__ __m512i __DEFAULT_FN_ATTRS512
1372_mm512_maskz_unpacklo_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
1373  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1374                                        (__v64qi)_mm512_unpacklo_epi8(__A, __B),
1375                                        (__v64qi)_mm512_setzero_si512());
1376}
1377
1378static __inline__ __m512i __DEFAULT_FN_ATTRS512
1379_mm512_unpacklo_epi16(__m512i __A, __m512i __B) {
1380  return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B,
1381                                          0,  32+0,   1, 32+1,
1382                                          2,  32+2,   3, 32+3,
1383                                          8,  32+8,   9, 32+9,
1384                                          10, 32+10, 11, 32+11,
1385                                          16, 32+16, 17, 32+17,
1386                                          18, 32+18, 19, 32+19,
1387                                          24, 32+24, 25, 32+25,
1388                                          26, 32+26, 27, 32+27);
1389}
1390
1391static __inline__ __m512i __DEFAULT_FN_ATTRS512
1392_mm512_mask_unpacklo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
1393  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1394                                       (__v32hi)_mm512_unpacklo_epi16(__A, __B),
1395                                       (__v32hi)__W);
1396}
1397
1398static __inline__ __m512i __DEFAULT_FN_ATTRS512
1399_mm512_maskz_unpacklo_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
1400  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1401                                       (__v32hi)_mm512_unpacklo_epi16(__A, __B),
1402                                       (__v32hi)_mm512_setzero_si512());
1403}
1404
1405static __inline__ __m512i __DEFAULT_FN_ATTRS512
1406_mm512_cvtepi8_epi16(__m256i __A)
1407{
1408  /* This function always performs a signed extension, but __v32qi is a char
1409     which may be signed or unsigned, so use __v32qs. */
1410  return (__m512i)__builtin_convertvector((__v32qs)__A, __v32hi);
1411}
1412
1413static __inline__ __m512i __DEFAULT_FN_ATTRS512
1414_mm512_mask_cvtepi8_epi16(__m512i __W, __mmask32 __U, __m256i __A)
1415{
1416  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1417                                             (__v32hi)_mm512_cvtepi8_epi16(__A),
1418                                             (__v32hi)__W);
1419}
1420
1421static __inline__ __m512i __DEFAULT_FN_ATTRS512
1422_mm512_maskz_cvtepi8_epi16(__mmask32 __U, __m256i __A)
1423{
1424  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1425                                             (__v32hi)_mm512_cvtepi8_epi16(__A),
1426                                             (__v32hi)_mm512_setzero_si512());
1427}
1428
1429static __inline__ __m512i __DEFAULT_FN_ATTRS512
1430_mm512_cvtepu8_epi16(__m256i __A)
1431{
1432  return (__m512i)__builtin_convertvector((__v32qu)__A, __v32hi);
1433}
1434
1435static __inline__ __m512i __DEFAULT_FN_ATTRS512
1436_mm512_mask_cvtepu8_epi16(__m512i __W, __mmask32 __U, __m256i __A)
1437{
1438  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1439                                             (__v32hi)_mm512_cvtepu8_epi16(__A),
1440                                             (__v32hi)__W);
1441}
1442
1443static __inline__ __m512i __DEFAULT_FN_ATTRS512
1444_mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A)
1445{
1446  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1447                                             (__v32hi)_mm512_cvtepu8_epi16(__A),
1448                                             (__v32hi)_mm512_setzero_si512());
1449}
1450
1451
1452#define _mm512_shufflehi_epi16(A, imm) \
1453  (__m512i)__builtin_ia32_pshufhw512((__v32hi)(__m512i)(A), (int)(imm))
1454
1455#define _mm512_mask_shufflehi_epi16(W, U, A, imm) \
1456  (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1457                                      (__v32hi)_mm512_shufflehi_epi16((A), \
1458                                                                      (imm)), \
1459                                      (__v32hi)(__m512i)(W))
1460
1461#define _mm512_maskz_shufflehi_epi16(U, A, imm) \
1462  (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1463                                      (__v32hi)_mm512_shufflehi_epi16((A), \
1464                                                                      (imm)), \
1465                                      (__v32hi)_mm512_setzero_si512())
1466
1467#define _mm512_shufflelo_epi16(A, imm) \
1468  (__m512i)__builtin_ia32_pshuflw512((__v32hi)(__m512i)(A), (int)(imm))
1469
1470
1471#define _mm512_mask_shufflelo_epi16(W, U, A, imm) \
1472  (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1473                                      (__v32hi)_mm512_shufflelo_epi16((A), \
1474                                                                      (imm)), \
1475                                      (__v32hi)(__m512i)(W))
1476
1477
1478#define _mm512_maskz_shufflelo_epi16(U, A, imm) \
1479  (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1480                                      (__v32hi)_mm512_shufflelo_epi16((A), \
1481                                                                      (imm)), \
1482                                      (__v32hi)_mm512_setzero_si512())
1483
1484static __inline__ __m512i __DEFAULT_FN_ATTRS512
1485_mm512_sllv_epi16(__m512i __A, __m512i __B)
1486{
1487  return (__m512i)__builtin_ia32_psllv32hi((__v32hi) __A, (__v32hi) __B);
1488}
1489
1490static __inline__ __m512i __DEFAULT_FN_ATTRS512
1491_mm512_mask_sllv_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1492{
1493  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1494                                           (__v32hi)_mm512_sllv_epi16(__A, __B),
1495                                           (__v32hi)__W);
1496}
1497
1498static __inline__ __m512i __DEFAULT_FN_ATTRS512
1499_mm512_maskz_sllv_epi16(__mmask32 __U, __m512i __A, __m512i __B)
1500{
1501  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1502                                           (__v32hi)_mm512_sllv_epi16(__A, __B),
1503                                           (__v32hi)_mm512_setzero_si512());
1504}
1505
1506static __inline__ __m512i __DEFAULT_FN_ATTRS512
1507_mm512_sll_epi16(__m512i __A, __m128i __B)
1508{
1509  return (__m512i)__builtin_ia32_psllw512((__v32hi) __A, (__v8hi) __B);
1510}
1511
1512static __inline__ __m512i __DEFAULT_FN_ATTRS512
1513_mm512_mask_sll_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B)
1514{
1515  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1516                                          (__v32hi)_mm512_sll_epi16(__A, __B),
1517                                          (__v32hi)__W);
1518}
1519
1520static __inline__ __m512i __DEFAULT_FN_ATTRS512
1521_mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, __m128i __B)
1522{
1523  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1524                                          (__v32hi)_mm512_sll_epi16(__A, __B),
1525                                          (__v32hi)_mm512_setzero_si512());
1526}
1527
1528static __inline__ __m512i __DEFAULT_FN_ATTRS512
1529_mm512_slli_epi16(__m512i __A, int __B)
1530{
1531  return (__m512i)__builtin_ia32_psllwi512((__v32hi)__A, __B);
1532}
1533
1534static __inline__ __m512i __DEFAULT_FN_ATTRS512
1535_mm512_mask_slli_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B)
1536{
1537  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1538                                         (__v32hi)_mm512_slli_epi16(__A, __B),
1539                                         (__v32hi)__W);
1540}
1541
1542static __inline__ __m512i __DEFAULT_FN_ATTRS512
1543_mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, int __B)
1544{
1545  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1546                                         (__v32hi)_mm512_slli_epi16(__A, __B),
1547                                         (__v32hi)_mm512_setzero_si512());
1548}
1549
1550#define _mm512_bslli_epi128(a, imm) \
1551  (__m512i)__builtin_ia32_pslldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm))
1552
1553static __inline__ __m512i __DEFAULT_FN_ATTRS512
1554_mm512_srlv_epi16(__m512i __A, __m512i __B)
1555{
1556  return (__m512i)__builtin_ia32_psrlv32hi((__v32hi)__A, (__v32hi)__B);
1557}
1558
1559static __inline__ __m512i __DEFAULT_FN_ATTRS512
1560_mm512_mask_srlv_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1561{
1562  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1563                                           (__v32hi)_mm512_srlv_epi16(__A, __B),
1564                                           (__v32hi)__W);
1565}
1566
1567static __inline__ __m512i __DEFAULT_FN_ATTRS512
1568_mm512_maskz_srlv_epi16(__mmask32 __U, __m512i __A, __m512i __B)
1569{
1570  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1571                                           (__v32hi)_mm512_srlv_epi16(__A, __B),
1572                                           (__v32hi)_mm512_setzero_si512());
1573}
1574
1575static __inline__ __m512i __DEFAULT_FN_ATTRS512
1576_mm512_srav_epi16(__m512i __A, __m512i __B)
1577{
1578  return (__m512i)__builtin_ia32_psrav32hi((__v32hi)__A, (__v32hi)__B);
1579}
1580
1581static __inline__ __m512i __DEFAULT_FN_ATTRS512
1582_mm512_mask_srav_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1583{
1584  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1585                                           (__v32hi)_mm512_srav_epi16(__A, __B),
1586                                           (__v32hi)__W);
1587}
1588
1589static __inline__ __m512i __DEFAULT_FN_ATTRS512
1590_mm512_maskz_srav_epi16(__mmask32 __U, __m512i __A, __m512i __B)
1591{
1592  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1593                                           (__v32hi)_mm512_srav_epi16(__A, __B),
1594                                           (__v32hi)_mm512_setzero_si512());
1595}
1596
1597static __inline__ __m512i __DEFAULT_FN_ATTRS512
1598_mm512_sra_epi16(__m512i __A, __m128i __B)
1599{
1600  return (__m512i)__builtin_ia32_psraw512((__v32hi) __A, (__v8hi) __B);
1601}
1602
1603static __inline__ __m512i __DEFAULT_FN_ATTRS512
1604_mm512_mask_sra_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B)
1605{
1606  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1607                                          (__v32hi)_mm512_sra_epi16(__A, __B),
1608                                          (__v32hi)__W);
1609}
1610
1611static __inline__ __m512i __DEFAULT_FN_ATTRS512
1612_mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, __m128i __B)
1613{
1614  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1615                                          (__v32hi)_mm512_sra_epi16(__A, __B),
1616                                          (__v32hi)_mm512_setzero_si512());
1617}
1618
1619static __inline__ __m512i __DEFAULT_FN_ATTRS512
1620_mm512_srai_epi16(__m512i __A, int __B)
1621{
1622  return (__m512i)__builtin_ia32_psrawi512((__v32hi)__A, __B);
1623}
1624
1625static __inline__ __m512i __DEFAULT_FN_ATTRS512
1626_mm512_mask_srai_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B)
1627{
1628  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1629                                         (__v32hi)_mm512_srai_epi16(__A, __B),
1630                                         (__v32hi)__W);
1631}
1632
1633static __inline__ __m512i __DEFAULT_FN_ATTRS512
1634_mm512_maskz_srai_epi16(__mmask32 __U, __m512i __A, int __B)
1635{
1636  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1637                                         (__v32hi)_mm512_srai_epi16(__A, __B),
1638                                         (__v32hi)_mm512_setzero_si512());
1639}
1640
1641static __inline__ __m512i __DEFAULT_FN_ATTRS512
1642_mm512_srl_epi16(__m512i __A, __m128i __B)
1643{
1644  return (__m512i)__builtin_ia32_psrlw512((__v32hi) __A, (__v8hi) __B);
1645}
1646
1647static __inline__ __m512i __DEFAULT_FN_ATTRS512
1648_mm512_mask_srl_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B)
1649{
1650  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1651                                          (__v32hi)_mm512_srl_epi16(__A, __B),
1652                                          (__v32hi)__W);
1653}
1654
1655static __inline__ __m512i __DEFAULT_FN_ATTRS512
1656_mm512_maskz_srl_epi16(__mmask32 __U, __m512i __A, __m128i __B)
1657{
1658  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1659                                          (__v32hi)_mm512_srl_epi16(__A, __B),
1660                                          (__v32hi)_mm512_setzero_si512());
1661}
1662
1663static __inline__ __m512i __DEFAULT_FN_ATTRS512
1664_mm512_srli_epi16(__m512i __A, int __B)
1665{
1666  return (__m512i)__builtin_ia32_psrlwi512((__v32hi)__A, __B);
1667}
1668
1669static __inline__ __m512i __DEFAULT_FN_ATTRS512
1670_mm512_mask_srli_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B)
1671{
1672  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1673                                         (__v32hi)_mm512_srli_epi16(__A, __B),
1674                                         (__v32hi)__W);
1675}
1676
1677static __inline__ __m512i __DEFAULT_FN_ATTRS512
1678_mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B)
1679{
1680  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1681                                         (__v32hi)_mm512_srli_epi16(__A, __B),
1682                                         (__v32hi)_mm512_setzero_si512());
1683}
1684
1685#define _mm512_bsrli_epi128(a, imm) \
1686  (__m512i)__builtin_ia32_psrldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm))
1687
1688static __inline__ __m512i __DEFAULT_FN_ATTRS512
1689_mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
1690{
1691  return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
1692                (__v32hi) __A,
1693                (__v32hi) __W);
1694}
1695
1696static __inline__ __m512i __DEFAULT_FN_ATTRS512
1697_mm512_maskz_mov_epi16 (__mmask32 __U, __m512i __A)
1698{
1699  return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
1700                (__v32hi) __A,
1701                (__v32hi) _mm512_setzero_si512 ());
1702}
1703
1704static __inline__ __m512i __DEFAULT_FN_ATTRS512
1705_mm512_mask_mov_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
1706{
1707  return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
1708                (__v64qi) __A,
1709                (__v64qi) __W);
1710}
1711
1712static __inline__ __m512i __DEFAULT_FN_ATTRS512
1713_mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A)
1714{
1715  return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
1716                (__v64qi) __A,
1717                (__v64qi) _mm512_setzero_si512 ());
1718}
1719
1720static __inline__ __m512i __DEFAULT_FN_ATTRS512
1721_mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A)
1722{
1723  return (__m512i) __builtin_ia32_selectb_512(__M,
1724                                              (__v64qi)_mm512_set1_epi8(__A),
1725                                              (__v64qi) __O);
1726}
1727
1728static __inline__ __m512i __DEFAULT_FN_ATTRS512
1729_mm512_maskz_set1_epi8 (__mmask64 __M, char __A)
1730{
1731  return (__m512i) __builtin_ia32_selectb_512(__M,
1732                                              (__v64qi) _mm512_set1_epi8(__A),
1733                                              (__v64qi) _mm512_setzero_si512());
1734}
1735
1736static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
1737_mm512_kunpackd (__mmask64 __A, __mmask64 __B)
1738{
1739  return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
1740                (__mmask64) __B);
1741}
1742
1743static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
1744_mm512_kunpackw (__mmask32 __A, __mmask32 __B)
1745{
1746  return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,
1747                (__mmask32) __B);
1748}
1749
1750static __inline __m512i __DEFAULT_FN_ATTRS512
1751_mm512_loadu_epi16 (void const *__P)
1752{
1753  struct __loadu_epi16 {
1754    __m512i __v;
1755  } __attribute__((__packed__, __may_alias__));
1756  return ((struct __loadu_epi16*)__P)->__v;
1757}
1758
1759static __inline__ __m512i __DEFAULT_FN_ATTRS512
1760_mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P)
1761{
1762  return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P,
1763                 (__v32hi) __W,
1764                 (__mmask32) __U);
1765}
1766
1767static __inline__ __m512i __DEFAULT_FN_ATTRS512
1768_mm512_maskz_loadu_epi16 (__mmask32 __U, void const *__P)
1769{
1770  return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P,
1771                 (__v32hi)
1772                 _mm512_setzero_si512 (),
1773                 (__mmask32) __U);
1774}
1775
1776static __inline __m512i __DEFAULT_FN_ATTRS512
1777_mm512_loadu_epi8 (void const *__P)
1778{
1779  struct __loadu_epi8 {
1780    __m512i __v;
1781  } __attribute__((__packed__, __may_alias__));
1782  return ((struct __loadu_epi8*)__P)->__v;
1783}
1784
1785static __inline__ __m512i __DEFAULT_FN_ATTRS512
1786_mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P)
1787{
1788  return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P,
1789                 (__v64qi) __W,
1790                 (__mmask64) __U);
1791}
1792
1793static __inline__ __m512i __DEFAULT_FN_ATTRS512
1794_mm512_maskz_loadu_epi8 (__mmask64 __U, void const *__P)
1795{
1796  return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P,
1797                 (__v64qi)
1798                 _mm512_setzero_si512 (),
1799                 (__mmask64) __U);
1800}
1801
1802static __inline void __DEFAULT_FN_ATTRS512
1803_mm512_storeu_epi16 (void *__P, __m512i __A)
1804{
1805  struct __storeu_epi16 {
1806    __m512i __v;
1807  } __attribute__((__packed__, __may_alias__));
1808  ((struct __storeu_epi16*)__P)->__v = __A;
1809}
1810
1811static __inline__ void __DEFAULT_FN_ATTRS512
1812_mm512_mask_storeu_epi16 (void *__P, __mmask32 __U, __m512i __A)
1813{
1814  __builtin_ia32_storedquhi512_mask ((__v32hi *) __P,
1815             (__v32hi) __A,
1816             (__mmask32) __U);
1817}
1818
1819static __inline void __DEFAULT_FN_ATTRS512
1820_mm512_storeu_epi8 (void *__P, __m512i __A)
1821{
1822  struct __storeu_epi8 {
1823    __m512i __v;
1824  } __attribute__((__packed__, __may_alias__));
1825  ((struct __storeu_epi8*)__P)->__v = __A;
1826}
1827
1828static __inline__ void __DEFAULT_FN_ATTRS512
1829_mm512_mask_storeu_epi8 (void *__P, __mmask64 __U, __m512i __A)
1830{
1831  __builtin_ia32_storedquqi512_mask ((__v64qi *) __P,
1832             (__v64qi) __A,
1833             (__mmask64) __U);
1834}
1835
1836static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
1837_mm512_test_epi8_mask (__m512i __A, __m512i __B)
1838{
1839  return _mm512_cmpneq_epi8_mask (_mm512_and_epi32 (__A, __B),
1840                                  _mm512_setzero_si512());
1841}
1842
1843static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
1844_mm512_mask_test_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
1845{
1846  return _mm512_mask_cmpneq_epi8_mask (__U, _mm512_and_epi32 (__A, __B),
1847                                       _mm512_setzero_si512());
1848}
1849
1850static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
1851_mm512_test_epi16_mask (__m512i __A, __m512i __B)
1852{
1853  return _mm512_cmpneq_epi16_mask (_mm512_and_epi32 (__A, __B),
1854                                   _mm512_setzero_si512());
1855}
1856
1857static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
1858_mm512_mask_test_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
1859{
1860  return _mm512_mask_cmpneq_epi16_mask (__U, _mm512_and_epi32 (__A, __B),
1861                                        _mm512_setzero_si512());
1862}
1863
1864static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
1865_mm512_testn_epi8_mask (__m512i __A, __m512i __B)
1866{
1867  return _mm512_cmpeq_epi8_mask (_mm512_and_epi32 (__A, __B), _mm512_setzero_si512());
1868}
1869
1870static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
1871_mm512_mask_testn_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
1872{
1873  return _mm512_mask_cmpeq_epi8_mask (__U, _mm512_and_epi32 (__A, __B),
1874                                      _mm512_setzero_si512());
1875}
1876
1877static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
1878_mm512_testn_epi16_mask (__m512i __A, __m512i __B)
1879{
1880  return _mm512_cmpeq_epi16_mask (_mm512_and_epi32 (__A, __B),
1881                                  _mm512_setzero_si512());
1882}
1883
1884static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
1885_mm512_mask_testn_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
1886{
1887  return _mm512_mask_cmpeq_epi16_mask (__U, _mm512_and_epi32 (__A, __B),
1888                                       _mm512_setzero_si512());
1889}
1890
1891static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
1892_mm512_movepi8_mask (__m512i __A)
1893{
1894  return (__mmask64) __builtin_ia32_cvtb2mask512 ((__v64qi) __A);
1895}
1896
1897static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
1898_mm512_movepi16_mask (__m512i __A)
1899{
1900  return (__mmask32) __builtin_ia32_cvtw2mask512 ((__v32hi) __A);
1901}
1902
1903static __inline__ __m512i __DEFAULT_FN_ATTRS512
1904_mm512_movm_epi8 (__mmask64 __A)
1905{
1906  return (__m512i) __builtin_ia32_cvtmask2b512 (__A);
1907}
1908
1909static __inline__ __m512i __DEFAULT_FN_ATTRS512
1910_mm512_movm_epi16 (__mmask32 __A)
1911{
1912  return (__m512i) __builtin_ia32_cvtmask2w512 (__A);
1913}
1914
1915static __inline__ __m512i __DEFAULT_FN_ATTRS512
1916_mm512_broadcastb_epi8 (__m128i __A)
1917{
1918  return (__m512i)__builtin_shufflevector((__v16qi) __A, (__v16qi) __A,
1919                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1920                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1921                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1922                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1923}
1924
1925static __inline__ __m512i __DEFAULT_FN_ATTRS512
1926_mm512_mask_broadcastb_epi8 (__m512i __O, __mmask64 __M, __m128i __A)
1927{
1928  return (__m512i)__builtin_ia32_selectb_512(__M,
1929                                             (__v64qi) _mm512_broadcastb_epi8(__A),
1930                                             (__v64qi) __O);
1931}
1932
1933static __inline__ __m512i __DEFAULT_FN_ATTRS512
1934_mm512_maskz_broadcastb_epi8 (__mmask64 __M, __m128i __A)
1935{
1936  return (__m512i)__builtin_ia32_selectb_512(__M,
1937                                             (__v64qi) _mm512_broadcastb_epi8(__A),
1938                                             (__v64qi) _mm512_setzero_si512());
1939}
1940
1941static __inline__ __m512i __DEFAULT_FN_ATTRS512
1942_mm512_mask_set1_epi16 (__m512i __O, __mmask32 __M, short __A)
1943{
1944  return (__m512i) __builtin_ia32_selectw_512(__M,
1945                                              (__v32hi) _mm512_set1_epi16(__A),
1946                                              (__v32hi) __O);
1947}
1948
1949static __inline__ __m512i __DEFAULT_FN_ATTRS512
1950_mm512_maskz_set1_epi16 (__mmask32 __M, short __A)
1951{
1952  return (__m512i) __builtin_ia32_selectw_512(__M,
1953                                              (__v32hi) _mm512_set1_epi16(__A),
1954                                              (__v32hi) _mm512_setzero_si512());
1955}
1956
1957static __inline__ __m512i __DEFAULT_FN_ATTRS512
1958_mm512_broadcastw_epi16 (__m128i __A)
1959{
1960  return (__m512i)__builtin_shufflevector((__v8hi) __A, (__v8hi) __A,
1961                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1962                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1963}
1964
1965static __inline__ __m512i __DEFAULT_FN_ATTRS512
1966_mm512_mask_broadcastw_epi16 (__m512i __O, __mmask32 __M, __m128i __A)
1967{
1968  return (__m512i)__builtin_ia32_selectw_512(__M,
1969                                             (__v32hi) _mm512_broadcastw_epi16(__A),
1970                                             (__v32hi) __O);
1971}
1972
1973static __inline__ __m512i __DEFAULT_FN_ATTRS512
1974_mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A)
1975{
1976  return (__m512i)__builtin_ia32_selectw_512(__M,
1977                                             (__v32hi) _mm512_broadcastw_epi16(__A),
1978                                             (__v32hi) _mm512_setzero_si512());
1979}
1980
1981static __inline__ __m512i __DEFAULT_FN_ATTRS512
1982_mm512_permutexvar_epi16 (__m512i __A, __m512i __B)
1983{
1984  return (__m512i)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A);
1985}
1986
1987static __inline__ __m512i __DEFAULT_FN_ATTRS512
1988_mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A,
1989        __m512i __B)
1990{
1991  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
1992                                    (__v32hi)_mm512_permutexvar_epi16(__A, __B),
1993                                    (__v32hi)_mm512_setzero_si512());
1994}
1995
1996static __inline__ __m512i __DEFAULT_FN_ATTRS512
1997_mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
1998             __m512i __B)
1999{
2000  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
2001                                    (__v32hi)_mm512_permutexvar_epi16(__A, __B),
2002                                    (__v32hi)__W);
2003}
2004
2005#define _mm512_alignr_epi8(A, B, N) \
2006  (__m512i)__builtin_ia32_palignr512((__v64qi)(__m512i)(A), \
2007                                     (__v64qi)(__m512i)(B), (int)(N))
2008
2009#define _mm512_mask_alignr_epi8(W, U, A, B, N) \
2010  (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
2011                             (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \
2012                             (__v64qi)(__m512i)(W))
2013
2014#define _mm512_maskz_alignr_epi8(U, A, B, N) \
2015  (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
2016                              (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \
2017                              (__v64qi)(__m512i)_mm512_setzero_si512())
2018
2019#define _mm512_dbsad_epu8(A, B, imm) \
2020  (__m512i)__builtin_ia32_dbpsadbw512((__v64qi)(__m512i)(A), \
2021                                      (__v64qi)(__m512i)(B), (int)(imm))
2022
2023#define _mm512_mask_dbsad_epu8(W, U, A, B, imm) \
2024  (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
2025                                  (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \
2026                                  (__v32hi)(__m512i)(W))
2027
2028#define _mm512_maskz_dbsad_epu8(U, A, B, imm) \
2029  (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
2030                                  (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \
2031                                  (__v32hi)_mm512_setzero_si512())
2032
2033static __inline__ __m512i __DEFAULT_FN_ATTRS512
2034_mm512_sad_epu8 (__m512i __A, __m512i __B)
2035{
2036 return (__m512i) __builtin_ia32_psadbw512 ((__v64qi) __A,
2037               (__v64qi) __B);
2038}
2039
2040#undef __DEFAULT_FN_ATTRS512
2041#undef __DEFAULT_FN_ATTRS
2042
2043#endif
2044