1/* Copyright (C) 2014-2015 Free Software Foundation, Inc.
2
3   This file is part of GCC.
4
5   GCC is free software; you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 3, or (at your option)
8   any later version.
9
10   GCC is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   Under Section 7 of GPL version 3, you are granted additional
16   permissions described in the GCC Runtime Library Exception, version
17   3.1, as published by the Free Software Foundation.
18
19   You should have received a copy of the GNU General Public License and
20   a copy of the GCC Runtime Library Exception along with this program;
21   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22   <http://www.gnu.org/licenses/>.  */
23
24#ifndef _IMMINTRIN_H_INCLUDED
25#error "Never use <avx512vlbwintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef _AVX512VLBWINTRIN_H_INCLUDED
29#define _AVX512VLBWINTRIN_H_INCLUDED
30
31#if !defined(__AVX512VL__) || !defined(__AVX512BW__)
32#pragma GCC push_options
33#pragma GCC target("avx512vl,avx512bw")
34#define __DISABLE_AVX512VLBW__
35#endif /* __AVX512VLBW__ */
36
37
38extern __inline __m256i
39__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
40_mm256_mask_mov_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
41{
42  return (__m256i) __builtin_ia32_movdquqi256_mask ((__v32qi) __A,
43						    (__v32qi) __W,
44						    (__mmask32) __U);
45}
46
47extern __inline __m256i
48__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
49_mm256_maskz_mov_epi8 (__mmask32 __U, __m256i __A)
50{
51  return (__m256i) __builtin_ia32_movdquqi256_mask ((__v32qi) __A,
52						    (__v32qi)
53						    _mm256_setzero_si256 (),
54						    (__mmask32) __U);
55}
56
57extern __inline __m128i
58__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
59_mm_mask_mov_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
60{
61  return (__m128i) __builtin_ia32_movdquqi128_mask ((__v16qi) __A,
62						    (__v16qi) __W,
63						    (__mmask16) __U);
64}
65
66extern __inline __m128i
67__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
68_mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A)
69{
70  return (__m128i) __builtin_ia32_movdquqi128_mask ((__v16qi) __A,
71						    (__v16qi)
72						    _mm_setzero_hi (),
73						    (__mmask16) __U);
74}
75
76extern __inline void
77__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
78_mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A)
79{
80  __builtin_ia32_storedquqi256_mask ((__v32qi *) __P,
81				     (__v32qi) __A,
82				     (__mmask32) __U);
83}
84
85extern __inline void
86__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
87_mm_mask_storeu_epi8 (void *__P, __mmask16 __U, __m128i __A)
88{
89  __builtin_ia32_storedquqi128_mask ((__v16qi *) __P,
90				     (__v16qi) __A,
91				     (__mmask16) __U);
92}
93
94extern __inline __m256i
95__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
96_mm256_mask_loadu_epi16 (__m256i __W, __mmask16 __U, void const *__P)
97{
98  return (__m256i) __builtin_ia32_loaddquhi256_mask ((__v16hi *) __P,
99						     (__v16hi) __W,
100						     (__mmask16) __U);
101}
102
103extern __inline __m256i
104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
105_mm256_maskz_loadu_epi16 (__mmask16 __U, void const *__P)
106{
107  return (__m256i) __builtin_ia32_loaddquhi256_mask ((__v16hi *) __P,
108						     (__v16hi)
109						     _mm256_setzero_si256 (),
110						     (__mmask16) __U);
111}
112
113extern __inline __m128i
114__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
115_mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P)
116{
117  return (__m128i) __builtin_ia32_loaddquhi128_mask ((__v8hi *) __P,
118						     (__v8hi) __W,
119						     (__mmask8) __U);
120}
121
122extern __inline __m128i
123__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
124_mm_maskz_loadu_epi16 (__mmask8 __U, void const *__P)
125{
126  return (__m128i) __builtin_ia32_loaddquhi128_mask ((__v8hi *) __P,
127						     (__v8hi)
128						     _mm_setzero_hi (),
129						     (__mmask8) __U);
130}
131
132
133extern __inline __m256i
134__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
135_mm256_mask_mov_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
136{
137  return (__m256i) __builtin_ia32_movdquhi256_mask ((__v16hi) __A,
138						    (__v16hi) __W,
139						    (__mmask16) __U);
140}
141
142extern __inline __m256i
143__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
144_mm256_maskz_mov_epi16 (__mmask16 __U, __m256i __A)
145{
146  return (__m256i) __builtin_ia32_movdquhi256_mask ((__v16hi) __A,
147						    (__v16hi)
148						    _mm256_setzero_si256 (),
149						    (__mmask16) __U);
150}
151
152extern __inline __m128i
153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
154_mm_mask_mov_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
155{
156  return (__m128i) __builtin_ia32_movdquhi128_mask ((__v8hi) __A,
157						    (__v8hi) __W,
158						    (__mmask8) __U);
159}
160
161extern __inline __m128i
162__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
163_mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A)
164{
165  return (__m128i) __builtin_ia32_movdquhi128_mask ((__v8hi) __A,
166						    (__v8hi)
167						    _mm_setzero_hi (),
168						    (__mmask8) __U);
169}
170
171extern __inline __m256i
172__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
173_mm256_mask_loadu_epi8 (__m256i __W, __mmask32 __U, void const *__P)
174{
175  return (__m256i) __builtin_ia32_loaddquqi256_mask ((__v32qi *) __P,
176						     (__v32qi) __W,
177						     (__mmask32) __U);
178}
179
180extern __inline __m256i
181__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
182_mm256_maskz_loadu_epi8 (__mmask32 __U, void const *__P)
183{
184  return (__m256i) __builtin_ia32_loaddquqi256_mask ((__v32qi *) __P,
185						     (__v32qi)
186						     _mm256_setzero_si256 (),
187						     (__mmask32) __U);
188}
189
190extern __inline __m128i
191__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
192_mm_mask_loadu_epi8 (__m128i __W, __mmask16 __U, void const *__P)
193{
194  return (__m128i) __builtin_ia32_loaddquqi128_mask ((__v16qi *) __P,
195						     (__v16qi) __W,
196						     (__mmask16) __U);
197}
198
199extern __inline __m128i
200__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
201_mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P)
202{
203  return (__m128i) __builtin_ia32_loaddquqi128_mask ((__v16qi *) __P,
204						     (__v16qi)
205						     _mm_setzero_hi (),
206						     (__mmask16) __U);
207}
208
209extern __inline __m128i
210__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
211_mm256_cvtepi16_epi8 (__m256i __A)
212{
213
214  return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A,
215						  (__v16qi)_mm_undefined_si128(),
216						  (__mmask16) -1);
217}
218
219extern __inline __m128i
220__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
221_mm256_mask_cvtepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A)
222{
223  return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A,
224						  (__v16qi) __O, __M);
225}
226
227extern __inline __m128i
228__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
229_mm256_maskz_cvtepi16_epi8 (__mmask16 __M, __m256i __A)
230{
231  return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A,
232						  (__v16qi)
233						  _mm_setzero_si128 (),
234						  __M);
235}
236
237extern __inline __m128i
238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
239_mm_cvtsepi16_epi8 (__m128i __A)
240{
241
242  return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
243						   (__v16qi)_mm_undefined_si128(),
244						   (__mmask8) -1);
245}
246
247extern __inline __m128i
248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
249_mm_mask_cvtsepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
250{
251  return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
252						   (__v16qi) __O, __M);
253}
254
255extern __inline __m128i
256__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
257_mm_maskz_cvtsepi16_epi8 (__mmask8 __M, __m128i __A)
258{
259  return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
260						   (__v16qi)
261						   _mm_setzero_si128 (),
262						   __M);
263}
264
265extern __inline __m128i
266__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
267_mm256_cvtsepi16_epi8 (__m256i __A)
268{
269
270  return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
271						   (__v16qi)_mm_undefined_si128(),
272						   (__mmask16) -1);
273}
274
275extern __inline __m128i
276__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
277_mm256_mask_cvtsepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A)
278{
279  return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
280						   (__v16qi) __O, __M);
281}
282
283extern __inline __m128i
284__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
285_mm256_maskz_cvtsepi16_epi8 (__mmask16 __M, __m256i __A)
286{
287  return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
288						   (__v16qi)
289						   _mm_setzero_si128 (),
290						   __M);
291}
292
293extern __inline __m128i
294__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
295_mm_cvtusepi16_epi8 (__m128i __A)
296{
297
298  return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
299						    (__v16qi)_mm_undefined_si128(),
300						    (__mmask8) -1);
301}
302
303extern __inline __m128i
304__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
305_mm_mask_cvtusepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
306{
307  return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
308						    (__v16qi) __O,
309						    __M);
310}
311
312extern __inline __m128i
313__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
314_mm_maskz_cvtusepi16_epi8 (__mmask8 __M, __m128i __A)
315{
316  return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
317						    (__v16qi)
318						    _mm_setzero_si128 (),
319						    __M);
320}
321
322extern __inline __m128i
323__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
324_mm256_cvtusepi16_epi8 (__m256i __A)
325{
326
327  return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
328						    (__v16qi)_mm_undefined_si128(),
329						    (__mmask16) -1);
330}
331
332extern __inline __m128i
333__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
334_mm256_mask_cvtusepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A)
335{
336  return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
337						    (__v16qi) __O,
338						    __M);
339}
340
341extern __inline __m128i
342__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
343_mm256_maskz_cvtusepi16_epi8 (__mmask16 __M, __m256i __A)
344{
345  return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
346						    (__v16qi)
347						    _mm_setzero_si128 (),
348						    __M);
349}
350
351extern __inline __m256i
352__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
353_mm256_mask_broadcastb_epi8 (__m256i __O, __mmask32 __M, __m128i __A)
354{
355  return (__m256i) __builtin_ia32_pbroadcastb256_mask ((__v16qi) __A,
356						       (__v32qi) __O,
357						       __M);
358}
359
360extern __inline __m256i
361__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
362_mm256_maskz_broadcastb_epi8 (__mmask32 __M, __m128i __A)
363{
364  return (__m256i) __builtin_ia32_pbroadcastb256_mask ((__v16qi) __A,
365						       (__v32qi)
366						       _mm256_setzero_si256 (),
367						       __M);
368}
369
370extern __inline __m256i
371__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
372_mm256_mask_set1_epi8 (__m256i __O, __mmask32 __M, char __A)
373{
374  return (__m256i) __builtin_ia32_pbroadcastb256_gpr_mask (__A,
375							   (__v32qi) __O,
376							   __M);
377}
378
379extern __inline __m256i
380__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
381_mm256_maskz_set1_epi8 (__mmask32 __M, char __A)
382{
383  return (__m256i) __builtin_ia32_pbroadcastb256_gpr_mask (__A,
384							   (__v32qi)
385							   _mm256_setzero_si256 (),
386							   __M);
387}
388
389extern __inline __m128i
390__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
391_mm_mask_broadcastb_epi8 (__m128i __O, __mmask16 __M, __m128i __A)
392{
393  return (__m128i) __builtin_ia32_pbroadcastb128_mask ((__v16qi) __A,
394						       (__v16qi) __O,
395						       __M);
396}
397
398extern __inline __m128i
399__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
400_mm_maskz_broadcastb_epi8 (__mmask16 __M, __m128i __A)
401{
402  return (__m128i) __builtin_ia32_pbroadcastb128_mask ((__v16qi) __A,
403						       (__v16qi)
404						       _mm_setzero_si128 (),
405						       __M);
406}
407
408extern __inline __m128i
409__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
410_mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A)
411{
412  return (__m128i) __builtin_ia32_pbroadcastb128_gpr_mask (__A,
413							   (__v16qi) __O,
414							   __M);
415}
416
417extern __inline __m128i
418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
419_mm_maskz_set1_epi8 (__mmask16 __M, char __A)
420{
421  return (__m128i) __builtin_ia32_pbroadcastb128_gpr_mask (__A,
422							   (__v16qi)
423							   _mm_setzero_si128 (),
424							   __M);
425}
426
427extern __inline __m256i
428__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
429_mm256_mask_broadcastw_epi16 (__m256i __O, __mmask16 __M, __m128i __A)
430{
431  return (__m256i) __builtin_ia32_pbroadcastw256_mask ((__v8hi) __A,
432						       (__v16hi) __O,
433						       __M);
434}
435
436extern __inline __m256i
437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
438_mm256_maskz_broadcastw_epi16 (__mmask16 __M, __m128i __A)
439{
440  return (__m256i) __builtin_ia32_pbroadcastw256_mask ((__v8hi) __A,
441						       (__v16hi)
442						       _mm256_setzero_si256 (),
443						       __M);
444}
445
446extern __inline __m256i
447__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
448_mm256_mask_set1_epi16 (__m256i __O, __mmask16 __M, short __A)
449{
450  return (__m256i) __builtin_ia32_pbroadcastw256_gpr_mask (__A,
451							   (__v16hi) __O,
452							   __M);
453}
454
455extern __inline __m256i
456__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
457_mm256_maskz_set1_epi16 (__mmask16 __M, short __A)
458{
459  return (__m256i) __builtin_ia32_pbroadcastw256_gpr_mask (__A,
460							   (__v16hi)
461							   _mm256_setzero_si256 (),
462							   __M);
463}
464
465extern __inline __m128i
466__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
467_mm_mask_broadcastw_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
468{
469  return (__m128i) __builtin_ia32_pbroadcastw128_mask ((__v8hi) __A,
470						       (__v8hi) __O,
471						       __M);
472}
473
474extern __inline __m128i
475__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
476_mm_maskz_broadcastw_epi16 (__mmask8 __M, __m128i __A)
477{
478  return (__m128i) __builtin_ia32_pbroadcastw128_mask ((__v8hi) __A,
479						       (__v8hi)
480						       _mm_setzero_si128 (),
481						       __M);
482}
483
484extern __inline __m128i
485__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
486_mm_mask_set1_epi16 (__m128i __O, __mmask8 __M, short __A)
487{
488  return (__m128i) __builtin_ia32_pbroadcastw128_gpr_mask (__A,
489							   (__v8hi) __O,
490							   __M);
491}
492
493extern __inline __m128i
494__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
495_mm_maskz_set1_epi16 (__mmask8 __M, short __A)
496{
497  return (__m128i) __builtin_ia32_pbroadcastw128_gpr_mask (__A,
498							   (__v8hi)
499							   _mm_setzero_si128 (),
500							   __M);
501}
502
503extern __inline __m256i
504__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
505_mm256_permutexvar_epi16 (__m256i __A, __m256i __B)
506{
507  return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __B,
508						     (__v16hi) __A,
509						     (__v16hi)
510						     _mm256_setzero_si256 (),
511						     (__mmask16) -1);
512}
513
514extern __inline __m256i
515__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
516_mm256_maskz_permutexvar_epi16 (__mmask16 __M, __m256i __A,
517				__m256i __B)
518{
519  return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __B,
520						     (__v16hi) __A,
521						     (__v16hi)
522						     _mm256_setzero_si256 (),
523						     (__mmask16) __M);
524}
525
526extern __inline __m256i
527__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
528_mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
529			       __m256i __B)
530{
531  return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __B,
532						     (__v16hi) __A,
533						     (__v16hi) __W,
534						     (__mmask16) __M);
535}
536
537extern __inline __m128i
538__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
539_mm_permutexvar_epi16 (__m128i __A, __m128i __B)
540{
541  return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __B,
542						     (__v8hi) __A,
543						     (__v8hi)
544						     _mm_setzero_hi (),
545						     (__mmask8) -1);
546}
547
548extern __inline __m128i
549__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
550_mm_maskz_permutexvar_epi16 (__mmask8 __M, __m128i __A, __m128i __B)
551{
552  return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __B,
553						     (__v8hi) __A,
554						     (__v8hi)
555						     _mm_setzero_si128 (),
556						     (__mmask8) __M);
557}
558
559extern __inline __m128i
560__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
561_mm_mask_permutexvar_epi16 (__m128i __W, __mmask8 __M, __m128i __A,
562			    __m128i __B)
563{
564  return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __B,
565						     (__v8hi) __A,
566						     (__v8hi) __W,
567						     (__mmask8) __M);
568}
569
570extern __inline __m256i
571__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
572_mm256_permutex2var_epi16 (__m256i __A, __m256i __I, __m256i __B)
573{
574  return (__m256i) __builtin_ia32_vpermt2varhi256_mask ((__v16hi) __I
575							/* idx */ ,
576							(__v16hi) __A,
577							(__v16hi) __B,
578							(__mmask16) -
579							1);
580}
581
582extern __inline __m256i
583__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
584_mm256_mask_permutex2var_epi16 (__m256i __A, __mmask16 __U,
585				__m256i __I, __m256i __B)
586{
587  return (__m256i) __builtin_ia32_vpermt2varhi256_mask ((__v16hi) __I
588							/* idx */ ,
589							(__v16hi) __A,
590							(__v16hi) __B,
591							(__mmask16)
592							__U);
593}
594
595extern __inline __m256i
596__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
597_mm256_mask2_permutex2var_epi16 (__m256i __A, __m256i __I,
598				 __mmask16 __U, __m256i __B)
599{
600  return (__m256i) __builtin_ia32_vpermi2varhi256_mask ((__v16hi) __A,
601							(__v16hi) __I
602							/* idx */ ,
603							(__v16hi) __B,
604							(__mmask16)
605							__U);
606}
607
608extern __inline __m256i
609__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
610_mm256_maskz_permutex2var_epi16 (__mmask16 __U, __m256i __A,
611				 __m256i __I, __m256i __B)
612{
613  return (__m256i) __builtin_ia32_vpermt2varhi256_maskz ((__v16hi) __I
614							 /* idx */ ,
615							 (__v16hi) __A,
616							 (__v16hi) __B,
617							 (__mmask16)
618							 __U);
619}
620
621extern __inline __m128i
622__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
623_mm_permutex2var_epi16 (__m128i __A, __m128i __I, __m128i __B)
624{
625  return (__m128i) __builtin_ia32_vpermt2varhi128_mask ((__v8hi) __I
626							/* idx */ ,
627							(__v8hi) __A,
628							(__v8hi) __B,
629							(__mmask8) -
630							1);
631}
632
633extern __inline __m128i
634__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
635_mm_mask_permutex2var_epi16 (__m128i __A, __mmask8 __U, __m128i __I,
636			     __m128i __B)
637{
638  return (__m128i) __builtin_ia32_vpermt2varhi128_mask ((__v8hi) __I
639							/* idx */ ,
640							(__v8hi) __A,
641							(__v8hi) __B,
642							(__mmask8)
643							__U);
644}
645
646extern __inline __m128i
647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
648_mm_mask2_permutex2var_epi16 (__m128i __A, __m128i __I, __mmask8 __U,
649			      __m128i __B)
650{
651  return (__m128i) __builtin_ia32_vpermi2varhi128_mask ((__v8hi) __A,
652							(__v8hi) __I
653							/* idx */ ,
654							(__v8hi) __B,
655							(__mmask8)
656							__U);
657}
658
659extern __inline __m128i
660__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
661_mm_maskz_permutex2var_epi16 (__mmask8 __U, __m128i __A, __m128i __I,
662			      __m128i __B)
663{
664  return (__m128i) __builtin_ia32_vpermt2varhi128_maskz ((__v8hi) __I
665							 /* idx */ ,
666							 (__v8hi) __A,
667							 (__v8hi) __B,
668							 (__mmask8)
669							 __U);
670}
671
672extern __inline __m256i
673__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
674_mm256_mask_maddubs_epi16 (__m256i __W, __mmask16 __U, __m256i __X,
675			   __m256i __Y)
676{
677  return (__m256i) __builtin_ia32_pmaddubsw256_mask ((__v32qi) __X,
678						     (__v32qi) __Y,
679						     (__v16hi) __W,
680						     (__mmask16) __U);
681}
682
683extern __inline __m256i
684__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
685_mm256_maskz_maddubs_epi16 (__mmask16 __U, __m256i __X, __m256i __Y)
686{
687  return (__m256i) __builtin_ia32_pmaddubsw256_mask ((__v32qi) __X,
688						     (__v32qi) __Y,
689						     (__v16hi)
690						     _mm256_setzero_si256 (),
691						     (__mmask16) __U);
692}
693
694extern __inline __m128i
695__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
696_mm_mask_maddubs_epi16 (__m128i __W, __mmask8 __U, __m128i __X,
697			__m128i __Y)
698{
699  return (__m128i) __builtin_ia32_pmaddubsw128_mask ((__v16qi) __X,
700						     (__v16qi) __Y,
701						     (__v8hi) __W,
702						     (__mmask8) __U);
703}
704
705extern __inline __m128i
706__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
707_mm_maskz_maddubs_epi16 (__mmask8 __U, __m128i __X, __m128i __Y)
708{
709  return (__m128i) __builtin_ia32_pmaddubsw128_mask ((__v16qi) __X,
710						     (__v16qi) __Y,
711						     (__v8hi)
712						     _mm_setzero_hi (),
713						     (__mmask8) __U);
714}
715
716extern __inline __m256i
717__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
718_mm256_mask_madd_epi16 (__m256i __W, __mmask8 __U, __m256i __A,
719			__m256i __B)
720{
721  return (__m256i) __builtin_ia32_pmaddwd256_mask ((__v16hi) __A,
722						   (__v16hi) __B,
723						   (__v8si) __W,
724						   (__mmask8) __U);
725}
726
727extern __inline __m256i
728__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
729_mm256_maskz_madd_epi16 (__mmask8 __U, __m256i __A, __m256i __B)
730{
731  return (__m256i) __builtin_ia32_pmaddwd256_mask ((__v16hi) __A,
732						   (__v16hi) __B,
733						   (__v8si)
734						   _mm256_setzero_si256 (),
735						   (__mmask8) __U);
736}
737
738extern __inline __m128i
739__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
740_mm_mask_madd_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
741		     __m128i __B)
742{
743  return (__m128i) __builtin_ia32_pmaddwd128_mask ((__v8hi) __A,
744						   (__v8hi) __B,
745						   (__v4si) __W,
746						   (__mmask8) __U);
747}
748
749extern __inline __m128i
750__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
751_mm_maskz_madd_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
752{
753  return (__m128i) __builtin_ia32_pmaddwd128_mask ((__v8hi) __A,
754						   (__v8hi) __B,
755						   (__v4si)
756						   _mm_setzero_si128 (),
757						   (__mmask8) __U);
758}
759
760extern __inline __mmask16
761__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
762_mm_movepi8_mask (__m128i __A)
763{
764  return (__mmask16) __builtin_ia32_cvtb2mask128 ((__v16qi) __A);
765}
766
767extern __inline __mmask32
768__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
769_mm256_movepi8_mask (__m256i __A)
770{
771  return (__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A);
772}
773
774extern __inline __mmask8
775__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
776_mm_movepi16_mask (__m128i __A)
777{
778  return (__mmask8) __builtin_ia32_cvtw2mask128 ((__v8hi) __A);
779}
780
781extern __inline __mmask16
782__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
783_mm256_movepi16_mask (__m256i __A)
784{
785  return (__mmask16) __builtin_ia32_cvtw2mask256 ((__v16hi) __A);
786}
787
788extern __inline __m128i
789__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
790_mm_movm_epi8 (__mmask16 __A)
791{
792  return (__m128i) __builtin_ia32_cvtmask2b128 (__A);
793}
794
795extern __inline __m256i
796__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
797_mm256_movm_epi8 (__mmask32 __A)
798{
799  return (__m256i) __builtin_ia32_cvtmask2b256 (__A);
800}
801
802extern __inline __m128i
803__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
804_mm_movm_epi16 (__mmask8 __A)
805{
806  return (__m128i) __builtin_ia32_cvtmask2w128 (__A);
807}
808
809extern __inline __m256i
810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
811_mm256_movm_epi16 (__mmask16 __A)
812{
813  return (__m256i) __builtin_ia32_cvtmask2w256 (__A);
814}
815
816extern __inline __mmask16
817__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
818_mm_test_epi8_mask (__m128i __A, __m128i __B)
819{
820  return (__mmask16) __builtin_ia32_ptestmb128 ((__v16qi) __A,
821						(__v16qi) __B,
822						(__mmask16) -1);
823}
824
825extern __inline __mmask16
826__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
827_mm_mask_test_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
828{
829  return (__mmask16) __builtin_ia32_ptestmb128 ((__v16qi) __A,
830						(__v16qi) __B, __U);
831}
832
833extern __inline __mmask32
834__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
835_mm256_test_epi8_mask (__m256i __A, __m256i __B)
836{
837  return (__mmask32) __builtin_ia32_ptestmb256 ((__v32qi) __A,
838						(__v32qi) __B,
839						(__mmask32) -1);
840}
841
842extern __inline __mmask32
843__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
844_mm256_mask_test_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
845{
846  return (__mmask32) __builtin_ia32_ptestmb256 ((__v32qi) __A,
847						(__v32qi) __B, __U);
848}
849
850extern __inline __mmask8
851__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
852_mm_test_epi16_mask (__m128i __A, __m128i __B)
853{
854  return (__mmask8) __builtin_ia32_ptestmw128 ((__v8hi) __A,
855					       (__v8hi) __B,
856					       (__mmask8) -1);
857}
858
859extern __inline __mmask8
860__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
861_mm_mask_test_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
862{
863  return (__mmask8) __builtin_ia32_ptestmw128 ((__v8hi) __A,
864					       (__v8hi) __B, __U);
865}
866
867extern __inline __mmask16
868__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
869_mm256_test_epi16_mask (__m256i __A, __m256i __B)
870{
871  return (__mmask16) __builtin_ia32_ptestmw256 ((__v16hi) __A,
872						(__v16hi) __B,
873						(__mmask16) -1);
874}
875
876extern __inline __mmask16
877__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
878_mm256_mask_test_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
879{
880  return (__mmask16) __builtin_ia32_ptestmw256 ((__v16hi) __A,
881						(__v16hi) __B, __U);
882}
883
884extern __inline __m256i
885__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
886_mm256_maskz_min_epu16 (__mmask16 __M, __m256i __A, __m256i __B)
887{
888  return (__m256i) __builtin_ia32_pminuw256_mask ((__v16hi) __A,
889						  (__v16hi) __B,
890						  (__v16hi)
891						  _mm256_setzero_si256 (),
892						  (__mmask16) __M);
893}
894
895extern __inline __m256i
896__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
897_mm256_mask_min_epu16 (__m256i __W, __mmask16 __M, __m256i __A,
898		       __m256i __B)
899{
900  return (__m256i) __builtin_ia32_pminuw256_mask ((__v16hi) __A,
901						  (__v16hi) __B,
902						  (__v16hi) __W,
903						  (__mmask16) __M);
904}
905
906extern __inline __m128i
907__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
908_mm_maskz_min_epu16 (__mmask8 __M, __m128i __A, __m128i __B)
909{
910  return (__m128i) __builtin_ia32_pminuw128_mask ((__v8hi) __A,
911						  (__v8hi) __B,
912						  (__v8hi)
913						  _mm_setzero_di (),
914						  (__mmask8) __M);
915}
916
917extern __inline __m128i
918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
919_mm_mask_min_epu16 (__m128i __W, __mmask8 __M, __m128i __A,
920		    __m128i __B)
921{
922  return (__m128i) __builtin_ia32_pminuw128_mask ((__v8hi) __A,
923						  (__v8hi) __B,
924						  (__v8hi) __W,
925						  (__mmask8) __M);
926}
927
928extern __inline __m256i
929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
930_mm256_maskz_min_epi16 (__mmask16 __M, __m256i __A, __m256i __B)
931{
932  return (__m256i) __builtin_ia32_pminsw256_mask ((__v16hi) __A,
933						  (__v16hi) __B,
934						  (__v16hi)
935						  _mm256_setzero_si256 (),
936						  (__mmask16) __M);
937}
938
939extern __inline __m256i
940__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
941_mm256_mask_min_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
942		       __m256i __B)
943{
944  return (__m256i) __builtin_ia32_pminsw256_mask ((__v16hi) __A,
945						  (__v16hi) __B,
946						  (__v16hi) __W,
947						  (__mmask16) __M);
948}
949
950extern __inline __m256i
951__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
952_mm256_maskz_max_epu8 (__mmask32 __M, __m256i __A, __m256i __B)
953{
954  return (__m256i) __builtin_ia32_pmaxub256_mask ((__v32qi) __A,
955						  (__v32qi) __B,
956						  (__v32qi)
957						  _mm256_setzero_si256 (),
958						  (__mmask32) __M);
959}
960
961extern __inline __m256i
962__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
963_mm256_mask_max_epu8 (__m256i __W, __mmask32 __M, __m256i __A,
964		      __m256i __B)
965{
966  return (__m256i) __builtin_ia32_pmaxub256_mask ((__v32qi) __A,
967						  (__v32qi) __B,
968						  (__v32qi) __W,
969						  (__mmask32) __M);
970}
971
972extern __inline __m128i
973__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
974_mm_maskz_max_epu8 (__mmask16 __M, __m128i __A, __m128i __B)
975{
976  return (__m128i) __builtin_ia32_pmaxub128_mask ((__v16qi) __A,
977						  (__v16qi) __B,
978						  (__v16qi)
979						  _mm_setzero_di (),
980						  (__mmask16) __M);
981}
982
983extern __inline __m128i
984__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
985_mm_mask_max_epu8 (__m128i __W, __mmask16 __M, __m128i __A,
986		   __m128i __B)
987{
988  return (__m128i) __builtin_ia32_pmaxub128_mask ((__v16qi) __A,
989						  (__v16qi) __B,
990						  (__v16qi) __W,
991						  (__mmask16) __M);
992}
993
994extern __inline __m256i
995__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
996_mm256_maskz_max_epi8 (__mmask32 __M, __m256i __A, __m256i __B)
997{
998  return (__m256i) __builtin_ia32_pmaxsb256_mask ((__v32qi) __A,
999						  (__v32qi) __B,
1000						  (__v32qi)
1001						  _mm256_setzero_si256 (),
1002						  (__mmask32) __M);
1003}
1004
1005extern __inline __m256i
1006__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1007_mm256_mask_max_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
1008		      __m256i __B)
1009{
1010  return (__m256i) __builtin_ia32_pmaxsb256_mask ((__v32qi) __A,
1011						  (__v32qi) __B,
1012						  (__v32qi) __W,
1013						  (__mmask32) __M);
1014}
1015
1016extern __inline __m128i
1017__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1018_mm_maskz_max_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
1019{
1020  return (__m128i) __builtin_ia32_pmaxsb128_mask ((__v16qi) __A,
1021						  (__v16qi) __B,
1022						  (__v16qi)
1023						  _mm_setzero_di (),
1024						  (__mmask16) __M);
1025}
1026
1027extern __inline __m128i
1028__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1029_mm_mask_max_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
1030		   __m128i __B)
1031{
1032  return (__m128i) __builtin_ia32_pmaxsb128_mask ((__v16qi) __A,
1033						  (__v16qi) __B,
1034						  (__v16qi) __W,
1035						  (__mmask16) __M);
1036}
1037
1038extern __inline __m256i
1039__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1040_mm256_maskz_min_epu8 (__mmask32 __M, __m256i __A, __m256i __B)
1041{
1042  return (__m256i) __builtin_ia32_pminub256_mask ((__v32qi) __A,
1043						  (__v32qi) __B,
1044						  (__v32qi)
1045						  _mm256_setzero_si256 (),
1046						  (__mmask32) __M);
1047}
1048
1049extern __inline __m256i
1050__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1051_mm256_mask_min_epu8 (__m256i __W, __mmask32 __M, __m256i __A,
1052		      __m256i __B)
1053{
1054  return (__m256i) __builtin_ia32_pminub256_mask ((__v32qi) __A,
1055						  (__v32qi) __B,
1056						  (__v32qi) __W,
1057						  (__mmask32) __M);
1058}
1059
1060extern __inline __m128i
1061__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1062_mm_maskz_min_epu8 (__mmask16 __M, __m128i __A, __m128i __B)
1063{
1064  return (__m128i) __builtin_ia32_pminub128_mask ((__v16qi) __A,
1065						  (__v16qi) __B,
1066						  (__v16qi)
1067						  _mm_setzero_di (),
1068						  (__mmask16) __M);
1069}
1070
1071extern __inline __m128i
1072__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1073_mm_mask_min_epu8 (__m128i __W, __mmask16 __M, __m128i __A,
1074		   __m128i __B)
1075{
1076  return (__m128i) __builtin_ia32_pminub128_mask ((__v16qi) __A,
1077						  (__v16qi) __B,
1078						  (__v16qi) __W,
1079						  (__mmask16) __M);
1080}
1081
1082extern __inline __m256i
1083__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1084_mm256_maskz_min_epi8 (__mmask32 __M, __m256i __A, __m256i __B)
1085{
1086  return (__m256i) __builtin_ia32_pminsb256_mask ((__v32qi) __A,
1087						  (__v32qi) __B,
1088						  (__v32qi)
1089						  _mm256_setzero_si256 (),
1090						  (__mmask32) __M);
1091}
1092
1093extern __inline __m256i
1094__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1095_mm256_mask_min_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
1096		      __m256i __B)
1097{
1098  return (__m256i) __builtin_ia32_pminsb256_mask ((__v32qi) __A,
1099						  (__v32qi) __B,
1100						  (__v32qi) __W,
1101						  (__mmask32) __M);
1102}
1103
1104extern __inline __m128i
1105__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1106_mm_maskz_min_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
1107{
1108  return (__m128i) __builtin_ia32_pminsb128_mask ((__v16qi) __A,
1109						  (__v16qi) __B,
1110						  (__v16qi)
1111						  _mm_setzero_di (),
1112						  (__mmask16) __M);
1113}
1114
1115extern __inline __m128i
1116__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1117_mm_mask_min_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
1118		   __m128i __B)
1119{
1120  return (__m128i) __builtin_ia32_pminsb128_mask ((__v16qi) __A,
1121						  (__v16qi) __B,
1122						  (__v16qi) __W,
1123						  (__mmask16) __M);
1124}
1125
1126extern __inline __m256i
1127__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1128_mm256_maskz_max_epi16 (__mmask16 __M, __m256i __A, __m256i __B)
1129{
1130  return (__m256i) __builtin_ia32_pmaxsw256_mask ((__v16hi) __A,
1131						  (__v16hi) __B,
1132						  (__v16hi)
1133						  _mm256_setzero_si256 (),
1134						  (__mmask16) __M);
1135}
1136
1137extern __inline __m256i
1138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1139_mm256_mask_max_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
1140		       __m256i __B)
1141{
1142  return (__m256i) __builtin_ia32_pmaxsw256_mask ((__v16hi) __A,
1143						  (__v16hi) __B,
1144						  (__v16hi) __W,
1145						  (__mmask16) __M);
1146}
1147
1148extern __inline __m128i
1149__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1150_mm_maskz_max_epi16 (__mmask8 __M, __m128i __A, __m128i __B)
1151{
1152  return (__m128i) __builtin_ia32_pmaxsw128_mask ((__v8hi) __A,
1153						  (__v8hi) __B,
1154						  (__v8hi)
1155						  _mm_setzero_di (),
1156						  (__mmask8) __M);
1157}
1158
1159extern __inline __m128i
1160__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1161_mm_mask_max_epi16 (__m128i __W, __mmask8 __M, __m128i __A,
1162		    __m128i __B)
1163{
1164  return (__m128i) __builtin_ia32_pmaxsw128_mask ((__v8hi) __A,
1165						  (__v8hi) __B,
1166						  (__v8hi) __W,
1167						  (__mmask8) __M);
1168}
1169
1170extern __inline __m256i
1171__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1172_mm256_maskz_max_epu16 (__mmask16 __M, __m256i __A, __m256i __B)
1173{
1174  return (__m256i) __builtin_ia32_pmaxuw256_mask ((__v16hi) __A,
1175						  (__v16hi) __B,
1176						  (__v16hi)
1177						  _mm256_setzero_si256 (),
1178						  (__mmask16) __M);
1179}
1180
1181extern __inline __m256i
1182__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1183_mm256_mask_max_epu16 (__m256i __W, __mmask16 __M, __m256i __A,
1184		       __m256i __B)
1185{
1186  return (__m256i) __builtin_ia32_pmaxuw256_mask ((__v16hi) __A,
1187						  (__v16hi) __B,
1188						  (__v16hi) __W,
1189						  (__mmask16) __M);
1190}
1191
1192extern __inline __m128i
1193__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1194_mm_maskz_max_epu16 (__mmask8 __M, __m128i __A, __m128i __B)
1195{
1196  return (__m128i) __builtin_ia32_pmaxuw128_mask ((__v8hi) __A,
1197						  (__v8hi) __B,
1198						  (__v8hi)
1199						  _mm_setzero_di (),
1200						  (__mmask8) __M);
1201}
1202
1203extern __inline __m128i
1204__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1205_mm_mask_max_epu16 (__m128i __W, __mmask8 __M, __m128i __A,
1206		    __m128i __B)
1207{
1208  return (__m128i) __builtin_ia32_pmaxuw128_mask ((__v8hi) __A,
1209						  (__v8hi) __B,
1210						  (__v8hi) __W,
1211						  (__mmask8) __M);
1212}
1213
1214extern __inline __m128i
1215__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1216_mm_maskz_min_epi16 (__mmask8 __M, __m128i __A, __m128i __B)
1217{
1218  return (__m128i) __builtin_ia32_pminsw128_mask ((__v8hi) __A,
1219						  (__v8hi) __B,
1220						  (__v8hi)
1221						  _mm_setzero_di (),
1222						  (__mmask8) __M);
1223}
1224
1225extern __inline __m128i
1226__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1227_mm_mask_min_epi16 (__m128i __W, __mmask8 __M, __m128i __A,
1228		    __m128i __B)
1229{
1230  return (__m128i) __builtin_ia32_pminsw128_mask ((__v8hi) __A,
1231						  (__v8hi) __B,
1232						  (__v8hi) __W,
1233						  (__mmask8) __M);
1234}
1235
1236#ifdef __OPTIMIZE__
1237extern __inline __m256i
1238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1239_mm256_mask_alignr_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
1240			 __m256i __B, const int __N)
1241{
1242  return (__m256i) __builtin_ia32_palignr256_mask ((__v4di) __A,
1243						   (__v4di) __B,
1244						   __N * 8,
1245						   (__v4di) __W,
1246						   (__mmask32) __U);
1247}
1248
1249extern __inline __m256i
1250__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1251_mm256_maskz_alignr_epi8 (__mmask32 __U, __m256i __A, __m256i __B,
1252			  const int __N)
1253{
1254  return (__m256i) __builtin_ia32_palignr256_mask ((__v4di) __A,
1255						   (__v4di) __B,
1256						   __N * 8,
1257						   (__v4di)
1258						   _mm256_setzero_si256 (),
1259						   (__mmask32) __U);
1260}
1261
1262extern __inline __m128i
1263__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1264_mm_mask_alignr_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
1265		      __m128i __B, const int __N)
1266{
1267  return (__m128i) __builtin_ia32_palignr128_mask ((__v2di) __A,
1268						   (__v2di) __B,
1269						   __N * 8,
1270						   (__v2di) __W,
1271						   (__mmask16) __U);
1272}
1273
1274extern __inline __m128i
1275__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1276_mm_maskz_alignr_epi8 (__mmask16 __U, __m128i __A, __m128i __B,
1277		       const int __N)
1278{
1279  return (__m128i) __builtin_ia32_palignr128_mask ((__v2di) __A,
1280						   (__v2di) __B,
1281						   __N * 8,
1282						   (__v2di)
1283						   _mm_setzero_si128 (),
1284						   (__mmask16) __U);
1285}
1286
1287extern __inline __m256i
1288__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1289_mm256_dbsad_epu8 (__m256i __A, __m256i __B, const int __imm)
1290{
1291  return (__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi) __A,
1292						    (__v32qi) __B,
1293						    __imm,
1294						    (__v16hi)
1295						    _mm256_setzero_si256 (),
1296						    (__mmask16) -1);
1297}
1298
1299extern __inline __m256i
1300__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1301_mm256_mask_dbsad_epu8 (__m256i __W, __mmask16 __U, __m256i __A,
1302			__m256i __B, const int __imm)
1303{
1304  return (__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi) __A,
1305						    (__v32qi) __B,
1306						    __imm,
1307						    (__v16hi) __W,
1308						    (__mmask16) __U);
1309}
1310
1311extern __inline __m256i
1312__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1313_mm256_maskz_dbsad_epu8 (__mmask16 __U, __m256i __A, __m256i __B,
1314			 const int __imm)
1315{
1316  return (__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi) __A,
1317						    (__v32qi) __B,
1318						    __imm,
1319						    (__v16hi)
1320						    _mm256_setzero_si256 (),
1321						    (__mmask16) __U);
1322}
1323
1324extern __inline __m128i
1325__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1326_mm_dbsad_epu8 (__m128i __A, __m128i __B, const int __imm)
1327{
1328  return (__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi) __A,
1329						    (__v16qi) __B,
1330						    __imm,
1331						    (__v8hi)
1332						    _mm_setzero_hi (),
1333						    (__mmask8) -1);
1334}
1335
1336extern __inline __m128i
1337__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1338_mm_mask_dbsad_epu8 (__m128i __W, __mmask8 __U, __m128i __A,
1339		     __m128i __B, const int __imm)
1340{
1341  return (__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi) __A,
1342						    (__v16qi) __B,
1343						    __imm,
1344						    (__v8hi) __W,
1345						    (__mmask8) __U);
1346}
1347
1348extern __inline __m128i
1349__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1350_mm_maskz_dbsad_epu8 (__mmask8 __U, __m128i __A, __m128i __B,
1351		      const int __imm)
1352{
1353  return (__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi) __A,
1354						    (__v16qi) __B,
1355						    __imm,
1356						    (__v8hi)
1357						    _mm_setzero_si128 (),
1358						    (__mmask8) __U);
1359}
1360
1361extern __inline __m128i
1362__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1363_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W)
1364{
1365  return (__m128i) __builtin_ia32_blendmw_128_mask ((__v8hi) __A,
1366						    (__v8hi) __W,
1367						    (__mmask8) __U);
1368}
1369
1370extern __inline __m128i
1371__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1372_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W)
1373{
1374  return (__m128i) __builtin_ia32_blendmb_128_mask ((__v16qi) __A,
1375						    (__v16qi) __W,
1376						    (__mmask16) __U);
1377}
1378
1379extern __inline __m256i
1380__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1381_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W)
1382{
1383  return (__m256i) __builtin_ia32_blendmw_256_mask ((__v16hi) __A,
1384						    (__v16hi) __W,
1385						    (__mmask16) __U);
1386}
1387
1388extern __inline __m256i
1389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1390_mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W)
1391{
1392  return (__m256i) __builtin_ia32_blendmb_256_mask ((__v32qi) __A,
1393						    (__v32qi) __W,
1394						    (__mmask32) __U);
1395}
1396
1397extern __inline __mmask8
1398__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1399_mm_mask_cmp_epi16_mask (__mmask8 __U, __m128i __X, __m128i __Y,
1400			 const int __P)
1401{
1402  return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
1403						 (__v8hi) __Y, __P,
1404						 (__mmask8) __U);
1405}
1406
1407extern __inline __mmask8
1408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1409_mm_cmp_epi16_mask (__m128i __X, __m128i __Y, const int __P)
1410{
1411  return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
1412						 (__v8hi) __Y, __P,
1413						 (__mmask8) -1);
1414}
1415
1416extern __inline __mmask16
1417__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1418_mm256_mask_cmp_epi16_mask (__mmask16 __U, __m256i __X, __m256i __Y,
1419			    const int __P)
1420{
1421  return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
1422						  (__v16hi) __Y, __P,
1423						  (__mmask16) __U);
1424}
1425
1426extern __inline __mmask16
1427__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1428_mm256_cmp_epi16_mask (__m256i __X, __m256i __Y, const int __P)
1429{
1430  return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
1431						  (__v16hi) __Y, __P,
1432						  (__mmask16) -1);
1433}
1434
1435extern __inline __mmask16
1436__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1437_mm_mask_cmp_epi8_mask (__mmask8 __U, __m128i __X, __m128i __Y,
1438			const int __P)
1439{
1440  return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
1441						  (__v16qi) __Y, __P,
1442						  (__mmask16) __U);
1443}
1444
1445extern __inline __mmask16
1446__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1447_mm_cmp_epi8_mask (__m128i __X, __m128i __Y, const int __P)
1448{
1449  return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
1450						  (__v16qi) __Y, __P,
1451						  (__mmask16) -1);
1452}
1453
1454extern __inline __mmask32
1455__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1456_mm256_mask_cmp_epi8_mask (__mmask16 __U, __m256i __X, __m256i __Y,
1457			   const int __P)
1458{
1459  return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
1460						  (__v32qi) __Y, __P,
1461						  (__mmask32) __U);
1462}
1463
1464extern __inline __mmask16
1465__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1466_mm256_cmp_epi8_mask (__m256i __X, __m256i __Y, const int __P)
1467{
1468  return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
1469						  (__v32qi) __Y, __P,
1470						  (__mmask32) -1);
1471}
1472
1473extern __inline __mmask8
1474__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1475_mm_mask_cmp_epu16_mask (__mmask8 __U, __m128i __X, __m128i __Y,
1476			 const int __P)
1477{
1478  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
1479						  (__v8hi) __Y, __P,
1480						  (__mmask8) __U);
1481}
1482
1483extern __inline __mmask8
1484__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1485_mm_cmp_epu16_mask (__m128i __X, __m128i __Y, const int __P)
1486{
1487  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
1488						  (__v8hi) __Y, __P,
1489						  (__mmask8) -1);
1490}
1491
1492extern __inline __mmask16
1493__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1494_mm256_mask_cmp_epu16_mask (__mmask16 __U, __m256i __X, __m256i __Y,
1495			    const int __P)
1496{
1497  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
1498						   (__v16hi) __Y, __P,
1499						   (__mmask16) __U);
1500}
1501
1502extern __inline __mmask16
1503__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1504_mm256_cmp_epu16_mask (__m256i __X, __m256i __Y, const int __P)
1505{
1506  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
1507						   (__v16hi) __Y, __P,
1508						   (__mmask16) -1);
1509}
1510
1511extern __inline __mmask16
1512__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1513_mm_mask_cmp_epu8_mask (__mmask8 __U, __m128i __X, __m128i __Y,
1514			const int __P)
1515{
1516  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
1517						   (__v16qi) __Y, __P,
1518						   (__mmask16) __U);
1519}
1520
1521extern __inline __mmask16
1522__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1523_mm_cmp_epu8_mask (__m128i __X, __m128i __Y, const int __P)
1524{
1525  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
1526						   (__v16qi) __Y, __P,
1527						   (__mmask16) -1);
1528}
1529
1530extern __inline __mmask32
1531__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1532_mm256_mask_cmp_epu8_mask (__mmask16 __U, __m256i __X, __m256i __Y,
1533			   const int __P)
1534{
1535  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
1536						   (__v32qi) __Y, __P,
1537						   (__mmask32) __U);
1538}
1539
1540extern __inline __mmask16
1541__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1542_mm256_cmp_epu8_mask (__m256i __X, __m256i __Y, const int __P)
1543{
1544  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
1545						   (__v32qi) __Y, __P,
1546						   (__mmask32) -1);
1547}
1548
1549extern __inline __m256i
1550__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1551_mm256_mask_srli_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
1552			const int __imm)
1553{
1554  return (__m256i) __builtin_ia32_psrlwi256_mask ((__v16hi) __A, __imm,
1555						  (__v16hi) __W,
1556						  (__mmask16) __U);
1557}
1558
1559extern __inline __m256i
1560__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1561_mm256_maskz_srli_epi16 (__mmask16 __U, __m256i __A, const int __imm)
1562{
1563  return (__m256i) __builtin_ia32_psrlwi256_mask ((__v16hi) __A, __imm,
1564						  (__v16hi)
1565						  _mm256_setzero_si256 (),
1566						  (__mmask16) __U);
1567}
1568
1569extern __inline __m128i
1570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1571_mm_mask_srli_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
1572		     const int __imm)
1573{
1574  return (__m128i) __builtin_ia32_psrlwi128_mask ((__v8hi) __A, __imm,
1575						  (__v8hi) __W,
1576						  (__mmask8) __U);
1577}
1578
1579extern __inline __m128i
1580__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1581_mm_maskz_srli_epi16 (__mmask8 __U, __m128i __A, const int __imm)
1582{
1583  return (__m128i) __builtin_ia32_psrlwi128_mask ((__v8hi) __A, __imm,
1584						  (__v8hi)
1585						  _mm_setzero_si128 (),
1586						  (__mmask8) __U);
1587}
1588
1589extern __inline __m256i
1590__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1591_mm256_mask_shufflehi_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
1592			     const int __imm)
1593{
1594  return (__m256i) __builtin_ia32_pshufhw256_mask ((__v16hi) __A,
1595						   __imm,
1596						   (__v16hi) __W,
1597						   (__mmask16) __U);
1598}
1599
1600extern __inline __m256i
1601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1602_mm256_maskz_shufflehi_epi16 (__mmask16 __U, __m256i __A,
1603			      const int __imm)
1604{
1605  return (__m256i) __builtin_ia32_pshufhw256_mask ((__v16hi) __A,
1606						   __imm,
1607						   (__v16hi)
1608						   _mm256_setzero_si256 (),
1609						   (__mmask16) __U);
1610}
1611
1612extern __inline __m128i
1613__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1614_mm_mask_shufflehi_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
1615			  const int __imm)
1616{
1617  return (__m128i) __builtin_ia32_pshufhw128_mask ((__v8hi) __A, __imm,
1618						   (__v8hi) __W,
1619						   (__mmask8) __U);
1620}
1621
1622extern __inline __m128i
1623__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1624_mm_maskz_shufflehi_epi16 (__mmask8 __U, __m128i __A, const int __imm)
1625{
1626  return (__m128i) __builtin_ia32_pshufhw128_mask ((__v8hi) __A, __imm,
1627						   (__v8hi)
1628						   _mm_setzero_hi (),
1629						   (__mmask8) __U);
1630}
1631
1632extern __inline __m256i
1633__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1634_mm256_mask_shufflelo_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
1635			     const int __imm)
1636{
1637  return (__m256i) __builtin_ia32_pshuflw256_mask ((__v16hi) __A,
1638						   __imm,
1639						   (__v16hi) __W,
1640						   (__mmask16) __U);
1641}
1642
1643extern __inline __m256i
1644__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1645_mm256_maskz_shufflelo_epi16 (__mmask16 __U, __m256i __A,
1646			      const int __imm)
1647{
1648  return (__m256i) __builtin_ia32_pshuflw256_mask ((__v16hi) __A,
1649						   __imm,
1650						   (__v16hi)
1651						   _mm256_setzero_si256 (),
1652						   (__mmask16) __U);
1653}
1654
1655extern __inline __m128i
1656__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1657_mm_mask_shufflelo_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
1658			  const int __imm)
1659{
1660  return (__m128i) __builtin_ia32_pshuflw128_mask ((__v8hi) __A, __imm,
1661						   (__v8hi) __W,
1662						   (__mmask8) __U);
1663}
1664
1665extern __inline __m128i
1666__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1667_mm_maskz_shufflelo_epi16 (__mmask8 __U, __m128i __A, const int __imm)
1668{
1669  return (__m128i) __builtin_ia32_pshuflw128_mask ((__v8hi) __A, __imm,
1670						   (__v8hi)
1671						   _mm_setzero_hi (),
1672						   (__mmask8) __U);
1673}
1674
1675extern __inline __m256i
1676__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1677_mm256_mask_srai_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
1678			const int __imm)
1679{
1680  return (__m256i) __builtin_ia32_psrawi256_mask ((__v16hi) __A, __imm,
1681						  (__v16hi) __W,
1682						  (__mmask16) __U);
1683}
1684
1685extern __inline __m256i
1686__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1687_mm256_maskz_srai_epi16 (__mmask16 __U, __m256i __A, const int __imm)
1688{
1689  return (__m256i) __builtin_ia32_psrawi256_mask ((__v16hi) __A, __imm,
1690						  (__v16hi)
1691						  _mm256_setzero_si256 (),
1692						  (__mmask16) __U);
1693}
1694
1695extern __inline __m128i
1696__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1697_mm_mask_srai_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
1698		     const int __imm)
1699{
1700  return (__m128i) __builtin_ia32_psrawi128_mask ((__v8hi) __A, __imm,
1701						  (__v8hi) __W,
1702						  (__mmask8) __U);
1703}
1704
1705extern __inline __m128i
1706__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1707_mm_maskz_srai_epi16 (__mmask8 __U, __m128i __A, const int __imm)
1708{
1709  return (__m128i) __builtin_ia32_psrawi128_mask ((__v8hi) __A, __imm,
1710						  (__v8hi)
1711						  _mm_setzero_si128 (),
1712						  (__mmask8) __U);
1713}
1714
1715extern __inline __m256i
1716__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1717_mm256_mask_slli_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
1718			int __B)
1719{
1720  return (__m256i) __builtin_ia32_psllwi256_mask ((__v16hi) __A, __B,
1721						  (__v16hi) __W,
1722						  (__mmask16) __U);
1723}
1724
1725extern __inline __m256i
1726__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1727_mm256_maskz_slli_epi16 (__mmask16 __U, __m256i __A, int __B)
1728{
1729  return (__m256i) __builtin_ia32_psllwi256_mask ((__v16hi) __A, __B,
1730						  (__v16hi)
1731						  _mm256_setzero_si256 (),
1732						  (__mmask16) __U);
1733}
1734
1735extern __inline __m128i
1736__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1737_mm_mask_slli_epi16 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
1738{
1739  return (__m128i) __builtin_ia32_psllwi128_mask ((__v8hi) __A, __B,
1740						  (__v8hi) __W,
1741						  (__mmask8) __U);
1742}
1743
1744extern __inline __m128i
1745__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1746_mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, int __B)
1747{
1748  return (__m128i) __builtin_ia32_psllwi128_mask ((__v8hi) __A, __B,
1749						  (__v8hi)
1750						  _mm_setzero_si128 (),
1751						  (__mmask8) __U);
1752}
1753
1754#else
1755#define _mm256_mask_alignr_epi8(W, U, X, Y, N)					    \
1756  ((__m256i) __builtin_ia32_palignr256_mask ((__v4di)(__m256i)(X),		    \
1757					    (__v4di)(__m256i)(Y), (int)(N * 8),	    \
1758					    (__v4di)(__m256i)(X), (__mmask32)(U)))
1759
1760#define _mm256_mask_srli_epi16(W, U, A, B)                              \
1761  ((__m256i) __builtin_ia32_psrlwi256_mask ((__v16hi)(__m256i)(A),      \
1762    (int)(B), (__v16hi)(__m256i)(W), (__mmask16)(U)))
1763
1764#define _mm256_maskz_srli_epi16(U, A, B)                                \
1765  ((__m256i) __builtin_ia32_psrlwi256_mask ((__v16hi)(__m256i)(A),      \
1766    (int)(B), (__v16hi)_mm256_setzero_si256 (), (__mmask16)(U)))
1767
1768#define _mm_mask_srli_epi16(W, U, A, B)                                 \
1769  ((__m128i) __builtin_ia32_psrlwi128_mask ((__v8hi)(__m128i)(A),       \
1770    (int)(B), (__v8hi)(__m128i)(W), (__mmask8)(U)))
1771
1772#define _mm_maskz_srli_epi16(U, A, B)                                   \
1773  ((__m128i) __builtin_ia32_psrlwi128_mask ((__v8hi)(__m128i)(A),       \
1774    (int)(B), (__v8hi)_mm_setzero_si128(), (__mmask8)(U)))
1775
1776#define _mm256_mask_srai_epi16(W, U, A, B)                              \
1777  ((__m256i) __builtin_ia32_psrawi256_mask ((__v16hi)(__m256i)(A),      \
1778    (int)(B), (__v16hi)(__m256i)(W), (__mmask16)(U)))
1779
1780#define _mm256_maskz_srai_epi16(U, A, B)                                \
1781  ((__m256i) __builtin_ia32_psrawi256_mask ((__v16hi)(__m256i)(A),      \
1782    (int)(B), (__v16hi)_mm256_setzero_si256 (), (__mmask16)(U)))
1783
1784#define _mm_mask_srai_epi16(W, U, A, B)                                 \
1785  ((__m128i) __builtin_ia32_psrawi128_mask ((__v8hi)(__m128i)(A),       \
1786    (int)(B), (__v8hi)(__m128i)(W), (__mmask8)(U)))
1787
1788#define _mm_maskz_srai_epi16(U, A, B)                                   \
1789  ((__m128i) __builtin_ia32_psrawi128_mask ((__v8hi)(__m128i)(A),       \
1790    (int)(B), (__v8hi)_mm_setzero_si128(), (__mmask8)(U)))
1791
1792#define _mm256_mask_shufflehi_epi16(W, U, A, B)                                     \
1793  ((__m256i) __builtin_ia32_pshufhw256_mask ((__v16hi)(__m256i)(A), (int)(B),       \
1794                                             (__v16hi)(__m256i)(W),                 \
1795                                             (__mmask16)(U)))
1796
1797#define _mm256_maskz_shufflehi_epi16(U, A, B)                                       \
1798  ((__m256i) __builtin_ia32_pshufhw256_mask ((__v16hi)(__m256i)(A), (int)(B),       \
1799                                             (__v16hi)(__m256i)_mm256_setzero_si256 (), \
1800                                             (__mmask16)(U)))
1801
1802#define _mm_mask_shufflehi_epi16(W, U, A, B)                                        \
1803  ((__m128i) __builtin_ia32_pshufhw128_mask ((__v8hi)(__m128i)(A), (int)(B),        \
1804                                             (__v8hi)(__m128i)(W),                  \
1805                                             (__mmask8)(U)))
1806
1807#define _mm_maskz_shufflehi_epi16(U, A, B)                                          \
1808  ((__m128i) __builtin_ia32_pshufhw128_mask ((__v8hi)(__m128i)(A), (int)(B),        \
1809                                             (__v8hi)(__m128i)_mm_setzero_hi(),     \
1810                                             (__mmask8)(U)))
1811
1812#define _mm256_mask_shufflelo_epi16(W, U, A, B)                                     \
1813  ((__m256i) __builtin_ia32_pshuflw256_mask ((__v16hi)(__m256i)(A), (int)(B),       \
1814                                             (__v16hi)(__m256i)(W),                 \
1815                                             (__mmask16)(U)))
1816
1817#define _mm256_maskz_shufflelo_epi16(U, A, B)                                       \
1818  ((__m256i) __builtin_ia32_pshuflw256_mask ((__v16hi)(__m256i)(A), (int)(B),       \
1819                                             (__v16hi)(__m256i)_mm256_setzero_si256 (), \
1820                                             (__mmask16)(U)))
1821
1822#define _mm_mask_shufflelo_epi16(W, U, A, B)                                        \
1823  ((__m128i) __builtin_ia32_pshuflw128_mask ((__v8hi)(__m128i)(A), (int)(B),        \
1824                                             (__v8hi)(__m128i)(W),                  \
1825                                             (__mmask8)(U)))
1826
1827#define _mm_maskz_shufflelo_epi16(U, A, B)                                          \
1828  ((__m128i) __builtin_ia32_pshuflw128_mask ((__v8hi)(__m128i)(A), (int)(B),        \
1829                                             (__v8hi)(__m128i)_mm_setzero_hi(),     \
1830                                             (__mmask8)(U)))
1831
1832#define _mm256_maskz_alignr_epi8(U, X, Y, N)					    \
1833  ((__m256i) __builtin_ia32_palignr256_mask ((__v4di)(__m256i)(X),		    \
1834					    (__v4di)(__m256i)(Y), (int)(N * 8),	    \
1835					    (__v4di)(__m256i)_mm256_setzero_si256 (),   \
1836					    (__mmask32)(U)))
1837
1838#define _mm_mask_alignr_epi8(W, U, X, Y, N)					    \
1839  ((__m128i) __builtin_ia32_palignr128_mask ((__v2di)(__m128i)(X),		    \
1840					    (__v2di)(__m128i)(Y), (int)(N * 8),	    \
1841					    (__v2di)(__m128i)(X), (__mmask16)(U)))
1842
1843#define _mm_maskz_alignr_epi8(U, X, Y, N)					    \
1844  ((__m128i) __builtin_ia32_palignr128_mask ((__v2di)(__m128i)(X),		    \
1845					    (__v2di)(__m128i)(Y), (int)(N * 8),	    \
1846					    (__v2di)(__m128i)_mm_setzero_di(),	    \
1847					    (__mmask16)(U)))
1848
1849#define _mm_mask_slli_epi16(W, U, X, C)					  \
1850  ((__m128i)__builtin_ia32_psllwi128_mask ((__v8hi)(__m128i)(X), (int)(C),\
1851    (__v8hi)(__m128i)(W),\
1852    (__mmask8)(U)))
1853
1854#define _mm_maskz_slli_epi16(U, X, C)					  \
1855  ((__m128i)__builtin_ia32_psllwi128_mask ((__v8hi)(__m128i)(X), (int)(C),\
1856    (__v8hi)(__m128i)_mm_setzero_hi(),\
1857    (__mmask8)(U)))
1858
1859#define _mm256_dbsad_epu8(X, Y, C)                                                  \
1860  ((__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi)(__m256i) (X),               \
1861                                              (__v32qi)(__m256i) (Y), (int) (C),    \
1862                                              (__v16hi)(__m256i)_mm256_setzero_si256(),\
1863                                              (__mmask16)-1))
1864
1865#define _mm256_mask_slli_epi16(W, U, X, C)                                 \
1866  ((__m256i)__builtin_ia32_psllwi256_mask ((__v16hi)(__m256i)(X), (int)(C),\
1867    (__v16hi)(__m256i)(W),\
1868    (__mmask16)(U)))
1869
1870#define _mm256_maskz_slli_epi16(U, X, C)                                   \
1871  ((__m256i)__builtin_ia32_psllwi256_mask ((__v16hi)(__m256i)(X), (int)(C),\
1872    (__v16hi)(__m256i)_mm256_setzero_si256 (),\
1873    (__mmask16)(U)))
1874
1875#define _mm256_mask_dbsad_epu8(W, U, X, Y, C)                                       \
1876  ((__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi)(__m256i) (X),               \
1877                                              (__v32qi)(__m256i) (Y), (int) (C),    \
1878                                              (__v16hi)(__m256i)(W),                \
1879                                              (__mmask16)(U)))
1880
1881#define _mm256_maskz_dbsad_epu8(U, X, Y, C)                                         \
1882  ((__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi)(__m256i) (X),               \
1883                                              (__v32qi)(__m256i) (Y), (int) (C),    \
1884                                              (__v16hi)(__m256i)_mm256_setzero_si256(),\
1885                                              (__mmask16)(U)))
1886
1887#define _mm_dbsad_epu8(X, Y, C)                                                     \
1888  ((__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi)(__m128i) (X),               \
1889                                              (__v16qi)(__m128i) (Y), (int) (C),    \
1890                                              (__v8hi)(__m128i)_mm_setzero_si128(), \
1891                                              (__mmask8)-1))
1892
1893#define _mm_mask_dbsad_epu8(W, U, X, Y, C)                                          \
1894  ((__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi)(__m128i) (X),               \
1895                                              (__v16qi)(__m128i) (Y), (int) (C),    \
1896                                              (__v8hi)(__m128i)(W),                 \
1897                                              (__mmask8)(U)))
1898
1899#define _mm_maskz_dbsad_epu8(U, X, Y, C)                                            \
1900  ((__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi)(__m128i) (X),               \
1901                                              (__v16qi)(__m128i) (Y), (int) (C),    \
1902                                              (__v8hi)(__m128i)_mm_setzero_si128(), \
1903                                              (__mmask8)(U)))
1904
1905#define _mm_mask_blend_epi16(__U, __A, __W)			      \
1906  ((__m128i) __builtin_ia32_blendmw_128_mask ((__v8hi) (__A),	      \
1907						    (__v8hi) (__W),   \
1908						    (__mmask8) (__U)))
1909
1910#define _mm_mask_blend_epi8(__U, __A, __W)			      \
1911  ((__m128i) __builtin_ia32_blendmb_128_mask ((__v16qi) (__A),	      \
1912						    (__v16qi) (__W),  \
1913						    (__mmask16) (__U)))
1914
1915#define _mm256_mask_blend_epi16(__U, __A, __W)			      \
1916  ((__m256i) __builtin_ia32_blendmw_256_mask ((__v16hi) (__A),	      \
1917						    (__v16hi) (__W),  \
1918						    (__mmask16) (__U)))
1919
1920#define _mm256_mask_blend_epi8(__U, __A, __W)			      \
1921  ((__m256i) __builtin_ia32_blendmb_256_mask ((__v32qi) (__A),	      \
1922						    (__v32qi) (__W),  \
1923						    (__mmask32) (__U)))
1924
1925#define _mm_cmp_epi16_mask(X, Y, P)				\
1926  ((__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi)(__m128i)(X),	\
1927					    (__v8hi)(__m128i)(Y), (int)(P),\
1928					    (__mmask8)(-1)))
1929
1930#define _mm_cmp_epi8_mask(X, Y, P)				\
1931  ((__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi)(__m128i)(X),	\
1932					    (__v16qi)(__m128i)(Y), (int)(P),\
1933					    (__mmask16)(-1)))
1934
1935#define _mm256_cmp_epi16_mask(X, Y, P)				\
1936  ((__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi)(__m256i)(X),	\
1937					    (__v16hi)(__m256i)(Y), (int)(P),\
1938					    (__mmask16)(-1)))
1939
1940#define _mm256_cmp_epi8_mask(X, Y, P)				\
1941  ((__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi)(__m256i)(X),	\
1942					    (__v32qi)(__m256i)(Y), (int)(P),\
1943					    (__mmask32)(-1)))
1944
1945#define _mm_cmp_epu16_mask(X, Y, P)				\
1946  ((__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi)(__m128i)(X),	\
1947					    (__v8hi)(__m128i)(Y), (int)(P),\
1948					    (__mmask8)(-1)))
1949
1950#define _mm_cmp_epu8_mask(X, Y, P)				\
1951  ((__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi)(__m128i)(X),	\
1952					    (__v16qi)(__m128i)(Y), (int)(P),\
1953					    (__mmask16)(-1)))
1954
1955#define _mm256_cmp_epu16_mask(X, Y, P)				\
1956  ((__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi)(__m256i)(X),	\
1957					    (__v16hi)(__m256i)(Y), (int)(P),\
1958					    (__mmask16)(-1)))
1959
1960#define _mm256_cmp_epu8_mask(X, Y, P)				\
1961  ((__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi)(__m256i)(X),	\
1962					    (__v32qi)(__m256i)(Y), (int)(P),\
1963					    (__mmask32)-1))
1964
1965#define _mm_mask_cmp_epi16_mask(M, X, Y, P)				\
1966  ((__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi)(__m128i)(X),	\
1967					    (__v8hi)(__m128i)(Y), (int)(P),\
1968					    (__mmask16)(M)))
1969
1970#define _mm_mask_cmp_epi8_mask(M, X, Y, P)				\
1971  ((__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi)(__m128i)(X),	\
1972					    (__v16qi)(__m128i)(Y), (int)(P),\
1973					    (__mmask16)(M)))
1974
1975#define _mm256_mask_cmp_epi16_mask(M, X, Y, P)				\
1976  ((__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi)(__m256i)(X),	\
1977					    (__v16hi)(__m256i)(Y), (int)(P),\
1978					    (__mmask16)(M)))
1979
1980#define _mm256_mask_cmp_epi8_mask(M, X, Y, P)				\
1981  ((__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi)(__m256i)(X),	\
1982					    (__v32qi)(__m256i)(Y), (int)(P),\
1983					    (__mmask32)(M)))
1984
1985#define _mm_mask_cmp_epu16_mask(M, X, Y, P)				\
1986  ((__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi)(__m128i)(X),	\
1987					    (__v8hi)(__m128i)(Y), (int)(P),\
1988					    (__mmask8)(M)))
1989
1990#define _mm_mask_cmp_epu8_mask(M, X, Y, P)				\
1991  ((__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi)(__m128i)(X),	\
1992					    (__v16qi)(__m128i)(Y), (int)(P),\
1993					    (__mmask16)(M)))
1994
1995#define _mm256_mask_cmp_epu16_mask(M, X, Y, P)				\
1996  ((__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi)(__m256i)(X),	\
1997					    (__v16hi)(__m256i)(Y), (int)(P),\
1998					    (__mmask16)(M)))
1999
2000#define _mm256_mask_cmp_epu8_mask(M, X, Y, P)				\
2001  ((__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi)(__m256i)(X),	\
2002					    (__v32qi)(__m256i)(Y), (int)(P),\
2003					    (__mmask32)M))
2004#endif
2005
2006extern __inline __mmask32
2007  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2008_mm256_cmpneq_epi8_mask (__m256i __X, __m256i __Y)
2009{
2010  return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
2011						  (__v32qi) __Y, 4,
2012						  (__mmask32) - 1);
2013}
2014
2015extern __inline __mmask32
2016  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2017_mm256_cmplt_epi8_mask (__m256i __X, __m256i __Y)
2018{
2019  return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
2020						  (__v32qi) __Y, 1,
2021						  (__mmask32) - 1);
2022}
2023
2024extern __inline __mmask32
2025  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2026_mm256_cmpge_epi8_mask (__m256i __X, __m256i __Y)
2027{
2028  return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
2029						  (__v32qi) __Y, 5,
2030						  (__mmask32) - 1);
2031}
2032
2033extern __inline __mmask32
2034  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2035_mm256_cmple_epi8_mask (__m256i __X, __m256i __Y)
2036{
2037  return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
2038						  (__v32qi) __Y, 2,
2039						  (__mmask32) - 1);
2040}
2041
2042extern __inline __mmask16
2043  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2044_mm256_cmpneq_epi16_mask (__m256i __X, __m256i __Y)
2045{
2046  return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
2047						  (__v16hi) __Y, 4,
2048						  (__mmask16) - 1);
2049}
2050
2051extern __inline __mmask16
2052  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2053_mm256_cmplt_epi16_mask (__m256i __X, __m256i __Y)
2054{
2055  return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
2056						  (__v16hi) __Y, 1,
2057						  (__mmask16) - 1);
2058}
2059
2060extern __inline __mmask16
2061  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2062_mm256_cmpge_epi16_mask (__m256i __X, __m256i __Y)
2063{
2064  return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
2065						  (__v16hi) __Y, 5,
2066						  (__mmask16) - 1);
2067}
2068
2069extern __inline __mmask16
2070  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2071_mm256_cmple_epi16_mask (__m256i __X, __m256i __Y)
2072{
2073  return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
2074						  (__v16hi) __Y, 2,
2075						  (__mmask16) - 1);
2076}
2077
2078extern __inline __mmask16
2079  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2080_mm_cmpneq_epu8_mask (__m128i __X, __m128i __Y)
2081{
2082  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
2083						   (__v16qi) __Y, 4,
2084						   (__mmask16) - 1);
2085}
2086
2087extern __inline __mmask16
2088  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2089_mm_cmplt_epu8_mask (__m128i __X, __m128i __Y)
2090{
2091  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
2092						   (__v16qi) __Y, 1,
2093						   (__mmask16) - 1);
2094}
2095
2096extern __inline __mmask16
2097  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2098_mm_cmpge_epu8_mask (__m128i __X, __m128i __Y)
2099{
2100  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
2101						   (__v16qi) __Y, 5,
2102						   (__mmask16) - 1);
2103}
2104
2105extern __inline __mmask16
2106  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2107_mm_cmple_epu8_mask (__m128i __X, __m128i __Y)
2108{
2109  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
2110						   (__v16qi) __Y, 2,
2111						   (__mmask16) - 1);
2112}
2113
2114extern __inline __mmask8
2115  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2116_mm_cmpneq_epu16_mask (__m128i __X, __m128i __Y)
2117{
2118  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
2119						  (__v8hi) __Y, 4,
2120						  (__mmask8) - 1);
2121}
2122
2123extern __inline __mmask8
2124  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2125_mm_cmplt_epu16_mask (__m128i __X, __m128i __Y)
2126{
2127  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
2128						  (__v8hi) __Y, 1,
2129						  (__mmask8) - 1);
2130}
2131
2132extern __inline __mmask8
2133  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2134_mm_cmpge_epu16_mask (__m128i __X, __m128i __Y)
2135{
2136  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
2137						  (__v8hi) __Y, 5,
2138						  (__mmask8) - 1);
2139}
2140
2141extern __inline __mmask8
2142  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2143_mm_cmple_epu16_mask (__m128i __X, __m128i __Y)
2144{
2145  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
2146						  (__v8hi) __Y, 2,
2147						  (__mmask8) - 1);
2148}
2149
2150extern __inline __mmask16
2151  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2152_mm_cmpneq_epi8_mask (__m128i __X, __m128i __Y)
2153{
2154  return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
2155						  (__v16qi) __Y, 4,
2156						  (__mmask16) - 1);
2157}
2158
2159extern __inline __mmask16
2160  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2161_mm_cmplt_epi8_mask (__m128i __X, __m128i __Y)
2162{
2163  return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
2164						  (__v16qi) __Y, 1,
2165						  (__mmask16) - 1);
2166}
2167
2168extern __inline __mmask16
2169  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2170_mm_cmpge_epi8_mask (__m128i __X, __m128i __Y)
2171{
2172  return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
2173						  (__v16qi) __Y, 5,
2174						  (__mmask16) - 1);
2175}
2176
2177extern __inline __mmask16
2178  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2179_mm_cmple_epi8_mask (__m128i __X, __m128i __Y)
2180{
2181  return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
2182						  (__v16qi) __Y, 2,
2183						  (__mmask16) - 1);
2184}
2185
2186extern __inline __mmask8
2187  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2188_mm_cmpneq_epi16_mask (__m128i __X, __m128i __Y)
2189{
2190  return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
2191						 (__v8hi) __Y, 4,
2192						 (__mmask8) - 1);
2193}
2194
2195extern __inline __mmask8
2196  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2197_mm_cmplt_epi16_mask (__m128i __X, __m128i __Y)
2198{
2199  return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
2200						 (__v8hi) __Y, 1,
2201						 (__mmask8) - 1);
2202}
2203
2204extern __inline __mmask8
2205  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2206_mm_cmpge_epi16_mask (__m128i __X, __m128i __Y)
2207{
2208  return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
2209						 (__v8hi) __Y, 5,
2210						 (__mmask8) - 1);
2211}
2212
2213extern __inline __mmask8
2214  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2215_mm_cmple_epi16_mask (__m128i __X, __m128i __Y)
2216{
2217  return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
2218						 (__v8hi) __Y, 2,
2219						 (__mmask8) - 1);
2220}
2221
2222extern __inline __m256i
2223__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2224_mm256_mask_mulhrs_epi16 (__m256i __W, __mmask16 __U, __m256i __X,
2225			  __m256i __Y)
2226{
2227  return (__m256i) __builtin_ia32_pmulhrsw256_mask ((__v16hi) __X,
2228						    (__v16hi) __Y,
2229						    (__v16hi) __W,
2230						    (__mmask16) __U);
2231}
2232
2233extern __inline __m256i
2234__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2235_mm256_maskz_mulhrs_epi16 (__mmask16 __U, __m256i __X, __m256i __Y)
2236{
2237  return (__m256i) __builtin_ia32_pmulhrsw256_mask ((__v16hi) __X,
2238						    (__v16hi) __Y,
2239						    (__v16hi)
2240						    _mm256_setzero_si256 (),
2241						    (__mmask16) __U);
2242}
2243
2244extern __inline __m256i
2245__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2246_mm256_mask_mulhi_epu16 (__m256i __W, __mmask16 __U, __m256i __A,
2247			 __m256i __B)
2248{
2249  return (__m256i) __builtin_ia32_pmulhuw256_mask ((__v16hi) __A,
2250						   (__v16hi) __B,
2251						   (__v16hi) __W,
2252						   (__mmask16) __U);
2253}
2254
2255extern __inline __m256i
2256__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2257_mm256_maskz_mulhi_epu16 (__mmask16 __U, __m256i __A, __m256i __B)
2258{
2259  return (__m256i) __builtin_ia32_pmulhuw256_mask ((__v16hi) __A,
2260						   (__v16hi) __B,
2261						   (__v16hi)
2262						   _mm256_setzero_si256 (),
2263						   (__mmask16) __U);
2264}
2265
2266extern __inline __m256i
2267__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2268_mm256_mask_mulhi_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
2269			 __m256i __B)
2270{
2271  return (__m256i) __builtin_ia32_pmulhw256_mask ((__v16hi) __A,
2272						  (__v16hi) __B,
2273						  (__v16hi) __W,
2274						  (__mmask16) __U);
2275}
2276
2277extern __inline __m256i
2278__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2279_mm256_maskz_mulhi_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
2280{
2281  return (__m256i) __builtin_ia32_pmulhw256_mask ((__v16hi) __A,
2282						  (__v16hi) __B,
2283						  (__v16hi)
2284						  _mm256_setzero_si256 (),
2285						  (__mmask16) __U);
2286}
2287
2288extern __inline __m128i
2289__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2290_mm_mask_mulhi_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
2291		      __m128i __B)
2292{
2293  return (__m128i) __builtin_ia32_pmulhw128_mask ((__v8hi) __A,
2294						  (__v8hi) __B,
2295						  (__v8hi) __W,
2296						  (__mmask8) __U);
2297}
2298
2299extern __inline __m128i
2300__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2301_mm_maskz_mulhi_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
2302{
2303  return (__m128i) __builtin_ia32_pmulhw128_mask ((__v8hi) __A,
2304						  (__v8hi) __B,
2305						  (__v8hi)
2306						  _mm_setzero_hi (),
2307						  (__mmask8) __U);
2308}
2309
2310extern __inline __m128i
2311__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2312_mm_mask_mulhi_epu16 (__m128i __W, __mmask8 __U, __m128i __A,
2313		      __m128i __B)
2314{
2315  return (__m128i) __builtin_ia32_pmulhuw128_mask ((__v8hi) __A,
2316						   (__v8hi) __B,
2317						   (__v8hi) __W,
2318						   (__mmask8) __U);
2319}
2320
2321extern __inline __m128i
2322__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2323_mm_maskz_mulhi_epu16 (__mmask8 __U, __m128i __A, __m128i __B)
2324{
2325  return (__m128i) __builtin_ia32_pmulhuw128_mask ((__v8hi) __A,
2326						   (__v8hi) __B,
2327						   (__v8hi)
2328						   _mm_setzero_hi (),
2329						   (__mmask8) __U);
2330}
2331
2332extern __inline __m128i
2333__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2334_mm_mask_mulhrs_epi16 (__m128i __W, __mmask8 __U, __m128i __X,
2335		       __m128i __Y)
2336{
2337  return (__m128i) __builtin_ia32_pmulhrsw128_mask ((__v8hi) __X,
2338						    (__v8hi) __Y,
2339						    (__v8hi) __W,
2340						    (__mmask8) __U);
2341}
2342
2343extern __inline __m128i
2344__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2345_mm_maskz_mulhrs_epi16 (__mmask8 __U, __m128i __X, __m128i __Y)
2346{
2347  return (__m128i) __builtin_ia32_pmulhrsw128_mask ((__v8hi) __X,
2348						    (__v8hi) __Y,
2349						    (__v8hi)
2350						    _mm_setzero_hi (),
2351						    (__mmask8) __U);
2352}
2353
2354extern __inline __m256i
2355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2356_mm256_mask_mullo_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
2357			 __m256i __B)
2358{
2359  return (__m256i) __builtin_ia32_pmullw256_mask ((__v16hi) __A,
2360						  (__v16hi) __B,
2361						  (__v16hi) __W,
2362						  (__mmask16) __U);
2363}
2364
2365extern __inline __m256i
2366__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2367_mm256_maskz_mullo_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
2368{
2369  return (__m256i) __builtin_ia32_pmullw256_mask ((__v16hi) __A,
2370						  (__v16hi) __B,
2371						  (__v16hi)
2372						  _mm256_setzero_si256 (),
2373						  (__mmask16) __U);
2374}
2375
2376extern __inline __m128i
2377__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2378_mm_mask_mullo_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
2379		      __m128i __B)
2380{
2381  return (__m128i) __builtin_ia32_pmullw128_mask ((__v8hi) __A,
2382						  (__v8hi) __B,
2383						  (__v8hi) __W,
2384						  (__mmask8) __U);
2385}
2386
2387extern __inline __m128i
2388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2389_mm_maskz_mullo_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
2390{
2391  return (__m128i) __builtin_ia32_pmullw128_mask ((__v8hi) __A,
2392						  (__v8hi) __B,
2393						  (__v8hi)
2394						  _mm_setzero_hi (),
2395						  (__mmask8) __U);
2396}
2397
2398extern __inline __m256i
2399__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2400_mm256_mask_cvtepi8_epi16 (__m256i __W, __mmask32 __U, __m128i __A)
2401{
2402  return (__m256i) __builtin_ia32_pmovsxbw256_mask ((__v16qi) __A,
2403						    (__v16hi) __W,
2404						    (__mmask16) __U);
2405}
2406
2407extern __inline __m256i
2408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2409_mm256_maskz_cvtepi8_epi16 (__mmask16 __U, __m128i __A)
2410{
2411  return (__m256i) __builtin_ia32_pmovsxbw256_mask ((__v16qi) __A,
2412						    (__v16hi)
2413						    _mm256_setzero_si256 (),
2414						    (__mmask16) __U);
2415}
2416
2417extern __inline __m128i
2418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2419_mm_mask_cvtepi8_epi16 (__m128i __W, __mmask32 __U, __m128i __A)
2420{
2421  return (__m128i) __builtin_ia32_pmovsxbw128_mask ((__v16qi) __A,
2422						    (__v8hi) __W,
2423						    (__mmask8) __U);
2424}
2425
2426extern __inline __m128i
2427__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2428_mm_maskz_cvtepi8_epi16 (__mmask8 __U, __m128i __A)
2429{
2430  return (__m128i) __builtin_ia32_pmovsxbw128_mask ((__v16qi) __A,
2431						    (__v8hi)
2432						    _mm_setzero_si128 (),
2433						    (__mmask8) __U);
2434}
2435
2436extern __inline __m256i
2437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2438_mm256_mask_cvtepu8_epi16 (__m256i __W, __mmask32 __U, __m128i __A)
2439{
2440  return (__m256i) __builtin_ia32_pmovzxbw256_mask ((__v16qi) __A,
2441						    (__v16hi) __W,
2442						    (__mmask16) __U);
2443}
2444
2445extern __inline __m256i
2446__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2447_mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A)
2448{
2449  return (__m256i) __builtin_ia32_pmovzxbw256_mask ((__v16qi) __A,
2450						    (__v16hi)
2451						    _mm256_setzero_si256 (),
2452						    (__mmask16) __U);
2453}
2454
2455extern __inline __m128i
2456__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2457_mm_mask_cvtepu8_epi16 (__m128i __W, __mmask32 __U, __m128i __A)
2458{
2459  return (__m128i) __builtin_ia32_pmovzxbw128_mask ((__v16qi) __A,
2460						    (__v8hi) __W,
2461						    (__mmask8) __U);
2462}
2463
2464extern __inline __m128i
2465__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2466_mm_maskz_cvtepu8_epi16 (__mmask8 __U, __m128i __A)
2467{
2468  return (__m128i) __builtin_ia32_pmovzxbw128_mask ((__v16qi) __A,
2469						    (__v8hi)
2470						    _mm_setzero_si128 (),
2471						    (__mmask8) __U);
2472}
2473
2474extern __inline __m256i
2475__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2476_mm256_mask_avg_epu8 (__m256i __W, __mmask32 __U, __m256i __A,
2477		      __m256i __B)
2478{
2479  return (__m256i) __builtin_ia32_pavgb256_mask ((__v32qi) __A,
2480						 (__v32qi) __B,
2481						 (__v32qi) __W,
2482						 (__mmask32) __U);
2483}
2484
2485extern __inline __m256i
2486__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2487_mm256_maskz_avg_epu8 (__mmask32 __U, __m256i __A, __m256i __B)
2488{
2489  return (__m256i) __builtin_ia32_pavgb256_mask ((__v32qi) __A,
2490						 (__v32qi) __B,
2491						 (__v32qi)
2492						 _mm256_setzero_si256 (),
2493						 (__mmask32) __U);
2494}
2495
2496extern __inline __m128i
2497__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2498_mm_mask_avg_epu8 (__m128i __W, __mmask16 __U, __m128i __A,
2499		   __m128i __B)
2500{
2501  return (__m128i) __builtin_ia32_pavgb128_mask ((__v16qi) __A,
2502						 (__v16qi) __B,
2503						 (__v16qi) __W,
2504						 (__mmask16) __U);
2505}
2506
2507extern __inline __m128i
2508__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2509_mm_maskz_avg_epu8 (__mmask16 __U, __m128i __A, __m128i __B)
2510{
2511  return (__m128i) __builtin_ia32_pavgb128_mask ((__v16qi) __A,
2512						 (__v16qi) __B,
2513						 (__v16qi)
2514						 _mm_setzero_si128 (),
2515						 (__mmask16) __U);
2516}
2517
2518extern __inline __m256i
2519__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2520_mm256_mask_avg_epu16 (__m256i __W, __mmask16 __U, __m256i __A,
2521		       __m256i __B)
2522{
2523  return (__m256i) __builtin_ia32_pavgw256_mask ((__v16hi) __A,
2524						 (__v16hi) __B,
2525						 (__v16hi) __W,
2526						 (__mmask16) __U);
2527}
2528
2529extern __inline __m256i
2530__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2531_mm256_maskz_avg_epu16 (__mmask16 __U, __m256i __A, __m256i __B)
2532{
2533  return (__m256i) __builtin_ia32_pavgw256_mask ((__v16hi) __A,
2534						 (__v16hi) __B,
2535						 (__v16hi)
2536						 _mm256_setzero_si256 (),
2537						 (__mmask16) __U);
2538}
2539
2540extern __inline __m128i
2541__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2542_mm_mask_avg_epu16 (__m128i __W, __mmask8 __U, __m128i __A,
2543		    __m128i __B)
2544{
2545  return (__m128i) __builtin_ia32_pavgw128_mask ((__v8hi) __A,
2546						 (__v8hi) __B,
2547						 (__v8hi) __W,
2548						 (__mmask8) __U);
2549}
2550
2551extern __inline __m128i
2552__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2553_mm_maskz_avg_epu16 (__mmask8 __U, __m128i __A, __m128i __B)
2554{
2555  return (__m128i) __builtin_ia32_pavgw128_mask ((__v8hi) __A,
2556						 (__v8hi) __B,
2557						 (__v8hi)
2558						 _mm_setzero_si128 (),
2559						 (__mmask8) __U);
2560}
2561
2562extern __inline __m256i
2563__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2564_mm256_mask_add_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
2565		      __m256i __B)
2566{
2567  return (__m256i) __builtin_ia32_paddb256_mask ((__v32qi) __A,
2568						 (__v32qi) __B,
2569						 (__v32qi) __W,
2570						 (__mmask32) __U);
2571}
2572
2573extern __inline __m256i
2574__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2575_mm256_maskz_add_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
2576{
2577  return (__m256i) __builtin_ia32_paddb256_mask ((__v32qi) __A,
2578						 (__v32qi) __B,
2579						 (__v32qi)
2580						 _mm256_setzero_si256 (),
2581						 (__mmask32) __U);
2582}
2583
2584extern __inline __m256i
2585__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2586_mm256_mask_add_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
2587		       __m256i __B)
2588{
2589  return (__m256i) __builtin_ia32_paddw256_mask ((__v16hi) __A,
2590						 (__v16hi) __B,
2591						 (__v16hi) __W,
2592						 (__mmask16) __U);
2593}
2594
2595extern __inline __m256i
2596__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2597_mm256_maskz_add_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
2598{
2599  return (__m256i) __builtin_ia32_paddw256_mask ((__v16hi) __A,
2600						 (__v16hi) __B,
2601						 (__v16hi)
2602						 _mm256_setzero_si256 (),
2603						 (__mmask16) __U);
2604}
2605
2606extern __inline __m256i
2607__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2608_mm256_mask_adds_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
2609		       __m256i __B)
2610{
2611  return (__m256i) __builtin_ia32_paddsb256_mask ((__v32qi) __A,
2612						  (__v32qi) __B,
2613						  (__v32qi) __W,
2614						  (__mmask32) __U);
2615}
2616
2617extern __inline __m256i
2618__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2619_mm256_maskz_adds_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
2620{
2621  return (__m256i) __builtin_ia32_paddsb256_mask ((__v32qi) __A,
2622						  (__v32qi) __B,
2623						  (__v32qi)
2624						  _mm256_setzero_si256 (),
2625						  (__mmask32) __U);
2626}
2627
2628extern __inline __m256i
2629__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2630_mm256_mask_adds_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
2631			__m256i __B)
2632{
2633  return (__m256i) __builtin_ia32_paddsw256_mask ((__v16hi) __A,
2634						  (__v16hi) __B,
2635						  (__v16hi) __W,
2636						  (__mmask16) __U);
2637}
2638
2639extern __inline __m256i
2640__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2641_mm256_maskz_adds_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
2642{
2643  return (__m256i) __builtin_ia32_paddsw256_mask ((__v16hi) __A,
2644						  (__v16hi) __B,
2645						  (__v16hi)
2646						  _mm256_setzero_si256 (),
2647						  (__mmask16) __U);
2648}
2649
2650extern __inline __m256i
2651__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2652_mm256_mask_adds_epu8 (__m256i __W, __mmask32 __U, __m256i __A,
2653		       __m256i __B)
2654{
2655  return (__m256i) __builtin_ia32_paddusb256_mask ((__v32qi) __A,
2656						   (__v32qi) __B,
2657						   (__v32qi) __W,
2658						   (__mmask32) __U);
2659}
2660
2661extern __inline __m256i
2662__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2663_mm256_maskz_adds_epu8 (__mmask32 __U, __m256i __A, __m256i __B)
2664{
2665  return (__m256i) __builtin_ia32_paddusb256_mask ((__v32qi) __A,
2666						   (__v32qi) __B,
2667						   (__v32qi)
2668						   _mm256_setzero_si256 (),
2669						   (__mmask32) __U);
2670}
2671
2672extern __inline __m256i
2673__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2674_mm256_mask_adds_epu16 (__m256i __W, __mmask16 __U, __m256i __A,
2675			__m256i __B)
2676{
2677  return (__m256i) __builtin_ia32_paddusw256_mask ((__v16hi) __A,
2678						   (__v16hi) __B,
2679						   (__v16hi) __W,
2680						   (__mmask16) __U);
2681}
2682
2683extern __inline __m256i
2684__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2685_mm256_maskz_adds_epu16 (__mmask16 __U, __m256i __A, __m256i __B)
2686{
2687  return (__m256i) __builtin_ia32_paddusw256_mask ((__v16hi) __A,
2688						   (__v16hi) __B,
2689						   (__v16hi)
2690						   _mm256_setzero_si256 (),
2691						   (__mmask16) __U);
2692}
2693
2694extern __inline __m256i
2695__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2696_mm256_mask_sub_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
2697		      __m256i __B)
2698{
2699  return (__m256i) __builtin_ia32_psubb256_mask ((__v32qi) __A,
2700						 (__v32qi) __B,
2701						 (__v32qi) __W,
2702						 (__mmask32) __U);
2703}
2704
2705extern __inline __m256i
2706__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2707_mm256_maskz_sub_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
2708{
2709  return (__m256i) __builtin_ia32_psubb256_mask ((__v32qi) __A,
2710						 (__v32qi) __B,
2711						 (__v32qi)
2712						 _mm256_setzero_si256 (),
2713						 (__mmask32) __U);
2714}
2715
2716extern __inline __m256i
2717__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2718_mm256_mask_sub_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
2719		       __m256i __B)
2720{
2721  return (__m256i) __builtin_ia32_psubw256_mask ((__v16hi) __A,
2722						 (__v16hi) __B,
2723						 (__v16hi) __W,
2724						 (__mmask16) __U);
2725}
2726
2727extern __inline __m256i
2728__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2729_mm256_maskz_sub_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
2730{
2731  return (__m256i) __builtin_ia32_psubw256_mask ((__v16hi) __A,
2732						 (__v16hi) __B,
2733						 (__v16hi)
2734						 _mm256_setzero_si256 (),
2735						 (__mmask16) __U);
2736}
2737
2738extern __inline __m256i
2739__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2740_mm256_mask_subs_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
2741		       __m256i __B)
2742{
2743  return (__m256i) __builtin_ia32_psubsb256_mask ((__v32qi) __A,
2744						  (__v32qi) __B,
2745						  (__v32qi) __W,
2746						  (__mmask32) __U);
2747}
2748
2749extern __inline __m256i
2750__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2751_mm256_maskz_subs_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
2752{
2753  return (__m256i) __builtin_ia32_psubsb256_mask ((__v32qi) __A,
2754						  (__v32qi) __B,
2755						  (__v32qi)
2756						  _mm256_setzero_si256 (),
2757						  (__mmask32) __U);
2758}
2759
2760extern __inline __m256i
2761__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2762_mm256_mask_subs_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
2763			__m256i __B)
2764{
2765  return (__m256i) __builtin_ia32_psubsw256_mask ((__v16hi) __A,
2766						  (__v16hi) __B,
2767						  (__v16hi) __W,
2768						  (__mmask16) __U);
2769}
2770
2771extern __inline __m256i
2772__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2773_mm256_maskz_subs_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
2774{
2775  return (__m256i) __builtin_ia32_psubsw256_mask ((__v16hi) __A,
2776						  (__v16hi) __B,
2777						  (__v16hi)
2778						  _mm256_setzero_si256 (),
2779						  (__mmask16) __U);
2780}
2781
2782extern __inline __m256i
2783__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2784_mm256_mask_subs_epu8 (__m256i __W, __mmask32 __U, __m256i __A,
2785		       __m256i __B)
2786{
2787  return (__m256i) __builtin_ia32_psubusb256_mask ((__v32qi) __A,
2788						   (__v32qi) __B,
2789						   (__v32qi) __W,
2790						   (__mmask32) __U);
2791}
2792
2793extern __inline __m256i
2794__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2795_mm256_maskz_subs_epu8 (__mmask32 __U, __m256i __A, __m256i __B)
2796{
2797  return (__m256i) __builtin_ia32_psubusb256_mask ((__v32qi) __A,
2798						   (__v32qi) __B,
2799						   (__v32qi)
2800						   _mm256_setzero_si256 (),
2801						   (__mmask32) __U);
2802}
2803
2804extern __inline __m256i
2805__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2806_mm256_mask_subs_epu16 (__m256i __W, __mmask16 __U, __m256i __A,
2807			__m256i __B)
2808{
2809  return (__m256i) __builtin_ia32_psubusw256_mask ((__v16hi) __A,
2810						   (__v16hi) __B,
2811						   (__v16hi) __W,
2812						   (__mmask16) __U);
2813}
2814
2815extern __inline __m256i
2816__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2817_mm256_maskz_subs_epu16 (__mmask16 __U, __m256i __A, __m256i __B)
2818{
2819  return (__m256i) __builtin_ia32_psubusw256_mask ((__v16hi) __A,
2820						   (__v16hi) __B,
2821						   (__v16hi)
2822						   _mm256_setzero_si256 (),
2823						   (__mmask16) __U);
2824}
2825
2826extern __inline __m128i
2827__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2828_mm_mask_add_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
2829		   __m128i __B)
2830{
2831  return (__m128i) __builtin_ia32_paddb128_mask ((__v16qi) __A,
2832						 (__v16qi) __B,
2833						 (__v16qi) __W,
2834						 (__mmask16) __U);
2835}
2836
2837extern __inline __m128i
2838__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2839_mm_maskz_add_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
2840{
2841  return (__m128i) __builtin_ia32_paddb128_mask ((__v16qi) __A,
2842						 (__v16qi) __B,
2843						 (__v16qi)
2844						 _mm_setzero_si128 (),
2845						 (__mmask16) __U);
2846}
2847
2848extern __inline __m128i
2849__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2850_mm_mask_add_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
2851		    __m128i __B)
2852{
2853  return (__m128i) __builtin_ia32_paddw128_mask ((__v8hi) __A,
2854						 (__v8hi) __B,
2855						 (__v8hi) __W,
2856						 (__mmask8) __U);
2857}
2858
2859extern __inline __m128i
2860__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2861_mm_maskz_add_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
2862{
2863  return (__m128i) __builtin_ia32_paddw128_mask ((__v8hi) __A,
2864						 (__v8hi) __B,
2865						 (__v8hi)
2866						 _mm_setzero_si128 (),
2867						 (__mmask8) __U);
2868}
2869
2870extern __inline __m256i
2871__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2872_mm256_mask_unpackhi_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
2873			   __m256i __B)
2874{
2875  return (__m256i) __builtin_ia32_punpckhbw256_mask ((__v32qi) __A,
2876						     (__v32qi) __B,
2877						     (__v32qi) __W,
2878						     (__mmask32) __U);
2879}
2880
2881extern __inline __m256i
2882__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2883_mm256_maskz_unpackhi_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
2884{
2885  return (__m256i) __builtin_ia32_punpckhbw256_mask ((__v32qi) __A,
2886						     (__v32qi) __B,
2887						     (__v32qi)
2888						     _mm256_setzero_si256 (),
2889						     (__mmask32) __U);
2890}
2891
2892extern __inline __m128i
2893__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2894_mm_mask_unpackhi_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
2895			__m128i __B)
2896{
2897  return (__m128i) __builtin_ia32_punpckhbw128_mask ((__v16qi) __A,
2898						     (__v16qi) __B,
2899						     (__v16qi) __W,
2900						     (__mmask16) __U);
2901}
2902
2903extern __inline __m128i
2904__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2905_mm_maskz_unpackhi_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
2906{
2907  return (__m128i) __builtin_ia32_punpckhbw128_mask ((__v16qi) __A,
2908						     (__v16qi) __B,
2909						     (__v16qi)
2910						     _mm_setzero_si128 (),
2911						     (__mmask16) __U);
2912}
2913
2914extern __inline __m256i
2915__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2916_mm256_mask_unpackhi_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
2917			    __m256i __B)
2918{
2919  return (__m256i) __builtin_ia32_punpckhwd256_mask ((__v16hi) __A,
2920						     (__v16hi) __B,
2921						     (__v16hi) __W,
2922						     (__mmask16) __U);
2923}
2924
2925extern __inline __m256i
2926__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2927_mm256_maskz_unpackhi_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
2928{
2929  return (__m256i) __builtin_ia32_punpckhwd256_mask ((__v16hi) __A,
2930						     (__v16hi) __B,
2931						     (__v16hi)
2932						     _mm256_setzero_si256 (),
2933						     (__mmask16) __U);
2934}
2935
2936extern __inline __m128i
2937__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2938_mm_mask_unpackhi_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
2939			 __m128i __B)
2940{
2941  return (__m128i) __builtin_ia32_punpckhwd128_mask ((__v8hi) __A,
2942						     (__v8hi) __B,
2943						     (__v8hi) __W,
2944						     (__mmask8) __U);
2945}
2946
2947extern __inline __m128i
2948__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2949_mm_maskz_unpackhi_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
2950{
2951  return (__m128i) __builtin_ia32_punpckhwd128_mask ((__v8hi) __A,
2952						     (__v8hi) __B,
2953						     (__v8hi)
2954						     _mm_setzero_si128 (),
2955						     (__mmask8) __U);
2956}
2957
2958extern __inline __m256i
2959__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2960_mm256_mask_unpacklo_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
2961			   __m256i __B)
2962{
2963  return (__m256i) __builtin_ia32_punpcklbw256_mask ((__v32qi) __A,
2964						     (__v32qi) __B,
2965						     (__v32qi) __W,
2966						     (__mmask32) __U);
2967}
2968
2969extern __inline __m256i
2970__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2971_mm256_maskz_unpacklo_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
2972{
2973  return (__m256i) __builtin_ia32_punpcklbw256_mask ((__v32qi) __A,
2974						     (__v32qi) __B,
2975						     (__v32qi)
2976						     _mm256_setzero_si256 (),
2977						     (__mmask32) __U);
2978}
2979
2980extern __inline __m128i
2981__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2982_mm_mask_unpacklo_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
2983			__m128i __B)
2984{
2985  return (__m128i) __builtin_ia32_punpcklbw128_mask ((__v16qi) __A,
2986						     (__v16qi) __B,
2987						     (__v16qi) __W,
2988						     (__mmask16) __U);
2989}
2990
2991extern __inline __m128i
2992__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2993_mm_maskz_unpacklo_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
2994{
2995  return (__m128i) __builtin_ia32_punpcklbw128_mask ((__v16qi) __A,
2996						     (__v16qi) __B,
2997						     (__v16qi)
2998						     _mm_setzero_si128 (),
2999						     (__mmask16) __U);
3000}
3001
3002extern __inline __m256i
3003__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3004_mm256_mask_unpacklo_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
3005			    __m256i __B)
3006{
3007  return (__m256i) __builtin_ia32_punpcklwd256_mask ((__v16hi) __A,
3008						     (__v16hi) __B,
3009						     (__v16hi) __W,
3010						     (__mmask16) __U);
3011}
3012
3013extern __inline __m256i
3014__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3015_mm256_maskz_unpacklo_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
3016{
3017  return (__m256i) __builtin_ia32_punpcklwd256_mask ((__v16hi) __A,
3018						     (__v16hi) __B,
3019						     (__v16hi)
3020						     _mm256_setzero_si256 (),
3021						     (__mmask16) __U);
3022}
3023
3024extern __inline __m128i
3025__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3026_mm_mask_unpacklo_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
3027			 __m128i __B)
3028{
3029  return (__m128i) __builtin_ia32_punpcklwd128_mask ((__v8hi) __A,
3030						     (__v8hi) __B,
3031						     (__v8hi) __W,
3032						     (__mmask8) __U);
3033}
3034
3035extern __inline __m128i
3036__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3037_mm_maskz_unpacklo_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
3038{
3039  return (__m128i) __builtin_ia32_punpcklwd128_mask ((__v8hi) __A,
3040						     (__v8hi) __B,
3041						     (__v8hi)
3042						     _mm_setzero_si128 (),
3043						     (__mmask8) __U);
3044}
3045
3046extern __inline __mmask16
3047__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3048_mm_cmpeq_epi8_mask (__m128i __A, __m128i __B)
3049{
3050  return (__mmask16) __builtin_ia32_pcmpeqb128_mask ((__v16qi) __A,
3051						     (__v16qi) __B,
3052						     (__mmask16) -1);
3053}
3054
3055extern __inline __mmask16
3056__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3057_mm_cmpeq_epu8_mask (__m128i __A, __m128i __B)
3058{
3059  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __A,
3060						    (__v16qi) __B, 0,
3061						    (__mmask16) -1);
3062}
3063
3064extern __inline __mmask16
3065__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3066_mm_mask_cmpeq_epu8_mask (__mmask16 __U, __m128i __A, __m128i __B)
3067{
3068  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __A,
3069						    (__v16qi) __B, 0,
3070						    __U);
3071}
3072
3073extern __inline __mmask16
3074__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3075_mm_mask_cmpeq_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
3076{
3077  return (__mmask16) __builtin_ia32_pcmpeqb128_mask ((__v16qi) __A,
3078						     (__v16qi) __B,
3079						     __U);
3080}
3081
3082extern __inline __mmask32
3083__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3084_mm256_cmpeq_epu8_mask (__m256i __A, __m256i __B)
3085{
3086  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __A,
3087						    (__v32qi) __B, 0,
3088						    (__mmask32) -1);
3089}
3090
3091extern __inline __mmask32
3092__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3093_mm256_cmpeq_epi8_mask (__m256i __A, __m256i __B)
3094{
3095  return (__mmask32) __builtin_ia32_pcmpeqb256_mask ((__v32qi) __A,
3096						     (__v32qi) __B,
3097						     (__mmask32) -1);
3098}
3099
3100extern __inline __mmask32
3101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3102_mm256_mask_cmpeq_epu8_mask (__mmask32 __U, __m256i __A, __m256i __B)
3103{
3104  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __A,
3105						    (__v32qi) __B, 0,
3106						    __U);
3107}
3108
3109extern __inline __mmask32
3110__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3111_mm256_mask_cmpeq_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
3112{
3113  return (__mmask32) __builtin_ia32_pcmpeqb256_mask ((__v32qi) __A,
3114						     (__v32qi) __B,
3115						     __U);
3116}
3117
3118extern __inline __mmask8
3119__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3120_mm_cmpeq_epu16_mask (__m128i __A, __m128i __B)
3121{
3122  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __A,
3123						   (__v8hi) __B, 0,
3124						   (__mmask8) -1);
3125}
3126
3127extern __inline __mmask8
3128__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3129_mm_cmpeq_epi16_mask (__m128i __A, __m128i __B)
3130{
3131  return (__mmask8) __builtin_ia32_pcmpeqw128_mask ((__v8hi) __A,
3132						    (__v8hi) __B,
3133						    (__mmask8) -1);
3134}
3135
3136extern __inline __mmask8
3137__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3138_mm_mask_cmpeq_epu16_mask (__mmask8 __U, __m128i __A, __m128i __B)
3139{
3140  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __A,
3141						   (__v8hi) __B, 0, __U);
3142}
3143
3144extern __inline __mmask8
3145__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3146_mm_mask_cmpeq_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
3147{
3148  return (__mmask8) __builtin_ia32_pcmpeqw128_mask ((__v8hi) __A,
3149						    (__v8hi) __B, __U);
3150}
3151
3152extern __inline __mmask16
3153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3154_mm256_cmpeq_epu16_mask (__m256i __A, __m256i __B)
3155{
3156  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __A,
3157						    (__v16hi) __B, 0,
3158						    (__mmask16) -1);
3159}
3160
3161extern __inline __mmask16
3162__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3163_mm256_cmpeq_epi16_mask (__m256i __A, __m256i __B)
3164{
3165  return (__mmask16) __builtin_ia32_pcmpeqw256_mask ((__v16hi) __A,
3166						     (__v16hi) __B,
3167						     (__mmask16) -1);
3168}
3169
3170extern __inline __mmask16
3171__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3172_mm256_mask_cmpeq_epu16_mask (__mmask16 __U, __m256i __A, __m256i __B)
3173{
3174  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __A,
3175						    (__v16hi) __B, 0,
3176						    __U);
3177}
3178
3179extern __inline __mmask16
3180__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3181_mm256_mask_cmpeq_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
3182{
3183  return (__mmask16) __builtin_ia32_pcmpeqw256_mask ((__v16hi) __A,
3184						     (__v16hi) __B,
3185						     __U);
3186}
3187
3188extern __inline __mmask16
3189__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3190_mm_cmpgt_epu8_mask (__m128i __A, __m128i __B)
3191{
3192  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __A,
3193						    (__v16qi) __B, 6,
3194						    (__mmask16) -1);
3195}
3196
3197extern __inline __mmask16
3198__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3199_mm_cmpgt_epi8_mask (__m128i __A, __m128i __B)
3200{
3201  return (__mmask16) __builtin_ia32_pcmpgtb128_mask ((__v16qi) __A,
3202						     (__v16qi) __B,
3203						     (__mmask16) -1);
3204}
3205
3206extern __inline __mmask16
3207__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3208_mm_mask_cmpgt_epu8_mask (__mmask16 __U, __m128i __A, __m128i __B)
3209{
3210  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __A,
3211						    (__v16qi) __B, 6,
3212						    __U);
3213}
3214
3215extern __inline __mmask16
3216__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3217_mm_mask_cmpgt_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
3218{
3219  return (__mmask16) __builtin_ia32_pcmpgtb128_mask ((__v16qi) __A,
3220						     (__v16qi) __B,
3221						     __U);
3222}
3223
3224extern __inline __mmask32
3225__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3226_mm256_cmpgt_epu8_mask (__m256i __A, __m256i __B)
3227{
3228  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __A,
3229						    (__v32qi) __B, 6,
3230						    (__mmask32) -1);
3231}
3232
3233extern __inline __mmask32
3234__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3235_mm256_cmpgt_epi8_mask (__m256i __A, __m256i __B)
3236{
3237  return (__mmask32) __builtin_ia32_pcmpgtb256_mask ((__v32qi) __A,
3238						     (__v32qi) __B,
3239						     (__mmask32) -1);
3240}
3241
3242extern __inline __mmask32
3243__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3244_mm256_mask_cmpgt_epu8_mask (__mmask32 __U, __m256i __A, __m256i __B)
3245{
3246  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __A,
3247						    (__v32qi) __B, 6,
3248						    __U);
3249}
3250
3251extern __inline __mmask32
3252__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3253_mm256_mask_cmpgt_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
3254{
3255  return (__mmask32) __builtin_ia32_pcmpgtb256_mask ((__v32qi) __A,
3256						     (__v32qi) __B,
3257						     __U);
3258}
3259
3260extern __inline __mmask8
3261__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3262_mm_cmpgt_epu16_mask (__m128i __A, __m128i __B)
3263{
3264  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __A,
3265						   (__v8hi) __B, 6,
3266						   (__mmask8) -1);
3267}
3268
3269extern __inline __mmask8
3270__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3271_mm_cmpgt_epi16_mask (__m128i __A, __m128i __B)
3272{
3273  return (__mmask8) __builtin_ia32_pcmpgtw128_mask ((__v8hi) __A,
3274						    (__v8hi) __B,
3275						    (__mmask8) -1);
3276}
3277
3278extern __inline __mmask8
3279__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3280_mm_mask_cmpgt_epu16_mask (__mmask8 __U, __m128i __A, __m128i __B)
3281{
3282  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __A,
3283						   (__v8hi) __B, 6, __U);
3284}
3285
3286extern __inline __mmask8
3287__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3288_mm_mask_cmpgt_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
3289{
3290  return (__mmask8) __builtin_ia32_pcmpgtw128_mask ((__v8hi) __A,
3291						    (__v8hi) __B, __U);
3292}
3293
3294extern __inline __mmask16
3295__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3296_mm256_cmpgt_epu16_mask (__m256i __A, __m256i __B)
3297{
3298  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __A,
3299						    (__v16hi) __B, 6,
3300						    (__mmask16) -1);
3301}
3302
3303extern __inline __mmask16
3304__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3305_mm256_cmpgt_epi16_mask (__m256i __A, __m256i __B)
3306{
3307  return (__mmask16) __builtin_ia32_pcmpgtw256_mask ((__v16hi) __A,
3308						     (__v16hi) __B,
3309						     (__mmask16) -1);
3310}
3311
3312extern __inline __mmask16
3313__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3314_mm256_mask_cmpgt_epu16_mask (__mmask16 __U, __m256i __A, __m256i __B)
3315{
3316  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __A,
3317						    (__v16hi) __B, 6,
3318						    __U);
3319}
3320
3321extern __inline __mmask16
3322__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3323_mm256_mask_cmpgt_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
3324{
3325  return (__mmask16) __builtin_ia32_pcmpgtw256_mask ((__v16hi) __A,
3326						     (__v16hi) __B,
3327						     __U);
3328}
3329
3330extern __inline __mmask16
3331__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3332_mm_testn_epi8_mask (__m128i __A, __m128i __B)
3333{
3334  return (__mmask16) __builtin_ia32_ptestnmb128 ((__v16qi) __A,
3335						 (__v16qi) __B,
3336						 (__mmask16) -1);
3337}
3338
3339extern __inline __mmask16
3340__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3341_mm_mask_testn_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
3342{
3343  return (__mmask16) __builtin_ia32_ptestnmb128 ((__v16qi) __A,
3344						 (__v16qi) __B, __U);
3345}
3346
3347extern __inline __mmask32
3348__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3349_mm256_testn_epi8_mask (__m256i __A, __m256i __B)
3350{
3351  return (__mmask32) __builtin_ia32_ptestnmb256 ((__v32qi) __A,
3352						 (__v32qi) __B,
3353						 (__mmask32) -1);
3354}
3355
3356extern __inline __mmask32
3357__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3358_mm256_mask_testn_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
3359{
3360  return (__mmask32) __builtin_ia32_ptestnmb256 ((__v32qi) __A,
3361						 (__v32qi) __B, __U);
3362}
3363
3364extern __inline __mmask8
3365__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3366_mm_testn_epi16_mask (__m128i __A, __m128i __B)
3367{
3368  return (__mmask8) __builtin_ia32_ptestnmw128 ((__v8hi) __A,
3369						(__v8hi) __B,
3370						(__mmask8) -1);
3371}
3372
3373extern __inline __mmask8
3374__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3375_mm_mask_testn_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
3376{
3377  return (__mmask8) __builtin_ia32_ptestnmw128 ((__v8hi) __A,
3378						(__v8hi) __B, __U);
3379}
3380
3381extern __inline __mmask16
3382__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3383_mm256_testn_epi16_mask (__m256i __A, __m256i __B)
3384{
3385  return (__mmask16) __builtin_ia32_ptestnmw256 ((__v16hi) __A,
3386						 (__v16hi) __B,
3387						 (__mmask16) -1);
3388}
3389
3390extern __inline __mmask16
3391__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3392_mm256_mask_testn_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
3393{
3394  return (__mmask16) __builtin_ia32_ptestnmw256 ((__v16hi) __A,
3395						 (__v16hi) __B, __U);
3396}
3397
3398extern __inline __m256i
3399__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3400_mm256_mask_shuffle_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
3401			  __m256i __B)
3402{
3403  return (__m256i) __builtin_ia32_pshufb256_mask ((__v32qi) __A,
3404						  (__v32qi) __B,
3405						  (__v32qi) __W,
3406						  (__mmask32) __U);
3407}
3408
3409extern __inline __m256i
3410__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3411_mm256_maskz_shuffle_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
3412{
3413  return (__m256i) __builtin_ia32_pshufb256_mask ((__v32qi) __A,
3414						  (__v32qi) __B,
3415						  (__v32qi)
3416						  _mm256_setzero_si256 (),
3417						  (__mmask32) __U);
3418}
3419
3420extern __inline __m128i
3421__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3422_mm_mask_shuffle_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
3423		       __m128i __B)
3424{
3425  return (__m128i) __builtin_ia32_pshufb128_mask ((__v16qi) __A,
3426						  (__v16qi) __B,
3427						  (__v16qi) __W,
3428						  (__mmask16) __U);
3429}
3430
3431extern __inline __m128i
3432__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3433_mm_maskz_shuffle_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
3434{
3435  return (__m128i) __builtin_ia32_pshufb128_mask ((__v16qi) __A,
3436						  (__v16qi) __B,
3437						  (__v16qi)
3438						  _mm_setzero_si128 (),
3439						  (__mmask16) __U);
3440}
3441
3442extern __inline __m256i
3443__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3444_mm256_maskz_packs_epi16 (__mmask32 __M, __m256i __A, __m256i __B)
3445{
3446  return (__m256i) __builtin_ia32_packsswb256_mask ((__v16hi) __A,
3447						    (__v16hi) __B,
3448						    (__v32qi)
3449						    _mm256_setzero_si256 (),
3450						    __M);
3451}
3452
3453extern __inline __m256i
3454__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3455_mm256_mask_packs_epi16 (__m256i __W, __mmask32 __M, __m256i __A,
3456			 __m256i __B)
3457{
3458  return (__m256i) __builtin_ia32_packsswb256_mask ((__v16hi) __A,
3459						    (__v16hi) __B,
3460						    (__v32qi) __W,
3461						    __M);
3462}
3463
3464extern __inline __m128i
3465__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3466_mm_maskz_packs_epi16 (__mmask16 __M, __m128i __A, __m128i __B)
3467{
3468  return (__m128i) __builtin_ia32_packsswb128_mask ((__v8hi) __A,
3469						    (__v8hi) __B,
3470						    (__v16qi)
3471						    _mm_setzero_si128 (),
3472						    __M);
3473}
3474
3475extern __inline __m128i
3476__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3477_mm_mask_packs_epi16 (__m128i __W, __mmask16 __M, __m128i __A,
3478		      __m128i __B)
3479{
3480  return (__m128i) __builtin_ia32_packsswb128_mask ((__v8hi) __A,
3481						    (__v8hi) __B,
3482						    (__v16qi) __W,
3483						    __M);
3484}
3485
3486extern __inline __m256i
3487__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3488_mm256_maskz_packus_epi16 (__mmask32 __M, __m256i __A, __m256i __B)
3489{
3490  return (__m256i) __builtin_ia32_packuswb256_mask ((__v16hi) __A,
3491						    (__v16hi) __B,
3492						    (__v32qi)
3493						    _mm256_setzero_si256 (),
3494						    __M);
3495}
3496
3497extern __inline __m256i
3498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3499_mm256_mask_packus_epi16 (__m256i __W, __mmask32 __M, __m256i __A,
3500			  __m256i __B)
3501{
3502  return (__m256i) __builtin_ia32_packuswb256_mask ((__v16hi) __A,
3503						    (__v16hi) __B,
3504						    (__v32qi) __W,
3505						    __M);
3506}
3507
3508extern __inline __m128i
3509__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3510_mm_maskz_packus_epi16 (__mmask16 __M, __m128i __A, __m128i __B)
3511{
3512  return (__m128i) __builtin_ia32_packuswb128_mask ((__v8hi) __A,
3513						    (__v8hi) __B,
3514						    (__v16qi)
3515						    _mm_setzero_si128 (),
3516						    __M);
3517}
3518
3519extern __inline __m128i
3520__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3521_mm_mask_packus_epi16 (__m128i __W, __mmask16 __M, __m128i __A,
3522		       __m128i __B)
3523{
3524  return (__m128i) __builtin_ia32_packuswb128_mask ((__v8hi) __A,
3525						    (__v8hi) __B,
3526						    (__v16qi) __W,
3527						    __M);
3528}
3529
3530extern __inline __m256i
3531__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3532_mm256_mask_abs_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
3533{
3534  return (__m256i) __builtin_ia32_pabsb256_mask ((__v32qi) __A,
3535						 (__v32qi) __W,
3536						 (__mmask32) __U);
3537}
3538
3539extern __inline __m256i
3540__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3541_mm256_maskz_abs_epi8 (__mmask32 __U, __m256i __A)
3542{
3543  return (__m256i) __builtin_ia32_pabsb256_mask ((__v32qi) __A,
3544						 (__v32qi)
3545						 _mm256_setzero_si256 (),
3546						 (__mmask32) __U);
3547}
3548
3549extern __inline __m128i
3550__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3551_mm_mask_abs_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
3552{
3553  return (__m128i) __builtin_ia32_pabsb128_mask ((__v16qi) __A,
3554						 (__v16qi) __W,
3555						 (__mmask16) __U);
3556}
3557
3558extern __inline __m128i
3559__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3560_mm_maskz_abs_epi8 (__mmask16 __U, __m128i __A)
3561{
3562  return (__m128i) __builtin_ia32_pabsb128_mask ((__v16qi) __A,
3563						 (__v16qi)
3564						 _mm_setzero_si128 (),
3565						 (__mmask16) __U);
3566}
3567
3568extern __inline __m256i
3569__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3570_mm256_mask_abs_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
3571{
3572  return (__m256i) __builtin_ia32_pabsw256_mask ((__v16hi) __A,
3573						 (__v16hi) __W,
3574						 (__mmask16) __U);
3575}
3576
3577extern __inline __m256i
3578__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3579_mm256_maskz_abs_epi16 (__mmask16 __U, __m256i __A)
3580{
3581  return (__m256i) __builtin_ia32_pabsw256_mask ((__v16hi) __A,
3582						 (__v16hi)
3583						 _mm256_setzero_si256 (),
3584						 (__mmask16) __U);
3585}
3586
3587extern __inline __m128i
3588__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3589_mm_mask_abs_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
3590{
3591  return (__m128i) __builtin_ia32_pabsw128_mask ((__v8hi) __A,
3592						 (__v8hi) __W,
3593						 (__mmask8) __U);
3594}
3595
3596extern __inline __m128i
3597__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3598_mm_maskz_abs_epi16 (__mmask8 __U, __m128i __A)
3599{
3600  return (__m128i) __builtin_ia32_pabsw128_mask ((__v8hi) __A,
3601						 (__v8hi)
3602						 _mm_setzero_si128 (),
3603						 (__mmask8) __U);
3604}
3605
3606extern __inline __mmask32
3607  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3608_mm256_cmpneq_epu8_mask (__m256i __X, __m256i __Y)
3609{
3610  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
3611						   (__v32qi) __Y, 4,
3612						   (__mmask32) - 1);
3613}
3614
3615extern __inline __mmask32
3616  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3617_mm256_cmplt_epu8_mask (__m256i __X, __m256i __Y)
3618{
3619  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
3620						   (__v32qi) __Y, 1,
3621						   (__mmask32) - 1);
3622}
3623
3624extern __inline __mmask32
3625  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3626_mm256_cmpge_epu8_mask (__m256i __X, __m256i __Y)
3627{
3628  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
3629						   (__v32qi) __Y, 5,
3630						   (__mmask32) - 1);
3631}
3632
3633extern __inline __mmask32
3634  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3635_mm256_cmple_epu8_mask (__m256i __X, __m256i __Y)
3636{
3637  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
3638						   (__v32qi) __Y, 2,
3639						   (__mmask32) - 1);
3640}
3641
3642extern __inline __mmask16
3643  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3644_mm256_cmpneq_epu16_mask (__m256i __X, __m256i __Y)
3645{
3646  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
3647						   (__v16hi) __Y, 4,
3648						   (__mmask16) - 1);
3649}
3650
3651extern __inline __mmask16
3652  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3653_mm256_cmplt_epu16_mask (__m256i __X, __m256i __Y)
3654{
3655  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
3656						   (__v16hi) __Y, 1,
3657						   (__mmask16) - 1);
3658}
3659
3660extern __inline __mmask16
3661  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3662_mm256_cmpge_epu16_mask (__m256i __X, __m256i __Y)
3663{
3664  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
3665						   (__v16hi) __Y, 5,
3666						   (__mmask16) - 1);
3667}
3668
3669extern __inline __mmask16
3670  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3671_mm256_cmple_epu16_mask (__m256i __X, __m256i __Y)
3672{
3673  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
3674						   (__v16hi) __Y, 2,
3675						   (__mmask16) - 1);
3676}
3677
3678extern __inline void
3679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3680_mm256_mask_storeu_epi16 (void *__P, __mmask16 __U, __m256i __A)
3681{
3682  __builtin_ia32_storedquhi256_mask ((__v16hi *) __P,
3683				     (__v16hi) __A,
3684				     (__mmask16) __U);
3685}
3686
3687extern __inline void
3688__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3689_mm_mask_storeu_epi16 (void *__P, __mmask8 __U, __m128i __A)
3690{
3691  __builtin_ia32_storedquhi128_mask ((__v8hi *) __P,
3692				     (__v8hi) __A,
3693				     (__mmask8) __U);
3694}
3695
3696extern __inline __m128i
3697__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3698_mm_mask_adds_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
3699		     __m128i __B)
3700{
3701  return (__m128i) __builtin_ia32_paddsw128_mask ((__v8hi) __A,
3702						  (__v8hi) __B,
3703						  (__v8hi) __W,
3704						  (__mmask8) __U);
3705}
3706
3707extern __inline __m128i
3708__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3709_mm_mask_subs_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
3710		    __m128i __B)
3711{
3712  return (__m128i) __builtin_ia32_psubsb128_mask ((__v16qi) __A,
3713						  (__v16qi) __B,
3714						  (__v16qi) __W,
3715						  (__mmask16) __U);
3716}
3717
3718extern __inline __m128i
3719__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3720_mm_maskz_subs_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
3721{
3722  return (__m128i) __builtin_ia32_psubsb128_mask ((__v16qi) __A,
3723						  (__v16qi) __B,
3724						  (__v16qi)
3725						  _mm_setzero_si128 (),
3726						  (__mmask16) __U);
3727}
3728
3729extern __inline __m128i
3730__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3731_mm_mask_subs_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
3732		     __m128i __B)
3733{
3734  return (__m128i) __builtin_ia32_psubsw128_mask ((__v8hi) __A,
3735						  (__v8hi) __B,
3736						  (__v8hi) __W,
3737						  (__mmask8) __U);
3738}
3739
3740extern __inline __m128i
3741__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3742_mm_maskz_subs_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
3743{
3744  return (__m128i) __builtin_ia32_psubsw128_mask ((__v8hi) __A,
3745						  (__v8hi) __B,
3746						  (__v8hi)
3747						  _mm_setzero_si128 (),
3748						  (__mmask8) __U);
3749}
3750
3751extern __inline __m128i
3752__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3753_mm_mask_subs_epu8 (__m128i __W, __mmask16 __U, __m128i __A,
3754		    __m128i __B)
3755{
3756  return (__m128i) __builtin_ia32_psubusb128_mask ((__v16qi) __A,
3757						   (__v16qi) __B,
3758						   (__v16qi) __W,
3759						   (__mmask16) __U);
3760}
3761
3762extern __inline __m128i
3763__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3764_mm_maskz_subs_epu8 (__mmask16 __U, __m128i __A, __m128i __B)
3765{
3766  return (__m128i) __builtin_ia32_psubusb128_mask ((__v16qi) __A,
3767						   (__v16qi) __B,
3768						   (__v16qi)
3769						   _mm_setzero_si128 (),
3770						   (__mmask16) __U);
3771}
3772
3773extern __inline __m128i
3774__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3775_mm_mask_subs_epu16 (__m128i __W, __mmask8 __U, __m128i __A,
3776		     __m128i __B)
3777{
3778  return (__m128i) __builtin_ia32_psubusw128_mask ((__v8hi) __A,
3779						   (__v8hi) __B,
3780						   (__v8hi) __W,
3781						   (__mmask8) __U);
3782}
3783
3784extern __inline __m128i
3785__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3786_mm_maskz_subs_epu16 (__mmask8 __U, __m128i __A, __m128i __B)
3787{
3788  return (__m128i) __builtin_ia32_psubusw128_mask ((__v8hi) __A,
3789						   (__v8hi) __B,
3790						   (__v8hi)
3791						   _mm_setzero_si128 (),
3792						   (__mmask8) __U);
3793}
3794
3795extern __inline __m256i
3796__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3797_mm256_mask_srl_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
3798		       __m128i __B)
3799{
3800  return (__m256i) __builtin_ia32_psrlw256_mask ((__v16hi) __A,
3801						 (__v8hi) __B,
3802						 (__v16hi) __W,
3803						 (__mmask16) __U);
3804}
3805
3806extern __inline __m256i
3807__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3808_mm256_maskz_srl_epi16 (__mmask16 __U, __m256i __A, __m128i __B)
3809{
3810  return (__m256i) __builtin_ia32_psrlw256_mask ((__v16hi) __A,
3811						 (__v8hi) __B,
3812						 (__v16hi)
3813						 _mm256_setzero_si256 (),
3814						 (__mmask16) __U);
3815}
3816
3817extern __inline __m128i
3818__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3819_mm_mask_srl_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
3820		    __m128i __B)
3821{
3822  return (__m128i) __builtin_ia32_psrlw128_mask ((__v8hi) __A,
3823						 (__v8hi) __B,
3824						 (__v8hi) __W,
3825						 (__mmask8) __U);
3826}
3827
3828extern __inline __m128i
3829__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3830_mm_maskz_srl_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
3831{
3832  return (__m128i) __builtin_ia32_psrlw128_mask ((__v8hi) __A,
3833						 (__v8hi) __B,
3834						 (__v8hi)
3835						 _mm_setzero_si128 (),
3836						 (__mmask8) __U);
3837}
3838
3839extern __inline __m256i
3840__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3841_mm256_mask_sra_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
3842		       __m128i __B)
3843{
3844  return (__m256i) __builtin_ia32_psraw256_mask ((__v16hi) __A,
3845						 (__v8hi) __B,
3846						 (__v16hi) __W,
3847						 (__mmask16) __U);
3848}
3849
3850extern __inline __m256i
3851__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3852_mm256_maskz_sra_epi16 (__mmask16 __U, __m256i __A, __m128i __B)
3853{
3854  return (__m256i) __builtin_ia32_psraw256_mask ((__v16hi) __A,
3855						 (__v8hi) __B,
3856						 (__v16hi)
3857						 _mm256_setzero_si256 (),
3858						 (__mmask16) __U);
3859}
3860
3861extern __inline __m128i
3862__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3863_mm_mask_sra_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
3864		    __m128i __B)
3865{
3866  return (__m128i) __builtin_ia32_psraw128_mask ((__v8hi) __A,
3867						 (__v8hi) __B,
3868						 (__v8hi) __W,
3869						 (__mmask8) __U);
3870}
3871
3872extern __inline __m128i
3873__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3874_mm_maskz_sra_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
3875{
3876  return (__m128i) __builtin_ia32_psraw128_mask ((__v8hi) __A,
3877						 (__v8hi) __B,
3878						 (__v8hi)
3879						 _mm_setzero_si128 (),
3880						 (__mmask8) __U);
3881}
3882
3883extern __inline __m128i
3884__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3885_mm_maskz_adds_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
3886{
3887  return (__m128i) __builtin_ia32_paddsw128_mask ((__v8hi) __A,
3888						  (__v8hi) __B,
3889						  (__v8hi)
3890						  _mm_setzero_si128 (),
3891						  (__mmask8) __U);
3892}
3893
3894extern __inline __m128i
3895__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3896_mm_mask_adds_epu8 (__m128i __W, __mmask16 __U, __m128i __A,
3897		    __m128i __B)
3898{
3899  return (__m128i) __builtin_ia32_paddusb128_mask ((__v16qi) __A,
3900						   (__v16qi) __B,
3901						   (__v16qi) __W,
3902						   (__mmask16) __U);
3903}
3904
3905extern __inline __m128i
3906__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3907_mm_maskz_adds_epu8 (__mmask16 __U, __m128i __A, __m128i __B)
3908{
3909  return (__m128i) __builtin_ia32_paddusb128_mask ((__v16qi) __A,
3910						   (__v16qi) __B,
3911						   (__v16qi)
3912						   _mm_setzero_si128 (),
3913						   (__mmask16) __U);
3914}
3915
3916extern __inline __m128i
3917__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3918_mm_mask_adds_epu16 (__m128i __W, __mmask8 __U, __m128i __A,
3919		     __m128i __B)
3920{
3921  return (__m128i) __builtin_ia32_paddusw128_mask ((__v8hi) __A,
3922						   (__v8hi) __B,
3923						   (__v8hi) __W,
3924						   (__mmask8) __U);
3925}
3926
3927extern __inline __m128i
3928__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3929_mm_maskz_adds_epu16 (__mmask8 __U, __m128i __A, __m128i __B)
3930{
3931  return (__m128i) __builtin_ia32_paddusw128_mask ((__v8hi) __A,
3932						   (__v8hi) __B,
3933						   (__v8hi)
3934						   _mm_setzero_si128 (),
3935						   (__mmask8) __U);
3936}
3937
3938extern __inline __m128i
3939__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3940_mm_mask_sub_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
3941		   __m128i __B)
3942{
3943  return (__m128i) __builtin_ia32_psubb128_mask ((__v16qi) __A,
3944						 (__v16qi) __B,
3945						 (__v16qi) __W,
3946						 (__mmask16) __U);
3947}
3948
3949extern __inline __m128i
3950__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3951_mm_maskz_sub_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
3952{
3953  return (__m128i) __builtin_ia32_psubb128_mask ((__v16qi) __A,
3954						 (__v16qi) __B,
3955						 (__v16qi)
3956						 _mm_setzero_si128 (),
3957						 (__mmask16) __U);
3958}
3959
3960extern __inline __m128i
3961__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3962_mm_mask_sub_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
3963		    __m128i __B)
3964{
3965  return (__m128i) __builtin_ia32_psubw128_mask ((__v8hi) __A,
3966						 (__v8hi) __B,
3967						 (__v8hi) __W,
3968						 (__mmask8) __U);
3969}
3970
3971extern __inline __m128i
3972__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3973_mm_maskz_sub_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
3974{
3975  return (__m128i) __builtin_ia32_psubw128_mask ((__v8hi) __A,
3976						 (__v8hi) __B,
3977						 (__v8hi)
3978						 _mm_setzero_si128 (),
3979						 (__mmask8) __U);
3980}
3981
3982extern __inline __m128i
3983__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3984_mm_mask_adds_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
3985		    __m128i __B)
3986{
3987  return (__m128i) __builtin_ia32_paddsb128_mask ((__v16qi) __A,
3988						  (__v16qi) __B,
3989						  (__v16qi) __W,
3990						  (__mmask16) __U);
3991}
3992
3993extern __inline __m128i
3994__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3995_mm_maskz_adds_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
3996{
3997  return (__m128i) __builtin_ia32_paddsb128_mask ((__v16qi) __A,
3998						  (__v16qi) __B,
3999						  (__v16qi)
4000						  _mm_setzero_si128 (),
4001						  (__mmask16) __U);
4002}
4003
4004extern __inline __m128i
4005__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4006_mm_cvtepi16_epi8 (__m128i __A)
4007{
4008
4009  return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
4010						  (__v16qi)_mm_undefined_si128(),
4011						  (__mmask8) -1);
4012}
4013
4014extern __inline __m128i
4015__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4016_mm_mask_cvtepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
4017{
4018  return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
4019						  (__v16qi) __O, __M);
4020}
4021
4022extern __inline __m128i
4023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4024_mm_maskz_cvtepi16_epi8 (__mmask8 __M, __m128i __A)
4025{
4026  return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
4027						  (__v16qi)
4028						  _mm_setzero_si128 (),
4029						  __M);
4030}
4031
4032extern __inline __m256i
4033__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4034_mm256_srav_epi16 (__m256i __A, __m256i __B)
4035{
4036  return (__m256i) __builtin_ia32_psrav16hi_mask ((__v16hi) __A,
4037						  (__v16hi) __B,
4038						  (__v16hi)
4039						  _mm256_setzero_si256 (),
4040						  (__mmask16) -1);
4041}
4042
4043extern __inline __m256i
4044__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4045_mm256_mask_srav_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
4046			__m256i __B)
4047{
4048  return (__m256i) __builtin_ia32_psrav16hi_mask ((__v16hi) __A,
4049						  (__v16hi) __B,
4050						  (__v16hi) __W,
4051						  (__mmask16) __U);
4052}
4053
4054extern __inline __m256i
4055__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4056_mm256_maskz_srav_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
4057{
4058  return (__m256i) __builtin_ia32_psrav16hi_mask ((__v16hi) __A,
4059						  (__v16hi) __B,
4060						  (__v16hi)
4061						  _mm256_setzero_si256 (),
4062						  (__mmask16) __U);
4063}
4064
4065extern __inline __m128i
4066__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4067_mm_srav_epi16 (__m128i __A, __m128i __B)
4068{
4069  return (__m128i) __builtin_ia32_psrav8hi_mask ((__v8hi) __A,
4070						 (__v8hi) __B,
4071						 (__v8hi)
4072						 _mm_setzero_hi (),
4073						 (__mmask8) -1);
4074}
4075
4076extern __inline __m128i
4077__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4078_mm_mask_srav_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
4079		     __m128i __B)
4080{
4081  return (__m128i) __builtin_ia32_psrav8hi_mask ((__v8hi) __A,
4082						 (__v8hi) __B,
4083						 (__v8hi) __W,
4084						 (__mmask8) __U);
4085}
4086
4087extern __inline __m128i
4088__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4089_mm_maskz_srav_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
4090{
4091  return (__m128i) __builtin_ia32_psrav8hi_mask ((__v8hi) __A,
4092						 (__v8hi) __B,
4093						 (__v8hi)
4094						 _mm_setzero_si128 (),
4095						 (__mmask8) __U);
4096}
4097
4098extern __inline __m256i
4099__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4100_mm256_srlv_epi16 (__m256i __A, __m256i __B)
4101{
4102  return (__m256i) __builtin_ia32_psrlv16hi_mask ((__v16hi) __A,
4103						  (__v16hi) __B,
4104						  (__v16hi)
4105						  _mm256_setzero_si256 (),
4106						  (__mmask16) -1);
4107}
4108
4109extern __inline __m256i
4110__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4111_mm256_mask_srlv_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
4112			__m256i __B)
4113{
4114  return (__m256i) __builtin_ia32_psrlv16hi_mask ((__v16hi) __A,
4115						  (__v16hi) __B,
4116						  (__v16hi) __W,
4117						  (__mmask16) __U);
4118}
4119
4120extern __inline __m256i
4121__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4122_mm256_maskz_srlv_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
4123{
4124  return (__m256i) __builtin_ia32_psrlv16hi_mask ((__v16hi) __A,
4125						  (__v16hi) __B,
4126						  (__v16hi)
4127						  _mm256_setzero_si256 (),
4128						  (__mmask16) __U);
4129}
4130
4131extern __inline __m128i
4132__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4133_mm_srlv_epi16 (__m128i __A, __m128i __B)
4134{
4135  return (__m128i) __builtin_ia32_psrlv8hi_mask ((__v8hi) __A,
4136						 (__v8hi) __B,
4137						 (__v8hi)
4138						 _mm_setzero_hi (),
4139						 (__mmask8) -1);
4140}
4141
4142extern __inline __m128i
4143__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4144_mm_mask_srlv_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
4145		     __m128i __B)
4146{
4147  return (__m128i) __builtin_ia32_psrlv8hi_mask ((__v8hi) __A,
4148						 (__v8hi) __B,
4149						 (__v8hi) __W,
4150						 (__mmask8) __U);
4151}
4152
4153extern __inline __m128i
4154__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4155_mm_maskz_srlv_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
4156{
4157  return (__m128i) __builtin_ia32_psrlv8hi_mask ((__v8hi) __A,
4158						 (__v8hi) __B,
4159						 (__v8hi)
4160						 _mm_setzero_si128 (),
4161						 (__mmask8) __U);
4162}
4163
4164extern __inline __m256i
4165__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4166_mm256_sllv_epi16 (__m256i __A, __m256i __B)
4167{
4168  return (__m256i) __builtin_ia32_psllv16hi_mask ((__v16hi) __A,
4169						  (__v16hi) __B,
4170						  (__v16hi)
4171						  _mm256_setzero_si256 (),
4172						  (__mmask16) -1);
4173}
4174
4175extern __inline __m256i
4176__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4177_mm256_mask_sllv_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
4178			__m256i __B)
4179{
4180  return (__m256i) __builtin_ia32_psllv16hi_mask ((__v16hi) __A,
4181						  (__v16hi) __B,
4182						  (__v16hi) __W,
4183						  (__mmask16) __U);
4184}
4185
4186extern __inline __m256i
4187__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4188_mm256_maskz_sllv_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
4189{
4190  return (__m256i) __builtin_ia32_psllv16hi_mask ((__v16hi) __A,
4191						  (__v16hi) __B,
4192						  (__v16hi)
4193						  _mm256_setzero_si256 (),
4194						  (__mmask16) __U);
4195}
4196
4197extern __inline __m128i
4198__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4199_mm_sllv_epi16 (__m128i __A, __m128i __B)
4200{
4201  return (__m128i) __builtin_ia32_psllv8hi_mask ((__v8hi) __A,
4202						 (__v8hi) __B,
4203						 (__v8hi)
4204						 _mm_setzero_hi (),
4205						 (__mmask8) -1);
4206}
4207
4208extern __inline __m128i
4209__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4210_mm_mask_sllv_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
4211		     __m128i __B)
4212{
4213  return (__m128i) __builtin_ia32_psllv8hi_mask ((__v8hi) __A,
4214						 (__v8hi) __B,
4215						 (__v8hi) __W,
4216						 (__mmask8) __U);
4217}
4218
4219extern __inline __m128i
4220__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4221_mm_maskz_sllv_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
4222{
4223  return (__m128i) __builtin_ia32_psllv8hi_mask ((__v8hi) __A,
4224						 (__v8hi) __B,
4225						 (__v8hi)
4226						 _mm_setzero_si128 (),
4227						 (__mmask8) __U);
4228}
4229
4230extern __inline __m128i
4231__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4232_mm_mask_sll_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
4233		    __m128i __B)
4234{
4235  return (__m128i) __builtin_ia32_psllw128_mask ((__v8hi) __A,
4236						 (__v8hi) __B,
4237						 (__v8hi) __W,
4238						 (__mmask8) __U);
4239}
4240
4241extern __inline __m128i
4242__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4243_mm_maskz_sll_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
4244{
4245  return (__m128i) __builtin_ia32_psllw128_mask ((__v8hi) __A,
4246						 (__v8hi) __B,
4247						 (__v8hi)
4248						 _mm_setzero_si128 (),
4249						 (__mmask8) __U);
4250}
4251
4252extern __inline __m256i
4253__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4254_mm256_mask_sll_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
4255		       __m128i __B)
4256{
4257  return (__m256i) __builtin_ia32_psllw256_mask ((__v16hi) __A,
4258						 (__v8hi) __B,
4259						 (__v16hi) __W,
4260						 (__mmask16) __U);
4261}
4262
4263extern __inline __m256i
4264__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4265_mm256_maskz_sll_epi16 (__mmask16 __U, __m256i __A, __m128i __B)
4266{
4267  return (__m256i) __builtin_ia32_psllw256_mask ((__v16hi) __A,
4268						 (__v8hi) __B,
4269						 (__v16hi)
4270						 _mm256_setzero_si256 (),
4271						 (__mmask16) __U);
4272}
4273
4274extern __inline __m256i
4275__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4276_mm256_maskz_packus_epi32 (__mmask16 __M, __m256i __A, __m256i __B)
4277{
4278  return (__m256i) __builtin_ia32_packusdw256_mask ((__v8si) __A,
4279						    (__v8si) __B,
4280						    (__v16hi)
4281						    _mm256_setzero_si256 (),
4282						    __M);
4283}
4284
4285extern __inline __m256i
4286__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4287_mm256_mask_packus_epi32 (__m256i __W, __mmask16 __M, __m256i __A,
4288			  __m256i __B)
4289{
4290  return (__m256i) __builtin_ia32_packusdw256_mask ((__v8si) __A,
4291						    (__v8si) __B,
4292						    (__v16hi) __W,
4293						    __M);
4294}
4295
4296extern __inline __m128i
4297__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4298_mm_maskz_packus_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
4299{
4300  return (__m128i) __builtin_ia32_packusdw128_mask ((__v4si) __A,
4301						    (__v4si) __B,
4302						    (__v8hi)
4303						    _mm_setzero_si128 (),
4304						    __M);
4305}
4306
4307extern __inline __m128i
4308__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4309_mm_mask_packus_epi32 (__m128i __W, __mmask16 __M, __m128i __A,
4310		       __m128i __B)
4311{
4312  return (__m128i) __builtin_ia32_packusdw128_mask ((__v4si) __A,
4313						    (__v4si) __B,
4314						    (__v8hi) __W, __M);
4315}
4316
4317extern __inline __m256i
4318__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4319_mm256_maskz_packs_epi32 (__mmask16 __M, __m256i __A, __m256i __B)
4320{
4321  return (__m256i) __builtin_ia32_packssdw256_mask ((__v8si) __A,
4322						    (__v8si) __B,
4323						    (__v16hi)
4324						    _mm256_setzero_si256 (),
4325						    __M);
4326}
4327
4328extern __inline __m256i
4329__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4330_mm256_mask_packs_epi32 (__m256i __W, __mmask16 __M, __m256i __A,
4331			 __m256i __B)
4332{
4333  return (__m256i) __builtin_ia32_packssdw256_mask ((__v8si) __A,
4334						    (__v8si) __B,
4335						    (__v16hi) __W,
4336						    __M);
4337}
4338
4339extern __inline __m128i
4340__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4341_mm_maskz_packs_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
4342{
4343  return (__m128i) __builtin_ia32_packssdw128_mask ((__v4si) __A,
4344						    (__v4si) __B,
4345						    (__v8hi)
4346						    _mm_setzero_si128 (),
4347						    __M);
4348}
4349
4350extern __inline __m128i
4351__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4352_mm_mask_packs_epi32 (__m128i __W, __mmask16 __M, __m128i __A,
4353		      __m128i __B)
4354{
4355  return (__m128i) __builtin_ia32_packssdw128_mask ((__v4si) __A,
4356						    (__v4si) __B,
4357						    (__v8hi) __W, __M);
4358}
4359
4360extern __inline __mmask16
4361__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4362_mm_mask_cmpneq_epu8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
4363{
4364  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
4365						   (__v16qi) __Y, 4,
4366						   (__mmask16) __M);
4367}
4368
4369extern __inline __mmask16
4370__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4371_mm_mask_cmplt_epu8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
4372{
4373  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
4374						   (__v16qi) __Y, 1,
4375						   (__mmask16) __M);
4376}
4377
4378extern __inline __mmask16
4379__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4380_mm_mask_cmpge_epu8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
4381{
4382  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
4383						   (__v16qi) __Y, 5,
4384						   (__mmask16) __M);
4385}
4386
4387extern __inline __mmask16
4388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4389_mm_mask_cmple_epu8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
4390{
4391  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
4392						   (__v16qi) __Y, 2,
4393						   (__mmask16) __M);
4394}
4395
4396extern __inline __mmask8
4397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4398_mm_mask_cmpneq_epu16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
4399{
4400  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
4401						  (__v8hi) __Y, 4,
4402						  (__mmask8) __M);
4403}
4404
4405extern __inline __mmask8
4406__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4407_mm_mask_cmplt_epu16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
4408{
4409  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
4410						  (__v8hi) __Y, 1,
4411						  (__mmask8) __M);
4412}
4413
4414extern __inline __mmask8
4415__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4416_mm_mask_cmpge_epu16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
4417{
4418  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
4419						  (__v8hi) __Y, 5,
4420						  (__mmask8) __M);
4421}
4422
4423extern __inline __mmask8
4424__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4425_mm_mask_cmple_epu16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
4426{
4427  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
4428						  (__v8hi) __Y, 2,
4429						  (__mmask8) __M);
4430}
4431
4432extern __inline __mmask16
4433__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4434_mm_mask_cmpneq_epi8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
4435{
4436  return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
4437						  (__v16qi) __Y, 4,
4438						  (__mmask16) __M);
4439}
4440
4441extern __inline __mmask16
4442__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4443_mm_mask_cmplt_epi8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
4444{
4445  return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
4446						  (__v16qi) __Y, 1,
4447						  (__mmask16) __M);
4448}
4449
4450extern __inline __mmask16
4451__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4452_mm_mask_cmpge_epi8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
4453{
4454  return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
4455						  (__v16qi) __Y, 5,
4456						  (__mmask16) __M);
4457}
4458
4459extern __inline __mmask16
4460__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4461_mm_mask_cmple_epi8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
4462{
4463  return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
4464						  (__v16qi) __Y, 2,
4465						  (__mmask16) __M);
4466}
4467
4468extern __inline __mmask8
4469__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4470_mm_mask_cmpneq_epi16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
4471{
4472  return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
4473						 (__v8hi) __Y, 4,
4474						 (__mmask8) __M);
4475}
4476
4477extern __inline __mmask8
4478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4479_mm_mask_cmplt_epi16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
4480{
4481  return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
4482						 (__v8hi) __Y, 1,
4483						 (__mmask8) __M);
4484}
4485
4486extern __inline __mmask8
4487__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4488_mm_mask_cmpge_epi16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
4489{
4490  return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
4491						 (__v8hi) __Y, 5,
4492						 (__mmask8) __M);
4493}
4494
4495extern __inline __mmask8
4496__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4497_mm_mask_cmple_epi16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
4498{
4499  return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
4500						 (__v8hi) __Y, 2,
4501						 (__mmask8) __M);
4502}
4503
4504extern __inline __mmask8
4505__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4506_mm256_mask_cmpneq_epu8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
4507{
4508  return (__mmask8) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
4509						  (__v32qi) __Y, 4,
4510						  (__mmask8) __M);
4511}
4512
4513extern __inline __mmask8
4514__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4515_mm256_mask_cmplt_epu8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
4516{
4517  return (__mmask8) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
4518						  (__v32qi) __Y, 1,
4519						  (__mmask8) __M);
4520}
4521
4522extern __inline __mmask8
4523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4524_mm256_mask_cmpge_epu8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
4525{
4526  return (__mmask8) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
4527						  (__v32qi) __Y, 5,
4528						  (__mmask8) __M);
4529}
4530
4531extern __inline __mmask8
4532__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4533_mm256_mask_cmple_epu8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
4534{
4535  return (__mmask8) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
4536						  (__v32qi) __Y, 2,
4537						  (__mmask8) __M);
4538}
4539
4540extern __inline __mmask8
4541__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4542_mm256_mask_cmpneq_epu16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
4543{
4544  return (__mmask8) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
4545						  (__v16hi) __Y, 4,
4546						  (__mmask8) __M);
4547}
4548
4549extern __inline __mmask8
4550__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4551_mm256_mask_cmplt_epu16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
4552{
4553  return (__mmask8) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
4554						  (__v16hi) __Y, 1,
4555						  (__mmask8) __M);
4556}
4557
4558extern __inline __mmask8
4559__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4560_mm256_mask_cmpge_epu16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
4561{
4562  return (__mmask8) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
4563						  (__v16hi) __Y, 5,
4564						  (__mmask8) __M);
4565}
4566
4567extern __inline __mmask8
4568__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4569_mm256_mask_cmple_epu16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
4570{
4571  return (__mmask8) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
4572						  (__v16hi) __Y, 2,
4573						  (__mmask8) __M);
4574}
4575
4576extern __inline __mmask8
4577__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4578_mm256_mask_cmpneq_epi8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
4579{
4580  return (__mmask8) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
4581						 (__v32qi) __Y, 4,
4582						 (__mmask8) __M);
4583}
4584
4585extern __inline __mmask8
4586__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4587_mm256_mask_cmplt_epi8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
4588{
4589  return (__mmask8) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
4590						 (__v32qi) __Y, 1,
4591						 (__mmask8) __M);
4592}
4593
4594extern __inline __mmask8
4595__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4596_mm256_mask_cmpge_epi8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
4597{
4598  return (__mmask8) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
4599						 (__v32qi) __Y, 5,
4600						 (__mmask8) __M);
4601}
4602
4603extern __inline __mmask8
4604__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4605_mm256_mask_cmple_epi8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
4606{
4607  return (__mmask8) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
4608						 (__v32qi) __Y, 2,
4609						 (__mmask8) __M);
4610}
4611
4612extern __inline __mmask8
4613__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4614_mm256_mask_cmpneq_epi16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
4615{
4616  return (__mmask8) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
4617						 (__v16hi) __Y, 4,
4618						 (__mmask8) __M);
4619}
4620
4621extern __inline __mmask8
4622__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4623_mm256_mask_cmplt_epi16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
4624{
4625  return (__mmask8) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
4626						 (__v16hi) __Y, 1,
4627						 (__mmask8) __M);
4628}
4629
4630extern __inline __mmask8
4631__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4632_mm256_mask_cmpge_epi16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
4633{
4634  return (__mmask8) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
4635						 (__v16hi) __Y, 5,
4636						 (__mmask8) __M);
4637}
4638
4639extern __inline __mmask8
4640__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4641_mm256_mask_cmple_epi16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
4642{
4643  return (__mmask8) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
4644						 (__v16hi) __Y, 2,
4645						 (__mmask8) __M);
4646}
4647
4648#ifdef __DISABLE_AVX512VLBW__
4649#undef __DISABLE_AVX512VLBW__
4650#pragma GCC pop_options
4651#endif /* __DISABLE_AVX512VLBW__ */
4652
4653#endif /* _AVX512VLBWINTRIN_H_INCLUDED */
4654