1/* Copyright (C) 2014-2020 Free Software Foundation, Inc.
2
3   This file is part of GCC.
4
5   GCC is free software; you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 3, or (at your option)
8   any later version.
9
10   GCC is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   Under Section 7 of GPL version 3, you are granted additional
16   permissions described in the GCC Runtime Library Exception, version
17   3.1, as published by the Free Software Foundation.
18
19   You should have received a copy of the GNU General Public License and
20   a copy of the GCC Runtime Library Exception along with this program;
21   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22   <http://www.gnu.org/licenses/>.  */
23
24#ifndef _IMMINTRIN_H_INCLUDED
25#error "Never use <avx512vlbwintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef _AVX512VLBWINTRIN_H_INCLUDED
29#define _AVX512VLBWINTRIN_H_INCLUDED
30
31#if !defined(__AVX512VL__) || !defined(__AVX512BW__)
32#pragma GCC push_options
33#pragma GCC target("avx512vl,avx512bw")
34#define __DISABLE_AVX512VLBW__
35#endif /* __AVX512VLBW__ */
36
37
38extern __inline __m256i
39__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
40_mm256_mask_mov_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
41{
42  return (__m256i) __builtin_ia32_movdquqi256_mask ((__v32qi) __A,
43						    (__v32qi) __W,
44						    (__mmask32) __U);
45}
46
47extern __inline __m256i
48__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
49_mm256_maskz_mov_epi8 (__mmask32 __U, __m256i __A)
50{
51  return (__m256i) __builtin_ia32_movdquqi256_mask ((__v32qi) __A,
52						    (__v32qi)
53						    _mm256_setzero_si256 (),
54						    (__mmask32) __U);
55}
56
57extern __inline __m128i
58__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
59_mm_mask_mov_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
60{
61  return (__m128i) __builtin_ia32_movdquqi128_mask ((__v16qi) __A,
62						    (__v16qi) __W,
63						    (__mmask16) __U);
64}
65
66extern __inline __m128i
67__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
68_mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A)
69{
70  return (__m128i) __builtin_ia32_movdquqi128_mask ((__v16qi) __A,
71						    (__v16qi)
72						    _mm_setzero_si128 (),
73						    (__mmask16) __U);
74}
75
76extern __inline void
77__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
78_mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A)
79{
80  __builtin_ia32_storedquqi256_mask ((char *) __P,
81				     (__v32qi) __A,
82				     (__mmask32) __U);
83}
84
85extern __inline void
86__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
87_mm_mask_storeu_epi8 (void *__P, __mmask16 __U, __m128i __A)
88{
89  __builtin_ia32_storedquqi128_mask ((char *) __P,
90				     (__v16qi) __A,
91				     (__mmask16) __U);
92}
93
94extern __inline __m256i
95__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
96_mm256_mask_loadu_epi16 (__m256i __W, __mmask16 __U, void const *__P)
97{
98  return (__m256i) __builtin_ia32_loaddquhi256_mask ((const short *) __P,
99						     (__v16hi) __W,
100						     (__mmask16) __U);
101}
102
103extern __inline __m256i
104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
105_mm256_maskz_loadu_epi16 (__mmask16 __U, void const *__P)
106{
107  return (__m256i) __builtin_ia32_loaddquhi256_mask ((const short *) __P,
108						     (__v16hi)
109						     _mm256_setzero_si256 (),
110						     (__mmask16) __U);
111}
112
113extern __inline __m128i
114__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
115_mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P)
116{
117  return (__m128i) __builtin_ia32_loaddquhi128_mask ((const short *) __P,
118						     (__v8hi) __W,
119						     (__mmask8) __U);
120}
121
122extern __inline __m128i
123__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
124_mm_maskz_loadu_epi16 (__mmask8 __U, void const *__P)
125{
126  return (__m128i) __builtin_ia32_loaddquhi128_mask ((const short *) __P,
127						     (__v8hi)
128						     _mm_setzero_si128 (),
129						     (__mmask8) __U);
130}
131
132
133extern __inline __m256i
134__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
135_mm256_mask_mov_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
136{
137  return (__m256i) __builtin_ia32_movdquhi256_mask ((__v16hi) __A,
138						    (__v16hi) __W,
139						    (__mmask16) __U);
140}
141
142extern __inline __m256i
143__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
144_mm256_maskz_mov_epi16 (__mmask16 __U, __m256i __A)
145{
146  return (__m256i) __builtin_ia32_movdquhi256_mask ((__v16hi) __A,
147						    (__v16hi)
148						    _mm256_setzero_si256 (),
149						    (__mmask16) __U);
150}
151
152extern __inline __m128i
153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
154_mm_mask_mov_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
155{
156  return (__m128i) __builtin_ia32_movdquhi128_mask ((__v8hi) __A,
157						    (__v8hi) __W,
158						    (__mmask8) __U);
159}
160
161extern __inline __m128i
162__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
163_mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A)
164{
165  return (__m128i) __builtin_ia32_movdquhi128_mask ((__v8hi) __A,
166						    (__v8hi)
167						    _mm_setzero_si128 (),
168						    (__mmask8) __U);
169}
170
171extern __inline __m256i
172__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
173_mm256_mask_loadu_epi8 (__m256i __W, __mmask32 __U, void const *__P)
174{
175  return (__m256i) __builtin_ia32_loaddquqi256_mask ((const char *) __P,
176						     (__v32qi) __W,
177						     (__mmask32) __U);
178}
179
180extern __inline __m256i
181__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
182_mm256_maskz_loadu_epi8 (__mmask32 __U, void const *__P)
183{
184  return (__m256i) __builtin_ia32_loaddquqi256_mask ((const char *) __P,
185						     (__v32qi)
186						     _mm256_setzero_si256 (),
187						     (__mmask32) __U);
188}
189
190extern __inline __m128i
191__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
192_mm_mask_loadu_epi8 (__m128i __W, __mmask16 __U, void const *__P)
193{
194  return (__m128i) __builtin_ia32_loaddquqi128_mask ((const char *) __P,
195						     (__v16qi) __W,
196						     (__mmask16) __U);
197}
198
199extern __inline __m128i
200__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
201_mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P)
202{
203  return (__m128i) __builtin_ia32_loaddquqi128_mask ((const char *) __P,
204						     (__v16qi)
205						     _mm_setzero_si128 (),
206						     (__mmask16) __U);
207}
208
209extern __inline __m128i
210__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
211_mm256_cvtepi16_epi8 (__m256i __A)
212{
213
214  return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A,
215						  (__v16qi)_mm_undefined_si128(),
216						  (__mmask16) -1);
217}
218
219extern __inline void
220__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
221_mm256_mask_cvtepi16_storeu_epi8 (void * __P, __mmask16 __M,__m256i __A)
222{
223  __builtin_ia32_pmovwb256mem_mask ((__v16qi *) __P , (__v16hi) __A, __M);
224}
225
226extern __inline __m128i
227__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
228_mm256_mask_cvtepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A)
229{
230  return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A,
231						  (__v16qi) __O, __M);
232}
233
234extern __inline __m128i
235__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
236_mm256_maskz_cvtepi16_epi8 (__mmask16 __M, __m256i __A)
237{
238  return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A,
239						  (__v16qi)
240						  _mm_setzero_si128 (),
241						  __M);
242}
243
244extern __inline __m128i
245__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
246_mm_cvtsepi16_epi8 (__m128i __A)
247{
248
249  return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
250						   (__v16qi)_mm_undefined_si128(),
251						   (__mmask8) -1);
252}
253
254extern __inline void
255__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
256_mm_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask8 __M,__m128i __A)
257{
258  __builtin_ia32_pmovswb128mem_mask ((__v8qi *) __P , (__v8hi) __A, __M);
259}
260
261extern __inline __m128i
262__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
263_mm_mask_cvtsepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
264{
265  return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
266						   (__v16qi) __O, __M);
267}
268
269extern __inline __m128i
270__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
271_mm_maskz_cvtsepi16_epi8 (__mmask8 __M, __m128i __A)
272{
273  return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
274						   (__v16qi)
275						   _mm_setzero_si128 (),
276						   __M);
277}
278
279extern __inline __m128i
280__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
281_mm256_cvtsepi16_epi8 (__m256i __A)
282{
283
284  return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
285						   (__v16qi)_mm_undefined_si128(),
286						   (__mmask16) -1);
287}
288
289extern __inline void
290__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
291_mm256_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask16 __M,__m256i __A)
292{
293  __builtin_ia32_pmovswb256mem_mask ((__v16qi *) __P , (__v16hi) __A, __M);
294}
295
296extern __inline __m128i
297__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
298_mm256_mask_cvtsepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A)
299{
300  return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
301						   (__v16qi) __O, __M);
302}
303
304extern __inline __m128i
305__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
306_mm256_maskz_cvtsepi16_epi8 (__mmask16 __M, __m256i __A)
307{
308  return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
309						   (__v16qi)
310						   _mm_setzero_si128 (),
311						   __M);
312}
313
314extern __inline __m128i
315__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
316_mm_cvtusepi16_epi8 (__m128i __A)
317{
318
319  return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
320						    (__v16qi)_mm_undefined_si128(),
321						    (__mmask8) -1);
322}
323
324extern __inline void
325__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
326_mm_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask8 __M,__m128i __A)
327{
328  __builtin_ia32_pmovuswb128mem_mask ((__v8qi *) __P , (__v8hi) __A, __M);
329}
330
331extern __inline __m128i
332__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
333_mm_mask_cvtusepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
334{
335  return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
336						    (__v16qi) __O,
337						    __M);
338}
339
340extern __inline __m128i
341__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
342_mm_maskz_cvtusepi16_epi8 (__mmask8 __M, __m128i __A)
343{
344  return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
345						    (__v16qi)
346						    _mm_setzero_si128 (),
347						    __M);
348}
349
350extern __inline __m128i
351__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
352_mm256_cvtusepi16_epi8 (__m256i __A)
353{
354
355  return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
356						    (__v16qi)_mm_undefined_si128(),
357						    (__mmask16) -1);
358}
359
360extern __inline void
361__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
362_mm256_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask16 __M,__m256i __A)
363{
364  __builtin_ia32_pmovuswb256mem_mask ((__v16qi *) __P , (__v16hi) __A, __M);
365}
366
367extern __inline __m128i
368__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
369_mm256_mask_cvtusepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A)
370{
371  return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
372						    (__v16qi) __O,
373						    __M);
374}
375
376extern __inline __m128i
377__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
378_mm256_maskz_cvtusepi16_epi8 (__mmask16 __M, __m256i __A)
379{
380  return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
381						    (__v16qi)
382						    _mm_setzero_si128 (),
383						    __M);
384}
385
386extern __inline __m256i
387__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
388_mm256_mask_broadcastb_epi8 (__m256i __O, __mmask32 __M, __m128i __A)
389{
390  return (__m256i) __builtin_ia32_pbroadcastb256_mask ((__v16qi) __A,
391						       (__v32qi) __O,
392						       __M);
393}
394
395extern __inline __m256i
396__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
397_mm256_maskz_broadcastb_epi8 (__mmask32 __M, __m128i __A)
398{
399  return (__m256i) __builtin_ia32_pbroadcastb256_mask ((__v16qi) __A,
400						       (__v32qi)
401						       _mm256_setzero_si256 (),
402						       __M);
403}
404
405extern __inline __m256i
406__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
407_mm256_mask_set1_epi8 (__m256i __O, __mmask32 __M, char __A)
408{
409  return (__m256i) __builtin_ia32_pbroadcastb256_gpr_mask (__A,
410							   (__v32qi) __O,
411							   __M);
412}
413
414extern __inline __m256i
415__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
416_mm256_maskz_set1_epi8 (__mmask32 __M, char __A)
417{
418  return (__m256i) __builtin_ia32_pbroadcastb256_gpr_mask (__A,
419							   (__v32qi)
420							   _mm256_setzero_si256 (),
421							   __M);
422}
423
424extern __inline __m128i
425__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
426_mm_mask_broadcastb_epi8 (__m128i __O, __mmask16 __M, __m128i __A)
427{
428  return (__m128i) __builtin_ia32_pbroadcastb128_mask ((__v16qi) __A,
429						       (__v16qi) __O,
430						       __M);
431}
432
433extern __inline __m128i
434__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
435_mm_maskz_broadcastb_epi8 (__mmask16 __M, __m128i __A)
436{
437  return (__m128i) __builtin_ia32_pbroadcastb128_mask ((__v16qi) __A,
438						       (__v16qi)
439						       _mm_setzero_si128 (),
440						       __M);
441}
442
443extern __inline __m128i
444__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
445_mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A)
446{
447  return (__m128i) __builtin_ia32_pbroadcastb128_gpr_mask (__A,
448							   (__v16qi) __O,
449							   __M);
450}
451
452extern __inline __m128i
453__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
454_mm_maskz_set1_epi8 (__mmask16 __M, char __A)
455{
456  return (__m128i) __builtin_ia32_pbroadcastb128_gpr_mask (__A,
457							   (__v16qi)
458							   _mm_setzero_si128 (),
459							   __M);
460}
461
462extern __inline __m256i
463__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
464_mm256_mask_broadcastw_epi16 (__m256i __O, __mmask16 __M, __m128i __A)
465{
466  return (__m256i) __builtin_ia32_pbroadcastw256_mask ((__v8hi) __A,
467						       (__v16hi) __O,
468						       __M);
469}
470
471extern __inline __m256i
472__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
473_mm256_maskz_broadcastw_epi16 (__mmask16 __M, __m128i __A)
474{
475  return (__m256i) __builtin_ia32_pbroadcastw256_mask ((__v8hi) __A,
476						       (__v16hi)
477						       _mm256_setzero_si256 (),
478						       __M);
479}
480
481extern __inline __m256i
482__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
483_mm256_mask_set1_epi16 (__m256i __O, __mmask16 __M, short __A)
484{
485  return (__m256i) __builtin_ia32_pbroadcastw256_gpr_mask (__A,
486							   (__v16hi) __O,
487							   __M);
488}
489
490extern __inline __m256i
491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
492_mm256_maskz_set1_epi16 (__mmask16 __M, short __A)
493{
494  return (__m256i) __builtin_ia32_pbroadcastw256_gpr_mask (__A,
495							   (__v16hi)
496							   _mm256_setzero_si256 (),
497							   __M);
498}
499
500extern __inline __m128i
501__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
502_mm_mask_broadcastw_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
503{
504  return (__m128i) __builtin_ia32_pbroadcastw128_mask ((__v8hi) __A,
505						       (__v8hi) __O,
506						       __M);
507}
508
509extern __inline __m128i
510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
511_mm_maskz_broadcastw_epi16 (__mmask8 __M, __m128i __A)
512{
513  return (__m128i) __builtin_ia32_pbroadcastw128_mask ((__v8hi) __A,
514						       (__v8hi)
515						       _mm_setzero_si128 (),
516						       __M);
517}
518
519extern __inline __m128i
520__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
521_mm_mask_set1_epi16 (__m128i __O, __mmask8 __M, short __A)
522{
523  return (__m128i) __builtin_ia32_pbroadcastw128_gpr_mask (__A,
524							   (__v8hi) __O,
525							   __M);
526}
527
528extern __inline __m128i
529__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
530_mm_maskz_set1_epi16 (__mmask8 __M, short __A)
531{
532  return (__m128i) __builtin_ia32_pbroadcastw128_gpr_mask (__A,
533							   (__v8hi)
534							   _mm_setzero_si128 (),
535							   __M);
536}
537
538extern __inline __m256i
539__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
540_mm256_permutexvar_epi16 (__m256i __A, __m256i __B)
541{
542  return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __B,
543						     (__v16hi) __A,
544						     (__v16hi)
545						     _mm256_setzero_si256 (),
546						     (__mmask16) -1);
547}
548
549extern __inline __m256i
550__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
551_mm256_maskz_permutexvar_epi16 (__mmask16 __M, __m256i __A,
552				__m256i __B)
553{
554  return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __B,
555						     (__v16hi) __A,
556						     (__v16hi)
557						     _mm256_setzero_si256 (),
558						     (__mmask16) __M);
559}
560
561extern __inline __m256i
562__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
563_mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
564			       __m256i __B)
565{
566  return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __B,
567						     (__v16hi) __A,
568						     (__v16hi) __W,
569						     (__mmask16) __M);
570}
571
572extern __inline __m128i
573__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
574_mm_permutexvar_epi16 (__m128i __A, __m128i __B)
575{
576  return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __B,
577						     (__v8hi) __A,
578						     (__v8hi)
579						     _mm_setzero_si128 (),
580						     (__mmask8) -1);
581}
582
583extern __inline __m128i
584__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
585_mm_maskz_permutexvar_epi16 (__mmask8 __M, __m128i __A, __m128i __B)
586{
587  return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __B,
588						     (__v8hi) __A,
589						     (__v8hi)
590						     _mm_setzero_si128 (),
591						     (__mmask8) __M);
592}
593
594extern __inline __m128i
595__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
596_mm_mask_permutexvar_epi16 (__m128i __W, __mmask8 __M, __m128i __A,
597			    __m128i __B)
598{
599  return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __B,
600						     (__v8hi) __A,
601						     (__v8hi) __W,
602						     (__mmask8) __M);
603}
604
605extern __inline __m256i
606__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
607_mm256_permutex2var_epi16 (__m256i __A, __m256i __I, __m256i __B)
608{
609  return (__m256i) __builtin_ia32_vpermt2varhi256_mask ((__v16hi) __I
610							/* idx */ ,
611							(__v16hi) __A,
612							(__v16hi) __B,
613							(__mmask16) -1);
614}
615
616extern __inline __m256i
617__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
618_mm256_mask_permutex2var_epi16 (__m256i __A, __mmask16 __U,
619				__m256i __I, __m256i __B)
620{
621  return (__m256i) __builtin_ia32_vpermt2varhi256_mask ((__v16hi) __I
622							/* idx */ ,
623							(__v16hi) __A,
624							(__v16hi) __B,
625							(__mmask16)
626							__U);
627}
628
629extern __inline __m256i
630__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
631_mm256_mask2_permutex2var_epi16 (__m256i __A, __m256i __I,
632				 __mmask16 __U, __m256i __B)
633{
634  return (__m256i) __builtin_ia32_vpermi2varhi256_mask ((__v16hi) __A,
635							(__v16hi) __I
636							/* idx */ ,
637							(__v16hi) __B,
638							(__mmask16)
639							__U);
640}
641
642extern __inline __m256i
643__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
644_mm256_maskz_permutex2var_epi16 (__mmask16 __U, __m256i __A,
645				 __m256i __I, __m256i __B)
646{
647  return (__m256i) __builtin_ia32_vpermt2varhi256_maskz ((__v16hi) __I
648							 /* idx */ ,
649							 (__v16hi) __A,
650							 (__v16hi) __B,
651							 (__mmask16)
652							 __U);
653}
654
655extern __inline __m128i
656__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
657_mm_permutex2var_epi16 (__m128i __A, __m128i __I, __m128i __B)
658{
659  return (__m128i) __builtin_ia32_vpermt2varhi128_mask ((__v8hi) __I
660							/* idx */ ,
661							(__v8hi) __A,
662							(__v8hi) __B,
663							(__mmask8) -1);
664}
665
666extern __inline __m128i
667__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
668_mm_mask_permutex2var_epi16 (__m128i __A, __mmask8 __U, __m128i __I,
669			     __m128i __B)
670{
671  return (__m128i) __builtin_ia32_vpermt2varhi128_mask ((__v8hi) __I
672							/* idx */ ,
673							(__v8hi) __A,
674							(__v8hi) __B,
675							(__mmask8)
676							__U);
677}
678
679extern __inline __m128i
680__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
681_mm_mask2_permutex2var_epi16 (__m128i __A, __m128i __I, __mmask8 __U,
682			      __m128i __B)
683{
684  return (__m128i) __builtin_ia32_vpermi2varhi128_mask ((__v8hi) __A,
685							(__v8hi) __I
686							/* idx */ ,
687							(__v8hi) __B,
688							(__mmask8)
689							__U);
690}
691
692extern __inline __m128i
693__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
694_mm_maskz_permutex2var_epi16 (__mmask8 __U, __m128i __A, __m128i __I,
695			      __m128i __B)
696{
697  return (__m128i) __builtin_ia32_vpermt2varhi128_maskz ((__v8hi) __I
698							 /* idx */ ,
699							 (__v8hi) __A,
700							 (__v8hi) __B,
701							 (__mmask8)
702							 __U);
703}
704
705extern __inline __m256i
706__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
707_mm256_mask_maddubs_epi16 (__m256i __W, __mmask16 __U, __m256i __X,
708			   __m256i __Y)
709{
710  return (__m256i) __builtin_ia32_pmaddubsw256_mask ((__v32qi) __X,
711						     (__v32qi) __Y,
712						     (__v16hi) __W,
713						     (__mmask16) __U);
714}
715
716extern __inline __m256i
717__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
718_mm256_maskz_maddubs_epi16 (__mmask16 __U, __m256i __X, __m256i __Y)
719{
720  return (__m256i) __builtin_ia32_pmaddubsw256_mask ((__v32qi) __X,
721						     (__v32qi) __Y,
722						     (__v16hi)
723						     _mm256_setzero_si256 (),
724						     (__mmask16) __U);
725}
726
727extern __inline __m128i
728__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
729_mm_mask_maddubs_epi16 (__m128i __W, __mmask8 __U, __m128i __X,
730			__m128i __Y)
731{
732  return (__m128i) __builtin_ia32_pmaddubsw128_mask ((__v16qi) __X,
733						     (__v16qi) __Y,
734						     (__v8hi) __W,
735						     (__mmask8) __U);
736}
737
738extern __inline __m128i
739__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
740_mm_maskz_maddubs_epi16 (__mmask8 __U, __m128i __X, __m128i __Y)
741{
742  return (__m128i) __builtin_ia32_pmaddubsw128_mask ((__v16qi) __X,
743						     (__v16qi) __Y,
744						     (__v8hi)
745						     _mm_setzero_si128 (),
746						     (__mmask8) __U);
747}
748
749extern __inline __m256i
750__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
751_mm256_mask_madd_epi16 (__m256i __W, __mmask8 __U, __m256i __A,
752			__m256i __B)
753{
754  return (__m256i) __builtin_ia32_pmaddwd256_mask ((__v16hi) __A,
755						   (__v16hi) __B,
756						   (__v8si) __W,
757						   (__mmask8) __U);
758}
759
760extern __inline __m256i
761__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
762_mm256_maskz_madd_epi16 (__mmask8 __U, __m256i __A, __m256i __B)
763{
764  return (__m256i) __builtin_ia32_pmaddwd256_mask ((__v16hi) __A,
765						   (__v16hi) __B,
766						   (__v8si)
767						   _mm256_setzero_si256 (),
768						   (__mmask8) __U);
769}
770
771extern __inline __m128i
772__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
773_mm_mask_madd_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
774		     __m128i __B)
775{
776  return (__m128i) __builtin_ia32_pmaddwd128_mask ((__v8hi) __A,
777						   (__v8hi) __B,
778						   (__v4si) __W,
779						   (__mmask8) __U);
780}
781
782extern __inline __m128i
783__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
784_mm_maskz_madd_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
785{
786  return (__m128i) __builtin_ia32_pmaddwd128_mask ((__v8hi) __A,
787						   (__v8hi) __B,
788						   (__v4si)
789						   _mm_setzero_si128 (),
790						   (__mmask8) __U);
791}
792
793extern __inline __mmask16
794__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
795_mm_movepi8_mask (__m128i __A)
796{
797  return (__mmask16) __builtin_ia32_cvtb2mask128 ((__v16qi) __A);
798}
799
800extern __inline __mmask32
801__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
802_mm256_movepi8_mask (__m256i __A)
803{
804  return (__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A);
805}
806
807extern __inline __mmask8
808__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
809_mm_movepi16_mask (__m128i __A)
810{
811  return (__mmask8) __builtin_ia32_cvtw2mask128 ((__v8hi) __A);
812}
813
814extern __inline __mmask16
815__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
816_mm256_movepi16_mask (__m256i __A)
817{
818  return (__mmask16) __builtin_ia32_cvtw2mask256 ((__v16hi) __A);
819}
820
821extern __inline __m128i
822__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
823_mm_movm_epi8 (__mmask16 __A)
824{
825  return (__m128i) __builtin_ia32_cvtmask2b128 (__A);
826}
827
828extern __inline __m256i
829__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
830_mm256_movm_epi8 (__mmask32 __A)
831{
832  return (__m256i) __builtin_ia32_cvtmask2b256 (__A);
833}
834
835extern __inline __m128i
836__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
837_mm_movm_epi16 (__mmask8 __A)
838{
839  return (__m128i) __builtin_ia32_cvtmask2w128 (__A);
840}
841
842extern __inline __m256i
843__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
844_mm256_movm_epi16 (__mmask16 __A)
845{
846  return (__m256i) __builtin_ia32_cvtmask2w256 (__A);
847}
848
849extern __inline __mmask16
850__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
851_mm_test_epi8_mask (__m128i __A, __m128i __B)
852{
853  return (__mmask16) __builtin_ia32_ptestmb128 ((__v16qi) __A,
854						(__v16qi) __B,
855						(__mmask16) -1);
856}
857
858extern __inline __mmask16
859__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
860_mm_mask_test_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
861{
862  return (__mmask16) __builtin_ia32_ptestmb128 ((__v16qi) __A,
863						(__v16qi) __B, __U);
864}
865
866extern __inline __mmask32
867__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
868_mm256_test_epi8_mask (__m256i __A, __m256i __B)
869{
870  return (__mmask32) __builtin_ia32_ptestmb256 ((__v32qi) __A,
871						(__v32qi) __B,
872						(__mmask32) -1);
873}
874
875extern __inline __mmask32
876__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
877_mm256_mask_test_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
878{
879  return (__mmask32) __builtin_ia32_ptestmb256 ((__v32qi) __A,
880						(__v32qi) __B, __U);
881}
882
883extern __inline __mmask8
884__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
885_mm_test_epi16_mask (__m128i __A, __m128i __B)
886{
887  return (__mmask8) __builtin_ia32_ptestmw128 ((__v8hi) __A,
888					       (__v8hi) __B,
889					       (__mmask8) -1);
890}
891
892extern __inline __mmask8
893__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
894_mm_mask_test_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
895{
896  return (__mmask8) __builtin_ia32_ptestmw128 ((__v8hi) __A,
897					       (__v8hi) __B, __U);
898}
899
900extern __inline __mmask16
901__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
902_mm256_test_epi16_mask (__m256i __A, __m256i __B)
903{
904  return (__mmask16) __builtin_ia32_ptestmw256 ((__v16hi) __A,
905						(__v16hi) __B,
906						(__mmask16) -1);
907}
908
909extern __inline __mmask16
910__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
911_mm256_mask_test_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
912{
913  return (__mmask16) __builtin_ia32_ptestmw256 ((__v16hi) __A,
914						(__v16hi) __B, __U);
915}
916
917extern __inline __m256i
918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
919_mm256_maskz_min_epu16 (__mmask16 __M, __m256i __A, __m256i __B)
920{
921  return (__m256i) __builtin_ia32_pminuw256_mask ((__v16hi) __A,
922						  (__v16hi) __B,
923						  (__v16hi)
924						  _mm256_setzero_si256 (),
925						  (__mmask16) __M);
926}
927
928extern __inline __m256i
929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
930_mm256_mask_min_epu16 (__m256i __W, __mmask16 __M, __m256i __A,
931		       __m256i __B)
932{
933  return (__m256i) __builtin_ia32_pminuw256_mask ((__v16hi) __A,
934						  (__v16hi) __B,
935						  (__v16hi) __W,
936						  (__mmask16) __M);
937}
938
939extern __inline __m128i
940__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
941_mm_maskz_min_epu16 (__mmask8 __M, __m128i __A, __m128i __B)
942{
943  return (__m128i) __builtin_ia32_pminuw128_mask ((__v8hi) __A,
944						  (__v8hi) __B,
945						  (__v8hi)
946						  _mm_setzero_si128 (),
947						  (__mmask8) __M);
948}
949
950extern __inline __m128i
951__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
952_mm_mask_min_epu16 (__m128i __W, __mmask8 __M, __m128i __A,
953		    __m128i __B)
954{
955  return (__m128i) __builtin_ia32_pminuw128_mask ((__v8hi) __A,
956						  (__v8hi) __B,
957						  (__v8hi) __W,
958						  (__mmask8) __M);
959}
960
961extern __inline __m256i
962__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
963_mm256_maskz_min_epi16 (__mmask16 __M, __m256i __A, __m256i __B)
964{
965  return (__m256i) __builtin_ia32_pminsw256_mask ((__v16hi) __A,
966						  (__v16hi) __B,
967						  (__v16hi)
968						  _mm256_setzero_si256 (),
969						  (__mmask16) __M);
970}
971
972extern __inline __m256i
973__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
974_mm256_mask_min_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
975		       __m256i __B)
976{
977  return (__m256i) __builtin_ia32_pminsw256_mask ((__v16hi) __A,
978						  (__v16hi) __B,
979						  (__v16hi) __W,
980						  (__mmask16) __M);
981}
982
983extern __inline __m256i
984__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
985_mm256_maskz_max_epu8 (__mmask32 __M, __m256i __A, __m256i __B)
986{
987  return (__m256i) __builtin_ia32_pmaxub256_mask ((__v32qi) __A,
988						  (__v32qi) __B,
989						  (__v32qi)
990						  _mm256_setzero_si256 (),
991						  (__mmask32) __M);
992}
993
994extern __inline __m256i
995__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
996_mm256_mask_max_epu8 (__m256i __W, __mmask32 __M, __m256i __A,
997		      __m256i __B)
998{
999  return (__m256i) __builtin_ia32_pmaxub256_mask ((__v32qi) __A,
1000						  (__v32qi) __B,
1001						  (__v32qi) __W,
1002						  (__mmask32) __M);
1003}
1004
1005extern __inline __m128i
1006__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1007_mm_maskz_max_epu8 (__mmask16 __M, __m128i __A, __m128i __B)
1008{
1009  return (__m128i) __builtin_ia32_pmaxub128_mask ((__v16qi) __A,
1010						  (__v16qi) __B,
1011						  (__v16qi)
1012						  _mm_setzero_si128 (),
1013						  (__mmask16) __M);
1014}
1015
1016extern __inline __m128i
1017__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1018_mm_mask_max_epu8 (__m128i __W, __mmask16 __M, __m128i __A,
1019		   __m128i __B)
1020{
1021  return (__m128i) __builtin_ia32_pmaxub128_mask ((__v16qi) __A,
1022						  (__v16qi) __B,
1023						  (__v16qi) __W,
1024						  (__mmask16) __M);
1025}
1026
1027extern __inline __m256i
1028__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1029_mm256_maskz_max_epi8 (__mmask32 __M, __m256i __A, __m256i __B)
1030{
1031  return (__m256i) __builtin_ia32_pmaxsb256_mask ((__v32qi) __A,
1032						  (__v32qi) __B,
1033						  (__v32qi)
1034						  _mm256_setzero_si256 (),
1035						  (__mmask32) __M);
1036}
1037
1038extern __inline __m256i
1039__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1040_mm256_mask_max_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
1041		      __m256i __B)
1042{
1043  return (__m256i) __builtin_ia32_pmaxsb256_mask ((__v32qi) __A,
1044						  (__v32qi) __B,
1045						  (__v32qi) __W,
1046						  (__mmask32) __M);
1047}
1048
1049extern __inline __m128i
1050__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1051_mm_maskz_max_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
1052{
1053  return (__m128i) __builtin_ia32_pmaxsb128_mask ((__v16qi) __A,
1054						  (__v16qi) __B,
1055						  (__v16qi)
1056						  _mm_setzero_si128 (),
1057						  (__mmask16) __M);
1058}
1059
1060extern __inline __m128i
1061__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1062_mm_mask_max_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
1063		   __m128i __B)
1064{
1065  return (__m128i) __builtin_ia32_pmaxsb128_mask ((__v16qi) __A,
1066						  (__v16qi) __B,
1067						  (__v16qi) __W,
1068						  (__mmask16) __M);
1069}
1070
1071extern __inline __m256i
1072__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1073_mm256_maskz_min_epu8 (__mmask32 __M, __m256i __A, __m256i __B)
1074{
1075  return (__m256i) __builtin_ia32_pminub256_mask ((__v32qi) __A,
1076						  (__v32qi) __B,
1077						  (__v32qi)
1078						  _mm256_setzero_si256 (),
1079						  (__mmask32) __M);
1080}
1081
1082extern __inline __m256i
1083__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1084_mm256_mask_min_epu8 (__m256i __W, __mmask32 __M, __m256i __A,
1085		      __m256i __B)
1086{
1087  return (__m256i) __builtin_ia32_pminub256_mask ((__v32qi) __A,
1088						  (__v32qi) __B,
1089						  (__v32qi) __W,
1090						  (__mmask32) __M);
1091}
1092
1093extern __inline __m128i
1094__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1095_mm_maskz_min_epu8 (__mmask16 __M, __m128i __A, __m128i __B)
1096{
1097  return (__m128i) __builtin_ia32_pminub128_mask ((__v16qi) __A,
1098						  (__v16qi) __B,
1099						  (__v16qi)
1100						  _mm_setzero_si128 (),
1101						  (__mmask16) __M);
1102}
1103
1104extern __inline __m128i
1105__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1106_mm_mask_min_epu8 (__m128i __W, __mmask16 __M, __m128i __A,
1107		   __m128i __B)
1108{
1109  return (__m128i) __builtin_ia32_pminub128_mask ((__v16qi) __A,
1110						  (__v16qi) __B,
1111						  (__v16qi) __W,
1112						  (__mmask16) __M);
1113}
1114
1115extern __inline __m256i
1116__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1117_mm256_maskz_min_epi8 (__mmask32 __M, __m256i __A, __m256i __B)
1118{
1119  return (__m256i) __builtin_ia32_pminsb256_mask ((__v32qi) __A,
1120						  (__v32qi) __B,
1121						  (__v32qi)
1122						  _mm256_setzero_si256 (),
1123						  (__mmask32) __M);
1124}
1125
1126extern __inline __m256i
1127__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1128_mm256_mask_min_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
1129		      __m256i __B)
1130{
1131  return (__m256i) __builtin_ia32_pminsb256_mask ((__v32qi) __A,
1132						  (__v32qi) __B,
1133						  (__v32qi) __W,
1134						  (__mmask32) __M);
1135}
1136
1137extern __inline __m128i
1138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1139_mm_maskz_min_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
1140{
1141  return (__m128i) __builtin_ia32_pminsb128_mask ((__v16qi) __A,
1142						  (__v16qi) __B,
1143						  (__v16qi)
1144						  _mm_setzero_si128 (),
1145						  (__mmask16) __M);
1146}
1147
1148extern __inline __m128i
1149__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1150_mm_mask_min_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
1151		   __m128i __B)
1152{
1153  return (__m128i) __builtin_ia32_pminsb128_mask ((__v16qi) __A,
1154						  (__v16qi) __B,
1155						  (__v16qi) __W,
1156						  (__mmask16) __M);
1157}
1158
1159extern __inline __m256i
1160__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1161_mm256_maskz_max_epi16 (__mmask16 __M, __m256i __A, __m256i __B)
1162{
1163  return (__m256i) __builtin_ia32_pmaxsw256_mask ((__v16hi) __A,
1164						  (__v16hi) __B,
1165						  (__v16hi)
1166						  _mm256_setzero_si256 (),
1167						  (__mmask16) __M);
1168}
1169
1170extern __inline __m256i
1171__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1172_mm256_mask_max_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
1173		       __m256i __B)
1174{
1175  return (__m256i) __builtin_ia32_pmaxsw256_mask ((__v16hi) __A,
1176						  (__v16hi) __B,
1177						  (__v16hi) __W,
1178						  (__mmask16) __M);
1179}
1180
1181extern __inline __m128i
1182__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1183_mm_maskz_max_epi16 (__mmask8 __M, __m128i __A, __m128i __B)
1184{
1185  return (__m128i) __builtin_ia32_pmaxsw128_mask ((__v8hi) __A,
1186						  (__v8hi) __B,
1187						  (__v8hi)
1188						  _mm_setzero_si128 (),
1189						  (__mmask8) __M);
1190}
1191
1192extern __inline __m128i
1193__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1194_mm_mask_max_epi16 (__m128i __W, __mmask8 __M, __m128i __A,
1195		    __m128i __B)
1196{
1197  return (__m128i) __builtin_ia32_pmaxsw128_mask ((__v8hi) __A,
1198						  (__v8hi) __B,
1199						  (__v8hi) __W,
1200						  (__mmask8) __M);
1201}
1202
1203extern __inline __m256i
1204__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1205_mm256_maskz_max_epu16 (__mmask16 __M, __m256i __A, __m256i __B)
1206{
1207  return (__m256i) __builtin_ia32_pmaxuw256_mask ((__v16hi) __A,
1208						  (__v16hi) __B,
1209						  (__v16hi)
1210						  _mm256_setzero_si256 (),
1211						  (__mmask16) __M);
1212}
1213
1214extern __inline __m256i
1215__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1216_mm256_mask_max_epu16 (__m256i __W, __mmask16 __M, __m256i __A,
1217		       __m256i __B)
1218{
1219  return (__m256i) __builtin_ia32_pmaxuw256_mask ((__v16hi) __A,
1220						  (__v16hi) __B,
1221						  (__v16hi) __W,
1222						  (__mmask16) __M);
1223}
1224
1225extern __inline __m128i
1226__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1227_mm_maskz_max_epu16 (__mmask8 __M, __m128i __A, __m128i __B)
1228{
1229  return (__m128i) __builtin_ia32_pmaxuw128_mask ((__v8hi) __A,
1230						  (__v8hi) __B,
1231						  (__v8hi)
1232						  _mm_setzero_si128 (),
1233						  (__mmask8) __M);
1234}
1235
1236extern __inline __m128i
1237__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1238_mm_mask_max_epu16 (__m128i __W, __mmask8 __M, __m128i __A,
1239		    __m128i __B)
1240{
1241  return (__m128i) __builtin_ia32_pmaxuw128_mask ((__v8hi) __A,
1242						  (__v8hi) __B,
1243						  (__v8hi) __W,
1244						  (__mmask8) __M);
1245}
1246
1247extern __inline __m128i
1248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1249_mm_maskz_min_epi16 (__mmask8 __M, __m128i __A, __m128i __B)
1250{
1251  return (__m128i) __builtin_ia32_pminsw128_mask ((__v8hi) __A,
1252						  (__v8hi) __B,
1253						  (__v8hi)
1254						  _mm_setzero_si128 (),
1255						  (__mmask8) __M);
1256}
1257
1258extern __inline __m128i
1259__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1260_mm_mask_min_epi16 (__m128i __W, __mmask8 __M, __m128i __A,
1261		    __m128i __B)
1262{
1263  return (__m128i) __builtin_ia32_pminsw128_mask ((__v8hi) __A,
1264						  (__v8hi) __B,
1265						  (__v8hi) __W,
1266						  (__mmask8) __M);
1267}
1268
1269#ifdef __OPTIMIZE__
1270extern __inline __m256i
1271__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1272_mm256_mask_alignr_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
1273			 __m256i __B, const int __N)
1274{
1275  return (__m256i) __builtin_ia32_palignr256_mask ((__v4di) __A,
1276						   (__v4di) __B,
1277						   __N * 8,
1278						   (__v4di) __W,
1279						   (__mmask32) __U);
1280}
1281
1282extern __inline __m256i
1283__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1284_mm256_maskz_alignr_epi8 (__mmask32 __U, __m256i __A, __m256i __B,
1285			  const int __N)
1286{
1287  return (__m256i) __builtin_ia32_palignr256_mask ((__v4di) __A,
1288						   (__v4di) __B,
1289						   __N * 8,
1290						   (__v4di)
1291						   _mm256_setzero_si256 (),
1292						   (__mmask32) __U);
1293}
1294
1295extern __inline __m128i
1296__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1297_mm_mask_alignr_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
1298		      __m128i __B, const int __N)
1299{
1300  return (__m128i) __builtin_ia32_palignr128_mask ((__v2di) __A,
1301						   (__v2di) __B,
1302						   __N * 8,
1303						   (__v2di) __W,
1304						   (__mmask16) __U);
1305}
1306
1307extern __inline __m128i
1308__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1309_mm_maskz_alignr_epi8 (__mmask16 __U, __m128i __A, __m128i __B,
1310		       const int __N)
1311{
1312  return (__m128i) __builtin_ia32_palignr128_mask ((__v2di) __A,
1313						   (__v2di) __B,
1314						   __N * 8,
1315						   (__v2di)
1316						   _mm_setzero_si128 (),
1317						   (__mmask16) __U);
1318}
1319
1320extern __inline __m256i
1321__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1322_mm256_dbsad_epu8 (__m256i __A, __m256i __B, const int __imm)
1323{
1324  return (__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi) __A,
1325						    (__v32qi) __B,
1326						    __imm,
1327						    (__v16hi)
1328						    _mm256_setzero_si256 (),
1329						    (__mmask16) -1);
1330}
1331
1332extern __inline __m256i
1333__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1334_mm256_mask_dbsad_epu8 (__m256i __W, __mmask16 __U, __m256i __A,
1335			__m256i __B, const int __imm)
1336{
1337  return (__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi) __A,
1338						    (__v32qi) __B,
1339						    __imm,
1340						    (__v16hi) __W,
1341						    (__mmask16) __U);
1342}
1343
1344extern __inline __m256i
1345__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1346_mm256_maskz_dbsad_epu8 (__mmask16 __U, __m256i __A, __m256i __B,
1347			 const int __imm)
1348{
1349  return (__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi) __A,
1350						    (__v32qi) __B,
1351						    __imm,
1352						    (__v16hi)
1353						    _mm256_setzero_si256 (),
1354						    (__mmask16) __U);
1355}
1356
1357extern __inline __m128i
1358__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1359_mm_dbsad_epu8 (__m128i __A, __m128i __B, const int __imm)
1360{
1361  return (__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi) __A,
1362						    (__v16qi) __B,
1363						    __imm,
1364						    (__v8hi)
1365						    _mm_setzero_si128 (),
1366						    (__mmask8) -1);
1367}
1368
1369extern __inline __m128i
1370__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1371_mm_mask_dbsad_epu8 (__m128i __W, __mmask8 __U, __m128i __A,
1372		     __m128i __B, const int __imm)
1373{
1374  return (__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi) __A,
1375						    (__v16qi) __B,
1376						    __imm,
1377						    (__v8hi) __W,
1378						    (__mmask8) __U);
1379}
1380
1381extern __inline __m128i
1382__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1383_mm_maskz_dbsad_epu8 (__mmask8 __U, __m128i __A, __m128i __B,
1384		      const int __imm)
1385{
1386  return (__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi) __A,
1387						    (__v16qi) __B,
1388						    __imm,
1389						    (__v8hi)
1390						    _mm_setzero_si128 (),
1391						    (__mmask8) __U);
1392}
1393
1394extern __inline __m128i
1395__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1396_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W)
1397{
1398  return (__m128i) __builtin_ia32_blendmw_128_mask ((__v8hi) __A,
1399						    (__v8hi) __W,
1400						    (__mmask8) __U);
1401}
1402
1403extern __inline __m128i
1404__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1405_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W)
1406{
1407  return (__m128i) __builtin_ia32_blendmb_128_mask ((__v16qi) __A,
1408						    (__v16qi) __W,
1409						    (__mmask16) __U);
1410}
1411
1412extern __inline __m256i
1413__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1414_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W)
1415{
1416  return (__m256i) __builtin_ia32_blendmw_256_mask ((__v16hi) __A,
1417						    (__v16hi) __W,
1418						    (__mmask16) __U);
1419}
1420
1421extern __inline __m256i
1422__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1423_mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W)
1424{
1425  return (__m256i) __builtin_ia32_blendmb_256_mask ((__v32qi) __A,
1426						    (__v32qi) __W,
1427						    (__mmask32) __U);
1428}
1429
1430extern __inline __mmask8
1431__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1432_mm_mask_cmp_epi16_mask (__mmask8 __U, __m128i __X, __m128i __Y,
1433			 const int __P)
1434{
1435  return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
1436						 (__v8hi) __Y, __P,
1437						 (__mmask8) __U);
1438}
1439
1440extern __inline __mmask8
1441__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1442_mm_cmp_epi16_mask (__m128i __X, __m128i __Y, const int __P)
1443{
1444  return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
1445						 (__v8hi) __Y, __P,
1446						 (__mmask8) -1);
1447}
1448
1449extern __inline __mmask16
1450__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1451_mm256_mask_cmp_epi16_mask (__mmask16 __U, __m256i __X, __m256i __Y,
1452			    const int __P)
1453{
1454  return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
1455						  (__v16hi) __Y, __P,
1456						  (__mmask16) __U);
1457}
1458
1459extern __inline __mmask16
1460__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1461_mm256_cmp_epi16_mask (__m256i __X, __m256i __Y, const int __P)
1462{
1463  return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
1464						  (__v16hi) __Y, __P,
1465						  (__mmask16) -1);
1466}
1467
1468extern __inline __mmask16
1469__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1470_mm_mask_cmp_epi8_mask (__mmask16 __U, __m128i __X, __m128i __Y,
1471			const int __P)
1472{
1473  return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
1474						  (__v16qi) __Y, __P,
1475						  (__mmask16) __U);
1476}
1477
1478extern __inline __mmask16
1479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1480_mm_cmp_epi8_mask (__m128i __X, __m128i __Y, const int __P)
1481{
1482  return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
1483						  (__v16qi) __Y, __P,
1484						  (__mmask16) -1);
1485}
1486
1487extern __inline __mmask32
1488__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1489_mm256_mask_cmp_epi8_mask (__mmask32 __U, __m256i __X, __m256i __Y,
1490			   const int __P)
1491{
1492  return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
1493						  (__v32qi) __Y, __P,
1494						  (__mmask32) __U);
1495}
1496
1497extern __inline __mmask32
1498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1499_mm256_cmp_epi8_mask (__m256i __X, __m256i __Y, const int __P)
1500{
1501  return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
1502						  (__v32qi) __Y, __P,
1503						  (__mmask32) -1);
1504}
1505
1506extern __inline __mmask8
1507__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1508_mm_mask_cmp_epu16_mask (__mmask8 __U, __m128i __X, __m128i __Y,
1509			 const int __P)
1510{
1511  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
1512						  (__v8hi) __Y, __P,
1513						  (__mmask8) __U);
1514}
1515
1516extern __inline __mmask8
1517__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1518_mm_cmp_epu16_mask (__m128i __X, __m128i __Y, const int __P)
1519{
1520  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
1521						  (__v8hi) __Y, __P,
1522						  (__mmask8) -1);
1523}
1524
1525extern __inline __mmask16
1526__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1527_mm256_mask_cmp_epu16_mask (__mmask16 __U, __m256i __X, __m256i __Y,
1528			    const int __P)
1529{
1530  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
1531						   (__v16hi) __Y, __P,
1532						   (__mmask16) __U);
1533}
1534
1535extern __inline __mmask16
1536__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1537_mm256_cmp_epu16_mask (__m256i __X, __m256i __Y, const int __P)
1538{
1539  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
1540						   (__v16hi) __Y, __P,
1541						   (__mmask16) -1);
1542}
1543
1544extern __inline __mmask16
1545__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1546_mm_mask_cmp_epu8_mask (__mmask16 __U, __m128i __X, __m128i __Y,
1547			const int __P)
1548{
1549  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
1550						   (__v16qi) __Y, __P,
1551						   (__mmask16) __U);
1552}
1553
1554extern __inline __mmask16
1555__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1556_mm_cmp_epu8_mask (__m128i __X, __m128i __Y, const int __P)
1557{
1558  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
1559						   (__v16qi) __Y, __P,
1560						   (__mmask16) -1);
1561}
1562
1563extern __inline __mmask32
1564__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1565_mm256_mask_cmp_epu8_mask (__mmask32 __U, __m256i __X, __m256i __Y,
1566			   const int __P)
1567{
1568  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
1569						   (__v32qi) __Y, __P,
1570						   (__mmask32) __U);
1571}
1572
1573extern __inline __mmask32
1574__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1575_mm256_cmp_epu8_mask (__m256i __X, __m256i __Y, const int __P)
1576{
1577  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
1578						   (__v32qi) __Y, __P,
1579						   (__mmask32) -1);
1580}
1581
1582extern __inline __m256i
1583__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1584_mm256_mask_srli_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
1585			const int __imm)
1586{
1587  return (__m256i) __builtin_ia32_psrlwi256_mask ((__v16hi) __A, __imm,
1588						  (__v16hi) __W,
1589						  (__mmask16) __U);
1590}
1591
1592extern __inline __m256i
1593__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1594_mm256_maskz_srli_epi16 (__mmask16 __U, __m256i __A, const int __imm)
1595{
1596  return (__m256i) __builtin_ia32_psrlwi256_mask ((__v16hi) __A, __imm,
1597						  (__v16hi)
1598						  _mm256_setzero_si256 (),
1599						  (__mmask16) __U);
1600}
1601
1602extern __inline __m128i
1603__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1604_mm_mask_srli_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
1605		     const int __imm)
1606{
1607  return (__m128i) __builtin_ia32_psrlwi128_mask ((__v8hi) __A, __imm,
1608						  (__v8hi) __W,
1609						  (__mmask8) __U);
1610}
1611
1612extern __inline __m128i
1613__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1614_mm_maskz_srli_epi16 (__mmask8 __U, __m128i __A, const int __imm)
1615{
1616  return (__m128i) __builtin_ia32_psrlwi128_mask ((__v8hi) __A, __imm,
1617						  (__v8hi)
1618						  _mm_setzero_si128 (),
1619						  (__mmask8) __U);
1620}
1621
1622extern __inline __m256i
1623__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1624_mm256_mask_shufflehi_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
1625			     const int __imm)
1626{
1627  return (__m256i) __builtin_ia32_pshufhw256_mask ((__v16hi) __A,
1628						   __imm,
1629						   (__v16hi) __W,
1630						   (__mmask16) __U);
1631}
1632
1633extern __inline __m256i
1634__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1635_mm256_maskz_shufflehi_epi16 (__mmask16 __U, __m256i __A,
1636			      const int __imm)
1637{
1638  return (__m256i) __builtin_ia32_pshufhw256_mask ((__v16hi) __A,
1639						   __imm,
1640						   (__v16hi)
1641						   _mm256_setzero_si256 (),
1642						   (__mmask16) __U);
1643}
1644
1645extern __inline __m128i
1646__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1647_mm_mask_shufflehi_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
1648			  const int __imm)
1649{
1650  return (__m128i) __builtin_ia32_pshufhw128_mask ((__v8hi) __A, __imm,
1651						   (__v8hi) __W,
1652						   (__mmask8) __U);
1653}
1654
1655extern __inline __m128i
1656__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1657_mm_maskz_shufflehi_epi16 (__mmask8 __U, __m128i __A, const int __imm)
1658{
1659  return (__m128i) __builtin_ia32_pshufhw128_mask ((__v8hi) __A, __imm,
1660						   (__v8hi)
1661						   _mm_setzero_si128 (),
1662						   (__mmask8) __U);
1663}
1664
1665extern __inline __m256i
1666__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1667_mm256_mask_shufflelo_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
1668			     const int __imm)
1669{
1670  return (__m256i) __builtin_ia32_pshuflw256_mask ((__v16hi) __A,
1671						   __imm,
1672						   (__v16hi) __W,
1673						   (__mmask16) __U);
1674}
1675
1676extern __inline __m256i
1677__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1678_mm256_maskz_shufflelo_epi16 (__mmask16 __U, __m256i __A,
1679			      const int __imm)
1680{
1681  return (__m256i) __builtin_ia32_pshuflw256_mask ((__v16hi) __A,
1682						   __imm,
1683						   (__v16hi)
1684						   _mm256_setzero_si256 (),
1685						   (__mmask16) __U);
1686}
1687
1688extern __inline __m128i
1689__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1690_mm_mask_shufflelo_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
1691			  const int __imm)
1692{
1693  return (__m128i) __builtin_ia32_pshuflw128_mask ((__v8hi) __A, __imm,
1694						   (__v8hi) __W,
1695						   (__mmask8) __U);
1696}
1697
1698extern __inline __m128i
1699__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1700_mm_maskz_shufflelo_epi16 (__mmask8 __U, __m128i __A, const int __imm)
1701{
1702  return (__m128i) __builtin_ia32_pshuflw128_mask ((__v8hi) __A, __imm,
1703						   (__v8hi)
1704						   _mm_setzero_si128 (),
1705						   (__mmask8) __U);
1706}
1707
1708extern __inline __m256i
1709__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1710_mm256_mask_srai_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
1711			const int __imm)
1712{
1713  return (__m256i) __builtin_ia32_psrawi256_mask ((__v16hi) __A, __imm,
1714						  (__v16hi) __W,
1715						  (__mmask16) __U);
1716}
1717
1718extern __inline __m256i
1719__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1720_mm256_maskz_srai_epi16 (__mmask16 __U, __m256i __A, const int __imm)
1721{
1722  return (__m256i) __builtin_ia32_psrawi256_mask ((__v16hi) __A, __imm,
1723						  (__v16hi)
1724						  _mm256_setzero_si256 (),
1725						  (__mmask16) __U);
1726}
1727
1728extern __inline __m128i
1729__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1730_mm_mask_srai_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
1731		     const int __imm)
1732{
1733  return (__m128i) __builtin_ia32_psrawi128_mask ((__v8hi) __A, __imm,
1734						  (__v8hi) __W,
1735						  (__mmask8) __U);
1736}
1737
1738extern __inline __m128i
1739__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1740_mm_maskz_srai_epi16 (__mmask8 __U, __m128i __A, const int __imm)
1741{
1742  return (__m128i) __builtin_ia32_psrawi128_mask ((__v8hi) __A, __imm,
1743						  (__v8hi)
1744						  _mm_setzero_si128 (),
1745						  (__mmask8) __U);
1746}
1747
1748extern __inline __m256i
1749__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1750_mm256_mask_slli_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
1751			int __B)
1752{
1753  return (__m256i) __builtin_ia32_psllwi256_mask ((__v16hi) __A, __B,
1754						  (__v16hi) __W,
1755						  (__mmask16) __U);
1756}
1757
1758extern __inline __m256i
1759__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1760_mm256_maskz_slli_epi16 (__mmask16 __U, __m256i __A, int __B)
1761{
1762  return (__m256i) __builtin_ia32_psllwi256_mask ((__v16hi) __A, __B,
1763						  (__v16hi)
1764						  _mm256_setzero_si256 (),
1765						  (__mmask16) __U);
1766}
1767
1768extern __inline __m128i
1769__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1770_mm_mask_slli_epi16 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
1771{
1772  return (__m128i) __builtin_ia32_psllwi128_mask ((__v8hi) __A, __B,
1773						  (__v8hi) __W,
1774						  (__mmask8) __U);
1775}
1776
1777extern __inline __m128i
1778__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1779_mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, int __B)
1780{
1781  return (__m128i) __builtin_ia32_psllwi128_mask ((__v8hi) __A, __B,
1782						  (__v8hi)
1783						  _mm_setzero_si128 (),
1784						  (__mmask8) __U);
1785}
1786
1787#else
1788#define _mm256_mask_alignr_epi8(W, U, X, Y, N)					    \
1789  ((__m256i) __builtin_ia32_palignr256_mask ((__v4di)(__m256i)(X),		    \
1790					    (__v4di)(__m256i)(Y), (int)((N) * 8),   \
1791					    (__v4di)(__m256i)(X), (__mmask32)(U)))
1792
1793#define _mm256_mask_srli_epi16(W, U, A, B)                              \
1794  ((__m256i) __builtin_ia32_psrlwi256_mask ((__v16hi)(__m256i)(A),      \
1795    (int)(B), (__v16hi)(__m256i)(W), (__mmask16)(U)))
1796
1797#define _mm256_maskz_srli_epi16(U, A, B)                                \
1798  ((__m256i) __builtin_ia32_psrlwi256_mask ((__v16hi)(__m256i)(A),      \
1799    (int)(B), (__v16hi)_mm256_setzero_si256 (), (__mmask16)(U)))
1800
1801#define _mm_mask_srli_epi16(W, U, A, B)                                 \
1802  ((__m128i) __builtin_ia32_psrlwi128_mask ((__v8hi)(__m128i)(A),       \
1803    (int)(B), (__v8hi)(__m128i)(W), (__mmask8)(U)))
1804
1805#define _mm_maskz_srli_epi16(U, A, B)                                   \
1806  ((__m128i) __builtin_ia32_psrlwi128_mask ((__v8hi)(__m128i)(A),       \
1807    (int)(B), (__v8hi)_mm_setzero_si128(), (__mmask8)(U)))
1808
1809#define _mm256_mask_srai_epi16(W, U, A, B)                              \
1810  ((__m256i) __builtin_ia32_psrawi256_mask ((__v16hi)(__m256i)(A),      \
1811    (int)(B), (__v16hi)(__m256i)(W), (__mmask16)(U)))
1812
1813#define _mm256_maskz_srai_epi16(U, A, B)                                \
1814  ((__m256i) __builtin_ia32_psrawi256_mask ((__v16hi)(__m256i)(A),      \
1815    (int)(B), (__v16hi)_mm256_setzero_si256 (), (__mmask16)(U)))
1816
1817#define _mm_mask_srai_epi16(W, U, A, B)                                 \
1818  ((__m128i) __builtin_ia32_psrawi128_mask ((__v8hi)(__m128i)(A),       \
1819    (int)(B), (__v8hi)(__m128i)(W), (__mmask8)(U)))
1820
1821#define _mm_maskz_srai_epi16(U, A, B)                                   \
1822  ((__m128i) __builtin_ia32_psrawi128_mask ((__v8hi)(__m128i)(A),       \
1823    (int)(B), (__v8hi)_mm_setzero_si128(), (__mmask8)(U)))
1824
1825#define _mm256_mask_shufflehi_epi16(W, U, A, B)                                     \
1826  ((__m256i) __builtin_ia32_pshufhw256_mask ((__v16hi)(__m256i)(A), (int)(B),       \
1827                                             (__v16hi)(__m256i)(W),                 \
1828                                             (__mmask16)(U)))
1829
1830#define _mm256_maskz_shufflehi_epi16(U, A, B)                                       \
1831  ((__m256i) __builtin_ia32_pshufhw256_mask ((__v16hi)(__m256i)(A), (int)(B),       \
1832                                             (__v16hi)(__m256i)_mm256_setzero_si256 (), \
1833                                             (__mmask16)(U)))
1834
1835#define _mm_mask_shufflehi_epi16(W, U, A, B)                                        \
1836  ((__m128i) __builtin_ia32_pshufhw128_mask ((__v8hi)(__m128i)(A), (int)(B),        \
1837                                             (__v8hi)(__m128i)(W),                  \
1838                                             (__mmask8)(U)))
1839
1840#define _mm_maskz_shufflehi_epi16(U, A, B)                                          \
1841  ((__m128i) __builtin_ia32_pshufhw128_mask ((__v8hi)(__m128i)(A), (int)(B),        \
1842					     (__v8hi)(__m128i)_mm_setzero_si128 (), \
1843                                             (__mmask8)(U)))
1844
1845#define _mm256_mask_shufflelo_epi16(W, U, A, B)                                     \
1846  ((__m256i) __builtin_ia32_pshuflw256_mask ((__v16hi)(__m256i)(A), (int)(B),       \
1847                                             (__v16hi)(__m256i)(W),                 \
1848                                             (__mmask16)(U)))
1849
1850#define _mm256_maskz_shufflelo_epi16(U, A, B)                                       \
1851  ((__m256i) __builtin_ia32_pshuflw256_mask ((__v16hi)(__m256i)(A), (int)(B),       \
1852                                             (__v16hi)(__m256i)_mm256_setzero_si256 (), \
1853                                             (__mmask16)(U)))
1854
1855#define _mm_mask_shufflelo_epi16(W, U, A, B)                                        \
1856  ((__m128i) __builtin_ia32_pshuflw128_mask ((__v8hi)(__m128i)(A), (int)(B),        \
1857                                             (__v8hi)(__m128i)(W),                  \
1858                                             (__mmask8)(U)))
1859
1860#define _mm_maskz_shufflelo_epi16(U, A, B)                                          \
1861  ((__m128i) __builtin_ia32_pshuflw128_mask ((__v8hi)(__m128i)(A), (int)(B),        \
1862					     (__v8hi)(__m128i)_mm_setzero_si128 (), \
1863                                             (__mmask8)(U)))
1864
1865#define _mm256_maskz_alignr_epi8(U, X, Y, N)					    \
1866  ((__m256i) __builtin_ia32_palignr256_mask ((__v4di)(__m256i)(X),		    \
1867					    (__v4di)(__m256i)(Y), (int)((N) * 8),   \
1868					    (__v4di)(__m256i)_mm256_setzero_si256 (),   \
1869					    (__mmask32)(U)))
1870
1871#define _mm_mask_alignr_epi8(W, U, X, Y, N)					    \
1872  ((__m128i) __builtin_ia32_palignr128_mask ((__v2di)(__m128i)(X),		    \
1873					    (__v2di)(__m128i)(Y), (int)((N) * 8),   \
1874					    (__v2di)(__m128i)(X), (__mmask16)(U)))
1875
1876#define _mm_maskz_alignr_epi8(U, X, Y, N)					    \
1877  ((__m128i) __builtin_ia32_palignr128_mask ((__v2di)(__m128i)(X),		    \
1878					    (__v2di)(__m128i)(Y), (int)((N) * 8),   \
1879					    (__v2di)(__m128i)_mm_setzero_si128 (),  \
1880					    (__mmask16)(U)))
1881
1882#define _mm_mask_slli_epi16(W, U, X, C)					  \
1883  ((__m128i)__builtin_ia32_psllwi128_mask ((__v8hi)(__m128i)(X), (int)(C),\
1884    (__v8hi)(__m128i)(W),\
1885    (__mmask8)(U)))
1886
1887#define _mm_maskz_slli_epi16(U, X, C)					  \
1888  ((__m128i)__builtin_ia32_psllwi128_mask ((__v8hi)(__m128i)(X), (int)(C),\
1889    (__v8hi)(__m128i)_mm_setzero_si128 (),\
1890    (__mmask8)(U)))
1891
1892#define _mm256_dbsad_epu8(X, Y, C)                                                  \
1893  ((__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi)(__m256i) (X),               \
1894                                              (__v32qi)(__m256i) (Y), (int) (C),    \
1895                                              (__v16hi)(__m256i)_mm256_setzero_si256(),\
1896                                              (__mmask16)-1))
1897
1898#define _mm256_mask_slli_epi16(W, U, X, C)                                 \
1899  ((__m256i)__builtin_ia32_psllwi256_mask ((__v16hi)(__m256i)(X), (int)(C),\
1900    (__v16hi)(__m256i)(W),\
1901    (__mmask16)(U)))
1902
1903#define _mm256_maskz_slli_epi16(U, X, C)                                   \
1904  ((__m256i)__builtin_ia32_psllwi256_mask ((__v16hi)(__m256i)(X), (int)(C),\
1905    (__v16hi)(__m256i)_mm256_setzero_si256 (),\
1906    (__mmask16)(U)))
1907
1908#define _mm256_mask_dbsad_epu8(W, U, X, Y, C)                                       \
1909  ((__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi)(__m256i) (X),               \
1910                                              (__v32qi)(__m256i) (Y), (int) (C),    \
1911                                              (__v16hi)(__m256i)(W),                \
1912                                              (__mmask16)(U)))
1913
1914#define _mm256_maskz_dbsad_epu8(U, X, Y, C)                                         \
1915  ((__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi)(__m256i) (X),               \
1916                                              (__v32qi)(__m256i) (Y), (int) (C),    \
1917                                              (__v16hi)(__m256i)_mm256_setzero_si256(),\
1918                                              (__mmask16)(U)))
1919
1920#define _mm_dbsad_epu8(X, Y, C)                                                     \
1921  ((__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi)(__m128i) (X),               \
1922                                              (__v16qi)(__m128i) (Y), (int) (C),    \
1923                                              (__v8hi)(__m128i)_mm_setzero_si128(), \
1924                                              (__mmask8)-1))
1925
1926#define _mm_mask_dbsad_epu8(W, U, X, Y, C)                                          \
1927  ((__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi)(__m128i) (X),               \
1928                                              (__v16qi)(__m128i) (Y), (int) (C),    \
1929                                              (__v8hi)(__m128i)(W),                 \
1930                                              (__mmask8)(U)))
1931
1932#define _mm_maskz_dbsad_epu8(U, X, Y, C)                                            \
1933  ((__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi)(__m128i) (X),               \
1934                                              (__v16qi)(__m128i) (Y), (int) (C),    \
1935                                              (__v8hi)(__m128i)_mm_setzero_si128(), \
1936                                              (__mmask8)(U)))
1937
1938#define _mm_mask_blend_epi16(__U, __A, __W)			      \
1939  ((__m128i) __builtin_ia32_blendmw_128_mask ((__v8hi) (__A),	      \
1940						    (__v8hi) (__W),   \
1941						    (__mmask8) (__U)))
1942
1943#define _mm_mask_blend_epi8(__U, __A, __W)			      \
1944  ((__m128i) __builtin_ia32_blendmb_128_mask ((__v16qi) (__A),	      \
1945						    (__v16qi) (__W),  \
1946						    (__mmask16) (__U)))
1947
1948#define _mm256_mask_blend_epi16(__U, __A, __W)			      \
1949  ((__m256i) __builtin_ia32_blendmw_256_mask ((__v16hi) (__A),	      \
1950						    (__v16hi) (__W),  \
1951						    (__mmask16) (__U)))
1952
1953#define _mm256_mask_blend_epi8(__U, __A, __W)			      \
1954  ((__m256i) __builtin_ia32_blendmb_256_mask ((__v32qi) (__A),	      \
1955						    (__v32qi) (__W),  \
1956						    (__mmask32) (__U)))
1957
1958#define _mm_cmp_epi16_mask(X, Y, P)				\
1959  ((__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi)(__m128i)(X),	\
1960					    (__v8hi)(__m128i)(Y), (int)(P),\
1961					    (__mmask8)(-1)))
1962
1963#define _mm_cmp_epi8_mask(X, Y, P)				\
1964  ((__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi)(__m128i)(X),	\
1965					    (__v16qi)(__m128i)(Y), (int)(P),\
1966					    (__mmask16)(-1)))
1967
1968#define _mm256_cmp_epi16_mask(X, Y, P)				\
1969  ((__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi)(__m256i)(X),	\
1970					    (__v16hi)(__m256i)(Y), (int)(P),\
1971					    (__mmask16)(-1)))
1972
1973#define _mm256_cmp_epi8_mask(X, Y, P)				\
1974  ((__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi)(__m256i)(X),	\
1975					    (__v32qi)(__m256i)(Y), (int)(P),\
1976					    (__mmask32)(-1)))
1977
1978#define _mm_cmp_epu16_mask(X, Y, P)				\
1979  ((__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi)(__m128i)(X),	\
1980					    (__v8hi)(__m128i)(Y), (int)(P),\
1981					    (__mmask8)(-1)))
1982
1983#define _mm_cmp_epu8_mask(X, Y, P)				\
1984  ((__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi)(__m128i)(X),	\
1985					    (__v16qi)(__m128i)(Y), (int)(P),\
1986					    (__mmask16)(-1)))
1987
1988#define _mm256_cmp_epu16_mask(X, Y, P)				\
1989  ((__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi)(__m256i)(X),	\
1990					    (__v16hi)(__m256i)(Y), (int)(P),\
1991					    (__mmask16)(-1)))
1992
1993#define _mm256_cmp_epu8_mask(X, Y, P)				\
1994  ((__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi)(__m256i)(X),	\
1995					    (__v32qi)(__m256i)(Y), (int)(P),\
1996					    (__mmask32)-1))
1997
1998#define _mm_mask_cmp_epi16_mask(M, X, Y, P)				\
1999  ((__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi)(__m128i)(X),	\
2000					    (__v8hi)(__m128i)(Y), (int)(P),\
2001					    (__mmask8)(M)))
2002
2003#define _mm_mask_cmp_epi8_mask(M, X, Y, P)				\
2004  ((__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi)(__m128i)(X),	\
2005					    (__v16qi)(__m128i)(Y), (int)(P),\
2006					    (__mmask16)(M)))
2007
2008#define _mm256_mask_cmp_epi16_mask(M, X, Y, P)				\
2009  ((__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi)(__m256i)(X),	\
2010					    (__v16hi)(__m256i)(Y), (int)(P),\
2011					    (__mmask16)(M)))
2012
2013#define _mm256_mask_cmp_epi8_mask(M, X, Y, P)				\
2014  ((__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi)(__m256i)(X),	\
2015					    (__v32qi)(__m256i)(Y), (int)(P),\
2016					    (__mmask32)(M)))
2017
2018#define _mm_mask_cmp_epu16_mask(M, X, Y, P)				\
2019  ((__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi)(__m128i)(X),	\
2020					    (__v8hi)(__m128i)(Y), (int)(P),\
2021					    (__mmask8)(M)))
2022
2023#define _mm_mask_cmp_epu8_mask(M, X, Y, P)				\
2024  ((__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi)(__m128i)(X),	\
2025					    (__v16qi)(__m128i)(Y), (int)(P),\
2026					    (__mmask16)(M)))
2027
2028#define _mm256_mask_cmp_epu16_mask(M, X, Y, P)				\
2029  ((__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi)(__m256i)(X),	\
2030					    (__v16hi)(__m256i)(Y), (int)(P),\
2031					    (__mmask16)(M)))
2032
2033#define _mm256_mask_cmp_epu8_mask(M, X, Y, P)				\
2034  ((__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi)(__m256i)(X),	\
2035					    (__v32qi)(__m256i)(Y), (int)(P),\
2036					    (__mmask32)(M)))
2037#endif
2038
2039extern __inline __mmask32
2040  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2041_mm256_cmpneq_epi8_mask (__m256i __X, __m256i __Y)
2042{
2043  return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
2044						  (__v32qi) __Y, 4,
2045						  (__mmask32) -1);
2046}
2047
2048extern __inline __mmask32
2049  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2050_mm256_cmplt_epi8_mask (__m256i __X, __m256i __Y)
2051{
2052  return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
2053						  (__v32qi) __Y, 1,
2054						  (__mmask32) -1);
2055}
2056
2057extern __inline __mmask32
2058  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2059_mm256_cmpge_epi8_mask (__m256i __X, __m256i __Y)
2060{
2061  return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
2062						  (__v32qi) __Y, 5,
2063						  (__mmask32) -1);
2064}
2065
2066extern __inline __mmask32
2067  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2068_mm256_cmple_epi8_mask (__m256i __X, __m256i __Y)
2069{
2070  return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
2071						  (__v32qi) __Y, 2,
2072						  (__mmask32) -1);
2073}
2074
2075extern __inline __mmask16
2076  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2077_mm256_cmpneq_epi16_mask (__m256i __X, __m256i __Y)
2078{
2079  return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
2080						  (__v16hi) __Y, 4,
2081						  (__mmask16) -1);
2082}
2083
2084extern __inline __mmask16
2085  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2086_mm256_cmplt_epi16_mask (__m256i __X, __m256i __Y)
2087{
2088  return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
2089						  (__v16hi) __Y, 1,
2090						  (__mmask16) -1);
2091}
2092
2093extern __inline __mmask16
2094  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2095_mm256_cmpge_epi16_mask (__m256i __X, __m256i __Y)
2096{
2097  return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
2098						  (__v16hi) __Y, 5,
2099						  (__mmask16) -1);
2100}
2101
2102extern __inline __mmask16
2103  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2104_mm256_cmple_epi16_mask (__m256i __X, __m256i __Y)
2105{
2106  return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
2107						  (__v16hi) __Y, 2,
2108						  (__mmask16) -1);
2109}
2110
2111extern __inline __mmask16
2112  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2113_mm_cmpneq_epu8_mask (__m128i __X, __m128i __Y)
2114{
2115  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
2116						   (__v16qi) __Y, 4,
2117						   (__mmask16) -1);
2118}
2119
2120extern __inline __mmask16
2121  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2122_mm_cmplt_epu8_mask (__m128i __X, __m128i __Y)
2123{
2124  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
2125						   (__v16qi) __Y, 1,
2126						   (__mmask16) -1);
2127}
2128
2129extern __inline __mmask16
2130  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2131_mm_cmpge_epu8_mask (__m128i __X, __m128i __Y)
2132{
2133  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
2134						   (__v16qi) __Y, 5,
2135						   (__mmask16) -1);
2136}
2137
2138extern __inline __mmask16
2139  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2140_mm_cmple_epu8_mask (__m128i __X, __m128i __Y)
2141{
2142  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
2143						   (__v16qi) __Y, 2,
2144						   (__mmask16) -1);
2145}
2146
2147extern __inline __mmask8
2148  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2149_mm_cmpneq_epu16_mask (__m128i __X, __m128i __Y)
2150{
2151  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
2152						  (__v8hi) __Y, 4,
2153						  (__mmask8) -1);
2154}
2155
2156extern __inline __mmask8
2157  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2158_mm_cmplt_epu16_mask (__m128i __X, __m128i __Y)
2159{
2160  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
2161						  (__v8hi) __Y, 1,
2162						  (__mmask8) -1);
2163}
2164
2165extern __inline __mmask8
2166  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2167_mm_cmpge_epu16_mask (__m128i __X, __m128i __Y)
2168{
2169  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
2170						  (__v8hi) __Y, 5,
2171						  (__mmask8) -1);
2172}
2173
2174extern __inline __mmask8
2175  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2176_mm_cmple_epu16_mask (__m128i __X, __m128i __Y)
2177{
2178  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
2179						  (__v8hi) __Y, 2,
2180						  (__mmask8) -1);
2181}
2182
2183extern __inline __mmask16
2184  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2185_mm_cmpneq_epi8_mask (__m128i __X, __m128i __Y)
2186{
2187  return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
2188						  (__v16qi) __Y, 4,
2189						  (__mmask16) -1);
2190}
2191
2192extern __inline __mmask16
2193  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2194_mm_cmplt_epi8_mask (__m128i __X, __m128i __Y)
2195{
2196  return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
2197						  (__v16qi) __Y, 1,
2198						  (__mmask16) -1);
2199}
2200
2201extern __inline __mmask16
2202  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2203_mm_cmpge_epi8_mask (__m128i __X, __m128i __Y)
2204{
2205  return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
2206						  (__v16qi) __Y, 5,
2207						  (__mmask16) -1);
2208}
2209
2210extern __inline __mmask16
2211  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2212_mm_cmple_epi8_mask (__m128i __X, __m128i __Y)
2213{
2214  return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
2215						  (__v16qi) __Y, 2,
2216						  (__mmask16) -1);
2217}
2218
2219extern __inline __mmask8
2220  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2221_mm_cmpneq_epi16_mask (__m128i __X, __m128i __Y)
2222{
2223  return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
2224						 (__v8hi) __Y, 4,
2225						 (__mmask8) -1);
2226}
2227
2228extern __inline __mmask8
2229  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2230_mm_cmplt_epi16_mask (__m128i __X, __m128i __Y)
2231{
2232  return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
2233						 (__v8hi) __Y, 1,
2234						 (__mmask8) -1);
2235}
2236
2237extern __inline __mmask8
2238  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2239_mm_cmpge_epi16_mask (__m128i __X, __m128i __Y)
2240{
2241  return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
2242						 (__v8hi) __Y, 5,
2243						 (__mmask8) -1);
2244}
2245
2246extern __inline __mmask8
2247  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2248_mm_cmple_epi16_mask (__m128i __X, __m128i __Y)
2249{
2250  return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
2251						 (__v8hi) __Y, 2,
2252						 (__mmask8) -1);
2253}
2254
2255extern __inline __m256i
2256__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2257_mm256_mask_mulhrs_epi16 (__m256i __W, __mmask16 __U, __m256i __X,
2258			  __m256i __Y)
2259{
2260  return (__m256i) __builtin_ia32_pmulhrsw256_mask ((__v16hi) __X,
2261						    (__v16hi) __Y,
2262						    (__v16hi) __W,
2263						    (__mmask16) __U);
2264}
2265
2266extern __inline __m256i
2267__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2268_mm256_maskz_mulhrs_epi16 (__mmask16 __U, __m256i __X, __m256i __Y)
2269{
2270  return (__m256i) __builtin_ia32_pmulhrsw256_mask ((__v16hi) __X,
2271						    (__v16hi) __Y,
2272						    (__v16hi)
2273						    _mm256_setzero_si256 (),
2274						    (__mmask16) __U);
2275}
2276
2277extern __inline __m256i
2278__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2279_mm256_mask_mulhi_epu16 (__m256i __W, __mmask16 __U, __m256i __A,
2280			 __m256i __B)
2281{
2282  return (__m256i) __builtin_ia32_pmulhuw256_mask ((__v16hi) __A,
2283						   (__v16hi) __B,
2284						   (__v16hi) __W,
2285						   (__mmask16) __U);
2286}
2287
2288extern __inline __m256i
2289__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2290_mm256_maskz_mulhi_epu16 (__mmask16 __U, __m256i __A, __m256i __B)
2291{
2292  return (__m256i) __builtin_ia32_pmulhuw256_mask ((__v16hi) __A,
2293						   (__v16hi) __B,
2294						   (__v16hi)
2295						   _mm256_setzero_si256 (),
2296						   (__mmask16) __U);
2297}
2298
2299extern __inline __m256i
2300__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2301_mm256_mask_mulhi_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
2302			 __m256i __B)
2303{
2304  return (__m256i) __builtin_ia32_pmulhw256_mask ((__v16hi) __A,
2305						  (__v16hi) __B,
2306						  (__v16hi) __W,
2307						  (__mmask16) __U);
2308}
2309
2310extern __inline __m256i
2311__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2312_mm256_maskz_mulhi_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
2313{
2314  return (__m256i) __builtin_ia32_pmulhw256_mask ((__v16hi) __A,
2315						  (__v16hi) __B,
2316						  (__v16hi)
2317						  _mm256_setzero_si256 (),
2318						  (__mmask16) __U);
2319}
2320
2321extern __inline __m128i
2322__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2323_mm_mask_mulhi_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
2324		      __m128i __B)
2325{
2326  return (__m128i) __builtin_ia32_pmulhw128_mask ((__v8hi) __A,
2327						  (__v8hi) __B,
2328						  (__v8hi) __W,
2329						  (__mmask8) __U);
2330}
2331
2332extern __inline __m128i
2333__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2334_mm_maskz_mulhi_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
2335{
2336  return (__m128i) __builtin_ia32_pmulhw128_mask ((__v8hi) __A,
2337						  (__v8hi) __B,
2338						  (__v8hi)
2339						  _mm_setzero_si128 (),
2340						  (__mmask8) __U);
2341}
2342
2343extern __inline __m128i
2344__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2345_mm_mask_mulhi_epu16 (__m128i __W, __mmask8 __U, __m128i __A,
2346		      __m128i __B)
2347{
2348  return (__m128i) __builtin_ia32_pmulhuw128_mask ((__v8hi) __A,
2349						   (__v8hi) __B,
2350						   (__v8hi) __W,
2351						   (__mmask8) __U);
2352}
2353
2354extern __inline __m128i
2355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2356_mm_maskz_mulhi_epu16 (__mmask8 __U, __m128i __A, __m128i __B)
2357{
2358  return (__m128i) __builtin_ia32_pmulhuw128_mask ((__v8hi) __A,
2359						   (__v8hi) __B,
2360						   (__v8hi)
2361						   _mm_setzero_si128 (),
2362						   (__mmask8) __U);
2363}
2364
2365extern __inline __m128i
2366__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2367_mm_mask_mulhrs_epi16 (__m128i __W, __mmask8 __U, __m128i __X,
2368		       __m128i __Y)
2369{
2370  return (__m128i) __builtin_ia32_pmulhrsw128_mask ((__v8hi) __X,
2371						    (__v8hi) __Y,
2372						    (__v8hi) __W,
2373						    (__mmask8) __U);
2374}
2375
2376extern __inline __m128i
2377__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2378_mm_maskz_mulhrs_epi16 (__mmask8 __U, __m128i __X, __m128i __Y)
2379{
2380  return (__m128i) __builtin_ia32_pmulhrsw128_mask ((__v8hi) __X,
2381						    (__v8hi) __Y,
2382						    (__v8hi)
2383						    _mm_setzero_si128 (),
2384						    (__mmask8) __U);
2385}
2386
2387extern __inline __m256i
2388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2389_mm256_mask_mullo_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
2390			 __m256i __B)
2391{
2392  return (__m256i) __builtin_ia32_pmullw256_mask ((__v16hi) __A,
2393						  (__v16hi) __B,
2394						  (__v16hi) __W,
2395						  (__mmask16) __U);
2396}
2397
2398extern __inline __m256i
2399__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2400_mm256_maskz_mullo_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
2401{
2402  return (__m256i) __builtin_ia32_pmullw256_mask ((__v16hi) __A,
2403						  (__v16hi) __B,
2404						  (__v16hi)
2405						  _mm256_setzero_si256 (),
2406						  (__mmask16) __U);
2407}
2408
2409extern __inline __m128i
2410__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2411_mm_mask_mullo_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
2412		      __m128i __B)
2413{
2414  return (__m128i) __builtin_ia32_pmullw128_mask ((__v8hi) __A,
2415						  (__v8hi) __B,
2416						  (__v8hi) __W,
2417						  (__mmask8) __U);
2418}
2419
2420extern __inline __m128i
2421__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2422_mm_maskz_mullo_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
2423{
2424  return (__m128i) __builtin_ia32_pmullw128_mask ((__v8hi) __A,
2425						  (__v8hi) __B,
2426						  (__v8hi)
2427						  _mm_setzero_si128 (),
2428						  (__mmask8) __U);
2429}
2430
2431extern __inline __m256i
2432__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2433_mm256_mask_cvtepi8_epi16 (__m256i __W, __mmask16 __U, __m128i __A)
2434{
2435  return (__m256i) __builtin_ia32_pmovsxbw256_mask ((__v16qi) __A,
2436						    (__v16hi) __W,
2437						    (__mmask16) __U);
2438}
2439
2440extern __inline __m256i
2441__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2442_mm256_maskz_cvtepi8_epi16 (__mmask16 __U, __m128i __A)
2443{
2444  return (__m256i) __builtin_ia32_pmovsxbw256_mask ((__v16qi) __A,
2445						    (__v16hi)
2446						    _mm256_setzero_si256 (),
2447						    (__mmask16) __U);
2448}
2449
2450extern __inline __m128i
2451__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2452_mm_mask_cvtepi8_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
2453{
2454  return (__m128i) __builtin_ia32_pmovsxbw128_mask ((__v16qi) __A,
2455						    (__v8hi) __W,
2456						    (__mmask8) __U);
2457}
2458
2459extern __inline __m128i
2460__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2461_mm_maskz_cvtepi8_epi16 (__mmask8 __U, __m128i __A)
2462{
2463  return (__m128i) __builtin_ia32_pmovsxbw128_mask ((__v16qi) __A,
2464						    (__v8hi)
2465						    _mm_setzero_si128 (),
2466						    (__mmask8) __U);
2467}
2468
2469extern __inline __m256i
2470__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2471_mm256_mask_cvtepu8_epi16 (__m256i __W, __mmask16 __U, __m128i __A)
2472{
2473  return (__m256i) __builtin_ia32_pmovzxbw256_mask ((__v16qi) __A,
2474						    (__v16hi) __W,
2475						    (__mmask16) __U);
2476}
2477
2478extern __inline __m256i
2479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2480_mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A)
2481{
2482  return (__m256i) __builtin_ia32_pmovzxbw256_mask ((__v16qi) __A,
2483						    (__v16hi)
2484						    _mm256_setzero_si256 (),
2485						    (__mmask16) __U);
2486}
2487
2488extern __inline __m128i
2489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2490_mm_mask_cvtepu8_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
2491{
2492  return (__m128i) __builtin_ia32_pmovzxbw128_mask ((__v16qi) __A,
2493						    (__v8hi) __W,
2494						    (__mmask8) __U);
2495}
2496
2497extern __inline __m128i
2498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2499_mm_maskz_cvtepu8_epi16 (__mmask8 __U, __m128i __A)
2500{
2501  return (__m128i) __builtin_ia32_pmovzxbw128_mask ((__v16qi) __A,
2502						    (__v8hi)
2503						    _mm_setzero_si128 (),
2504						    (__mmask8) __U);
2505}
2506
2507extern __inline __m256i
2508__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2509_mm256_mask_avg_epu8 (__m256i __W, __mmask32 __U, __m256i __A,
2510		      __m256i __B)
2511{
2512  return (__m256i) __builtin_ia32_pavgb256_mask ((__v32qi) __A,
2513						 (__v32qi) __B,
2514						 (__v32qi) __W,
2515						 (__mmask32) __U);
2516}
2517
2518extern __inline __m256i
2519__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2520_mm256_maskz_avg_epu8 (__mmask32 __U, __m256i __A, __m256i __B)
2521{
2522  return (__m256i) __builtin_ia32_pavgb256_mask ((__v32qi) __A,
2523						 (__v32qi) __B,
2524						 (__v32qi)
2525						 _mm256_setzero_si256 (),
2526						 (__mmask32) __U);
2527}
2528
2529extern __inline __m128i
2530__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2531_mm_mask_avg_epu8 (__m128i __W, __mmask16 __U, __m128i __A,
2532		   __m128i __B)
2533{
2534  return (__m128i) __builtin_ia32_pavgb128_mask ((__v16qi) __A,
2535						 (__v16qi) __B,
2536						 (__v16qi) __W,
2537						 (__mmask16) __U);
2538}
2539
2540extern __inline __m128i
2541__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2542_mm_maskz_avg_epu8 (__mmask16 __U, __m128i __A, __m128i __B)
2543{
2544  return (__m128i) __builtin_ia32_pavgb128_mask ((__v16qi) __A,
2545						 (__v16qi) __B,
2546						 (__v16qi)
2547						 _mm_setzero_si128 (),
2548						 (__mmask16) __U);
2549}
2550
2551extern __inline __m256i
2552__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2553_mm256_mask_avg_epu16 (__m256i __W, __mmask16 __U, __m256i __A,
2554		       __m256i __B)
2555{
2556  return (__m256i) __builtin_ia32_pavgw256_mask ((__v16hi) __A,
2557						 (__v16hi) __B,
2558						 (__v16hi) __W,
2559						 (__mmask16) __U);
2560}
2561
2562extern __inline __m256i
2563__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2564_mm256_maskz_avg_epu16 (__mmask16 __U, __m256i __A, __m256i __B)
2565{
2566  return (__m256i) __builtin_ia32_pavgw256_mask ((__v16hi) __A,
2567						 (__v16hi) __B,
2568						 (__v16hi)
2569						 _mm256_setzero_si256 (),
2570						 (__mmask16) __U);
2571}
2572
2573extern __inline __m128i
2574__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2575_mm_mask_avg_epu16 (__m128i __W, __mmask8 __U, __m128i __A,
2576		    __m128i __B)
2577{
2578  return (__m128i) __builtin_ia32_pavgw128_mask ((__v8hi) __A,
2579						 (__v8hi) __B,
2580						 (__v8hi) __W,
2581						 (__mmask8) __U);
2582}
2583
2584extern __inline __m128i
2585__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2586_mm_maskz_avg_epu16 (__mmask8 __U, __m128i __A, __m128i __B)
2587{
2588  return (__m128i) __builtin_ia32_pavgw128_mask ((__v8hi) __A,
2589						 (__v8hi) __B,
2590						 (__v8hi)
2591						 _mm_setzero_si128 (),
2592						 (__mmask8) __U);
2593}
2594
2595extern __inline __m256i
2596__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2597_mm256_mask_add_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
2598		      __m256i __B)
2599{
2600  return (__m256i) __builtin_ia32_paddb256_mask ((__v32qi) __A,
2601						 (__v32qi) __B,
2602						 (__v32qi) __W,
2603						 (__mmask32) __U);
2604}
2605
2606extern __inline __m256i
2607__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2608_mm256_maskz_add_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
2609{
2610  return (__m256i) __builtin_ia32_paddb256_mask ((__v32qi) __A,
2611						 (__v32qi) __B,
2612						 (__v32qi)
2613						 _mm256_setzero_si256 (),
2614						 (__mmask32) __U);
2615}
2616
2617extern __inline __m256i
2618__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2619_mm256_mask_add_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
2620		       __m256i __B)
2621{
2622  return (__m256i) __builtin_ia32_paddw256_mask ((__v16hi) __A,
2623						 (__v16hi) __B,
2624						 (__v16hi) __W,
2625						 (__mmask16) __U);
2626}
2627
2628extern __inline __m256i
2629__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2630_mm256_maskz_add_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
2631{
2632  return (__m256i) __builtin_ia32_paddw256_mask ((__v16hi) __A,
2633						 (__v16hi) __B,
2634						 (__v16hi)
2635						 _mm256_setzero_si256 (),
2636						 (__mmask16) __U);
2637}
2638
2639extern __inline __m256i
2640__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2641_mm256_mask_adds_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
2642		       __m256i __B)
2643{
2644  return (__m256i) __builtin_ia32_paddsb256_mask ((__v32qi) __A,
2645						  (__v32qi) __B,
2646						  (__v32qi) __W,
2647						  (__mmask32) __U);
2648}
2649
2650extern __inline __m256i
2651__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2652_mm256_maskz_adds_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
2653{
2654  return (__m256i) __builtin_ia32_paddsb256_mask ((__v32qi) __A,
2655						  (__v32qi) __B,
2656						  (__v32qi)
2657						  _mm256_setzero_si256 (),
2658						  (__mmask32) __U);
2659}
2660
2661extern __inline __m256i
2662__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2663_mm256_mask_adds_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
2664			__m256i __B)
2665{
2666  return (__m256i) __builtin_ia32_paddsw256_mask ((__v16hi) __A,
2667						  (__v16hi) __B,
2668						  (__v16hi) __W,
2669						  (__mmask16) __U);
2670}
2671
2672extern __inline __m256i
2673__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2674_mm256_maskz_adds_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
2675{
2676  return (__m256i) __builtin_ia32_paddsw256_mask ((__v16hi) __A,
2677						  (__v16hi) __B,
2678						  (__v16hi)
2679						  _mm256_setzero_si256 (),
2680						  (__mmask16) __U);
2681}
2682
2683extern __inline __m256i
2684__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2685_mm256_mask_adds_epu8 (__m256i __W, __mmask32 __U, __m256i __A,
2686		       __m256i __B)
2687{
2688  return (__m256i) __builtin_ia32_paddusb256_mask ((__v32qi) __A,
2689						   (__v32qi) __B,
2690						   (__v32qi) __W,
2691						   (__mmask32) __U);
2692}
2693
2694extern __inline __m256i
2695__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2696_mm256_maskz_adds_epu8 (__mmask32 __U, __m256i __A, __m256i __B)
2697{
2698  return (__m256i) __builtin_ia32_paddusb256_mask ((__v32qi) __A,
2699						   (__v32qi) __B,
2700						   (__v32qi)
2701						   _mm256_setzero_si256 (),
2702						   (__mmask32) __U);
2703}
2704
2705extern __inline __m256i
2706__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2707_mm256_mask_adds_epu16 (__m256i __W, __mmask16 __U, __m256i __A,
2708			__m256i __B)
2709{
2710  return (__m256i) __builtin_ia32_paddusw256_mask ((__v16hi) __A,
2711						   (__v16hi) __B,
2712						   (__v16hi) __W,
2713						   (__mmask16) __U);
2714}
2715
2716extern __inline __m256i
2717__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2718_mm256_maskz_adds_epu16 (__mmask16 __U, __m256i __A, __m256i __B)
2719{
2720  return (__m256i) __builtin_ia32_paddusw256_mask ((__v16hi) __A,
2721						   (__v16hi) __B,
2722						   (__v16hi)
2723						   _mm256_setzero_si256 (),
2724						   (__mmask16) __U);
2725}
2726
2727extern __inline __m256i
2728__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2729_mm256_mask_sub_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
2730		      __m256i __B)
2731{
2732  return (__m256i) __builtin_ia32_psubb256_mask ((__v32qi) __A,
2733						 (__v32qi) __B,
2734						 (__v32qi) __W,
2735						 (__mmask32) __U);
2736}
2737
2738extern __inline __m256i
2739__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2740_mm256_maskz_sub_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
2741{
2742  return (__m256i) __builtin_ia32_psubb256_mask ((__v32qi) __A,
2743						 (__v32qi) __B,
2744						 (__v32qi)
2745						 _mm256_setzero_si256 (),
2746						 (__mmask32) __U);
2747}
2748
2749extern __inline __m256i
2750__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2751_mm256_mask_sub_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
2752		       __m256i __B)
2753{
2754  return (__m256i) __builtin_ia32_psubw256_mask ((__v16hi) __A,
2755						 (__v16hi) __B,
2756						 (__v16hi) __W,
2757						 (__mmask16) __U);
2758}
2759
2760extern __inline __m256i
2761__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2762_mm256_maskz_sub_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
2763{
2764  return (__m256i) __builtin_ia32_psubw256_mask ((__v16hi) __A,
2765						 (__v16hi) __B,
2766						 (__v16hi)
2767						 _mm256_setzero_si256 (),
2768						 (__mmask16) __U);
2769}
2770
2771extern __inline __m256i
2772__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2773_mm256_mask_subs_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
2774		       __m256i __B)
2775{
2776  return (__m256i) __builtin_ia32_psubsb256_mask ((__v32qi) __A,
2777						  (__v32qi) __B,
2778						  (__v32qi) __W,
2779						  (__mmask32) __U);
2780}
2781
2782extern __inline __m256i
2783__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2784_mm256_maskz_subs_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
2785{
2786  return (__m256i) __builtin_ia32_psubsb256_mask ((__v32qi) __A,
2787						  (__v32qi) __B,
2788						  (__v32qi)
2789						  _mm256_setzero_si256 (),
2790						  (__mmask32) __U);
2791}
2792
2793extern __inline __m256i
2794__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2795_mm256_mask_subs_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
2796			__m256i __B)
2797{
2798  return (__m256i) __builtin_ia32_psubsw256_mask ((__v16hi) __A,
2799						  (__v16hi) __B,
2800						  (__v16hi) __W,
2801						  (__mmask16) __U);
2802}
2803
2804extern __inline __m256i
2805__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2806_mm256_maskz_subs_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
2807{
2808  return (__m256i) __builtin_ia32_psubsw256_mask ((__v16hi) __A,
2809						  (__v16hi) __B,
2810						  (__v16hi)
2811						  _mm256_setzero_si256 (),
2812						  (__mmask16) __U);
2813}
2814
2815extern __inline __m256i
2816__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2817_mm256_mask_subs_epu8 (__m256i __W, __mmask32 __U, __m256i __A,
2818		       __m256i __B)
2819{
2820  return (__m256i) __builtin_ia32_psubusb256_mask ((__v32qi) __A,
2821						   (__v32qi) __B,
2822						   (__v32qi) __W,
2823						   (__mmask32) __U);
2824}
2825
2826extern __inline __m256i
2827__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2828_mm256_maskz_subs_epu8 (__mmask32 __U, __m256i __A, __m256i __B)
2829{
2830  return (__m256i) __builtin_ia32_psubusb256_mask ((__v32qi) __A,
2831						   (__v32qi) __B,
2832						   (__v32qi)
2833						   _mm256_setzero_si256 (),
2834						   (__mmask32) __U);
2835}
2836
2837extern __inline __m256i
2838__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2839_mm256_mask_subs_epu16 (__m256i __W, __mmask16 __U, __m256i __A,
2840			__m256i __B)
2841{
2842  return (__m256i) __builtin_ia32_psubusw256_mask ((__v16hi) __A,
2843						   (__v16hi) __B,
2844						   (__v16hi) __W,
2845						   (__mmask16) __U);
2846}
2847
2848extern __inline __m256i
2849__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2850_mm256_maskz_subs_epu16 (__mmask16 __U, __m256i __A, __m256i __B)
2851{
2852  return (__m256i) __builtin_ia32_psubusw256_mask ((__v16hi) __A,
2853						   (__v16hi) __B,
2854						   (__v16hi)
2855						   _mm256_setzero_si256 (),
2856						   (__mmask16) __U);
2857}
2858
2859extern __inline __m128i
2860__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2861_mm_mask_add_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
2862		   __m128i __B)
2863{
2864  return (__m128i) __builtin_ia32_paddb128_mask ((__v16qi) __A,
2865						 (__v16qi) __B,
2866						 (__v16qi) __W,
2867						 (__mmask16) __U);
2868}
2869
2870extern __inline __m128i
2871__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2872_mm_maskz_add_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
2873{
2874  return (__m128i) __builtin_ia32_paddb128_mask ((__v16qi) __A,
2875						 (__v16qi) __B,
2876						 (__v16qi)
2877						 _mm_setzero_si128 (),
2878						 (__mmask16) __U);
2879}
2880
2881extern __inline __m128i
2882__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2883_mm_mask_add_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
2884		    __m128i __B)
2885{
2886  return (__m128i) __builtin_ia32_paddw128_mask ((__v8hi) __A,
2887						 (__v8hi) __B,
2888						 (__v8hi) __W,
2889						 (__mmask8) __U);
2890}
2891
2892extern __inline __m128i
2893__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2894_mm_maskz_add_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
2895{
2896  return (__m128i) __builtin_ia32_paddw128_mask ((__v8hi) __A,
2897						 (__v8hi) __B,
2898						 (__v8hi)
2899						 _mm_setzero_si128 (),
2900						 (__mmask8) __U);
2901}
2902
2903extern __inline __m256i
2904__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2905_mm256_mask_unpackhi_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
2906			   __m256i __B)
2907{
2908  return (__m256i) __builtin_ia32_punpckhbw256_mask ((__v32qi) __A,
2909						     (__v32qi) __B,
2910						     (__v32qi) __W,
2911						     (__mmask32) __U);
2912}
2913
2914extern __inline __m256i
2915__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2916_mm256_maskz_unpackhi_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
2917{
2918  return (__m256i) __builtin_ia32_punpckhbw256_mask ((__v32qi) __A,
2919						     (__v32qi) __B,
2920						     (__v32qi)
2921						     _mm256_setzero_si256 (),
2922						     (__mmask32) __U);
2923}
2924
2925extern __inline __m128i
2926__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2927_mm_mask_unpackhi_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
2928			__m128i __B)
2929{
2930  return (__m128i) __builtin_ia32_punpckhbw128_mask ((__v16qi) __A,
2931						     (__v16qi) __B,
2932						     (__v16qi) __W,
2933						     (__mmask16) __U);
2934}
2935
2936extern __inline __m128i
2937__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2938_mm_maskz_unpackhi_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
2939{
2940  return (__m128i) __builtin_ia32_punpckhbw128_mask ((__v16qi) __A,
2941						     (__v16qi) __B,
2942						     (__v16qi)
2943						     _mm_setzero_si128 (),
2944						     (__mmask16) __U);
2945}
2946
2947extern __inline __m256i
2948__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2949_mm256_mask_unpackhi_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
2950			    __m256i __B)
2951{
2952  return (__m256i) __builtin_ia32_punpckhwd256_mask ((__v16hi) __A,
2953						     (__v16hi) __B,
2954						     (__v16hi) __W,
2955						     (__mmask16) __U);
2956}
2957
2958extern __inline __m256i
2959__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2960_mm256_maskz_unpackhi_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
2961{
2962  return (__m256i) __builtin_ia32_punpckhwd256_mask ((__v16hi) __A,
2963						     (__v16hi) __B,
2964						     (__v16hi)
2965						     _mm256_setzero_si256 (),
2966						     (__mmask16) __U);
2967}
2968
2969extern __inline __m128i
2970__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2971_mm_mask_unpackhi_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
2972			 __m128i __B)
2973{
2974  return (__m128i) __builtin_ia32_punpckhwd128_mask ((__v8hi) __A,
2975						     (__v8hi) __B,
2976						     (__v8hi) __W,
2977						     (__mmask8) __U);
2978}
2979
2980extern __inline __m128i
2981__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2982_mm_maskz_unpackhi_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
2983{
2984  return (__m128i) __builtin_ia32_punpckhwd128_mask ((__v8hi) __A,
2985						     (__v8hi) __B,
2986						     (__v8hi)
2987						     _mm_setzero_si128 (),
2988						     (__mmask8) __U);
2989}
2990
2991extern __inline __m256i
2992__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2993_mm256_mask_unpacklo_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
2994			   __m256i __B)
2995{
2996  return (__m256i) __builtin_ia32_punpcklbw256_mask ((__v32qi) __A,
2997						     (__v32qi) __B,
2998						     (__v32qi) __W,
2999						     (__mmask32) __U);
3000}
3001
3002extern __inline __m256i
3003__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3004_mm256_maskz_unpacklo_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
3005{
3006  return (__m256i) __builtin_ia32_punpcklbw256_mask ((__v32qi) __A,
3007						     (__v32qi) __B,
3008						     (__v32qi)
3009						     _mm256_setzero_si256 (),
3010						     (__mmask32) __U);
3011}
3012
3013extern __inline __m128i
3014__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3015_mm_mask_unpacklo_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
3016			__m128i __B)
3017{
3018  return (__m128i) __builtin_ia32_punpcklbw128_mask ((__v16qi) __A,
3019						     (__v16qi) __B,
3020						     (__v16qi) __W,
3021						     (__mmask16) __U);
3022}
3023
3024extern __inline __m128i
3025__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3026_mm_maskz_unpacklo_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
3027{
3028  return (__m128i) __builtin_ia32_punpcklbw128_mask ((__v16qi) __A,
3029						     (__v16qi) __B,
3030						     (__v16qi)
3031						     _mm_setzero_si128 (),
3032						     (__mmask16) __U);
3033}
3034
3035extern __inline __m256i
3036__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3037_mm256_mask_unpacklo_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
3038			    __m256i __B)
3039{
3040  return (__m256i) __builtin_ia32_punpcklwd256_mask ((__v16hi) __A,
3041						     (__v16hi) __B,
3042						     (__v16hi) __W,
3043						     (__mmask16) __U);
3044}
3045
3046extern __inline __m256i
3047__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3048_mm256_maskz_unpacklo_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
3049{
3050  return (__m256i) __builtin_ia32_punpcklwd256_mask ((__v16hi) __A,
3051						     (__v16hi) __B,
3052						     (__v16hi)
3053						     _mm256_setzero_si256 (),
3054						     (__mmask16) __U);
3055}
3056
3057extern __inline __m128i
3058__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3059_mm_mask_unpacklo_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
3060			 __m128i __B)
3061{
3062  return (__m128i) __builtin_ia32_punpcklwd128_mask ((__v8hi) __A,
3063						     (__v8hi) __B,
3064						     (__v8hi) __W,
3065						     (__mmask8) __U);
3066}
3067
3068extern __inline __m128i
3069__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3070_mm_maskz_unpacklo_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
3071{
3072  return (__m128i) __builtin_ia32_punpcklwd128_mask ((__v8hi) __A,
3073						     (__v8hi) __B,
3074						     (__v8hi)
3075						     _mm_setzero_si128 (),
3076						     (__mmask8) __U);
3077}
3078
3079extern __inline __mmask16
3080__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3081_mm_cmpeq_epi8_mask (__m128i __A, __m128i __B)
3082{
3083  return (__mmask16) __builtin_ia32_pcmpeqb128_mask ((__v16qi) __A,
3084						     (__v16qi) __B,
3085						     (__mmask16) -1);
3086}
3087
3088extern __inline __mmask16
3089__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3090_mm_cmpeq_epu8_mask (__m128i __A, __m128i __B)
3091{
3092  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __A,
3093						    (__v16qi) __B, 0,
3094						    (__mmask16) -1);
3095}
3096
3097extern __inline __mmask16
3098__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3099_mm_mask_cmpeq_epu8_mask (__mmask16 __U, __m128i __A, __m128i __B)
3100{
3101  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __A,
3102						    (__v16qi) __B, 0,
3103						    __U);
3104}
3105
3106extern __inline __mmask16
3107__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3108_mm_mask_cmpeq_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
3109{
3110  return (__mmask16) __builtin_ia32_pcmpeqb128_mask ((__v16qi) __A,
3111						     (__v16qi) __B,
3112						     __U);
3113}
3114
3115extern __inline __mmask32
3116__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3117_mm256_cmpeq_epu8_mask (__m256i __A, __m256i __B)
3118{
3119  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __A,
3120						    (__v32qi) __B, 0,
3121						    (__mmask32) -1);
3122}
3123
3124extern __inline __mmask32
3125__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3126_mm256_cmpeq_epi8_mask (__m256i __A, __m256i __B)
3127{
3128  return (__mmask32) __builtin_ia32_pcmpeqb256_mask ((__v32qi) __A,
3129						     (__v32qi) __B,
3130						     (__mmask32) -1);
3131}
3132
3133extern __inline __mmask32
3134__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3135_mm256_mask_cmpeq_epu8_mask (__mmask32 __U, __m256i __A, __m256i __B)
3136{
3137  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __A,
3138						    (__v32qi) __B, 0,
3139						    __U);
3140}
3141
3142extern __inline __mmask32
3143__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3144_mm256_mask_cmpeq_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
3145{
3146  return (__mmask32) __builtin_ia32_pcmpeqb256_mask ((__v32qi) __A,
3147						     (__v32qi) __B,
3148						     __U);
3149}
3150
3151extern __inline __mmask8
3152__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3153_mm_cmpeq_epu16_mask (__m128i __A, __m128i __B)
3154{
3155  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __A,
3156						   (__v8hi) __B, 0,
3157						   (__mmask8) -1);
3158}
3159
3160extern __inline __mmask8
3161__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3162_mm_cmpeq_epi16_mask (__m128i __A, __m128i __B)
3163{
3164  return (__mmask8) __builtin_ia32_pcmpeqw128_mask ((__v8hi) __A,
3165						    (__v8hi) __B,
3166						    (__mmask8) -1);
3167}
3168
3169extern __inline __mmask8
3170__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3171_mm_mask_cmpeq_epu16_mask (__mmask8 __U, __m128i __A, __m128i __B)
3172{
3173  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __A,
3174						   (__v8hi) __B, 0, __U);
3175}
3176
3177extern __inline __mmask8
3178__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3179_mm_mask_cmpeq_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
3180{
3181  return (__mmask8) __builtin_ia32_pcmpeqw128_mask ((__v8hi) __A,
3182						    (__v8hi) __B, __U);
3183}
3184
3185extern __inline __mmask16
3186__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3187_mm256_cmpeq_epu16_mask (__m256i __A, __m256i __B)
3188{
3189  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __A,
3190						    (__v16hi) __B, 0,
3191						    (__mmask16) -1);
3192}
3193
3194extern __inline __mmask16
3195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3196_mm256_cmpeq_epi16_mask (__m256i __A, __m256i __B)
3197{
3198  return (__mmask16) __builtin_ia32_pcmpeqw256_mask ((__v16hi) __A,
3199						     (__v16hi) __B,
3200						     (__mmask16) -1);
3201}
3202
3203extern __inline __mmask16
3204__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3205_mm256_mask_cmpeq_epu16_mask (__mmask16 __U, __m256i __A, __m256i __B)
3206{
3207  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __A,
3208						    (__v16hi) __B, 0,
3209						    __U);
3210}
3211
3212extern __inline __mmask16
3213__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3214_mm256_mask_cmpeq_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
3215{
3216  return (__mmask16) __builtin_ia32_pcmpeqw256_mask ((__v16hi) __A,
3217						     (__v16hi) __B,
3218						     __U);
3219}
3220
3221extern __inline __mmask16
3222__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3223_mm_cmpgt_epu8_mask (__m128i __A, __m128i __B)
3224{
3225  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __A,
3226						    (__v16qi) __B, 6,
3227						    (__mmask16) -1);
3228}
3229
3230extern __inline __mmask16
3231__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3232_mm_cmpgt_epi8_mask (__m128i __A, __m128i __B)
3233{
3234  return (__mmask16) __builtin_ia32_pcmpgtb128_mask ((__v16qi) __A,
3235						     (__v16qi) __B,
3236						     (__mmask16) -1);
3237}
3238
3239extern __inline __mmask16
3240__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3241_mm_mask_cmpgt_epu8_mask (__mmask16 __U, __m128i __A, __m128i __B)
3242{
3243  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __A,
3244						    (__v16qi) __B, 6,
3245						    __U);
3246}
3247
3248extern __inline __mmask16
3249__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3250_mm_mask_cmpgt_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
3251{
3252  return (__mmask16) __builtin_ia32_pcmpgtb128_mask ((__v16qi) __A,
3253						     (__v16qi) __B,
3254						     __U);
3255}
3256
3257extern __inline __mmask32
3258__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3259_mm256_cmpgt_epu8_mask (__m256i __A, __m256i __B)
3260{
3261  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __A,
3262						    (__v32qi) __B, 6,
3263						    (__mmask32) -1);
3264}
3265
3266extern __inline __mmask32
3267__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3268_mm256_cmpgt_epi8_mask (__m256i __A, __m256i __B)
3269{
3270  return (__mmask32) __builtin_ia32_pcmpgtb256_mask ((__v32qi) __A,
3271						     (__v32qi) __B,
3272						     (__mmask32) -1);
3273}
3274
3275extern __inline __mmask32
3276__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3277_mm256_mask_cmpgt_epu8_mask (__mmask32 __U, __m256i __A, __m256i __B)
3278{
3279  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __A,
3280						    (__v32qi) __B, 6,
3281						    __U);
3282}
3283
3284extern __inline __mmask32
3285__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3286_mm256_mask_cmpgt_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
3287{
3288  return (__mmask32) __builtin_ia32_pcmpgtb256_mask ((__v32qi) __A,
3289						     (__v32qi) __B,
3290						     __U);
3291}
3292
3293extern __inline __mmask8
3294__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3295_mm_cmpgt_epu16_mask (__m128i __A, __m128i __B)
3296{
3297  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __A,
3298						   (__v8hi) __B, 6,
3299						   (__mmask8) -1);
3300}
3301
3302extern __inline __mmask8
3303__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3304_mm_cmpgt_epi16_mask (__m128i __A, __m128i __B)
3305{
3306  return (__mmask8) __builtin_ia32_pcmpgtw128_mask ((__v8hi) __A,
3307						    (__v8hi) __B,
3308						    (__mmask8) -1);
3309}
3310
3311extern __inline __mmask8
3312__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3313_mm_mask_cmpgt_epu16_mask (__mmask8 __U, __m128i __A, __m128i __B)
3314{
3315  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __A,
3316						   (__v8hi) __B, 6, __U);
3317}
3318
3319extern __inline __mmask8
3320__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3321_mm_mask_cmpgt_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
3322{
3323  return (__mmask8) __builtin_ia32_pcmpgtw128_mask ((__v8hi) __A,
3324						    (__v8hi) __B, __U);
3325}
3326
3327extern __inline __mmask16
3328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3329_mm256_cmpgt_epu16_mask (__m256i __A, __m256i __B)
3330{
3331  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __A,
3332						    (__v16hi) __B, 6,
3333						    (__mmask16) -1);
3334}
3335
3336extern __inline __mmask16
3337__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3338_mm256_cmpgt_epi16_mask (__m256i __A, __m256i __B)
3339{
3340  return (__mmask16) __builtin_ia32_pcmpgtw256_mask ((__v16hi) __A,
3341						     (__v16hi) __B,
3342						     (__mmask16) -1);
3343}
3344
3345extern __inline __mmask16
3346__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3347_mm256_mask_cmpgt_epu16_mask (__mmask16 __U, __m256i __A, __m256i __B)
3348{
3349  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __A,
3350						    (__v16hi) __B, 6,
3351						    __U);
3352}
3353
3354extern __inline __mmask16
3355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3356_mm256_mask_cmpgt_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
3357{
3358  return (__mmask16) __builtin_ia32_pcmpgtw256_mask ((__v16hi) __A,
3359						     (__v16hi) __B,
3360						     __U);
3361}
3362
3363extern __inline __mmask16
3364__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3365_mm_testn_epi8_mask (__m128i __A, __m128i __B)
3366{
3367  return (__mmask16) __builtin_ia32_ptestnmb128 ((__v16qi) __A,
3368						 (__v16qi) __B,
3369						 (__mmask16) -1);
3370}
3371
3372extern __inline __mmask16
3373__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3374_mm_mask_testn_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
3375{
3376  return (__mmask16) __builtin_ia32_ptestnmb128 ((__v16qi) __A,
3377						 (__v16qi) __B, __U);
3378}
3379
3380extern __inline __mmask32
3381__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3382_mm256_testn_epi8_mask (__m256i __A, __m256i __B)
3383{
3384  return (__mmask32) __builtin_ia32_ptestnmb256 ((__v32qi) __A,
3385						 (__v32qi) __B,
3386						 (__mmask32) -1);
3387}
3388
3389extern __inline __mmask32
3390__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3391_mm256_mask_testn_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
3392{
3393  return (__mmask32) __builtin_ia32_ptestnmb256 ((__v32qi) __A,
3394						 (__v32qi) __B, __U);
3395}
3396
3397extern __inline __mmask8
3398__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3399_mm_testn_epi16_mask (__m128i __A, __m128i __B)
3400{
3401  return (__mmask8) __builtin_ia32_ptestnmw128 ((__v8hi) __A,
3402						(__v8hi) __B,
3403						(__mmask8) -1);
3404}
3405
3406extern __inline __mmask8
3407__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3408_mm_mask_testn_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
3409{
3410  return (__mmask8) __builtin_ia32_ptestnmw128 ((__v8hi) __A,
3411						(__v8hi) __B, __U);
3412}
3413
3414extern __inline __mmask16
3415__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3416_mm256_testn_epi16_mask (__m256i __A, __m256i __B)
3417{
3418  return (__mmask16) __builtin_ia32_ptestnmw256 ((__v16hi) __A,
3419						 (__v16hi) __B,
3420						 (__mmask16) -1);
3421}
3422
3423extern __inline __mmask16
3424__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3425_mm256_mask_testn_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
3426{
3427  return (__mmask16) __builtin_ia32_ptestnmw256 ((__v16hi) __A,
3428						 (__v16hi) __B, __U);
3429}
3430
3431extern __inline __m256i
3432__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3433_mm256_mask_shuffle_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
3434			  __m256i __B)
3435{
3436  return (__m256i) __builtin_ia32_pshufb256_mask ((__v32qi) __A,
3437						  (__v32qi) __B,
3438						  (__v32qi) __W,
3439						  (__mmask32) __U);
3440}
3441
3442extern __inline __m256i
3443__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3444_mm256_maskz_shuffle_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
3445{
3446  return (__m256i) __builtin_ia32_pshufb256_mask ((__v32qi) __A,
3447						  (__v32qi) __B,
3448						  (__v32qi)
3449						  _mm256_setzero_si256 (),
3450						  (__mmask32) __U);
3451}
3452
3453extern __inline __m128i
3454__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3455_mm_mask_shuffle_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
3456		       __m128i __B)
3457{
3458  return (__m128i) __builtin_ia32_pshufb128_mask ((__v16qi) __A,
3459						  (__v16qi) __B,
3460						  (__v16qi) __W,
3461						  (__mmask16) __U);
3462}
3463
3464extern __inline __m128i
3465__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3466_mm_maskz_shuffle_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
3467{
3468  return (__m128i) __builtin_ia32_pshufb128_mask ((__v16qi) __A,
3469						  (__v16qi) __B,
3470						  (__v16qi)
3471						  _mm_setzero_si128 (),
3472						  (__mmask16) __U);
3473}
3474
3475extern __inline __m256i
3476__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3477_mm256_maskz_packs_epi16 (__mmask32 __M, __m256i __A, __m256i __B)
3478{
3479  return (__m256i) __builtin_ia32_packsswb256_mask ((__v16hi) __A,
3480						    (__v16hi) __B,
3481						    (__v32qi)
3482						    _mm256_setzero_si256 (),
3483						    __M);
3484}
3485
3486extern __inline __m256i
3487__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3488_mm256_mask_packs_epi16 (__m256i __W, __mmask32 __M, __m256i __A,
3489			 __m256i __B)
3490{
3491  return (__m256i) __builtin_ia32_packsswb256_mask ((__v16hi) __A,
3492						    (__v16hi) __B,
3493						    (__v32qi) __W,
3494						    __M);
3495}
3496
3497extern __inline __m128i
3498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3499_mm_maskz_packs_epi16 (__mmask16 __M, __m128i __A, __m128i __B)
3500{
3501  return (__m128i) __builtin_ia32_packsswb128_mask ((__v8hi) __A,
3502						    (__v8hi) __B,
3503						    (__v16qi)
3504						    _mm_setzero_si128 (),
3505						    __M);
3506}
3507
3508extern __inline __m128i
3509__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3510_mm_mask_packs_epi16 (__m128i __W, __mmask16 __M, __m128i __A,
3511		      __m128i __B)
3512{
3513  return (__m128i) __builtin_ia32_packsswb128_mask ((__v8hi) __A,
3514						    (__v8hi) __B,
3515						    (__v16qi) __W,
3516						    __M);
3517}
3518
3519extern __inline __m256i
3520__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3521_mm256_maskz_packus_epi16 (__mmask32 __M, __m256i __A, __m256i __B)
3522{
3523  return (__m256i) __builtin_ia32_packuswb256_mask ((__v16hi) __A,
3524						    (__v16hi) __B,
3525						    (__v32qi)
3526						    _mm256_setzero_si256 (),
3527						    __M);
3528}
3529
3530extern __inline __m256i
3531__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3532_mm256_mask_packus_epi16 (__m256i __W, __mmask32 __M, __m256i __A,
3533			  __m256i __B)
3534{
3535  return (__m256i) __builtin_ia32_packuswb256_mask ((__v16hi) __A,
3536						    (__v16hi) __B,
3537						    (__v32qi) __W,
3538						    __M);
3539}
3540
3541extern __inline __m128i
3542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3543_mm_maskz_packus_epi16 (__mmask16 __M, __m128i __A, __m128i __B)
3544{
3545  return (__m128i) __builtin_ia32_packuswb128_mask ((__v8hi) __A,
3546						    (__v8hi) __B,
3547						    (__v16qi)
3548						    _mm_setzero_si128 (),
3549						    __M);
3550}
3551
3552extern __inline __m128i
3553__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3554_mm_mask_packus_epi16 (__m128i __W, __mmask16 __M, __m128i __A,
3555		       __m128i __B)
3556{
3557  return (__m128i) __builtin_ia32_packuswb128_mask ((__v8hi) __A,
3558						    (__v8hi) __B,
3559						    (__v16qi) __W,
3560						    __M);
3561}
3562
3563extern __inline __m256i
3564__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3565_mm256_mask_abs_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
3566{
3567  return (__m256i) __builtin_ia32_pabsb256_mask ((__v32qi) __A,
3568						 (__v32qi) __W,
3569						 (__mmask32) __U);
3570}
3571
3572extern __inline __m256i
3573__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3574_mm256_maskz_abs_epi8 (__mmask32 __U, __m256i __A)
3575{
3576  return (__m256i) __builtin_ia32_pabsb256_mask ((__v32qi) __A,
3577						 (__v32qi)
3578						 _mm256_setzero_si256 (),
3579						 (__mmask32) __U);
3580}
3581
3582extern __inline __m128i
3583__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3584_mm_mask_abs_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
3585{
3586  return (__m128i) __builtin_ia32_pabsb128_mask ((__v16qi) __A,
3587						 (__v16qi) __W,
3588						 (__mmask16) __U);
3589}
3590
3591extern __inline __m128i
3592__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3593_mm_maskz_abs_epi8 (__mmask16 __U, __m128i __A)
3594{
3595  return (__m128i) __builtin_ia32_pabsb128_mask ((__v16qi) __A,
3596						 (__v16qi)
3597						 _mm_setzero_si128 (),
3598						 (__mmask16) __U);
3599}
3600
3601extern __inline __m256i
3602__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3603_mm256_mask_abs_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
3604{
3605  return (__m256i) __builtin_ia32_pabsw256_mask ((__v16hi) __A,
3606						 (__v16hi) __W,
3607						 (__mmask16) __U);
3608}
3609
3610extern __inline __m256i
3611__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3612_mm256_maskz_abs_epi16 (__mmask16 __U, __m256i __A)
3613{
3614  return (__m256i) __builtin_ia32_pabsw256_mask ((__v16hi) __A,
3615						 (__v16hi)
3616						 _mm256_setzero_si256 (),
3617						 (__mmask16) __U);
3618}
3619
3620extern __inline __m128i
3621__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3622_mm_mask_abs_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
3623{
3624  return (__m128i) __builtin_ia32_pabsw128_mask ((__v8hi) __A,
3625						 (__v8hi) __W,
3626						 (__mmask8) __U);
3627}
3628
3629extern __inline __m128i
3630__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3631_mm_maskz_abs_epi16 (__mmask8 __U, __m128i __A)
3632{
3633  return (__m128i) __builtin_ia32_pabsw128_mask ((__v8hi) __A,
3634						 (__v8hi)
3635						 _mm_setzero_si128 (),
3636						 (__mmask8) __U);
3637}
3638
3639extern __inline __mmask32
3640  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3641_mm256_cmpneq_epu8_mask (__m256i __X, __m256i __Y)
3642{
3643  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
3644						   (__v32qi) __Y, 4,
3645						   (__mmask32) -1);
3646}
3647
3648extern __inline __mmask32
3649  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3650_mm256_cmplt_epu8_mask (__m256i __X, __m256i __Y)
3651{
3652  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
3653						   (__v32qi) __Y, 1,
3654						   (__mmask32) -1);
3655}
3656
3657extern __inline __mmask32
3658  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3659_mm256_cmpge_epu8_mask (__m256i __X, __m256i __Y)
3660{
3661  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
3662						   (__v32qi) __Y, 5,
3663						   (__mmask32) -1);
3664}
3665
3666extern __inline __mmask32
3667  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3668_mm256_cmple_epu8_mask (__m256i __X, __m256i __Y)
3669{
3670  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
3671						   (__v32qi) __Y, 2,
3672						   (__mmask32) -1);
3673}
3674
3675extern __inline __mmask16
3676  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3677_mm256_cmpneq_epu16_mask (__m256i __X, __m256i __Y)
3678{
3679  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
3680						   (__v16hi) __Y, 4,
3681						   (__mmask16) -1);
3682}
3683
3684extern __inline __mmask16
3685  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3686_mm256_cmplt_epu16_mask (__m256i __X, __m256i __Y)
3687{
3688  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
3689						   (__v16hi) __Y, 1,
3690						   (__mmask16) -1);
3691}
3692
3693extern __inline __mmask16
3694  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3695_mm256_cmpge_epu16_mask (__m256i __X, __m256i __Y)
3696{
3697  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
3698						   (__v16hi) __Y, 5,
3699						   (__mmask16) -1);
3700}
3701
3702extern __inline __mmask16
3703  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3704_mm256_cmple_epu16_mask (__m256i __X, __m256i __Y)
3705{
3706  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
3707						   (__v16hi) __Y, 2,
3708						   (__mmask16) -1);
3709}
3710
3711extern __inline void
3712__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3713_mm256_mask_storeu_epi16 (void *__P, __mmask16 __U, __m256i __A)
3714{
3715  __builtin_ia32_storedquhi256_mask ((short *) __P,
3716				     (__v16hi) __A,
3717				     (__mmask16) __U);
3718}
3719
3720extern __inline void
3721__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3722_mm_mask_storeu_epi16 (void *__P, __mmask8 __U, __m128i __A)
3723{
3724  __builtin_ia32_storedquhi128_mask ((short *) __P,
3725				     (__v8hi) __A,
3726				     (__mmask8) __U);
3727}
3728
3729extern __inline __m128i
3730__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3731_mm_mask_adds_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
3732		     __m128i __B)
3733{
3734  return (__m128i) __builtin_ia32_paddsw128_mask ((__v8hi) __A,
3735						  (__v8hi) __B,
3736						  (__v8hi) __W,
3737						  (__mmask8) __U);
3738}
3739
3740extern __inline __m128i
3741__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3742_mm_mask_subs_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
3743		    __m128i __B)
3744{
3745  return (__m128i) __builtin_ia32_psubsb128_mask ((__v16qi) __A,
3746						  (__v16qi) __B,
3747						  (__v16qi) __W,
3748						  (__mmask16) __U);
3749}
3750
3751extern __inline __m128i
3752__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3753_mm_maskz_subs_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
3754{
3755  return (__m128i) __builtin_ia32_psubsb128_mask ((__v16qi) __A,
3756						  (__v16qi) __B,
3757						  (__v16qi)
3758						  _mm_setzero_si128 (),
3759						  (__mmask16) __U);
3760}
3761
3762extern __inline __m128i
3763__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3764_mm_mask_subs_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
3765		     __m128i __B)
3766{
3767  return (__m128i) __builtin_ia32_psubsw128_mask ((__v8hi) __A,
3768						  (__v8hi) __B,
3769						  (__v8hi) __W,
3770						  (__mmask8) __U);
3771}
3772
3773extern __inline __m128i
3774__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3775_mm_maskz_subs_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
3776{
3777  return (__m128i) __builtin_ia32_psubsw128_mask ((__v8hi) __A,
3778						  (__v8hi) __B,
3779						  (__v8hi)
3780						  _mm_setzero_si128 (),
3781						  (__mmask8) __U);
3782}
3783
3784extern __inline __m128i
3785__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3786_mm_mask_subs_epu8 (__m128i __W, __mmask16 __U, __m128i __A,
3787		    __m128i __B)
3788{
3789  return (__m128i) __builtin_ia32_psubusb128_mask ((__v16qi) __A,
3790						   (__v16qi) __B,
3791						   (__v16qi) __W,
3792						   (__mmask16) __U);
3793}
3794
3795extern __inline __m128i
3796__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3797_mm_maskz_subs_epu8 (__mmask16 __U, __m128i __A, __m128i __B)
3798{
3799  return (__m128i) __builtin_ia32_psubusb128_mask ((__v16qi) __A,
3800						   (__v16qi) __B,
3801						   (__v16qi)
3802						   _mm_setzero_si128 (),
3803						   (__mmask16) __U);
3804}
3805
3806extern __inline __m128i
3807__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3808_mm_mask_subs_epu16 (__m128i __W, __mmask8 __U, __m128i __A,
3809		     __m128i __B)
3810{
3811  return (__m128i) __builtin_ia32_psubusw128_mask ((__v8hi) __A,
3812						   (__v8hi) __B,
3813						   (__v8hi) __W,
3814						   (__mmask8) __U);
3815}
3816
3817extern __inline __m128i
3818__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3819_mm_maskz_subs_epu16 (__mmask8 __U, __m128i __A, __m128i __B)
3820{
3821  return (__m128i) __builtin_ia32_psubusw128_mask ((__v8hi) __A,
3822						   (__v8hi) __B,
3823						   (__v8hi)
3824						   _mm_setzero_si128 (),
3825						   (__mmask8) __U);
3826}
3827
3828extern __inline __m256i
3829__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3830_mm256_mask_srl_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
3831		       __m128i __B)
3832{
3833  return (__m256i) __builtin_ia32_psrlw256_mask ((__v16hi) __A,
3834						 (__v8hi) __B,
3835						 (__v16hi) __W,
3836						 (__mmask16) __U);
3837}
3838
3839extern __inline __m256i
3840__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3841_mm256_maskz_srl_epi16 (__mmask16 __U, __m256i __A, __m128i __B)
3842{
3843  return (__m256i) __builtin_ia32_psrlw256_mask ((__v16hi) __A,
3844						 (__v8hi) __B,
3845						 (__v16hi)
3846						 _mm256_setzero_si256 (),
3847						 (__mmask16) __U);
3848}
3849
3850extern __inline __m128i
3851__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3852_mm_mask_srl_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
3853		    __m128i __B)
3854{
3855  return (__m128i) __builtin_ia32_psrlw128_mask ((__v8hi) __A,
3856						 (__v8hi) __B,
3857						 (__v8hi) __W,
3858						 (__mmask8) __U);
3859}
3860
3861extern __inline __m128i
3862__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3863_mm_maskz_srl_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
3864{
3865  return (__m128i) __builtin_ia32_psrlw128_mask ((__v8hi) __A,
3866						 (__v8hi) __B,
3867						 (__v8hi)
3868						 _mm_setzero_si128 (),
3869						 (__mmask8) __U);
3870}
3871
3872extern __inline __m256i
3873__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3874_mm256_mask_sra_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
3875		       __m128i __B)
3876{
3877  return (__m256i) __builtin_ia32_psraw256_mask ((__v16hi) __A,
3878						 (__v8hi) __B,
3879						 (__v16hi) __W,
3880						 (__mmask16) __U);
3881}
3882
3883extern __inline __m256i
3884__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3885_mm256_maskz_sra_epi16 (__mmask16 __U, __m256i __A, __m128i __B)
3886{
3887  return (__m256i) __builtin_ia32_psraw256_mask ((__v16hi) __A,
3888						 (__v8hi) __B,
3889						 (__v16hi)
3890						 _mm256_setzero_si256 (),
3891						 (__mmask16) __U);
3892}
3893
3894extern __inline __m128i
3895__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3896_mm_mask_sra_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
3897		    __m128i __B)
3898{
3899  return (__m128i) __builtin_ia32_psraw128_mask ((__v8hi) __A,
3900						 (__v8hi) __B,
3901						 (__v8hi) __W,
3902						 (__mmask8) __U);
3903}
3904
3905extern __inline __m128i
3906__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3907_mm_maskz_sra_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
3908{
3909  return (__m128i) __builtin_ia32_psraw128_mask ((__v8hi) __A,
3910						 (__v8hi) __B,
3911						 (__v8hi)
3912						 _mm_setzero_si128 (),
3913						 (__mmask8) __U);
3914}
3915
3916extern __inline __m128i
3917__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3918_mm_maskz_adds_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
3919{
3920  return (__m128i) __builtin_ia32_paddsw128_mask ((__v8hi) __A,
3921						  (__v8hi) __B,
3922						  (__v8hi)
3923						  _mm_setzero_si128 (),
3924						  (__mmask8) __U);
3925}
3926
3927extern __inline __m128i
3928__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3929_mm_mask_adds_epu8 (__m128i __W, __mmask16 __U, __m128i __A,
3930		    __m128i __B)
3931{
3932  return (__m128i) __builtin_ia32_paddusb128_mask ((__v16qi) __A,
3933						   (__v16qi) __B,
3934						   (__v16qi) __W,
3935						   (__mmask16) __U);
3936}
3937
3938extern __inline __m128i
3939__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3940_mm_maskz_adds_epu8 (__mmask16 __U, __m128i __A, __m128i __B)
3941{
3942  return (__m128i) __builtin_ia32_paddusb128_mask ((__v16qi) __A,
3943						   (__v16qi) __B,
3944						   (__v16qi)
3945						   _mm_setzero_si128 (),
3946						   (__mmask16) __U);
3947}
3948
3949extern __inline __m128i
3950__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3951_mm_mask_adds_epu16 (__m128i __W, __mmask8 __U, __m128i __A,
3952		     __m128i __B)
3953{
3954  return (__m128i) __builtin_ia32_paddusw128_mask ((__v8hi) __A,
3955						   (__v8hi) __B,
3956						   (__v8hi) __W,
3957						   (__mmask8) __U);
3958}
3959
3960extern __inline __m128i
3961__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3962_mm_maskz_adds_epu16 (__mmask8 __U, __m128i __A, __m128i __B)
3963{
3964  return (__m128i) __builtin_ia32_paddusw128_mask ((__v8hi) __A,
3965						   (__v8hi) __B,
3966						   (__v8hi)
3967						   _mm_setzero_si128 (),
3968						   (__mmask8) __U);
3969}
3970
3971extern __inline __m128i
3972__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3973_mm_mask_sub_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
3974		   __m128i __B)
3975{
3976  return (__m128i) __builtin_ia32_psubb128_mask ((__v16qi) __A,
3977						 (__v16qi) __B,
3978						 (__v16qi) __W,
3979						 (__mmask16) __U);
3980}
3981
3982extern __inline __m128i
3983__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3984_mm_maskz_sub_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
3985{
3986  return (__m128i) __builtin_ia32_psubb128_mask ((__v16qi) __A,
3987						 (__v16qi) __B,
3988						 (__v16qi)
3989						 _mm_setzero_si128 (),
3990						 (__mmask16) __U);
3991}
3992
3993extern __inline __m128i
3994__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3995_mm_mask_sub_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
3996		    __m128i __B)
3997{
3998  return (__m128i) __builtin_ia32_psubw128_mask ((__v8hi) __A,
3999						 (__v8hi) __B,
4000						 (__v8hi) __W,
4001						 (__mmask8) __U);
4002}
4003
4004extern __inline __m128i
4005__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4006_mm_maskz_sub_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
4007{
4008  return (__m128i) __builtin_ia32_psubw128_mask ((__v8hi) __A,
4009						 (__v8hi) __B,
4010						 (__v8hi)
4011						 _mm_setzero_si128 (),
4012						 (__mmask8) __U);
4013}
4014
4015extern __inline __m128i
4016__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4017_mm_mask_adds_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
4018		    __m128i __B)
4019{
4020  return (__m128i) __builtin_ia32_paddsb128_mask ((__v16qi) __A,
4021						  (__v16qi) __B,
4022						  (__v16qi) __W,
4023						  (__mmask16) __U);
4024}
4025
4026extern __inline __m128i
4027__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4028_mm_maskz_adds_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
4029{
4030  return (__m128i) __builtin_ia32_paddsb128_mask ((__v16qi) __A,
4031						  (__v16qi) __B,
4032						  (__v16qi)
4033						  _mm_setzero_si128 (),
4034						  (__mmask16) __U);
4035}
4036
4037extern __inline __m128i
4038__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4039_mm_cvtepi16_epi8 (__m128i __A)
4040{
4041
4042  return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
4043						  (__v16qi)_mm_undefined_si128(),
4044						  (__mmask8) -1);
4045}
4046
4047extern __inline void
4048__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4049_mm_mask_cvtepi16_storeu_epi8 (void * __P, __mmask8 __M,__m128i __A)
4050{
4051  __builtin_ia32_pmovwb128mem_mask ((__v8qi *) __P , (__v8hi) __A, __M);
4052}
4053
4054extern __inline __m128i
4055__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4056_mm_mask_cvtepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
4057{
4058  return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
4059						  (__v16qi) __O, __M);
4060}
4061
4062extern __inline __m128i
4063__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4064_mm_maskz_cvtepi16_epi8 (__mmask8 __M, __m128i __A)
4065{
4066  return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
4067						  (__v16qi)
4068						  _mm_setzero_si128 (),
4069						  __M);
4070}
4071
4072extern __inline __m256i
4073__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4074_mm256_srav_epi16 (__m256i __A, __m256i __B)
4075{
4076  return (__m256i) __builtin_ia32_psrav16hi_mask ((__v16hi) __A,
4077						  (__v16hi) __B,
4078						  (__v16hi)
4079						  _mm256_setzero_si256 (),
4080						  (__mmask16) -1);
4081}
4082
4083extern __inline __m256i
4084__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4085_mm256_mask_srav_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
4086			__m256i __B)
4087{
4088  return (__m256i) __builtin_ia32_psrav16hi_mask ((__v16hi) __A,
4089						  (__v16hi) __B,
4090						  (__v16hi) __W,
4091						  (__mmask16) __U);
4092}
4093
4094extern __inline __m256i
4095__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4096_mm256_maskz_srav_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
4097{
4098  return (__m256i) __builtin_ia32_psrav16hi_mask ((__v16hi) __A,
4099						  (__v16hi) __B,
4100						  (__v16hi)
4101						  _mm256_setzero_si256 (),
4102						  (__mmask16) __U);
4103}
4104
4105extern __inline __m128i
4106__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4107_mm_srav_epi16 (__m128i __A, __m128i __B)
4108{
4109  return (__m128i) __builtin_ia32_psrav8hi_mask ((__v8hi) __A,
4110						 (__v8hi) __B,
4111						 (__v8hi)
4112						 _mm_setzero_si128 (),
4113						 (__mmask8) -1);
4114}
4115
4116extern __inline __m128i
4117__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4118_mm_mask_srav_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
4119		     __m128i __B)
4120{
4121  return (__m128i) __builtin_ia32_psrav8hi_mask ((__v8hi) __A,
4122						 (__v8hi) __B,
4123						 (__v8hi) __W,
4124						 (__mmask8) __U);
4125}
4126
4127extern __inline __m128i
4128__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4129_mm_maskz_srav_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
4130{
4131  return (__m128i) __builtin_ia32_psrav8hi_mask ((__v8hi) __A,
4132						 (__v8hi) __B,
4133						 (__v8hi)
4134						 _mm_setzero_si128 (),
4135						 (__mmask8) __U);
4136}
4137
4138extern __inline __m256i
4139__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4140_mm256_srlv_epi16 (__m256i __A, __m256i __B)
4141{
4142  return (__m256i) __builtin_ia32_psrlv16hi_mask ((__v16hi) __A,
4143						  (__v16hi) __B,
4144						  (__v16hi)
4145						  _mm256_setzero_si256 (),
4146						  (__mmask16) -1);
4147}
4148
4149extern __inline __m256i
4150__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4151_mm256_mask_srlv_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
4152			__m256i __B)
4153{
4154  return (__m256i) __builtin_ia32_psrlv16hi_mask ((__v16hi) __A,
4155						  (__v16hi) __B,
4156						  (__v16hi) __W,
4157						  (__mmask16) __U);
4158}
4159
4160extern __inline __m256i
4161__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4162_mm256_maskz_srlv_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
4163{
4164  return (__m256i) __builtin_ia32_psrlv16hi_mask ((__v16hi) __A,
4165						  (__v16hi) __B,
4166						  (__v16hi)
4167						  _mm256_setzero_si256 (),
4168						  (__mmask16) __U);
4169}
4170
4171extern __inline __m128i
4172__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4173_mm_srlv_epi16 (__m128i __A, __m128i __B)
4174{
4175  return (__m128i) __builtin_ia32_psrlv8hi_mask ((__v8hi) __A,
4176						 (__v8hi) __B,
4177						 (__v8hi)
4178						 _mm_setzero_si128 (),
4179						 (__mmask8) -1);
4180}
4181
4182extern __inline __m128i
4183__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4184_mm_mask_srlv_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
4185		     __m128i __B)
4186{
4187  return (__m128i) __builtin_ia32_psrlv8hi_mask ((__v8hi) __A,
4188						 (__v8hi) __B,
4189						 (__v8hi) __W,
4190						 (__mmask8) __U);
4191}
4192
4193extern __inline __m128i
4194__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4195_mm_maskz_srlv_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
4196{
4197  return (__m128i) __builtin_ia32_psrlv8hi_mask ((__v8hi) __A,
4198						 (__v8hi) __B,
4199						 (__v8hi)
4200						 _mm_setzero_si128 (),
4201						 (__mmask8) __U);
4202}
4203
4204extern __inline __m256i
4205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4206_mm256_sllv_epi16 (__m256i __A, __m256i __B)
4207{
4208  return (__m256i) __builtin_ia32_psllv16hi_mask ((__v16hi) __A,
4209						  (__v16hi) __B,
4210						  (__v16hi)
4211						  _mm256_setzero_si256 (),
4212						  (__mmask16) -1);
4213}
4214
4215extern __inline __m256i
4216__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4217_mm256_mask_sllv_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
4218			__m256i __B)
4219{
4220  return (__m256i) __builtin_ia32_psllv16hi_mask ((__v16hi) __A,
4221						  (__v16hi) __B,
4222						  (__v16hi) __W,
4223						  (__mmask16) __U);
4224}
4225
4226extern __inline __m256i
4227__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4228_mm256_maskz_sllv_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
4229{
4230  return (__m256i) __builtin_ia32_psllv16hi_mask ((__v16hi) __A,
4231						  (__v16hi) __B,
4232						  (__v16hi)
4233						  _mm256_setzero_si256 (),
4234						  (__mmask16) __U);
4235}
4236
4237extern __inline __m128i
4238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4239_mm_sllv_epi16 (__m128i __A, __m128i __B)
4240{
4241  return (__m128i) __builtin_ia32_psllv8hi_mask ((__v8hi) __A,
4242						 (__v8hi) __B,
4243						 (__v8hi)
4244						 _mm_setzero_si128 (),
4245						 (__mmask8) -1);
4246}
4247
4248extern __inline __m128i
4249__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4250_mm_mask_sllv_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
4251		     __m128i __B)
4252{
4253  return (__m128i) __builtin_ia32_psllv8hi_mask ((__v8hi) __A,
4254						 (__v8hi) __B,
4255						 (__v8hi) __W,
4256						 (__mmask8) __U);
4257}
4258
4259extern __inline __m128i
4260__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4261_mm_maskz_sllv_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
4262{
4263  return (__m128i) __builtin_ia32_psllv8hi_mask ((__v8hi) __A,
4264						 (__v8hi) __B,
4265						 (__v8hi)
4266						 _mm_setzero_si128 (),
4267						 (__mmask8) __U);
4268}
4269
4270extern __inline __m128i
4271__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4272_mm_mask_sll_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
4273		    __m128i __B)
4274{
4275  return (__m128i) __builtin_ia32_psllw128_mask ((__v8hi) __A,
4276						 (__v8hi) __B,
4277						 (__v8hi) __W,
4278						 (__mmask8) __U);
4279}
4280
4281extern __inline __m128i
4282__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4283_mm_maskz_sll_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
4284{
4285  return (__m128i) __builtin_ia32_psllw128_mask ((__v8hi) __A,
4286						 (__v8hi) __B,
4287						 (__v8hi)
4288						 _mm_setzero_si128 (),
4289						 (__mmask8) __U);
4290}
4291
4292extern __inline __m256i
4293__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4294_mm256_mask_sll_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
4295		       __m128i __B)
4296{
4297  return (__m256i) __builtin_ia32_psllw256_mask ((__v16hi) __A,
4298						 (__v8hi) __B,
4299						 (__v16hi) __W,
4300						 (__mmask16) __U);
4301}
4302
4303extern __inline __m256i
4304__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4305_mm256_maskz_sll_epi16 (__mmask16 __U, __m256i __A, __m128i __B)
4306{
4307  return (__m256i) __builtin_ia32_psllw256_mask ((__v16hi) __A,
4308						 (__v8hi) __B,
4309						 (__v16hi)
4310						 _mm256_setzero_si256 (),
4311						 (__mmask16) __U);
4312}
4313
4314extern __inline __m256i
4315__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4316_mm256_maskz_packus_epi32 (__mmask16 __M, __m256i __A, __m256i __B)
4317{
4318  return (__m256i) __builtin_ia32_packusdw256_mask ((__v8si) __A,
4319						    (__v8si) __B,
4320						    (__v16hi)
4321						    _mm256_setzero_si256 (),
4322						    __M);
4323}
4324
4325extern __inline __m256i
4326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4327_mm256_mask_packus_epi32 (__m256i __W, __mmask16 __M, __m256i __A,
4328			  __m256i __B)
4329{
4330  return (__m256i) __builtin_ia32_packusdw256_mask ((__v8si) __A,
4331						    (__v8si) __B,
4332						    (__v16hi) __W,
4333						    __M);
4334}
4335
4336extern __inline __m128i
4337__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4338_mm_maskz_packus_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
4339{
4340  return (__m128i) __builtin_ia32_packusdw128_mask ((__v4si) __A,
4341						    (__v4si) __B,
4342						    (__v8hi)
4343						    _mm_setzero_si128 (),
4344						    __M);
4345}
4346
4347extern __inline __m128i
4348__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4349_mm_mask_packus_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
4350		       __m128i __B)
4351{
4352  return (__m128i) __builtin_ia32_packusdw128_mask ((__v4si) __A,
4353						    (__v4si) __B,
4354						    (__v8hi) __W, __M);
4355}
4356
4357extern __inline __m256i
4358__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4359_mm256_maskz_packs_epi32 (__mmask16 __M, __m256i __A, __m256i __B)
4360{
4361  return (__m256i) __builtin_ia32_packssdw256_mask ((__v8si) __A,
4362						    (__v8si) __B,
4363						    (__v16hi)
4364						    _mm256_setzero_si256 (),
4365						    __M);
4366}
4367
4368extern __inline __m256i
4369__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4370_mm256_mask_packs_epi32 (__m256i __W, __mmask16 __M, __m256i __A,
4371			 __m256i __B)
4372{
4373  return (__m256i) __builtin_ia32_packssdw256_mask ((__v8si) __A,
4374						    (__v8si) __B,
4375						    (__v16hi) __W,
4376						    __M);
4377}
4378
4379extern __inline __m128i
4380__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4381_mm_maskz_packs_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
4382{
4383  return (__m128i) __builtin_ia32_packssdw128_mask ((__v4si) __A,
4384						    (__v4si) __B,
4385						    (__v8hi)
4386						    _mm_setzero_si128 (),
4387						    __M);
4388}
4389
4390extern __inline __m128i
4391__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4392_mm_mask_packs_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
4393		      __m128i __B)
4394{
4395  return (__m128i) __builtin_ia32_packssdw128_mask ((__v4si) __A,
4396						    (__v4si) __B,
4397						    (__v8hi) __W, __M);
4398}
4399
4400extern __inline __mmask16
4401__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4402_mm_mask_cmpneq_epu8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
4403{
4404  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
4405						   (__v16qi) __Y, 4,
4406						   (__mmask16) __M);
4407}
4408
4409extern __inline __mmask16
4410__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4411_mm_mask_cmplt_epu8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
4412{
4413  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
4414						   (__v16qi) __Y, 1,
4415						   (__mmask16) __M);
4416}
4417
4418extern __inline __mmask16
4419__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4420_mm_mask_cmpge_epu8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
4421{
4422  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
4423						   (__v16qi) __Y, 5,
4424						   (__mmask16) __M);
4425}
4426
4427extern __inline __mmask16
4428__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4429_mm_mask_cmple_epu8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
4430{
4431  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
4432						   (__v16qi) __Y, 2,
4433						   (__mmask16) __M);
4434}
4435
4436extern __inline __mmask8
4437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4438_mm_mask_cmpneq_epu16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
4439{
4440  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
4441						  (__v8hi) __Y, 4,
4442						  (__mmask8) __M);
4443}
4444
4445extern __inline __mmask8
4446__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4447_mm_mask_cmplt_epu16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
4448{
4449  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
4450						  (__v8hi) __Y, 1,
4451						  (__mmask8) __M);
4452}
4453
4454extern __inline __mmask8
4455__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4456_mm_mask_cmpge_epu16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
4457{
4458  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
4459						  (__v8hi) __Y, 5,
4460						  (__mmask8) __M);
4461}
4462
4463extern __inline __mmask8
4464__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4465_mm_mask_cmple_epu16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
4466{
4467  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
4468						  (__v8hi) __Y, 2,
4469						  (__mmask8) __M);
4470}
4471
4472extern __inline __mmask16
4473__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4474_mm_mask_cmpneq_epi8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
4475{
4476  return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
4477						  (__v16qi) __Y, 4,
4478						  (__mmask16) __M);
4479}
4480
4481extern __inline __mmask16
4482__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4483_mm_mask_cmplt_epi8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
4484{
4485  return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
4486						  (__v16qi) __Y, 1,
4487						  (__mmask16) __M);
4488}
4489
4490extern __inline __mmask16
4491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4492_mm_mask_cmpge_epi8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
4493{
4494  return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
4495						  (__v16qi) __Y, 5,
4496						  (__mmask16) __M);
4497}
4498
4499extern __inline __mmask16
4500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4501_mm_mask_cmple_epi8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
4502{
4503  return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
4504						  (__v16qi) __Y, 2,
4505						  (__mmask16) __M);
4506}
4507
4508extern __inline __mmask8
4509__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4510_mm_mask_cmpneq_epi16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
4511{
4512  return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
4513						 (__v8hi) __Y, 4,
4514						 (__mmask8) __M);
4515}
4516
4517extern __inline __mmask8
4518__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4519_mm_mask_cmplt_epi16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
4520{
4521  return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
4522						 (__v8hi) __Y, 1,
4523						 (__mmask8) __M);
4524}
4525
4526extern __inline __mmask8
4527__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4528_mm_mask_cmpge_epi16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
4529{
4530  return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
4531						 (__v8hi) __Y, 5,
4532						 (__mmask8) __M);
4533}
4534
4535extern __inline __mmask8
4536__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4537_mm_mask_cmple_epi16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
4538{
4539  return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
4540						 (__v8hi) __Y, 2,
4541						 (__mmask8) __M);
4542}
4543
4544extern __inline __mmask32
4545__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4546_mm256_mask_cmpneq_epu8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
4547{
4548  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
4549						   (__v32qi) __Y, 4,
4550						   (__mmask32) __M);
4551}
4552
4553extern __inline __mmask32
4554__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4555_mm256_mask_cmplt_epu8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
4556{
4557  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
4558						   (__v32qi) __Y, 1,
4559						   (__mmask32) __M);
4560}
4561
4562extern __inline __mmask32
4563__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4564_mm256_mask_cmpge_epu8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
4565{
4566  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
4567						   (__v32qi) __Y, 5,
4568						   (__mmask32) __M);
4569}
4570
4571extern __inline __mmask32
4572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4573_mm256_mask_cmple_epu8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
4574{
4575  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
4576						   (__v32qi) __Y, 2,
4577						   (__mmask32) __M);
4578}
4579
4580extern __inline __mmask16
4581__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4582_mm256_mask_cmpneq_epu16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
4583{
4584  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
4585						   (__v16hi) __Y, 4,
4586						   (__mmask16) __M);
4587}
4588
4589extern __inline __mmask16
4590__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4591_mm256_mask_cmplt_epu16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
4592{
4593  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
4594						   (__v16hi) __Y, 1,
4595						   (__mmask16) __M);
4596}
4597
4598extern __inline __mmask16
4599__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4600_mm256_mask_cmpge_epu16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
4601{
4602  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
4603						   (__v16hi) __Y, 5,
4604						   (__mmask16) __M);
4605}
4606
4607extern __inline __mmask16
4608__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4609_mm256_mask_cmple_epu16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
4610{
4611  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
4612						   (__v16hi) __Y, 2,
4613						   (__mmask16) __M);
4614}
4615
4616extern __inline __mmask32
4617__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4618_mm256_mask_cmpneq_epi8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
4619{
4620  return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
4621						  (__v32qi) __Y, 4,
4622						  (__mmask32) __M);
4623}
4624
4625extern __inline __mmask32
4626__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4627_mm256_mask_cmplt_epi8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
4628{
4629  return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
4630						  (__v32qi) __Y, 1,
4631						  (__mmask32) __M);
4632}
4633
4634extern __inline __mmask32
4635__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4636_mm256_mask_cmpge_epi8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
4637{
4638  return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
4639						  (__v32qi) __Y, 5,
4640						  (__mmask32) __M);
4641}
4642
4643extern __inline __mmask32
4644__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4645_mm256_mask_cmple_epi8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
4646{
4647  return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
4648						  (__v32qi) __Y, 2,
4649						  (__mmask32) __M);
4650}
4651
4652extern __inline __mmask16
4653__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4654_mm256_mask_cmpneq_epi16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
4655{
4656  return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
4657						  (__v16hi) __Y, 4,
4658						  (__mmask16) __M);
4659}
4660
4661extern __inline __mmask16
4662__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4663_mm256_mask_cmplt_epi16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
4664{
4665  return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
4666						  (__v16hi) __Y, 1,
4667						  (__mmask16) __M);
4668}
4669
4670extern __inline __mmask16
4671__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4672_mm256_mask_cmpge_epi16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
4673{
4674  return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
4675						  (__v16hi) __Y, 5,
4676						  (__mmask16) __M);
4677}
4678
4679extern __inline __mmask16
4680__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4681_mm256_mask_cmple_epi16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
4682{
4683  return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
4684						  (__v16hi) __Y, 2,
4685						  (__mmask16) __M);
4686}
4687
4688#ifdef __DISABLE_AVX512VLBW__
4689#undef __DISABLE_AVX512VLBW__
4690#pragma GCC pop_options
4691#endif /* __DISABLE_AVX512VLBW__ */
4692
4693#endif /* _AVX512VLBWINTRIN_H_INCLUDED */
4694