1/* Copyright (C) 2019-2022 Free Software Foundation, Inc.
2
3   This file is part of GCC.
4
5   GCC is free software; you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 3, or (at your option)
8   any later version.
9
10   GCC is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   Under Section 7 of GPL version 3, you are granted additional
16   permissions described in the GCC Runtime Library Exception, version
17   3.1, as published by the Free Software Foundation.
18
19   You should have received a copy of the GNU General Public License and
20   a copy of the GCC Runtime Library Exception along with this program;
21   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22   <http://www.gnu.org/licenses/>.  */
23
24#ifndef _IMMINTRIN_H_INCLUDED
25#error "Never use <avx512fp16vlintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef __AVX512FP16VLINTRIN_H_INCLUDED
29#define __AVX512FP16VLINTRIN_H_INCLUDED
30
31#if !defined(__AVX512VL__) || !defined(__AVX512FP16__)
32#pragma GCC push_options
33#pragma GCC target("avx512fp16,avx512vl")
34#define __DISABLE_AVX512FP16VL__
35#endif /* __AVX512FP16VL__ */
36
37extern __inline __m128
38__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
39_mm_castph_ps (__m128h __a)
40{
41  return (__m128) __a;
42}
43
44extern __inline __m256
45__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
46_mm256_castph_ps (__m256h __a)
47{
48  return (__m256) __a;
49}
50
51extern __inline __m128d
52__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
53_mm_castph_pd (__m128h __a)
54{
55  return (__m128d) __a;
56}
57
58extern __inline __m256d
59__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
60_mm256_castph_pd (__m256h __a)
61{
62  return (__m256d) __a;
63}
64
65extern __inline __m128i
66__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
67_mm_castph_si128 (__m128h __a)
68{
69  return (__m128i) __a;
70}
71
72extern __inline __m256i
73__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
74_mm256_castph_si256 (__m256h __a)
75{
76  return (__m256i) __a;
77}
78
79extern __inline __m128h
80__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
81_mm_castps_ph (__m128 __a)
82{
83  return (__m128h) __a;
84}
85
86extern __inline __m256h
87__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
88_mm256_castps_ph (__m256 __a)
89{
90  return (__m256h) __a;
91}
92
93extern __inline __m128h
94__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
95_mm_castpd_ph (__m128d __a)
96{
97  return (__m128h) __a;
98}
99
100extern __inline __m256h
101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
102_mm256_castpd_ph (__m256d __a)
103{
104  return (__m256h) __a;
105}
106
107extern __inline __m128h
108__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
109_mm_castsi128_ph (__m128i __a)
110{
111  return (__m128h) __a;
112}
113
114extern __inline __m256h
115__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
116_mm256_castsi256_ph (__m256i __a)
117{
118  return (__m256h) __a;
119}
120
121extern __inline __m128h
122__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
123_mm256_castph256_ph128 (__m256h __A)
124{
125  union
126  {
127    __m128h __a[2];
128    __m256h __v;
129  } __u = { .__v = __A };
130  return __u.__a[0];
131}
132
133extern __inline __m256h
134__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
135_mm256_castph128_ph256 (__m128h __A)
136{
137  union
138  {
139    __m128h __a[2];
140    __m256h __v;
141  } __u;
142  __u.__a[0] = __A;
143  return __u.__v;
144}
145
146extern __inline __m256h
147__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
148_mm256_zextph128_ph256 (__m128h __A)
149{
150  return (__m256h) _mm256_insertf128_ps (_mm256_setzero_ps (),
151					 (__m128) __A, 0);
152}
153
154extern __inline __m256h
155__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
156_mm256_conj_pch (__m256h __A)
157{
158  return (__m256h) _mm256_xor_epi32 ((__m256i) __A, _mm256_set1_epi32 (1<<31));
159}
160
161extern __inline __m256h
162__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
163_mm256_mask_conj_pch (__m256h __W, __mmask8 __U, __m256h __A)
164{
165  return (__m256h) __builtin_ia32_movaps256_mask ((__v8sf)
166						   _mm256_conj_pch (__A),
167						  (__v8sf) __W,
168						  (__mmask8) __U);
169}
170
171extern __inline __m256h
172__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
173_mm256_maskz_conj_pch (__mmask8 __U, __m256h __A)
174{
175  return (__m256h) __builtin_ia32_movaps256_mask ((__v8sf)
176						   _mm256_conj_pch (__A),
177						  (__v8sf)
178						   _mm256_setzero_ps (),
179						  (__mmask8) __U);
180}
181
182extern __inline __m128h
183__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
184_mm_conj_pch (__m128h __A)
185{
186  return (__m128h) _mm_xor_epi32 ((__m128i) __A, _mm_set1_epi32 (1<<31));
187}
188
189extern __inline __m128h
190__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
191_mm_mask_conj_pch (__m128h __W, __mmask8 __U, __m128h __A)
192{
193  return (__m128h) __builtin_ia32_movaps128_mask ((__v4sf) _mm_conj_pch (__A),
194						  (__v4sf) __W,
195						  (__mmask8) __U);
196}
197
198extern __inline __m128h
199__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
200_mm_maskz_conj_pch (__mmask8 __U, __m128h __A)
201{
202  return (__m128h) __builtin_ia32_movaps128_mask ((__v4sf) _mm_conj_pch (__A),
203						  (__v4sf) _mm_setzero_ps (),
204						  (__mmask8) __U);
205}
206
207/* Intrinsics v[add,sub,mul,div]ph.  */
208extern __inline __m128h
209__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
210_mm_add_ph (__m128h __A, __m128h __B)
211{
212  return (__m128h) ((__v8hf) __A + (__v8hf) __B);
213}
214
215extern __inline __m256h
216__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
217_mm256_add_ph (__m256h __A, __m256h __B)
218{
219  return (__m256h) ((__v16hf) __A + (__v16hf) __B);
220}
221
222extern __inline __m128h
223__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
224_mm_mask_add_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
225{
226  return __builtin_ia32_addph128_mask (__C, __D, __A, __B);
227}
228
229extern __inline __m256h
230__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
231_mm256_mask_add_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
232{
233  return __builtin_ia32_addph256_mask (__C, __D, __A, __B);
234}
235
236extern __inline __m128h
237__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
238_mm_maskz_add_ph (__mmask8 __A, __m128h __B, __m128h __C)
239{
240  return __builtin_ia32_addph128_mask (__B, __C, _mm_setzero_ph (),
241				       __A);
242}
243
244extern __inline __m256h
245__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
246_mm256_maskz_add_ph (__mmask16 __A, __m256h __B, __m256h __C)
247{
248  return __builtin_ia32_addph256_mask (__B, __C,
249				       _mm256_setzero_ph (), __A);
250}
251
252extern __inline __m128h
253__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
254_mm_sub_ph (__m128h __A, __m128h __B)
255{
256  return (__m128h) ((__v8hf) __A - (__v8hf) __B);
257}
258
259extern __inline __m256h
260__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
261_mm256_sub_ph (__m256h __A, __m256h __B)
262{
263  return (__m256h) ((__v16hf) __A - (__v16hf) __B);
264}
265
266extern __inline __m128h
267__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
268_mm_mask_sub_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
269{
270  return __builtin_ia32_subph128_mask (__C, __D, __A, __B);
271}
272
273extern __inline __m256h
274__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
275_mm256_mask_sub_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
276{
277  return __builtin_ia32_subph256_mask (__C, __D, __A, __B);
278}
279
280extern __inline __m128h
281__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
282_mm_maskz_sub_ph (__mmask8 __A, __m128h __B, __m128h __C)
283{
284  return __builtin_ia32_subph128_mask (__B, __C, _mm_setzero_ph (),
285				       __A);
286}
287
288extern __inline __m256h
289__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
290_mm256_maskz_sub_ph (__mmask16 __A, __m256h __B, __m256h __C)
291{
292  return __builtin_ia32_subph256_mask (__B, __C,
293				       _mm256_setzero_ph (), __A);
294}
295
296extern __inline __m128h
297__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
298_mm_mul_ph (__m128h __A, __m128h __B)
299{
300  return (__m128h) ((__v8hf) __A * (__v8hf) __B);
301}
302
303extern __inline __m256h
304__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
305_mm256_mul_ph (__m256h __A, __m256h __B)
306{
307  return (__m256h) ((__v16hf) __A * (__v16hf) __B);
308}
309
310extern __inline __m128h
311__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
312_mm_mask_mul_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
313{
314  return __builtin_ia32_mulph128_mask (__C, __D, __A, __B);
315}
316
317extern __inline __m256h
318__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
319_mm256_mask_mul_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
320{
321  return __builtin_ia32_mulph256_mask (__C, __D, __A, __B);
322}
323
324extern __inline __m128h
325__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
326_mm_maskz_mul_ph (__mmask8 __A, __m128h __B, __m128h __C)
327{
328  return __builtin_ia32_mulph128_mask (__B, __C, _mm_setzero_ph (),
329				       __A);
330}
331
332extern __inline __m256h
333__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
334_mm256_maskz_mul_ph (__mmask16 __A, __m256h __B, __m256h __C)
335{
336  return __builtin_ia32_mulph256_mask (__B, __C,
337				       _mm256_setzero_ph (), __A);
338}
339
340extern __inline __m128h
341__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
342_mm_div_ph (__m128h __A, __m128h __B)
343{
344  return (__m128h) ((__v8hf) __A / (__v8hf) __B);
345}
346
347extern __inline __m256h
348__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
349_mm256_div_ph (__m256h __A, __m256h __B)
350{
351  return (__m256h) ((__v16hf) __A / (__v16hf) __B);
352}
353
354extern __inline __m128h
355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
356_mm_mask_div_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
357{
358  return __builtin_ia32_divph128_mask (__C, __D, __A, __B);
359}
360
361extern __inline __m256h
362__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
363_mm256_mask_div_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
364{
365  return __builtin_ia32_divph256_mask (__C, __D, __A, __B);
366}
367
368extern __inline __m128h
369__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
370_mm_maskz_div_ph (__mmask8 __A, __m128h __B, __m128h __C)
371{
372  return __builtin_ia32_divph128_mask (__B, __C, _mm_setzero_ph (),
373				       __A);
374}
375
376extern __inline __m256h
377__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
378_mm256_maskz_div_ph (__mmask16 __A, __m256h __B, __m256h __C)
379{
380  return __builtin_ia32_divph256_mask (__B, __C,
381				       _mm256_setzero_ph (), __A);
382}
383
384/* Intrinsics v[max,min]ph.  */
385extern __inline __m128h
386__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
387_mm_max_ph (__m128h __A, __m128h __B)
388{
389  return __builtin_ia32_maxph128_mask (__A, __B,
390				       _mm_setzero_ph (),
391				       (__mmask8) -1);
392}
393
394extern __inline __m256h
395__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
396_mm256_max_ph (__m256h __A, __m256h __B)
397{
398  return __builtin_ia32_maxph256_mask (__A, __B,
399				       _mm256_setzero_ph (),
400				       (__mmask16) -1);
401}
402
403extern __inline __m128h
404__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
405_mm_mask_max_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
406{
407  return __builtin_ia32_maxph128_mask (__C, __D, __A, __B);
408}
409
410extern __inline __m256h
411__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
412_mm256_mask_max_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
413{
414  return __builtin_ia32_maxph256_mask (__C, __D, __A, __B);
415}
416
417extern __inline __m128h
418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
419_mm_maskz_max_ph (__mmask8 __A, __m128h __B, __m128h __C)
420{
421  return __builtin_ia32_maxph128_mask (__B, __C, _mm_setzero_ph (),
422				       __A);
423}
424
425extern __inline __m256h
426__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
427_mm256_maskz_max_ph (__mmask16 __A, __m256h __B, __m256h __C)
428{
429  return __builtin_ia32_maxph256_mask (__B, __C,
430				       _mm256_setzero_ph (), __A);
431}
432
433extern __inline __m128h
434__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
435_mm_min_ph (__m128h __A, __m128h __B)
436{
437  return __builtin_ia32_minph128_mask (__A, __B,
438				       _mm_setzero_ph (),
439				       (__mmask8) -1);
440}
441
442extern __inline __m256h
443__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
444_mm256_min_ph (__m256h __A, __m256h __B)
445{
446  return __builtin_ia32_minph256_mask (__A, __B,
447				       _mm256_setzero_ph (),
448				       (__mmask16) -1);
449}
450
451extern __inline __m128h
452__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
453_mm_mask_min_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
454{
455  return __builtin_ia32_minph128_mask (__C, __D, __A, __B);
456}
457
458extern __inline __m256h
459__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
460_mm256_mask_min_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
461{
462  return __builtin_ia32_minph256_mask (__C, __D, __A, __B);
463}
464
465extern __inline __m128h
466__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
467_mm_maskz_min_ph (__mmask8 __A, __m128h __B, __m128h __C)
468{
469  return __builtin_ia32_minph128_mask (__B, __C, _mm_setzero_ph (),
470				       __A);
471}
472
473extern __inline __m256h
474__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
475_mm256_maskz_min_ph (__mmask16 __A, __m256h __B, __m256h __C)
476{
477  return __builtin_ia32_minph256_mask (__B, __C,
478				       _mm256_setzero_ph (), __A);
479}
480
481extern __inline __m128h
482__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
483_mm_abs_ph (__m128h __A)
484{
485  return (__m128h) _mm_and_si128 ( _mm_set1_epi32 (0x7FFF7FFF),
486				   (__m128i) __A);
487}
488
489extern __inline __m256h
490__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
491_mm256_abs_ph (__m256h __A)
492{
493  return (__m256h) _mm256_and_si256 ( _mm256_set1_epi32 (0x7FFF7FFF),
494				      (__m256i) __A);
495}
496
497/* vcmpph */
498#ifdef __OPTIMIZE
499extern __inline __mmask8
500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
501_mm_cmp_ph_mask (__m128h __A, __m128h __B, const int __C)
502{
503  return (__mmask8) __builtin_ia32_cmpph128_mask (__A, __B, __C,
504						  (__mmask8) -1);
505}
506
507extern __inline __mmask8
508__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
509_mm_mask_cmp_ph_mask (__mmask8 __A, __m128h __B, __m128h __C,
510		      const int __D)
511{
512  return (__mmask8) __builtin_ia32_cmpph128_mask (__B, __C, __D, __A);
513}
514
515extern __inline __mmask16
516__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
517_mm_cmp_ph_mask (__m256h __A, __m256h __B, const int __C)
518{
519  return (__mmask16) __builtin_ia32_cmpph256_mask (__A, __B, __C,
520						   (__mmask16) -1);
521}
522
523extern __inline __mmask16
524__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
525_mm_mask_cmp_ph_mask (__mmask16 __A, __m256h __B, __m256h __C,
526		      const int __D)
527{
528  return (__mmask16) __builtin_ia32_cmpph256_mask (__B, __C, __D,
529						   __A);
530}
531
532#else
533#define _mm_cmp_ph_mask(A, B, C)			\
534  (__builtin_ia32_cmpph128_mask ((A), (B), (C), (-1)))
535
536#define _mm_mask_cmp_ph_mask(A, B, C, D)		\
537  (__builtin_ia32_cmpph128_mask ((B), (C), (D), (A)))
538
539#define _mm256_cmp_ph_mask(A, B, C)			\
540  (__builtin_ia32_cmpph256_mask ((A), (B), (C), (-1)))
541
542#define _mm256_mask_cmp_ph_mask(A, B, C, D)		\
543  (__builtin_ia32_cmpph256_mask ((B), (C), (D), (A)))
544
545#endif /* __OPTIMIZE__ */
546
547/* Intrinsics vsqrtph.  */
548extern __inline __m128h
549__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
550_mm_sqrt_ph (__m128h __A)
551{
552  return __builtin_ia32_sqrtph128_mask (__A, _mm_setzero_ph (),
553					(__mmask8) -1);
554}
555
556extern __inline __m256h
557__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
558_mm256_sqrt_ph (__m256h __A)
559{
560  return __builtin_ia32_sqrtph256_mask (__A, _mm256_setzero_ph (),
561					(__mmask16) -1);
562}
563
564extern __inline __m128h
565__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
566_mm_mask_sqrt_ph (__m128h __A, __mmask8 __B, __m128h __C)
567{
568  return __builtin_ia32_sqrtph128_mask (__C, __A, __B);
569}
570
571extern __inline __m256h
572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
573_mm256_mask_sqrt_ph (__m256h __A, __mmask16 __B, __m256h __C)
574{
575  return __builtin_ia32_sqrtph256_mask (__C, __A, __B);
576}
577
578extern __inline __m128h
579__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
580_mm_maskz_sqrt_ph (__mmask8 __A, __m128h __B)
581{
582  return __builtin_ia32_sqrtph128_mask (__B, _mm_setzero_ph (),
583					__A);
584}
585
586extern __inline __m256h
587__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
588_mm256_maskz_sqrt_ph (__mmask16 __A, __m256h __B)
589{
590  return __builtin_ia32_sqrtph256_mask (__B, _mm256_setzero_ph (),
591					__A);
592}
593
594/* Intrinsics vrsqrtph.  */
595extern __inline __m128h
596__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
597_mm_rsqrt_ph (__m128h __A)
598{
599  return __builtin_ia32_rsqrtph128_mask (__A, _mm_setzero_ph (),
600					 (__mmask8) -1);
601}
602
603extern __inline __m256h
604__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
605_mm256_rsqrt_ph (__m256h __A)
606{
607  return __builtin_ia32_rsqrtph256_mask (__A, _mm256_setzero_ph (),
608					 (__mmask16) -1);
609}
610
611extern __inline __m128h
612__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
613_mm_mask_rsqrt_ph (__m128h __A, __mmask8 __B, __m128h __C)
614{
615  return __builtin_ia32_rsqrtph128_mask (__C, __A, __B);
616}
617
618extern __inline __m256h
619__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
620_mm256_mask_rsqrt_ph (__m256h __A, __mmask16 __B, __m256h __C)
621{
622  return __builtin_ia32_rsqrtph256_mask (__C, __A, __B);
623}
624
625extern __inline __m128h
626__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
627_mm_maskz_rsqrt_ph (__mmask8 __A, __m128h __B)
628{
629  return __builtin_ia32_rsqrtph128_mask (__B, _mm_setzero_ph (), __A);
630}
631
632extern __inline __m256h
633__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
634_mm256_maskz_rsqrt_ph (__mmask16 __A, __m256h __B)
635{
636  return __builtin_ia32_rsqrtph256_mask (__B, _mm256_setzero_ph (),
637					 __A);
638}
639
640/* Intrinsics vrcpph.  */
641extern __inline __m128h
642__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
643_mm_rcp_ph (__m128h __A)
644{
645  return __builtin_ia32_rcpph128_mask (__A, _mm_setzero_ph (),
646				       (__mmask8) -1);
647}
648
649extern __inline __m256h
650__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
651_mm256_rcp_ph (__m256h __A)
652{
653  return __builtin_ia32_rcpph256_mask (__A, _mm256_setzero_ph (),
654				       (__mmask16) -1);
655}
656
657extern __inline __m128h
658__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
659_mm_mask_rcp_ph (__m128h __A, __mmask8 __B, __m128h __C)
660{
661  return __builtin_ia32_rcpph128_mask (__C, __A, __B);
662}
663
664extern __inline __m256h
665__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
666_mm256_mask_rcp_ph (__m256h __A, __mmask16 __B, __m256h __C)
667{
668  return __builtin_ia32_rcpph256_mask (__C, __A, __B);
669}
670
671extern __inline __m128h
672__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
673_mm_maskz_rcp_ph (__mmask8 __A, __m128h __B)
674{
675  return __builtin_ia32_rcpph128_mask (__B, _mm_setzero_ph (), __A);
676}
677
678extern __inline __m256h
679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
680_mm256_maskz_rcp_ph (__mmask16 __A, __m256h __B)
681{
682  return __builtin_ia32_rcpph256_mask (__B, _mm256_setzero_ph (),
683				       __A);
684}
685
686/* Intrinsics vscalefph.  */
687extern __inline __m128h
688__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
689_mm_scalef_ph (__m128h __A, __m128h __B)
690{
691  return __builtin_ia32_scalefph128_mask (__A, __B,
692					  _mm_setzero_ph (),
693					  (__mmask8) -1);
694}
695
696extern __inline __m256h
697__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
698_mm256_scalef_ph (__m256h __A, __m256h __B)
699{
700  return __builtin_ia32_scalefph256_mask (__A, __B,
701					  _mm256_setzero_ph (),
702					  (__mmask16) -1);
703}
704
705extern __inline __m128h
706__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
707_mm_mask_scalef_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
708{
709  return __builtin_ia32_scalefph128_mask (__C, __D, __A, __B);
710}
711
712extern __inline __m256h
713__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
714_mm256_mask_scalef_ph (__m256h __A, __mmask16 __B, __m256h __C,
715		       __m256h __D)
716{
717  return __builtin_ia32_scalefph256_mask (__C, __D, __A, __B);
718}
719
720extern __inline __m128h
721__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
722_mm_maskz_scalef_ph (__mmask8 __A, __m128h __B, __m128h __C)
723{
724  return __builtin_ia32_scalefph128_mask (__B, __C,
725					  _mm_setzero_ph (), __A);
726}
727
728extern __inline __m256h
729__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
730_mm256_maskz_scalef_ph (__mmask16 __A, __m256h __B, __m256h __C)
731{
732  return __builtin_ia32_scalefph256_mask (__B, __C,
733					  _mm256_setzero_ph (),
734					  __A);
735}
736
737/* Intrinsics vreduceph.  */
738#ifdef __OPTIMIZE__
739extern __inline __m128h
740__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
741_mm_reduce_ph (__m128h __A, int __B)
742{
743  return __builtin_ia32_reduceph128_mask (__A, __B,
744					  _mm_setzero_ph (),
745					  (__mmask8) -1);
746}
747
748extern __inline __m128h
749__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
750_mm_mask_reduce_ph (__m128h __A, __mmask8 __B, __m128h __C, int __D)
751{
752  return __builtin_ia32_reduceph128_mask (__C, __D, __A, __B);
753}
754
755extern __inline __m128h
756__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
757_mm_maskz_reduce_ph (__mmask8 __A, __m128h __B, int __C)
758{
759  return __builtin_ia32_reduceph128_mask (__B, __C,
760					  _mm_setzero_ph (), __A);
761}
762
763extern __inline __m256h
764__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
765_mm256_reduce_ph (__m256h __A, int __B)
766{
767  return __builtin_ia32_reduceph256_mask (__A, __B,
768					  _mm256_setzero_ph (),
769					  (__mmask16) -1);
770}
771
772extern __inline __m256h
773__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
774_mm256_mask_reduce_ph (__m256h __A, __mmask16 __B, __m256h __C, int __D)
775{
776  return __builtin_ia32_reduceph256_mask (__C, __D, __A, __B);
777}
778
779extern __inline __m256h
780__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
781_mm256_maskz_reduce_ph (__mmask16 __A, __m256h __B, int __C)
782{
783  return __builtin_ia32_reduceph256_mask (__B, __C,
784					  _mm256_setzero_ph (),
785					  __A);
786}
787
788#else
789#define _mm_reduce_ph(A, B)				\
790  (__builtin_ia32_reduceph128_mask ((A), (B),		\
791				    _mm_setzero_ph (),	\
792				    ((__mmask8)-1)))
793
794#define _mm_mask_reduce_ph(A,  B,  C, D)			\
795  (__builtin_ia32_reduceph128_mask ((C), (D), (A), (B)))
796
797#define _mm_maskz_reduce_ph(A,  B, C)					\
798  (__builtin_ia32_reduceph128_mask ((B), (C), _mm_setzero_ph (), (A)))
799
800#define _mm256_reduce_ph(A, B)					\
801  (__builtin_ia32_reduceph256_mask ((A), (B),			\
802				    _mm256_setzero_ph (),	\
803				    ((__mmask16)-1)))
804
805#define _mm256_mask_reduce_ph(A, B, C, D)			\
806  (__builtin_ia32_reduceph256_mask ((C), (D), (A), (B)))
807
808#define _mm256_maskz_reduce_ph(A, B, C)					\
809  (__builtin_ia32_reduceph256_mask ((B), (C), _mm256_setzero_ph (), (A)))
810
811#endif /* __OPTIMIZE__ */
812
813/* Intrinsics vrndscaleph.  */
814#ifdef __OPTIMIZE__
815  extern __inline __m128h
816  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
817  _mm_roundscale_ph (__m128h __A, int __B)
818  {
819    return __builtin_ia32_rndscaleph128_mask (__A, __B,
820					      _mm_setzero_ph (),
821					      (__mmask8) -1);
822  }
823
824extern __inline __m128h
825__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
826_mm_mask_roundscale_ph (__m128h __A, __mmask8 __B, __m128h __C, int __D)
827{
828  return __builtin_ia32_rndscaleph128_mask (__C, __D, __A, __B);
829}
830
831extern __inline __m128h
832__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
833_mm_maskz_roundscale_ph (__mmask8 __A, __m128h __B, int __C)
834{
835  return __builtin_ia32_rndscaleph128_mask (__B, __C,
836					    _mm_setzero_ph (), __A);
837}
838
839extern __inline __m256h
840__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
841_mm256_roundscale_ph (__m256h __A, int __B)
842{
843  return __builtin_ia32_rndscaleph256_mask (__A, __B,
844					    _mm256_setzero_ph (),
845					    (__mmask16) -1);
846}
847
848extern __inline __m256h
849__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
850_mm256_mask_roundscale_ph (__m256h __A, __mmask16 __B, __m256h __C,
851			   int __D)
852{
853  return __builtin_ia32_rndscaleph256_mask (__C, __D, __A, __B);
854}
855
856extern __inline __m256h
857__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
858_mm256_maskz_roundscale_ph (__mmask16 __A, __m256h __B, int __C)
859{
860  return __builtin_ia32_rndscaleph256_mask (__B, __C,
861					    _mm256_setzero_ph (),
862					    __A);
863}
864
865#else
866#define _mm_roundscale_ph(A, B)						\
867  (__builtin_ia32_rndscaleph128_mask ((A), (B), _mm_setzero_ph (),	\
868				      ((__mmask8)-1)))
869
870#define _mm_mask_roundscale_ph(A, B, C, D)			\
871  (__builtin_ia32_rndscaleph128_mask ((C), (D), (A), (B)))
872
873#define _mm_maskz_roundscale_ph(A, B, C)				\
874  (__builtin_ia32_rndscaleph128_mask ((B), (C), _mm_setzero_ph (), (A)))
875
876#define _mm256_roundscale_ph(A, B)				\
877  (__builtin_ia32_rndscaleph256_mask ((A), (B),			\
878				      _mm256_setzero_ph(),	\
879				      ((__mmask16)-1)))
880
881#define _mm256_mask_roundscale_ph(A, B, C, D)			\
882  (__builtin_ia32_rndscaleph256_mask ((C), (D), (A), (B)))
883
884#define _mm256_maskz_roundscale_ph(A, B, C)				\
885  (__builtin_ia32_rndscaleph256_mask ((B), (C),				\
886				      _mm256_setzero_ph (), (A)))
887
888#endif /* __OPTIMIZE__ */
889
890/* Intrinsics vfpclassph.  */
891#ifdef __OPTIMIZE__
892extern __inline __mmask8
893__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
894  _mm_mask_fpclass_ph_mask (__mmask8 __U, __m128h __A, const int __imm)
895{
896  return (__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) __A,
897						      __imm, __U);
898}
899
900extern __inline __mmask8
901__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
902_mm_fpclass_ph_mask (__m128h __A, const int __imm)
903{
904  return (__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) __A,
905						      __imm,
906						      (__mmask8) -1);
907}
908
909extern __inline __mmask16
910__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
911_mm256_mask_fpclass_ph_mask (__mmask16 __U, __m256h __A, const int __imm)
912{
913  return (__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) __A,
914						       __imm, __U);
915}
916
917extern __inline __mmask16
918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
919_mm256_fpclass_ph_mask (__m256h __A, const int __imm)
920{
921  return (__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) __A,
922						       __imm,
923						       (__mmask16) -1);
924}
925
926#else
927#define _mm_fpclass_ph_mask(X, C)                                       \
928  ((__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) (__m128h) (X),	\
929						(int) (C),(__mmask8)-1))
930
931#define _mm_mask_fpclass_ph_mask(u, X, C)                               \
932  ((__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) (__m128h) (X),	\
933						(int) (C),(__mmask8)(u)))
934
935#define _mm256_fpclass_ph_mask(X, C)                                    \
936  ((__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) (__m256h) (X), \
937						 (int) (C),(__mmask16)-1))
938
939#define _mm256_mask_fpclass_ph_mask(u, X, C)				\
940  ((__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) (__m256h) (X), \
941						 (int) (C),(__mmask16)(u)))
942#endif /* __OPTIMIZE__ */
943
944/* Intrinsics vgetexpph, vgetexpsh.  */
945extern __inline __m256h
946__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
947_mm256_getexp_ph (__m256h __A)
948{
949  return (__m256h) __builtin_ia32_getexpph256_mask ((__v16hf) __A,
950						    (__v16hf)
951						    _mm256_setzero_ph (),
952						    (__mmask16) -1);
953}
954
955extern __inline __m256h
956__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
957_mm256_mask_getexp_ph (__m256h __W, __mmask16 __U, __m256h __A)
958{
959  return (__m256h) __builtin_ia32_getexpph256_mask ((__v16hf) __A,
960						    (__v16hf) __W,
961						    (__mmask16) __U);
962}
963
964extern __inline __m256h
965__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
966_mm256_maskz_getexp_ph (__mmask16 __U, __m256h __A)
967{
968  return (__m256h) __builtin_ia32_getexpph256_mask ((__v16hf) __A,
969						    (__v16hf)
970						    _mm256_setzero_ph (),
971						    (__mmask16) __U);
972}
973
974extern __inline __m128h
975__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
976_mm_getexp_ph (__m128h __A)
977{
978  return (__m128h) __builtin_ia32_getexpph128_mask ((__v8hf) __A,
979						    (__v8hf)
980						    _mm_setzero_ph (),
981						    (__mmask8) -1);
982}
983
984extern __inline __m128h
985__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
986_mm_mask_getexp_ph (__m128h __W, __mmask8 __U, __m128h __A)
987{
988  return (__m128h) __builtin_ia32_getexpph128_mask ((__v8hf) __A,
989						    (__v8hf) __W,
990						    (__mmask8) __U);
991}
992
993extern __inline __m128h
994__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
995_mm_maskz_getexp_ph (__mmask8 __U, __m128h __A)
996{
997  return (__m128h) __builtin_ia32_getexpph128_mask ((__v8hf) __A,
998						    (__v8hf)
999						    _mm_setzero_ph (),
1000						    (__mmask8) __U);
1001}
1002
1003
1004/* Intrinsics vgetmantph, vgetmantsh.  */
1005#ifdef __OPTIMIZE__
1006extern __inline __m256h
1007__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1008_mm256_getmant_ph (__m256h __A, _MM_MANTISSA_NORM_ENUM __B,
1009		   _MM_MANTISSA_SIGN_ENUM __C)
1010{
1011  return (__m256h) __builtin_ia32_getmantph256_mask ((__v16hf) __A,
1012						     (__C << 2) | __B,
1013						     (__v16hf)
1014						     _mm256_setzero_ph (),
1015						     (__mmask16) -1);
1016}
1017
1018extern __inline __m256h
1019__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1020_mm256_mask_getmant_ph (__m256h __W, __mmask16 __U, __m256h __A,
1021			_MM_MANTISSA_NORM_ENUM __B,
1022			_MM_MANTISSA_SIGN_ENUM __C)
1023{
1024  return (__m256h) __builtin_ia32_getmantph256_mask ((__v16hf) __A,
1025						     (__C << 2) | __B,
1026						     (__v16hf) __W,
1027						     (__mmask16) __U);
1028}
1029
1030extern __inline __m256h
1031__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1032_mm256_maskz_getmant_ph (__mmask16 __U, __m256h __A,
1033			 _MM_MANTISSA_NORM_ENUM __B,
1034			 _MM_MANTISSA_SIGN_ENUM __C)
1035{
1036  return (__m256h) __builtin_ia32_getmantph256_mask ((__v16hf) __A,
1037						     (__C << 2) | __B,
1038						     (__v16hf)
1039						     _mm256_setzero_ph (),
1040						     (__mmask16) __U);
1041}
1042
1043extern __inline __m128h
1044__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1045_mm_getmant_ph (__m128h __A, _MM_MANTISSA_NORM_ENUM __B,
1046		_MM_MANTISSA_SIGN_ENUM __C)
1047{
1048  return (__m128h) __builtin_ia32_getmantph128_mask ((__v8hf) __A,
1049						     (__C << 2) | __B,
1050						     (__v8hf)
1051						     _mm_setzero_ph (),
1052						     (__mmask8) -1);
1053}
1054
1055extern __inline __m128h
1056__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1057_mm_mask_getmant_ph (__m128h __W, __mmask8 __U, __m128h __A,
1058		     _MM_MANTISSA_NORM_ENUM __B,
1059		     _MM_MANTISSA_SIGN_ENUM __C)
1060{
1061  return (__m128h) __builtin_ia32_getmantph128_mask ((__v8hf) __A,
1062						     (__C << 2) | __B,
1063						     (__v8hf) __W,
1064						     (__mmask8) __U);
1065}
1066
1067extern __inline __m128h
1068__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1069_mm_maskz_getmant_ph (__mmask8 __U, __m128h __A,
1070		      _MM_MANTISSA_NORM_ENUM __B,
1071		      _MM_MANTISSA_SIGN_ENUM __C)
1072{
1073  return (__m128h) __builtin_ia32_getmantph128_mask ((__v8hf) __A,
1074						     (__C << 2) | __B,
1075						     (__v8hf)
1076						     _mm_setzero_ph (),
1077						     (__mmask8) __U);
1078}
1079
1080#else
1081#define _mm256_getmant_ph(X, B, C)					\
1082  ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X),	\
1083					       (int)(((C)<<2) | (B)),	\
1084					       (__v16hf)(__m256h)_mm256_setzero_ph (), \
1085					       (__mmask16)-1))
1086
1087#define _mm256_mask_getmant_ph(W, U, X, B, C)				\
1088  ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X),	\
1089					       (int)(((C)<<2) | (B)),	\
1090					       (__v16hf)(__m256h)(W),	\
1091					       (__mmask16)(U)))
1092
1093#define _mm256_maskz_getmant_ph(U, X, B, C)				\
1094  ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X),	\
1095					       (int)(((C)<<2) | (B)),	\
1096					       (__v16hf)(__m256h)_mm256_setzero_ph (), \
1097					       (__mmask16)(U)))
1098
1099#define _mm_getmant_ph(X, B, C)						\
1100  ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X),	\
1101					       (int)(((C)<<2) | (B)),	\
1102					       (__v8hf)(__m128h)_mm_setzero_ph (), \
1103					       (__mmask8)-1))
1104
1105#define _mm_mask_getmant_ph(W, U, X, B, C)				\
1106  ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X),	\
1107					       (int)(((C)<<2) | (B)),	\
1108					       (__v8hf)(__m128h)(W),	\
1109					       (__mmask8)(U)))
1110
1111#define _mm_maskz_getmant_ph(U, X, B, C)				\
1112  ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X),	\
1113					       (int)(((C)<<2) | (B)),	\
1114					       (__v8hf)(__m128h)_mm_setzero_ph (), \
1115					       (__mmask8)(U)))
1116
1117#endif /* __OPTIMIZE__ */
1118
1119/* Intrinsics vcvtph2dq.  */
1120extern __inline __m128i
1121__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1122_mm_cvtph_epi32 (__m128h __A)
1123{
1124  return (__m128i)
1125    __builtin_ia32_vcvtph2dq128_mask (__A,
1126				      (__v4si)
1127				      _mm_setzero_si128 (),
1128				      (__mmask8) -1);
1129}
1130
1131extern __inline __m128i
1132__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1133_mm_mask_cvtph_epi32 (__m128i __A, __mmask8 __B, __m128h __C)
1134{
1135  return (__m128i)
1136    __builtin_ia32_vcvtph2dq128_mask (__C, ( __v4si) __A, __B);
1137}
1138
1139extern __inline __m128i
1140__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1141_mm_maskz_cvtph_epi32 (__mmask8 __A, __m128h __B)
1142{
1143  return (__m128i)
1144    __builtin_ia32_vcvtph2dq128_mask (__B,
1145				      (__v4si) _mm_setzero_si128 (),
1146				      __A);
1147}
1148
1149extern __inline __m256i
1150__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1151_mm256_cvtph_epi32 (__m128h __A)
1152{
1153  return (__m256i)
1154    __builtin_ia32_vcvtph2dq256_mask (__A,
1155				      (__v8si)
1156				      _mm256_setzero_si256 (),
1157				      (__mmask8) -1);
1158}
1159
1160extern __inline __m256i
1161__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1162_mm256_mask_cvtph_epi32 (__m256i __A, __mmask8 __B, __m128h __C)
1163{
1164  return (__m256i)
1165    __builtin_ia32_vcvtph2dq256_mask (__C, ( __v8si) __A, __B);
1166}
1167
1168extern __inline __m256i
1169__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1170_mm256_maskz_cvtph_epi32 (__mmask8 __A, __m128h __B)
1171{
1172  return (__m256i)
1173    __builtin_ia32_vcvtph2dq256_mask (__B,
1174				      (__v8si)
1175				      _mm256_setzero_si256 (),
1176				      __A);
1177}
1178
1179/* Intrinsics vcvtph2udq.  */
1180extern __inline __m128i
1181__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1182_mm_cvtph_epu32 (__m128h __A)
1183{
1184  return (__m128i)
1185    __builtin_ia32_vcvtph2udq128_mask (__A,
1186				       (__v4si)
1187				       _mm_setzero_si128 (),
1188				       (__mmask8) -1);
1189}
1190
1191extern __inline __m128i
1192__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1193_mm_mask_cvtph_epu32 (__m128i __A, __mmask8 __B, __m128h __C)
1194{
1195  return (__m128i)
1196    __builtin_ia32_vcvtph2udq128_mask (__C, ( __v4si) __A, __B);
1197}
1198
1199extern __inline __m128i
1200__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1201_mm_maskz_cvtph_epu32 (__mmask8 __A, __m128h __B)
1202{
1203  return (__m128i)
1204    __builtin_ia32_vcvtph2udq128_mask (__B,
1205				       (__v4si)
1206				       _mm_setzero_si128 (),
1207				       __A);
1208}
1209
1210extern __inline __m256i
1211__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1212_mm256_cvtph_epu32 (__m128h __A)
1213{
1214  return (__m256i)
1215    __builtin_ia32_vcvtph2udq256_mask (__A,
1216				       (__v8si)
1217				       _mm256_setzero_si256 (),
1218				       (__mmask8) -1);
1219}
1220
1221extern __inline __m256i
1222__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1223_mm256_mask_cvtph_epu32 (__m256i __A, __mmask8 __B, __m128h __C)
1224{
1225  return (__m256i)
1226    __builtin_ia32_vcvtph2udq256_mask (__C, ( __v8si) __A, __B);
1227}
1228
1229extern __inline __m256i
1230__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1231_mm256_maskz_cvtph_epu32 (__mmask8 __A, __m128h __B)
1232{
1233  return (__m256i)
1234    __builtin_ia32_vcvtph2udq256_mask (__B,
1235				       (__v8si) _mm256_setzero_si256 (),
1236				       __A);
1237}
1238
1239/* Intrinsics vcvttph2dq.  */
1240extern __inline __m128i
1241__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1242_mm_cvttph_epi32 (__m128h __A)
1243{
1244  return (__m128i)
1245    __builtin_ia32_vcvttph2dq128_mask (__A,
1246				       (__v4si) _mm_setzero_si128 (),
1247				       (__mmask8) -1);
1248}
1249
1250extern __inline __m128i
1251__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1252_mm_mask_cvttph_epi32 (__m128i __A, __mmask8 __B, __m128h __C)
1253{
1254  return (__m128i)__builtin_ia32_vcvttph2dq128_mask (__C,
1255						     ( __v4si) __A,
1256						     __B);
1257}
1258
1259extern __inline __m128i
1260__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1261_mm_maskz_cvttph_epi32 (__mmask8 __A, __m128h __B)
1262{
1263  return (__m128i)
1264    __builtin_ia32_vcvttph2dq128_mask (__B,
1265				       (__v4si) _mm_setzero_si128 (),
1266				       __A);
1267}
1268
1269extern __inline __m256i
1270__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1271_mm256_cvttph_epi32 (__m128h __A)
1272{
1273  return (__m256i)
1274    __builtin_ia32_vcvttph2dq256_mask (__A,
1275				       (__v8si)
1276				       _mm256_setzero_si256 (),
1277				       (__mmask8) -1);
1278}
1279
1280extern __inline __m256i
1281__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1282_mm256_mask_cvttph_epi32 (__m256i __A, __mmask8 __B, __m128h __C)
1283{
1284  return (__m256i)
1285    __builtin_ia32_vcvttph2dq256_mask (__C,
1286				       ( __v8si) __A,
1287				       __B);
1288}
1289
1290extern __inline __m256i
1291__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1292_mm256_maskz_cvttph_epi32 (__mmask8 __A, __m128h __B)
1293{
1294  return (__m256i)
1295    __builtin_ia32_vcvttph2dq256_mask (__B,
1296				       (__v8si)
1297				       _mm256_setzero_si256 (),
1298				       __A);
1299}
1300
1301/* Intrinsics vcvttph2udq.  */
1302extern __inline __m128i
1303__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1304_mm_cvttph_epu32 (__m128h __A)
1305{
1306  return (__m128i)
1307    __builtin_ia32_vcvttph2udq128_mask (__A,
1308					(__v4si)
1309					_mm_setzero_si128 (),
1310					(__mmask8) -1);
1311}
1312
1313extern __inline __m128i
1314__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1315_mm_mask_cvttph_epu32 (__m128i __A, __mmask8 __B, __m128h __C)
1316{
1317  return (__m128i)
1318    __builtin_ia32_vcvttph2udq128_mask (__C,
1319					( __v4si) __A,
1320					__B);
1321}
1322
1323extern __inline __m128i
1324__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1325_mm_maskz_cvttph_epu32 (__mmask8 __A, __m128h __B)
1326{
1327  return (__m128i)
1328    __builtin_ia32_vcvttph2udq128_mask (__B,
1329					(__v4si)
1330					_mm_setzero_si128 (),
1331					__A);
1332}
1333
1334extern __inline __m256i
1335__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1336_mm256_cvttph_epu32 (__m128h __A)
1337{
1338  return (__m256i)
1339    __builtin_ia32_vcvttph2udq256_mask (__A,
1340					(__v8si)
1341					_mm256_setzero_si256 (), (__mmask8) -1);
1342}
1343
1344extern __inline __m256i
1345__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1346_mm256_mask_cvttph_epu32 (__m256i __A, __mmask8 __B, __m128h __C)
1347{
1348  return (__m256i)
1349    __builtin_ia32_vcvttph2udq256_mask (__C,
1350					( __v8si) __A,
1351					__B);
1352}
1353
1354extern __inline __m256i
1355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1356_mm256_maskz_cvttph_epu32 (__mmask8 __A, __m128h __B)
1357{
1358  return (__m256i)
1359    __builtin_ia32_vcvttph2udq256_mask (__B,
1360					(__v8si)
1361					_mm256_setzero_si256 (),
1362					__A);
1363}
1364
1365/* Intrinsics vcvtdq2ph.  */
1366extern __inline __m128h
1367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1368_mm_cvtepi32_ph (__m128i __A)
1369{
1370  return __builtin_ia32_vcvtdq2ph128_mask ((__v4si) __A,
1371					   _mm_setzero_ph (),
1372					   (__mmask8) -1);
1373}
1374
1375extern __inline __m128h
1376__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1377_mm_mask_cvtepi32_ph (__m128h __A, __mmask8 __B, __m128i __C)
1378{
1379  return __builtin_ia32_vcvtdq2ph128_mask ((__v4si) __C, __A, __B);
1380}
1381
1382extern __inline __m128h
1383__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1384_mm_maskz_cvtepi32_ph (__mmask8 __A, __m128i __B)
1385{
1386  return __builtin_ia32_vcvtdq2ph128_mask ((__v4si) __B,
1387					   _mm_setzero_ph (),
1388					   __A);
1389}
1390
1391extern __inline __m128h
1392__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1393_mm256_cvtepi32_ph (__m256i __A)
1394{
1395  return __builtin_ia32_vcvtdq2ph256_mask ((__v8si) __A,
1396					   _mm_setzero_ph (),
1397					   (__mmask8) -1);
1398}
1399
1400extern __inline __m128h
1401__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1402_mm256_mask_cvtepi32_ph (__m128h __A, __mmask8 __B, __m256i __C)
1403{
1404  return __builtin_ia32_vcvtdq2ph256_mask ((__v8si) __C, __A, __B);
1405}
1406
1407extern __inline __m128h
1408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1409_mm256_maskz_cvtepi32_ph (__mmask8 __A, __m256i __B)
1410{
1411  return __builtin_ia32_vcvtdq2ph256_mask ((__v8si) __B,
1412					   _mm_setzero_ph (),
1413					   __A);
1414}
1415
1416/* Intrinsics vcvtudq2ph.  */
1417extern __inline __m128h
1418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1419_mm_cvtepu32_ph (__m128i __A)
1420{
1421  return __builtin_ia32_vcvtudq2ph128_mask ((__v4si) __A,
1422					    _mm_setzero_ph (),
1423					    (__mmask8) -1);
1424}
1425
1426extern __inline __m128h
1427__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1428_mm_mask_cvtepu32_ph (__m128h __A, __mmask8 __B, __m128i __C)
1429{
1430  return __builtin_ia32_vcvtudq2ph128_mask ((__v4si) __C,
1431					    __A,
1432					    __B);
1433}
1434
1435extern __inline __m128h
1436__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1437_mm_maskz_cvtepu32_ph (__mmask8 __A, __m128i __B)
1438{
1439  return __builtin_ia32_vcvtudq2ph128_mask ((__v4si) __B,
1440					    _mm_setzero_ph (),
1441					    __A);
1442}
1443
1444extern __inline __m128h
1445__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1446_mm256_cvtepu32_ph (__m256i __A)
1447{
1448  return __builtin_ia32_vcvtudq2ph256_mask ((__v8si) __A,
1449					    _mm_setzero_ph (),
1450					    (__mmask8) -1);
1451}
1452
1453extern __inline __m128h
1454__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1455_mm256_mask_cvtepu32_ph (__m128h __A, __mmask8 __B, __m256i __C)
1456{
1457  return __builtin_ia32_vcvtudq2ph256_mask ((__v8si) __C, __A, __B);
1458}
1459
1460extern __inline __m128h
1461__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1462_mm256_maskz_cvtepu32_ph (__mmask8 __A, __m256i __B)
1463{
1464  return __builtin_ia32_vcvtudq2ph256_mask ((__v8si) __B,
1465					    _mm_setzero_ph (),
1466					    __A);
1467}
1468
1469/* Intrinsics vcvtph2qq.  */
1470extern __inline __m128i
1471__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1472_mm_cvtph_epi64 (__m128h __A)
1473{
1474  return
1475    __builtin_ia32_vcvtph2qq128_mask (__A,
1476				      _mm_setzero_si128 (),
1477				      (__mmask8) -1);
1478}
1479
1480extern __inline __m128i
1481__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1482_mm_mask_cvtph_epi64 (__m128i __A, __mmask8 __B, __m128h __C)
1483{
1484  return __builtin_ia32_vcvtph2qq128_mask (__C, __A, __B);
1485}
1486
1487extern __inline __m128i
1488__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1489_mm_maskz_cvtph_epi64 (__mmask8 __A, __m128h __B)
1490{
1491  return __builtin_ia32_vcvtph2qq128_mask (__B,
1492					   _mm_setzero_si128 (),
1493					   __A);
1494}
1495
1496extern __inline __m256i
1497__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1498_mm256_cvtph_epi64 (__m128h __A)
1499{
1500  return __builtin_ia32_vcvtph2qq256_mask (__A,
1501					   _mm256_setzero_si256 (),
1502					   (__mmask8) -1);
1503}
1504
1505extern __inline __m256i
1506__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1507_mm256_mask_cvtph_epi64 (__m256i __A, __mmask8 __B, __m128h __C)
1508{
1509  return __builtin_ia32_vcvtph2qq256_mask (__C, __A, __B);
1510}
1511
1512extern __inline __m256i
1513__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1514_mm256_maskz_cvtph_epi64 (__mmask8 __A, __m128h __B)
1515{
1516  return __builtin_ia32_vcvtph2qq256_mask (__B,
1517					   _mm256_setzero_si256 (),
1518					   __A);
1519}
1520
1521/* Intrinsics vcvtph2uqq.  */
1522extern __inline __m128i
1523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1524_mm_cvtph_epu64 (__m128h __A)
1525{
1526  return __builtin_ia32_vcvtph2uqq128_mask (__A,
1527					    _mm_setzero_si128 (),
1528					    (__mmask8) -1);
1529}
1530
1531extern __inline __m128i
1532__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1533_mm_mask_cvtph_epu64 (__m128i __A, __mmask8 __B, __m128h __C)
1534{
1535  return __builtin_ia32_vcvtph2uqq128_mask (__C, __A, __B);
1536}
1537
1538extern __inline __m128i
1539__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1540_mm_maskz_cvtph_epu64 (__mmask8 __A, __m128h __B)
1541{
1542  return __builtin_ia32_vcvtph2uqq128_mask (__B,
1543					    _mm_setzero_si128 (),
1544					    __A);
1545}
1546
1547extern __inline __m256i
1548__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1549_mm256_cvtph_epu64 (__m128h __A)
1550{
1551  return __builtin_ia32_vcvtph2uqq256_mask (__A,
1552					    _mm256_setzero_si256 (),
1553					    (__mmask8) -1);
1554}
1555
1556extern __inline __m256i
1557__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1558_mm256_mask_cvtph_epu64 (__m256i __A, __mmask8 __B, __m128h __C)
1559{
1560  return __builtin_ia32_vcvtph2uqq256_mask (__C, __A, __B);
1561}
1562
1563extern __inline __m256i
1564__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1565_mm256_maskz_cvtph_epu64 (__mmask8 __A, __m128h __B)
1566{
1567  return __builtin_ia32_vcvtph2uqq256_mask (__B,
1568					    _mm256_setzero_si256 (),
1569					    __A);
1570}
1571
1572/* Intrinsics vcvttph2qq.  */
1573extern __inline __m128i
1574__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1575_mm_cvttph_epi64 (__m128h __A)
1576{
1577  return __builtin_ia32_vcvttph2qq128_mask (__A,
1578					    _mm_setzero_si128 (),
1579					    (__mmask8) -1);
1580}
1581
1582extern __inline __m128i
1583__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1584_mm_mask_cvttph_epi64 (__m128i __A, __mmask8 __B, __m128h __C)
1585{
1586  return __builtin_ia32_vcvttph2qq128_mask (__C,
1587					    __A,
1588					    __B);
1589}
1590
1591extern __inline __m128i
1592__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1593_mm_maskz_cvttph_epi64 (__mmask8 __A, __m128h __B)
1594{
1595  return __builtin_ia32_vcvttph2qq128_mask (__B,
1596					    _mm_setzero_si128 (),
1597					    __A);
1598}
1599
1600extern __inline __m256i
1601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1602_mm256_cvttph_epi64 (__m128h __A)
1603{
1604  return __builtin_ia32_vcvttph2qq256_mask (__A,
1605					    _mm256_setzero_si256 (),
1606					    (__mmask8) -1);
1607}
1608
1609extern __inline __m256i
1610__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1611_mm256_mask_cvttph_epi64 (__m256i __A, __mmask8 __B, __m128h __C)
1612{
1613  return __builtin_ia32_vcvttph2qq256_mask (__C,
1614					    __A,
1615					    __B);
1616}
1617
1618extern __inline __m256i
1619__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1620_mm256_maskz_cvttph_epi64 (__mmask8 __A, __m128h __B)
1621{
1622  return __builtin_ia32_vcvttph2qq256_mask (__B,
1623					    _mm256_setzero_si256 (),
1624					    __A);
1625}
1626
1627/* Intrinsics vcvttph2uqq.  */
1628extern __inline __m128i
1629__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1630_mm_cvttph_epu64 (__m128h __A)
1631{
1632  return __builtin_ia32_vcvttph2uqq128_mask (__A,
1633					     _mm_setzero_si128 (),
1634					     (__mmask8) -1);
1635}
1636
1637extern __inline __m128i
1638__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1639_mm_mask_cvttph_epu64 (__m128i __A, __mmask8 __B, __m128h __C)
1640{
1641  return __builtin_ia32_vcvttph2uqq128_mask (__C,
1642					     __A,
1643					     __B);
1644}
1645
1646extern __inline __m128i
1647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1648_mm_maskz_cvttph_epu64 (__mmask8 __A, __m128h __B)
1649{
1650  return __builtin_ia32_vcvttph2uqq128_mask (__B,
1651					     _mm_setzero_si128 (),
1652					     __A);
1653}
1654
1655extern __inline __m256i
1656__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1657_mm256_cvttph_epu64 (__m128h __A)
1658{
1659  return __builtin_ia32_vcvttph2uqq256_mask (__A,
1660					     _mm256_setzero_si256 (),
1661					     (__mmask8) -1);
1662}
1663
1664extern __inline __m256i
1665__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1666_mm256_mask_cvttph_epu64 (__m256i __A, __mmask8 __B, __m128h __C)
1667{
1668  return __builtin_ia32_vcvttph2uqq256_mask (__C,
1669					     __A,
1670					     __B);
1671}
1672
1673extern __inline __m256i
1674__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1675_mm256_maskz_cvttph_epu64 (__mmask8 __A, __m128h __B)
1676{
1677  return __builtin_ia32_vcvttph2uqq256_mask (__B,
1678					     _mm256_setzero_si256 (),
1679					     __A);
1680}
1681
1682/* Intrinsics vcvtqq2ph.  */
1683extern __inline __m128h
1684__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1685_mm_cvtepi64_ph (__m128i __A)
1686{
1687  return __builtin_ia32_vcvtqq2ph128_mask ((__v2di) __A,
1688					   _mm_setzero_ph (),
1689					   (__mmask8) -1);
1690}
1691
1692extern __inline __m128h
1693__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1694_mm_mask_cvtepi64_ph (__m128h __A, __mmask8 __B, __m128i __C)
1695{
1696  return __builtin_ia32_vcvtqq2ph128_mask ((__v2di) __C, __A, __B);
1697}
1698
1699extern __inline __m128h
1700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1701_mm_maskz_cvtepi64_ph (__mmask8 __A, __m128i __B)
1702{
1703  return __builtin_ia32_vcvtqq2ph128_mask ((__v2di) __B,
1704					   _mm_setzero_ph (),
1705					   __A);
1706}
1707
1708extern __inline __m128h
1709__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1710_mm256_cvtepi64_ph (__m256i __A)
1711{
1712  return __builtin_ia32_vcvtqq2ph256_mask ((__v4di) __A,
1713					   _mm_setzero_ph (),
1714					   (__mmask8) -1);
1715}
1716
1717extern __inline __m128h
1718__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1719_mm256_mask_cvtepi64_ph (__m128h __A, __mmask8 __B, __m256i __C)
1720{
1721  return __builtin_ia32_vcvtqq2ph256_mask ((__v4di) __C, __A, __B);
1722}
1723
1724extern __inline __m128h
1725__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1726_mm256_maskz_cvtepi64_ph (__mmask8 __A, __m256i __B)
1727{
1728  return __builtin_ia32_vcvtqq2ph256_mask ((__v4di) __B,
1729					   _mm_setzero_ph (),
1730					   __A);
1731}
1732
1733/* Intrinsics vcvtuqq2ph.  */
1734extern __inline __m128h
1735__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1736_mm_cvtepu64_ph (__m128i __A)
1737{
1738  return __builtin_ia32_vcvtuqq2ph128_mask ((__v2di) __A,
1739					    _mm_setzero_ph (),
1740					    (__mmask8) -1);
1741}
1742
1743extern __inline __m128h
1744__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1745_mm_mask_cvtepu64_ph (__m128h __A, __mmask8 __B, __m128i __C)
1746{
1747  return __builtin_ia32_vcvtuqq2ph128_mask ((__v2di) __C, __A, __B);
1748}
1749
1750extern __inline __m128h
1751__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1752_mm_maskz_cvtepu64_ph (__mmask8 __A, __m128i __B)
1753{
1754  return __builtin_ia32_vcvtuqq2ph128_mask ((__v2di) __B,
1755					    _mm_setzero_ph (),
1756					    __A);
1757}
1758
1759extern __inline __m128h
1760__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1761_mm256_cvtepu64_ph (__m256i __A)
1762{
1763  return __builtin_ia32_vcvtuqq2ph256_mask ((__v4di) __A,
1764					    _mm_setzero_ph (),
1765					    (__mmask8) -1);
1766}
1767
1768extern __inline __m128h
1769__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1770_mm256_mask_cvtepu64_ph (__m128h __A, __mmask8 __B, __m256i __C)
1771{
1772  return __builtin_ia32_vcvtuqq2ph256_mask ((__v4di) __C, __A, __B);
1773}
1774
1775extern __inline __m128h
1776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1777_mm256_maskz_cvtepu64_ph (__mmask8 __A, __m256i __B)
1778{
1779  return __builtin_ia32_vcvtuqq2ph256_mask ((__v4di) __B,
1780					    _mm_setzero_ph (),
1781					    __A);
1782}
1783
1784/* Intrinsics vcvtph2w.  */
1785extern __inline __m128i
1786__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1787_mm_cvtph_epi16 (__m128h __A)
1788{
1789  return (__m128i)
1790    __builtin_ia32_vcvtph2w128_mask (__A,
1791				     (__v8hi)
1792				     _mm_setzero_si128 (),
1793				     (__mmask8) -1);
1794}
1795
1796extern __inline __m128i
1797__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1798_mm_mask_cvtph_epi16 (__m128i __A, __mmask8 __B, __m128h __C)
1799{
1800  return (__m128i)
1801    __builtin_ia32_vcvtph2w128_mask (__C, ( __v8hi) __A, __B);
1802}
1803
1804extern __inline __m128i
1805__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1806_mm_maskz_cvtph_epi16 (__mmask8 __A, __m128h __B)
1807{
1808  return (__m128i)
1809    __builtin_ia32_vcvtph2w128_mask (__B,
1810				     (__v8hi)
1811				     _mm_setzero_si128 (),
1812				     __A);
1813}
1814
1815extern __inline __m256i
1816__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1817_mm256_cvtph_epi16 (__m256h __A)
1818{
1819  return (__m256i)
1820    __builtin_ia32_vcvtph2w256_mask (__A,
1821				     (__v16hi)
1822				     _mm256_setzero_si256 (),
1823				     (__mmask16) -1);
1824}
1825
1826extern __inline __m256i
1827__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1828_mm256_mask_cvtph_epi16 (__m256i __A, __mmask16 __B, __m256h __C)
1829{
1830  return (__m256i)
1831    __builtin_ia32_vcvtph2w256_mask (__C, ( __v16hi) __A, __B);
1832}
1833
1834extern __inline __m256i
1835__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1836_mm256_maskz_cvtph_epi16 (__mmask16 __A, __m256h __B)
1837{
1838  return (__m256i)
1839    __builtin_ia32_vcvtph2w256_mask (__B,
1840				     (__v16hi)
1841				     _mm256_setzero_si256 (),
1842				     __A);
1843}
1844
1845/* Intrinsics vcvtph2uw.  */
1846extern __inline __m128i
1847__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1848_mm_cvtph_epu16 (__m128h __A)
1849{
1850  return (__m128i)
1851    __builtin_ia32_vcvtph2uw128_mask (__A,
1852				      (__v8hi)
1853				      _mm_setzero_si128 (),
1854				      (__mmask8) -1);
1855}
1856
1857extern __inline __m128i
1858__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1859_mm_mask_cvtph_epu16 (__m128i __A, __mmask8 __B, __m128h __C)
1860{
1861  return (__m128i)
1862    __builtin_ia32_vcvtph2uw128_mask (__C, ( __v8hi) __A, __B);
1863}
1864
1865extern __inline __m128i
1866__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1867_mm_maskz_cvtph_epu16 (__mmask8 __A, __m128h __B)
1868{
1869  return (__m128i)
1870    __builtin_ia32_vcvtph2uw128_mask (__B,
1871				      (__v8hi)
1872				      _mm_setzero_si128 (),
1873				      __A);
1874}
1875
1876extern __inline __m256i
1877__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1878_mm256_cvtph_epu16 (__m256h __A)
1879{
1880  return (__m256i)
1881    __builtin_ia32_vcvtph2uw256_mask (__A,
1882				      (__v16hi)
1883				      _mm256_setzero_si256 (),
1884				      (__mmask16) -1);
1885}
1886
1887extern __inline __m256i
1888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1889_mm256_mask_cvtph_epu16 (__m256i __A, __mmask16 __B, __m256h __C)
1890{
1891  return (__m256i)
1892    __builtin_ia32_vcvtph2uw256_mask (__C, ( __v16hi) __A, __B);
1893}
1894
1895extern __inline __m256i
1896__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1897_mm256_maskz_cvtph_epu16 (__mmask16 __A, __m256h __B)
1898{
1899  return (__m256i)
1900    __builtin_ia32_vcvtph2uw256_mask (__B,
1901				      (__v16hi)
1902				      _mm256_setzero_si256 (),
1903				      __A);
1904}
1905
1906/* Intrinsics vcvttph2w.  */
1907extern __inline __m128i
1908__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1909_mm_cvttph_epi16 (__m128h __A)
1910{
1911  return (__m128i)
1912    __builtin_ia32_vcvttph2w128_mask (__A,
1913				      (__v8hi)
1914				      _mm_setzero_si128 (),
1915				      (__mmask8) -1);
1916}
1917
1918extern __inline __m128i
1919__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1920_mm_mask_cvttph_epi16 (__m128i __A, __mmask8 __B, __m128h __C)
1921{
1922  return (__m128i)
1923    __builtin_ia32_vcvttph2w128_mask (__C,
1924				      ( __v8hi) __A,
1925				      __B);
1926}
1927
1928extern __inline __m128i
1929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1930_mm_maskz_cvttph_epi16 (__mmask8 __A, __m128h __B)
1931{
1932  return (__m128i)
1933    __builtin_ia32_vcvttph2w128_mask (__B,
1934				      (__v8hi)
1935				      _mm_setzero_si128 (),
1936				      __A);
1937}
1938
1939extern __inline __m256i
1940__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1941_mm256_cvttph_epi16 (__m256h __A)
1942{
1943  return (__m256i)
1944    __builtin_ia32_vcvttph2w256_mask (__A,
1945				      (__v16hi)
1946				      _mm256_setzero_si256 (),
1947				      (__mmask16) -1);
1948}
1949
1950extern __inline __m256i
1951__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1952_mm256_mask_cvttph_epi16 (__m256i __A, __mmask16 __B, __m256h __C)
1953{
1954  return (__m256i)
1955    __builtin_ia32_vcvttph2w256_mask (__C,
1956				      ( __v16hi) __A,
1957				      __B);
1958}
1959
1960extern __inline __m256i
1961__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1962_mm256_maskz_cvttph_epi16 (__mmask16 __A, __m256h __B)
1963{
1964  return (__m256i)
1965    __builtin_ia32_vcvttph2w256_mask (__B,
1966				      (__v16hi)
1967				      _mm256_setzero_si256 (),
1968				      __A);
1969}
1970
1971/* Intrinsics vcvttph2uw.  */
1972extern __inline __m128i
1973__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1974_mm_cvttph_epu16 (__m128h __A)
1975{
1976  return (__m128i)
1977    __builtin_ia32_vcvttph2uw128_mask (__A,
1978				       (__v8hi)
1979				       _mm_setzero_si128 (),
1980				       (__mmask8) -1);
1981}
1982
1983extern __inline __m128i
1984__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1985_mm_mask_cvttph_epu16 (__m128i __A, __mmask8 __B, __m128h __C)
1986{
1987  return (__m128i)
1988    __builtin_ia32_vcvttph2uw128_mask (__C,
1989				       ( __v8hi) __A,
1990				       __B);
1991}
1992
1993extern __inline __m128i
1994__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1995_mm_maskz_cvttph_epu16 (__mmask8 __A, __m128h __B)
1996{
1997  return (__m128i)
1998    __builtin_ia32_vcvttph2uw128_mask (__B,
1999				       (__v8hi)
2000				       _mm_setzero_si128 (),
2001				       __A);
2002}
2003
2004extern __inline __m256i
2005__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2006_mm256_cvttph_epu16 (__m256h __A)
2007{
2008  return (__m256i)
2009    __builtin_ia32_vcvttph2uw256_mask (__A,
2010				       (__v16hi)
2011				       _mm256_setzero_si256 (),
2012				       (__mmask16) -1);
2013}
2014
2015extern __inline __m256i
2016__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2017_mm256_mask_cvttph_epu16 (__m256i __A, __mmask16 __B, __m256h __C)
2018{
2019  return (__m256i)
2020    __builtin_ia32_vcvttph2uw256_mask (__C,
2021				       ( __v16hi) __A,
2022				       __B);
2023}
2024
2025extern __inline __m256i
2026__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2027_mm256_maskz_cvttph_epu16 (__mmask16 __A, __m256h __B)
2028{
2029  return (__m256i)
2030    __builtin_ia32_vcvttph2uw256_mask (__B,
2031				       (__v16hi) _mm256_setzero_si256 (),
2032				       __A);
2033}
2034
2035/* Intrinsics vcvtw2ph.  */
2036extern __inline __m128h
2037__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2038_mm_cvtepi16_ph (__m128i __A)
2039{
2040  return __builtin_ia32_vcvtw2ph128_mask ((__v8hi) __A,
2041					  _mm_setzero_ph (),
2042					  (__mmask8) -1);
2043}
2044
2045extern __inline __m128h
2046__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2047_mm_mask_cvtepi16_ph (__m128h __A, __mmask8 __B, __m128i __C)
2048{
2049  return __builtin_ia32_vcvtw2ph128_mask ((__v8hi) __C,
2050					  __A,
2051					  __B);
2052}
2053
2054extern __inline __m128h
2055__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2056_mm_maskz_cvtepi16_ph (__mmask8 __A, __m128i __B)
2057{
2058  return __builtin_ia32_vcvtw2ph128_mask ((__v8hi) __B,
2059					  _mm_setzero_ph (),
2060					  __A);
2061}
2062
2063extern __inline __m256h
2064__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2065_mm256_cvtepi16_ph (__m256i __A)
2066{
2067  return __builtin_ia32_vcvtw2ph256_mask ((__v16hi) __A,
2068					  _mm256_setzero_ph (),
2069					  (__mmask16) -1);
2070}
2071
2072extern __inline __m256h
2073__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2074_mm256_mask_cvtepi16_ph (__m256h __A, __mmask16 __B, __m256i __C)
2075{
2076  return __builtin_ia32_vcvtw2ph256_mask ((__v16hi) __C,
2077					  __A,
2078					  __B);
2079}
2080
2081extern __inline __m256h
2082__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2083_mm256_maskz_cvtepi16_ph (__mmask16 __A, __m256i __B)
2084{
2085  return __builtin_ia32_vcvtw2ph256_mask ((__v16hi) __B,
2086					  _mm256_setzero_ph (),
2087					  __A);
2088}
2089
2090/* Intrinsics vcvtuw2ph.  */
2091extern __inline __m128h
2092__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2093_mm_cvtepu16_ph (__m128i __A)
2094{
2095  return __builtin_ia32_vcvtuw2ph128_mask ((__v8hi) __A,
2096					   _mm_setzero_ph (),
2097					   (__mmask8) -1);
2098}
2099
2100extern __inline __m128h
2101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2102_mm_mask_cvtepu16_ph (__m128h __A, __mmask8 __B, __m128i __C)
2103{
2104  return __builtin_ia32_vcvtuw2ph128_mask ((__v8hi) __C, __A, __B);
2105}
2106
2107extern __inline __m128h
2108__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2109_mm_maskz_cvtepu16_ph (__mmask8 __A, __m128i __B)
2110{
2111  return __builtin_ia32_vcvtuw2ph128_mask ((__v8hi) __B,
2112					   _mm_setzero_ph (),
2113					   __A);
2114}
2115
2116extern __inline __m256h
2117__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2118_mm256_cvtepu16_ph (__m256i __A)
2119{
2120  return __builtin_ia32_vcvtuw2ph256_mask ((__v16hi) __A,
2121					   _mm256_setzero_ph (),
2122					   (__mmask16) -1);
2123}
2124
2125extern __inline __m256h
2126__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2127_mm256_mask_cvtepu16_ph (__m256h __A, __mmask16 __B, __m256i __C)
2128{
2129  return __builtin_ia32_vcvtuw2ph256_mask ((__v16hi) __C, __A, __B);
2130}
2131
2132extern __inline __m256h
2133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2134_mm256_maskz_cvtepu16_ph (__mmask16 __A, __m256i __B)
2135{
2136  return __builtin_ia32_vcvtuw2ph256_mask ((__v16hi) __B,
2137					   _mm256_setzero_ph (),
2138					   __A);
2139}
2140
2141/* Intrinsics vcvtph2pd.  */
2142extern __inline __m128d
2143__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2144_mm_cvtph_pd (__m128h __A)
2145{
2146  return __builtin_ia32_vcvtph2pd128_mask (__A,
2147					   _mm_setzero_pd (),
2148					   (__mmask8) -1);
2149}
2150
2151extern __inline __m128d
2152__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2153_mm_mask_cvtph_pd (__m128d __A, __mmask8 __B, __m128h __C)
2154{
2155  return __builtin_ia32_vcvtph2pd128_mask (__C, __A, __B);
2156}
2157
2158extern __inline __m128d
2159__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2160_mm_maskz_cvtph_pd (__mmask8 __A, __m128h __B)
2161{
2162  return __builtin_ia32_vcvtph2pd128_mask (__B, _mm_setzero_pd (), __A);
2163}
2164
2165extern __inline __m256d
2166__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2167_mm256_cvtph_pd (__m128h __A)
2168{
2169  return __builtin_ia32_vcvtph2pd256_mask (__A,
2170					   _mm256_setzero_pd (),
2171					   (__mmask8) -1);
2172}
2173
2174extern __inline __m256d
2175__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2176_mm256_mask_cvtph_pd (__m256d __A, __mmask8 __B, __m128h __C)
2177{
2178  return __builtin_ia32_vcvtph2pd256_mask (__C, __A, __B);
2179}
2180
2181extern __inline __m256d
2182__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2183_mm256_maskz_cvtph_pd (__mmask8 __A, __m128h __B)
2184{
2185  return __builtin_ia32_vcvtph2pd256_mask (__B,
2186					   _mm256_setzero_pd (),
2187					   __A);
2188}
2189
2190/* Intrinsics vcvtph2ps.  */
2191extern __inline __m128
2192__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2193_mm_cvtxph_ps (__m128h __A)
2194{
2195  return __builtin_ia32_vcvtph2psx128_mask (__A,
2196					   _mm_setzero_ps (),
2197					   (__mmask8) -1);
2198}
2199
2200extern __inline __m128
2201__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2202_mm_mask_cvtxph_ps (__m128 __A, __mmask8 __B, __m128h __C)
2203{
2204  return __builtin_ia32_vcvtph2psx128_mask (__C, __A, __B);
2205}
2206
2207extern __inline __m128
2208__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2209_mm_maskz_cvtxph_ps (__mmask8 __A, __m128h __B)
2210{
2211  return __builtin_ia32_vcvtph2psx128_mask (__B, _mm_setzero_ps (), __A);
2212}
2213
2214extern __inline __m256
2215__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2216_mm256_cvtxph_ps (__m128h __A)
2217{
2218  return __builtin_ia32_vcvtph2psx256_mask (__A,
2219					    _mm256_setzero_ps (),
2220					    (__mmask8) -1);
2221}
2222
2223extern __inline __m256
2224__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2225_mm256_mask_cvtxph_ps (__m256 __A, __mmask8 __B, __m128h __C)
2226{
2227  return __builtin_ia32_vcvtph2psx256_mask (__C, __A, __B);
2228}
2229
2230extern __inline __m256
2231__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2232_mm256_maskz_cvtxph_ps (__mmask8 __A, __m128h __B)
2233{
2234  return __builtin_ia32_vcvtph2psx256_mask (__B,
2235					    _mm256_setzero_ps (),
2236					    __A);
2237}
2238
2239/* Intrinsics vcvtxps2ph.  */
2240extern __inline __m128h
2241__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2242_mm_cvtxps_ph (__m128 __A)
2243{
2244  return __builtin_ia32_vcvtps2phx128_mask ((__v4sf) __A,
2245					    _mm_setzero_ph (),
2246					    (__mmask8) -1);
2247}
2248
2249extern __inline __m128h
2250__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2251_mm_mask_cvtxps_ph (__m128h __A, __mmask8 __B, __m128 __C)
2252{
2253  return __builtin_ia32_vcvtps2phx128_mask ((__v4sf) __C, __A, __B);
2254}
2255
2256extern __inline __m128h
2257__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2258_mm_maskz_cvtxps_ph (__mmask8 __A, __m128 __B)
2259{
2260  return __builtin_ia32_vcvtps2phx128_mask ((__v4sf) __B,
2261					    _mm_setzero_ph (),
2262					    __A);
2263}
2264
2265extern __inline __m128h
2266__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2267_mm256_cvtxps_ph (__m256 __A)
2268{
2269  return __builtin_ia32_vcvtps2phx256_mask ((__v8sf) __A,
2270					    _mm_setzero_ph (),
2271					    (__mmask8) -1);
2272}
2273
2274extern __inline __m128h
2275__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2276_mm256_mask_cvtxps_ph (__m128h __A, __mmask8 __B, __m256 __C)
2277{
2278  return __builtin_ia32_vcvtps2phx256_mask ((__v8sf) __C, __A, __B);
2279}
2280
2281extern __inline __m128h
2282__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2283_mm256_maskz_cvtxps_ph (__mmask8 __A, __m256 __B)
2284{
2285  return __builtin_ia32_vcvtps2phx256_mask ((__v8sf) __B,
2286					    _mm_setzero_ph (),
2287					    __A);
2288}
2289
2290/* Intrinsics vcvtpd2ph.  */
2291extern __inline __m128h
2292__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2293_mm_cvtpd_ph (__m128d __A)
2294{
2295  return __builtin_ia32_vcvtpd2ph128_mask ((__v2df) __A,
2296					   _mm_setzero_ph (),
2297					   (__mmask8) -1);
2298}
2299
2300extern __inline __m128h
2301__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2302_mm_mask_cvtpd_ph (__m128h __A, __mmask8 __B, __m128d __C)
2303{
2304  return __builtin_ia32_vcvtpd2ph128_mask ((__v2df) __C, __A, __B);
2305}
2306
2307extern __inline __m128h
2308__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2309_mm_maskz_cvtpd_ph (__mmask8 __A, __m128d __B)
2310{
2311  return __builtin_ia32_vcvtpd2ph128_mask ((__v2df) __B,
2312					   _mm_setzero_ph (),
2313					   __A);
2314}
2315
2316extern __inline __m128h
2317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2318_mm256_cvtpd_ph (__m256d __A)
2319{
2320  return __builtin_ia32_vcvtpd2ph256_mask ((__v4df) __A,
2321					   _mm_setzero_ph (),
2322					   (__mmask8) -1);
2323}
2324
2325extern __inline __m128h
2326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2327_mm256_mask_cvtpd_ph (__m128h __A, __mmask8 __B, __m256d __C)
2328{
2329  return __builtin_ia32_vcvtpd2ph256_mask ((__v4df) __C, __A, __B);
2330}
2331
2332extern __inline __m128h
2333__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2334_mm256_maskz_cvtpd_ph (__mmask8 __A, __m256d __B)
2335{
2336  return __builtin_ia32_vcvtpd2ph256_mask ((__v4df) __B,
2337					   _mm_setzero_ph (),
2338					   __A);
2339}
2340
2341/* Intrinsics vfmaddsub[132,213,231]ph.  */
2342extern __inline __m256h
2343__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2344_mm256_fmaddsub_ph (__m256h __A, __m256h __B, __m256h __C)
2345{
2346  return (__m256h)__builtin_ia32_vfmaddsubph256_mask ((__v16hf)__A,
2347						      (__v16hf)__B,
2348						      (__v16hf)__C,
2349						      (__mmask16)-1);
2350}
2351
2352extern __inline __m256h
2353__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2354_mm256_mask_fmaddsub_ph (__m256h __A, __mmask16 __U, __m256h __B,
2355			 __m256h __C)
2356{
2357  return (__m256h) __builtin_ia32_vfmaddsubph256_mask ((__v16hf) __A,
2358						       (__v16hf) __B,
2359						       (__v16hf) __C,
2360						       (__mmask16) __U);
2361}
2362
2363extern __inline __m256h
2364__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2365_mm256_mask3_fmaddsub_ph (__m256h __A, __m256h __B, __m256h __C,
2366			  __mmask16 __U)
2367{
2368  return (__m256h) __builtin_ia32_vfmaddsubph256_mask3 ((__v16hf) __A,
2369							(__v16hf) __B,
2370							(__v16hf) __C,
2371							(__mmask16)
2372							__U);
2373}
2374
2375extern __inline __m256h
2376__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2377_mm256_maskz_fmaddsub_ph (__mmask16 __U, __m256h __A, __m256h __B,
2378			  __m256h __C)
2379{
2380  return (__m256h) __builtin_ia32_vfmaddsubph256_maskz ((__v16hf) __A,
2381							(__v16hf) __B,
2382							(__v16hf) __C,
2383							(__mmask16)
2384							__U);
2385}
2386
2387extern __inline __m128h
2388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2389_mm_fmaddsub_ph (__m128h __A, __m128h __B, __m128h __C)
2390{
2391  return (__m128h)__builtin_ia32_vfmaddsubph128_mask ((__v8hf)__A,
2392						      (__v8hf)__B,
2393						      (__v8hf)__C,
2394						      (__mmask8)-1);
2395}
2396
2397extern __inline __m128h
2398__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2399_mm_mask_fmaddsub_ph (__m128h __A, __mmask8 __U, __m128h __B,
2400		      __m128h __C)
2401{
2402  return (__m128h) __builtin_ia32_vfmaddsubph128_mask ((__v8hf) __A,
2403						       (__v8hf) __B,
2404						       (__v8hf) __C,
2405						       (__mmask8) __U);
2406}
2407
2408extern __inline __m128h
2409__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2410_mm_mask3_fmaddsub_ph (__m128h __A, __m128h __B, __m128h __C,
2411		       __mmask8 __U)
2412{
2413  return (__m128h) __builtin_ia32_vfmaddsubph128_mask3 ((__v8hf) __A,
2414							(__v8hf) __B,
2415							(__v8hf) __C,
2416							(__mmask8)
2417							__U);
2418}
2419
2420extern __inline __m128h
2421__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2422_mm_maskz_fmaddsub_ph (__mmask8 __U, __m128h __A, __m128h __B,
2423		       __m128h __C)
2424{
2425  return (__m128h) __builtin_ia32_vfmaddsubph128_maskz ((__v8hf) __A,
2426							(__v8hf) __B,
2427							(__v8hf) __C,
2428							(__mmask8)
2429							__U);
2430}
2431
2432/* Intrinsics vfmsubadd[132,213,231]ph.  */
2433extern __inline __m256h
2434__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2435_mm256_fmsubadd_ph (__m256h __A, __m256h __B, __m256h __C)
2436{
2437  return (__m256h) __builtin_ia32_vfmsubaddph256_mask ((__v16hf) __A,
2438						       (__v16hf) __B,
2439						       (__v16hf) __C,
2440						       (__mmask16) -1);
2441}
2442
2443extern __inline __m256h
2444__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2445_mm256_mask_fmsubadd_ph (__m256h __A, __mmask16 __U, __m256h __B,
2446			 __m256h __C)
2447{
2448  return (__m256h) __builtin_ia32_vfmsubaddph256_mask ((__v16hf) __A,
2449						       (__v16hf) __B,
2450						       (__v16hf) __C,
2451						       (__mmask16) __U);
2452}
2453
2454extern __inline __m256h
2455__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2456_mm256_mask3_fmsubadd_ph (__m256h __A, __m256h __B, __m256h __C,
2457			  __mmask16 __U)
2458{
2459  return (__m256h) __builtin_ia32_vfmsubaddph256_mask3 ((__v16hf) __A,
2460							(__v16hf) __B,
2461							(__v16hf) __C,
2462							(__mmask16)
2463							__U);
2464}
2465
2466extern __inline __m256h
2467__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2468_mm256_maskz_fmsubadd_ph (__mmask16 __U, __m256h __A, __m256h __B,
2469			  __m256h __C)
2470{
2471  return (__m256h) __builtin_ia32_vfmsubaddph256_maskz ((__v16hf) __A,
2472							(__v16hf) __B,
2473							(__v16hf) __C,
2474							(__mmask16)
2475							__U);
2476}
2477
2478extern __inline __m128h
2479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2480_mm_fmsubadd_ph (__m128h __A, __m128h __B, __m128h __C)
2481{
2482  return (__m128h) __builtin_ia32_vfmsubaddph128_mask ((__v8hf) __A,
2483						       (__v8hf) __B,
2484						       (__v8hf) __C,
2485						       (__mmask8) -1);
2486}
2487
2488extern __inline __m128h
2489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2490_mm_mask_fmsubadd_ph (__m128h __A, __mmask8 __U, __m128h __B,
2491		      __m128h __C)
2492{
2493  return (__m128h) __builtin_ia32_vfmsubaddph128_mask ((__v8hf) __A,
2494						       (__v8hf) __B,
2495						       (__v8hf) __C,
2496						       (__mmask8) __U);
2497}
2498
2499extern __inline __m128h
2500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2501_mm_mask3_fmsubadd_ph (__m128h __A, __m128h __B, __m128h __C,
2502		       __mmask8 __U)
2503{
2504  return (__m128h) __builtin_ia32_vfmsubaddph128_mask3 ((__v8hf) __A,
2505							(__v8hf) __B,
2506							(__v8hf) __C,
2507							(__mmask8)
2508							__U);
2509}
2510
2511extern __inline __m128h
2512__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2513_mm_maskz_fmsubadd_ph (__mmask8 __U, __m128h __A, __m128h __B,
2514		       __m128h __C)
2515{
2516  return (__m128h) __builtin_ia32_vfmsubaddph128_maskz ((__v8hf) __A,
2517							(__v8hf) __B,
2518							(__v8hf) __C,
2519							(__mmask8)
2520							__U);
2521}
2522
2523/* Intrinsics vfmadd[132,213,231]ph.  */
2524extern __inline __m256h
2525__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2526_mm256_fmadd_ph (__m256h __A, __m256h __B, __m256h __C)
2527{
2528  return (__m256h) __builtin_ia32_vfmaddph256_mask ((__v16hf) __A,
2529						       (__v16hf) __B,
2530						       (__v16hf) __C,
2531						       (__mmask16) -1);
2532}
2533
2534extern __inline __m256h
2535__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2536_mm256_mask_fmadd_ph (__m256h __A, __mmask16 __U, __m256h __B,
2537			 __m256h __C)
2538{
2539  return (__m256h) __builtin_ia32_vfmaddph256_mask ((__v16hf) __A,
2540						       (__v16hf) __B,
2541						       (__v16hf) __C,
2542						       (__mmask16) __U);
2543}
2544
2545extern __inline __m256h
2546__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2547_mm256_mask3_fmadd_ph (__m256h __A, __m256h __B, __m256h __C,
2548			  __mmask16 __U)
2549{
2550  return (__m256h) __builtin_ia32_vfmaddph256_mask3 ((__v16hf) __A,
2551							(__v16hf) __B,
2552							(__v16hf) __C,
2553							(__mmask16)
2554							__U);
2555}
2556
2557extern __inline __m256h
2558__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2559_mm256_maskz_fmadd_ph (__mmask16 __U, __m256h __A, __m256h __B,
2560			  __m256h __C)
2561{
2562  return (__m256h) __builtin_ia32_vfmaddph256_maskz ((__v16hf) __A,
2563							(__v16hf) __B,
2564							(__v16hf) __C,
2565							(__mmask16)
2566							__U);
2567}
2568
2569extern __inline __m128h
2570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2571_mm_fmadd_ph (__m128h __A, __m128h __B, __m128h __C)
2572{
2573  return (__m128h) __builtin_ia32_vfmaddph128_mask ((__v8hf) __A,
2574						       (__v8hf) __B,
2575						       (__v8hf) __C,
2576						       (__mmask8) -1);
2577}
2578
2579extern __inline __m128h
2580__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2581_mm_mask_fmadd_ph (__m128h __A, __mmask8 __U, __m128h __B,
2582		      __m128h __C)
2583{
2584  return (__m128h) __builtin_ia32_vfmaddph128_mask ((__v8hf) __A,
2585						       (__v8hf) __B,
2586						       (__v8hf) __C,
2587						       (__mmask8) __U);
2588}
2589
2590extern __inline __m128h
2591__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2592_mm_mask3_fmadd_ph (__m128h __A, __m128h __B, __m128h __C,
2593		       __mmask8 __U)
2594{
2595  return (__m128h) __builtin_ia32_vfmaddph128_mask3 ((__v8hf) __A,
2596							(__v8hf) __B,
2597							(__v8hf) __C,
2598							(__mmask8)
2599							__U);
2600}
2601
2602extern __inline __m128h
2603__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2604_mm_maskz_fmadd_ph (__mmask8 __U, __m128h __A, __m128h __B,
2605		       __m128h __C)
2606{
2607  return (__m128h) __builtin_ia32_vfmaddph128_maskz ((__v8hf) __A,
2608							(__v8hf) __B,
2609							(__v8hf) __C,
2610							(__mmask8)
2611							__U);
2612}
2613
2614/* Intrinsics vfnmadd[132,213,231]ph.  */
2615extern __inline __m256h
2616__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2617_mm256_fnmadd_ph (__m256h __A, __m256h __B, __m256h __C)
2618{
2619  return (__m256h) __builtin_ia32_vfnmaddph256_mask ((__v16hf) __A,
2620						       (__v16hf) __B,
2621						       (__v16hf) __C,
2622						       (__mmask16) -1);
2623}
2624
2625extern __inline __m256h
2626__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2627_mm256_mask_fnmadd_ph (__m256h __A, __mmask16 __U, __m256h __B,
2628			 __m256h __C)
2629{
2630  return (__m256h) __builtin_ia32_vfnmaddph256_mask ((__v16hf) __A,
2631						       (__v16hf) __B,
2632						       (__v16hf) __C,
2633						       (__mmask16) __U);
2634}
2635
2636extern __inline __m256h
2637__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2638_mm256_mask3_fnmadd_ph (__m256h __A, __m256h __B, __m256h __C,
2639			  __mmask16 __U)
2640{
2641  return (__m256h) __builtin_ia32_vfnmaddph256_mask3 ((__v16hf) __A,
2642							(__v16hf) __B,
2643							(__v16hf) __C,
2644							(__mmask16)
2645							__U);
2646}
2647
2648extern __inline __m256h
2649__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2650_mm256_maskz_fnmadd_ph (__mmask16 __U, __m256h __A, __m256h __B,
2651			  __m256h __C)
2652{
2653  return (__m256h) __builtin_ia32_vfnmaddph256_maskz ((__v16hf) __A,
2654							(__v16hf) __B,
2655							(__v16hf) __C,
2656							(__mmask16)
2657							__U);
2658}
2659
2660extern __inline __m128h
2661__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2662_mm_fnmadd_ph (__m128h __A, __m128h __B, __m128h __C)
2663{
2664  return (__m128h) __builtin_ia32_vfnmaddph128_mask ((__v8hf) __A,
2665						       (__v8hf) __B,
2666						       (__v8hf) __C,
2667						       (__mmask8) -1);
2668}
2669
2670extern __inline __m128h
2671__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2672_mm_mask_fnmadd_ph (__m128h __A, __mmask8 __U, __m128h __B,
2673		      __m128h __C)
2674{
2675  return (__m128h) __builtin_ia32_vfnmaddph128_mask ((__v8hf) __A,
2676						       (__v8hf) __B,
2677						       (__v8hf) __C,
2678						       (__mmask8) __U);
2679}
2680
2681extern __inline __m128h
2682__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2683_mm_mask3_fnmadd_ph (__m128h __A, __m128h __B, __m128h __C,
2684		       __mmask8 __U)
2685{
2686  return (__m128h) __builtin_ia32_vfnmaddph128_mask3 ((__v8hf) __A,
2687							(__v8hf) __B,
2688							(__v8hf) __C,
2689							(__mmask8)
2690							__U);
2691}
2692
2693extern __inline __m128h
2694__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2695_mm_maskz_fnmadd_ph (__mmask8 __U, __m128h __A, __m128h __B,
2696		       __m128h __C)
2697{
2698  return (__m128h) __builtin_ia32_vfnmaddph128_maskz ((__v8hf) __A,
2699							(__v8hf) __B,
2700							(__v8hf) __C,
2701							(__mmask8)
2702							__U);
2703}
2704
2705/* Intrinsics vfmsub[132,213,231]ph.  */
2706extern __inline __m256h
2707__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2708_mm256_fmsub_ph (__m256h __A, __m256h __B, __m256h __C)
2709{
2710  return (__m256h) __builtin_ia32_vfmsubph256_mask ((__v16hf) __A,
2711						       (__v16hf) __B,
2712						       (__v16hf) __C,
2713						       (__mmask16) -1);
2714}
2715
2716extern __inline __m256h
2717__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2718_mm256_mask_fmsub_ph (__m256h __A, __mmask16 __U, __m256h __B,
2719			 __m256h __C)
2720{
2721  return (__m256h) __builtin_ia32_vfmsubph256_mask ((__v16hf) __A,
2722						       (__v16hf) __B,
2723						       (__v16hf) __C,
2724						       (__mmask16) __U);
2725}
2726
2727extern __inline __m256h
2728__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2729_mm256_mask3_fmsub_ph (__m256h __A, __m256h __B, __m256h __C,
2730			  __mmask16 __U)
2731{
2732  return (__m256h) __builtin_ia32_vfmsubph256_mask3 ((__v16hf) __A,
2733							(__v16hf) __B,
2734							(__v16hf) __C,
2735							(__mmask16)
2736							__U);
2737}
2738
2739extern __inline __m256h
2740__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2741_mm256_maskz_fmsub_ph (__mmask16 __U, __m256h __A, __m256h __B,
2742			  __m256h __C)
2743{
2744  return (__m256h) __builtin_ia32_vfmsubph256_maskz ((__v16hf) __A,
2745							(__v16hf) __B,
2746							(__v16hf) __C,
2747							(__mmask16)
2748							__U);
2749}
2750
2751extern __inline __m128h
2752__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2753_mm_fmsub_ph (__m128h __A, __m128h __B, __m128h __C)
2754{
2755  return (__m128h) __builtin_ia32_vfmsubph128_mask ((__v8hf) __A,
2756						       (__v8hf) __B,
2757						       (__v8hf) __C,
2758						       (__mmask8) -1);
2759}
2760
2761extern __inline __m128h
2762__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2763_mm_mask_fmsub_ph (__m128h __A, __mmask8 __U, __m128h __B,
2764		      __m128h __C)
2765{
2766  return (__m128h) __builtin_ia32_vfmsubph128_mask ((__v8hf) __A,
2767						       (__v8hf) __B,
2768						       (__v8hf) __C,
2769						       (__mmask8) __U);
2770}
2771
2772extern __inline __m128h
2773__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2774_mm_mask3_fmsub_ph (__m128h __A, __m128h __B, __m128h __C,
2775		       __mmask8 __U)
2776{
2777  return (__m128h) __builtin_ia32_vfmsubph128_mask3 ((__v8hf) __A,
2778							(__v8hf) __B,
2779							(__v8hf) __C,
2780							(__mmask8)
2781							__U);
2782}
2783
2784extern __inline __m128h
2785__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2786_mm_maskz_fmsub_ph (__mmask8 __U, __m128h __A, __m128h __B,
2787		       __m128h __C)
2788{
2789  return (__m128h) __builtin_ia32_vfmsubph128_maskz ((__v8hf) __A,
2790							(__v8hf) __B,
2791							(__v8hf) __C,
2792							(__mmask8)
2793							__U);
2794}
2795
2796/* Intrinsics vfnmsub[132,213,231]ph.  */
2797extern __inline __m256h
2798__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2799_mm256_fnmsub_ph (__m256h __A, __m256h __B, __m256h __C)
2800{
2801  return (__m256h) __builtin_ia32_vfnmsubph256_mask ((__v16hf) __A,
2802						       (__v16hf) __B,
2803						       (__v16hf) __C,
2804						       (__mmask16) -1);
2805}
2806
2807extern __inline __m256h
2808__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2809_mm256_mask_fnmsub_ph (__m256h __A, __mmask16 __U, __m256h __B,
2810			 __m256h __C)
2811{
2812  return (__m256h) __builtin_ia32_vfnmsubph256_mask ((__v16hf) __A,
2813						       (__v16hf) __B,
2814						       (__v16hf) __C,
2815						       (__mmask16) __U);
2816}
2817
2818extern __inline __m256h
2819__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2820_mm256_mask3_fnmsub_ph (__m256h __A, __m256h __B, __m256h __C,
2821			  __mmask16 __U)
2822{
2823  return (__m256h) __builtin_ia32_vfnmsubph256_mask3 ((__v16hf) __A,
2824							(__v16hf) __B,
2825							(__v16hf) __C,
2826							(__mmask16)
2827							__U);
2828}
2829
2830extern __inline __m256h
2831__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2832_mm256_maskz_fnmsub_ph (__mmask16 __U, __m256h __A, __m256h __B,
2833			  __m256h __C)
2834{
2835  return (__m256h) __builtin_ia32_vfnmsubph256_maskz ((__v16hf) __A,
2836							(__v16hf) __B,
2837							(__v16hf) __C,
2838							(__mmask16)
2839							__U);
2840}
2841
2842extern __inline __m128h
2843__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2844_mm_fnmsub_ph (__m128h __A, __m128h __B, __m128h __C)
2845{
2846  return (__m128h) __builtin_ia32_vfnmsubph128_mask ((__v8hf) __A,
2847						       (__v8hf) __B,
2848						       (__v8hf) __C,
2849						       (__mmask8) -1);
2850}
2851
2852extern __inline __m128h
2853__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2854_mm_mask_fnmsub_ph (__m128h __A, __mmask8 __U, __m128h __B,
2855		      __m128h __C)
2856{
2857  return (__m128h) __builtin_ia32_vfnmsubph128_mask ((__v8hf) __A,
2858						       (__v8hf) __B,
2859						       (__v8hf) __C,
2860						       (__mmask8) __U);
2861}
2862
2863extern __inline __m128h
2864__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2865_mm_mask3_fnmsub_ph (__m128h __A, __m128h __B, __m128h __C,
2866		       __mmask8 __U)
2867{
2868  return (__m128h) __builtin_ia32_vfnmsubph128_mask3 ((__v8hf) __A,
2869							(__v8hf) __B,
2870							(__v8hf) __C,
2871							(__mmask8)
2872							__U);
2873}
2874
2875extern __inline __m128h
2876__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2877_mm_maskz_fnmsub_ph (__mmask8 __U, __m128h __A, __m128h __B,
2878		       __m128h __C)
2879{
2880  return (__m128h) __builtin_ia32_vfnmsubph128_maskz ((__v8hf) __A,
2881							(__v8hf) __B,
2882							(__v8hf) __C,
2883							(__mmask8)
2884							__U);
2885}
2886
2887/* Intrinsics vf[,c]maddcph.  */
2888extern __inline __m128h
2889__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2890_mm_fmadd_pch (__m128h __A, __m128h __B, __m128h __C)
2891{
2892  return (__m128h) __builtin_ia32_vfmaddcph128 ((__v8hf) __A,
2893						(__v8hf) __B,
2894						(__v8hf) __C);
2895}
2896
2897extern __inline __m128h
2898__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2899_mm_mask_fmadd_pch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
2900{
2901  return (__m128h)
2902    __builtin_ia32_vfmaddcph128_mask ((__v8hf) __A,
2903				      (__v8hf) __C,
2904				      (__v8hf) __D, __B);
2905}
2906
2907extern __inline __m128h
2908__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2909_mm_mask3_fmadd_pch (__m128h __A, __m128h __B, __m128h __C,  __mmask8 __D)
2910{
2911  return (__m128h)
2912    __builtin_ia32_vfmaddcph128_mask3 ((__v8hf) __A,
2913				       (__v8hf) __B,
2914				       (__v8hf) __C, __D);
2915}
2916
2917extern __inline __m128h
2918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2919_mm_maskz_fmadd_pch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D)
2920{
2921  return (__m128h) __builtin_ia32_vfmaddcph128_maskz ((__v8hf) __B,
2922						      (__v8hf) __C,
2923						      (__v8hf) __D, __A);
2924}
2925
2926extern __inline __m256h
2927__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2928_mm256_fmadd_pch (__m256h __A, __m256h __B, __m256h __C)
2929{
2930  return (__m256h) __builtin_ia32_vfmaddcph256 ((__v16hf) __A,
2931						(__v16hf) __B,
2932						(__v16hf) __C);
2933}
2934
2935extern __inline __m256h
2936__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2937_mm256_mask_fmadd_pch (__m256h __A, __mmask8 __B, __m256h __C, __m256h __D)
2938{
2939  return (__m256h)
2940     __builtin_ia32_vfmaddcph256_mask ((__v16hf) __A,
2941				       (__v16hf) __C,
2942				       (__v16hf) __D, __B);
2943}
2944
2945extern __inline __m256h
2946__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2947_mm256_mask3_fmadd_pch (__m256h __A, __m256h __B, __m256h __C,  __mmask8 __D)
2948{
2949  return (__m256h)
2950    __builtin_ia32_vfmaddcph256_mask3 ((__v16hf) __A,
2951				       (__v16hf) __B,
2952				       (__v16hf) __C, __D);
2953}
2954
2955extern __inline __m256h
2956__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2957_mm256_maskz_fmadd_pch (__mmask8 __A, __m256h __B, __m256h __C, __m256h __D)
2958{
2959  return (__m256h)__builtin_ia32_vfmaddcph256_maskz ((__v16hf) __B,
2960						     (__v16hf) __C,
2961						     (__v16hf) __D, __A);
2962}
2963
2964extern __inline __m128h
2965__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2966_mm_fcmadd_pch (__m128h __A, __m128h __B, __m128h __C)
2967{
2968  return (__m128h) __builtin_ia32_vfcmaddcph128 ((__v8hf) __A,
2969						 (__v8hf) __B,
2970						 (__v8hf) __C);
2971}
2972
2973extern __inline __m128h
2974__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2975_mm_mask_fcmadd_pch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
2976{
2977  return (__m128h)
2978     __builtin_ia32_vfcmaddcph128_mask ((__v8hf) __A,
2979					(__v8hf) __C,
2980					(__v8hf) __D, __B);
2981}
2982
2983extern __inline __m128h
2984__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2985_mm_mask3_fcmadd_pch (__m128h __A, __m128h __B, __m128h __C,  __mmask8 __D)
2986{
2987  return (__m128h)
2988    __builtin_ia32_vfcmaddcph128_mask3 ((__v8hf) __A,
2989					(__v8hf) __B,
2990					(__v8hf) __C, __D);
2991}
2992
2993extern __inline __m128h
2994__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2995_mm_maskz_fcmadd_pch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D)
2996{
2997  return (__m128h)__builtin_ia32_vfcmaddcph128_maskz ((__v8hf) __B,
2998						      (__v8hf) __C,
2999						      (__v8hf) __D, __A);
3000}
3001
3002extern __inline __m256h
3003__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3004_mm256_fcmadd_pch (__m256h __A, __m256h __B, __m256h __C)
3005{
3006  return (__m256h) __builtin_ia32_vfcmaddcph256 ((__v16hf) __A,
3007						 (__v16hf) __B,
3008						 (__v16hf) __C);
3009}
3010
3011extern __inline __m256h
3012__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3013_mm256_mask_fcmadd_pch (__m256h __A, __mmask8 __B, __m256h __C, __m256h __D)
3014{
3015  return (__m256h)
3016     __builtin_ia32_vfcmaddcph256_mask ((__v16hf) __A,
3017					(__v16hf) __C,
3018					(__v16hf) __D, __B);
3019}
3020
3021extern __inline __m256h
3022__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3023_mm256_mask3_fcmadd_pch (__m256h __A, __m256h __B, __m256h __C,  __mmask8 __D)
3024{
3025  return (__m256h)
3026    __builtin_ia32_vfcmaddcph256_mask3 ((__v16hf) __A,
3027					(__v16hf) __B,
3028					(__v16hf) __C, __D);
3029}
3030
3031extern __inline __m256h
3032__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3033_mm256_maskz_fcmadd_pch (__mmask8 __A, __m256h __B, __m256h __C, __m256h __D)
3034{
3035  return (__m256h) __builtin_ia32_vfcmaddcph256_maskz ((__v16hf) __B,
3036						       (__v16hf) __C,
3037						       (__v16hf) __D, __A);
3038}
3039
3040/* Intrinsics vf[,c]mulcph.  */
3041extern __inline __m128h
3042__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3043_mm_fmul_pch (__m128h __A, __m128h __B)
3044{
3045  return (__m128h) __builtin_ia32_vfmulcph128 ((__v8hf) __A, (__v8hf) __B);
3046}
3047
3048extern __inline __m128h
3049__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3050_mm_mask_fmul_pch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
3051{
3052  return (__m128h) __builtin_ia32_vfmulcph128_mask ((__v8hf) __C,
3053						    (__v8hf) __D,
3054						    (__v8hf) __A, __B);
3055}
3056
3057extern __inline __m128h
3058__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3059_mm_maskz_fmul_pch (__mmask8 __A, __m128h __B, __m128h __C)
3060{
3061  return (__m128h) __builtin_ia32_vfmulcph128_mask ((__v8hf) __B,
3062						    (__v8hf) __C,
3063						    _mm_setzero_ph (),
3064						    __A);
3065}
3066
3067extern __inline __m256h
3068__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3069_mm256_fmul_pch (__m256h __A, __m256h __B)
3070{
3071  return (__m256h) __builtin_ia32_vfmulcph256 ((__v16hf) __A,
3072					       (__v16hf) __B);
3073}
3074
3075extern __inline __m256h
3076__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3077_mm256_mask_fmul_pch (__m256h __A, __mmask8 __B, __m256h __C, __m256h __D)
3078{
3079  return (__m256h) __builtin_ia32_vfmulcph256_mask ((__v16hf) __C,
3080						    (__v16hf) __D,
3081						    (__v16hf) __A, __B);
3082}
3083
3084extern __inline __m256h
3085__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3086_mm256_maskz_fmul_pch (__mmask8 __A, __m256h __B, __m256h __C)
3087{
3088  return (__m256h) __builtin_ia32_vfmulcph256_mask ((__v16hf) __B,
3089						    (__v16hf) __C,
3090						    _mm256_setzero_ph (),
3091						    __A);
3092}
3093
3094extern __inline __m128h
3095__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3096_mm_fcmul_pch (__m128h __A, __m128h __B)
3097{
3098  return (__m128h) __builtin_ia32_vfcmulcph128 ((__v8hf) __A,
3099						(__v8hf) __B);
3100}
3101
3102extern __inline __m128h
3103__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3104_mm_mask_fcmul_pch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
3105{
3106  return (__m128h) __builtin_ia32_vfcmulcph128_mask ((__v8hf) __C,
3107						     (__v8hf) __D,
3108						     (__v8hf) __A, __B);
3109}
3110
3111extern __inline __m128h
3112__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3113_mm_maskz_fcmul_pch (__mmask8 __A, __m128h __B, __m128h __C)
3114{
3115  return (__m128h) __builtin_ia32_vfcmulcph128_mask ((__v8hf) __B,
3116						     (__v8hf) __C,
3117						     _mm_setzero_ph (),
3118						     __A);
3119}
3120
3121extern __inline __m256h
3122__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3123_mm256_fcmul_pch (__m256h __A, __m256h __B)
3124{
3125  return (__m256h) __builtin_ia32_vfcmulcph256 ((__v16hf) __A, (__v16hf) __B);
3126}
3127
3128extern __inline __m256h
3129__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3130_mm256_mask_fcmul_pch (__m256h __A, __mmask8 __B, __m256h __C, __m256h __D)
3131{
3132  return (__m256h) __builtin_ia32_vfcmulcph256_mask ((__v16hf) __C,
3133						     (__v16hf) __D,
3134						     (__v16hf) __A, __B);
3135}
3136
3137extern __inline __m256h
3138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3139_mm256_maskz_fcmul_pch (__mmask8 __A, __m256h __B, __m256h __C)
3140{
3141  return (__m256h) __builtin_ia32_vfcmulcph256_mask ((__v16hf) __B,
3142						     (__v16hf) __C,
3143						     _mm256_setzero_ph (),
3144						     __A);
3145}
3146
3147#define _MM256_REDUCE_OP(op)						\
3148  __m128h __T1 = (__m128h) _mm256_extractf128_pd ((__m256d) __A, 0);	\
3149  __m128h __T2 = (__m128h) _mm256_extractf128_pd ((__m256d) __A, 1);	\
3150  __m128h __T3 = (__T1 op __T2);					\
3151  __m128h __T4 = (__m128h) __builtin_shuffle (__T3,			\
3152		 (__v8hi) { 4, 5, 6, 7, 0, 1, 2, 3 });			\
3153  __m128h __T5 = (__T3) op (__T4);					\
3154  __m128h __T6 = (__m128h) __builtin_shuffle (__T5,			\
3155		 (__v8hi) { 2, 3, 0, 1, 4, 5, 6, 7 });			\
3156  __m128h __T7 = __T5 op __T6;						\
3157  return __T7[0] op __T7[1]
3158
3159extern __inline _Float16
3160__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3161_mm256_reduce_add_ph (__m256h __A)
3162{
3163  _MM256_REDUCE_OP (+);
3164}
3165
3166extern __inline _Float16
3167__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3168_mm256_reduce_mul_ph (__m256h __A)
3169{
3170  _MM256_REDUCE_OP (*);
3171}
3172
3173#undef _MM256_REDUCE_OP
3174#define _MM256_REDUCE_OP(op)						\
3175  __m128h __T1 = (__m128h) _mm256_extractf128_pd ((__m256d) __A, 0);	\
3176  __m128h __T2 = (__m128h) _mm256_extractf128_pd ((__m256d) __A, 1);	\
3177  __m128h __T3 = _mm_##op (__T1, __T2);				\
3178  __m128h __T4 = (__m128h) __builtin_shuffle (__T3,			\
3179		 (__v8hi) { 2, 3, 0, 1, 6, 7, 4, 5 });			\
3180  __m128h __T5 = _mm_##op (__T3, __T4);				\
3181  __m128h __T6 = (__m128h) __builtin_shuffle (__T5, (__v8hi) { 4, 5 }); \
3182  __m128h __T7 = _mm_##op (__T5, __T6);				\
3183  __m128h __T8 = (__m128h) __builtin_shuffle (__T7, (__v8hi) { 1, 0 }); \
3184  __m128h __T9 = _mm_##op (__T7, __T8);				\
3185  return __T9[0]
3186
3187extern __inline _Float16
3188__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3189_mm256_reduce_min_ph (__m256h __A)
3190{
3191  _MM256_REDUCE_OP (min_ph);
3192}
3193
3194extern __inline _Float16
3195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3196_mm256_reduce_max_ph (__m256h __A)
3197{
3198  _MM256_REDUCE_OP (max_ph);
3199}
3200
3201#define _MM_REDUCE_OP(op) 						\
3202  __m128h __T1 = (__m128h) __builtin_shuffle (__A,			\
3203		 (__v8hi) { 4, 5, 6, 7, 0, 1, 2, 3 });			\
3204  __m128h __T2 = (__A) op (__T1);					\
3205  __m128h __T3 = (__m128h) __builtin_shuffle (__T2,			\
3206		 (__v8hi){ 2, 3, 0, 1, 4, 5, 6, 7 });			\
3207  __m128h __T4 = __T2 op __T3;						\
3208  return __T4[0] op __T4[1]
3209
3210extern __inline _Float16
3211__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3212_mm_reduce_add_ph (__m128h __A)
3213{
3214  _MM_REDUCE_OP (+);
3215}
3216
3217extern __inline _Float16
3218__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3219_mm_reduce_mul_ph (__m128h __A)
3220{
3221  _MM_REDUCE_OP (*);
3222}
3223
3224#undef _MM_REDUCE_OP
3225#define _MM_REDUCE_OP(op) 						\
3226  __m128h __T1 = (__m128h) __builtin_shuffle (__A,			\
3227		 (__v8hi) { 2, 3, 0, 1, 6, 7, 4, 5 });			\
3228  __m128h __T2 = _mm_##op (__A, __T1);					\
3229  __m128h __T3 = (__m128h) __builtin_shuffle (__T2, (__v8hi){ 4, 5 });	\
3230  __m128h __T4 = _mm_##op (__T2, __T3);				\
3231  __m128h __T5 = (__m128h) __builtin_shuffle (__T4, (__v8hi){ 1, 0 });	\
3232  __m128h __T6 = _mm_##op (__T4, __T5);				\
3233  return __T6[0]
3234
3235extern __inline _Float16
3236__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3237_mm_reduce_min_ph (__m128h __A)
3238{
3239  _MM_REDUCE_OP (min_ph);
3240}
3241
3242extern __inline _Float16
3243__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3244_mm_reduce_max_ph (__m128h __A)
3245{
3246  _MM_REDUCE_OP (max_ph);
3247}
3248
3249#undef _MM256_REDUCE_OP
3250#undef _MM_REDUCE_OP
3251
3252extern __inline __m256h
3253__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3254_mm256_mask_blend_ph (__mmask16 __U, __m256h __A, __m256h __W)
3255{
3256  return (__m256h) __builtin_ia32_movdquhi256_mask ((__v16hi) __W,
3257						    (__v16hi) __A,
3258						    (__mmask16) __U);
3259
3260}
3261
3262extern __inline __m256h
3263__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3264_mm256_permutex2var_ph (__m256h __A, __m256i __I, __m256h __B)
3265{
3266  return (__m256h) __builtin_ia32_vpermi2varhi256_mask ((__v16hi) __A,
3267						       (__v16hi) __I,
3268						       (__v16hi) __B,
3269						       (__mmask16)-1);
3270}
3271
3272extern __inline __m256h
3273__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3274_mm256_permutexvar_ph (__m256i __A, __m256h __B)
3275{
3276  return (__m256h) __builtin_ia32_permvarhi256_mask ((__v16hi) __B,
3277						     (__v16hi) __A,
3278						     (__v16hi)
3279						     (_mm256_setzero_ph ()),
3280						     (__mmask16)-1);
3281}
3282
3283extern __inline __m128h
3284__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3285_mm_mask_blend_ph (__mmask8 __U, __m128h __A, __m128h __W)
3286{
3287  return (__m128h) __builtin_ia32_movdquhi128_mask ((__v8hi) __W,
3288						    (__v8hi) __A,
3289						    (__mmask8) __U);
3290
3291}
3292
3293extern __inline __m128h
3294__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3295_mm_permutex2var_ph (__m128h __A, __m128i __I, __m128h __B)
3296{
3297  return (__m128h) __builtin_ia32_vpermi2varhi128_mask ((__v8hi) __A,
3298						       (__v8hi) __I,
3299						       (__v8hi) __B,
3300						       (__mmask8)-1);
3301}
3302
3303extern __inline __m128h
3304__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3305_mm_permutexvar_ph (__m128i __A, __m128h __B)
3306{
3307  return (__m128h) __builtin_ia32_permvarhi128_mask ((__v8hi) __B,
3308						     (__v8hi) __A,
3309						     (__v8hi)
3310						     (_mm_setzero_ph ()),
3311						     (__mmask8)-1);
3312}
3313
3314extern __inline __m256h
3315__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3316_mm256_set1_pch (_Float16 _Complex __A)
3317{
3318  union
3319  {
3320    _Float16 _Complex __a;
3321    float __b;
3322  } __u = { .__a = __A };
3323
3324  return (__m256h) _mm256_set1_ps (__u.__b);
3325}
3326
3327extern __inline __m128h
3328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3329_mm_set1_pch (_Float16 _Complex __A)
3330{
3331  union
3332  {
3333    _Float16 _Complex __a;
3334    float __b;
3335  } __u = { .__a = __A };
3336
3337  return (__m128h) _mm_set1_ps (__u.__b);
3338}
3339
3340// intrinsics below are alias for f*mul_*ch
3341#define _mm_mul_pch(A, B) _mm_fmul_pch ((A), (B))
3342#define _mm_mask_mul_pch(W, U, A, B) _mm_mask_fmul_pch ((W), (U), (A), (B))
3343#define _mm_maskz_mul_pch(U, A, B) _mm_maskz_fmul_pch ((U), (A), (B))
3344#define _mm256_mul_pch(A, B) _mm256_fmul_pch ((A), (B))
3345#define _mm256_mask_mul_pch(W, U, A, B)				      \
3346  _mm256_mask_fmul_pch ((W), (U), (A), (B))
3347#define _mm256_maskz_mul_pch(U, A, B) _mm256_maskz_fmul_pch ((U), (A), (B))
3348
3349#define _mm_cmul_pch(A, B) _mm_fcmul_pch ((A), (B))
3350#define _mm_mask_cmul_pch(W, U, A, B) _mm_mask_fcmul_pch ((W), (U), (A), (B))
3351#define _mm_maskz_cmul_pch(U, A, B) _mm_maskz_fcmul_pch ((U), (A), (B))
3352#define _mm256_cmul_pch(A, B) _mm256_fcmul_pch ((A), (B))
3353#define _mm256_mask_cmul_pch(W, U, A, B)			      \
3354   _mm256_mask_fcmul_pch ((W), (U), (A), (B))
3355#define _mm256_maskz_cmul_pch(U, A, B) _mm256_maskz_fcmul_pch((U), (A), (B))
3356
3357#ifdef __DISABLE_AVX512FP16VL__
3358#undef __DISABLE_AVX512FP16VL__
3359#pragma GCC pop_options
3360#endif /* __DISABLE_AVX512FP16VL__ */
3361
3362#endif /* __AVX512FP16VLINTRIN_H_INCLUDED */
3363