Lines Matching refs:__A

84 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_add_ph(__m256h __A,
86 return (__m256h)((__v16hf)__A + (__v16hf)__B);
90 _mm256_mask_add_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
92 __U, (__v16hf)_mm256_add_ph(__A, __B), (__v16hf)__W);
96 _mm256_maskz_add_ph(__mmask16 __U, __m256h __A, __m256h __B) {
98 __U, (__v16hf)_mm256_add_ph(__A, __B), (__v16hf)_mm256_setzero_ph());
101 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_add_ph(__m128h __A,
103 return (__m128h)((__v8hf)__A + (__v8hf)__B);
108 __m128h __A,
110 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_add_ph(__A, __B),
115 __m128h __A,
117 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_add_ph(__A, __B),
121 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_sub_ph(__m256h __A,
123 return (__m256h)((__v16hf)__A - (__v16hf)__B);
127 _mm256_mask_sub_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
129 __U, (__v16hf)_mm256_sub_ph(__A, __B), (__v16hf)__W);
133 _mm256_maskz_sub_ph(__mmask16 __U, __m256h __A, __m256h __B) {
135 __U, (__v16hf)_mm256_sub_ph(__A, __B), (__v16hf)_mm256_setzero_ph());
138 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sub_ph(__m128h __A,
140 return (__m128h)((__v8hf)__A - (__v8hf)__B);
145 __m128h __A,
147 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_sub_ph(__A, __B),
152 __m128h __A,
154 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_sub_ph(__A, __B),
158 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mul_ph(__m256h __A,
160 return (__m256h)((__v16hf)__A * (__v16hf)__B);
164 _mm256_mask_mul_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
166 __U, (__v16hf)_mm256_mul_ph(__A, __B), (__v16hf)__W);
170 _mm256_maskz_mul_ph(__mmask16 __U, __m256h __A, __m256h __B) {
172 __U, (__v16hf)_mm256_mul_ph(__A, __B), (__v16hf)_mm256_setzero_ph());
175 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mul_ph(__m128h __A,
177 return (__m128h)((__v8hf)__A * (__v8hf)__B);
182 __m128h __A,
184 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_mul_ph(__A, __B),
189 __m128h __A,
191 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_mul_ph(__A, __B),
195 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_div_ph(__m256h __A,
197 return (__m256h)((__v16hf)__A / (__v16hf)__B);
201 _mm256_mask_div_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
203 __U, (__v16hf)_mm256_div_ph(__A, __B), (__v16hf)__W);
207 _mm256_maskz_div_ph(__mmask16 __U, __m256h __A, __m256h __B) {
209 __U, (__v16hf)_mm256_div_ph(__A, __B), (__v16hf)_mm256_setzero_ph());
212 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_div_ph(__m128h __A,
214 return (__m128h)((__v8hf)__A / (__v8hf)__B);
219 __m128h __A,
221 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_div_ph(__A, __B),
226 __m128h __A,
228 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_div_ph(__A, __B),
232 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_min_ph(__m256h __A,
234 return (__m256h)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B);
238 _mm256_mask_min_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
241 (__v16hf)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B),
246 _mm256_maskz_min_ph(__mmask16 __U, __m256h __A, __m256h __B) {
249 (__v16hf)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B),
253 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_min_ph(__m128h __A,
255 return (__m128h)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B);
260 __m128h __A,
263 (__mmask8)__U, (__v8hf)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B),
268 __m128h __A,
271 (__mmask8)__U, (__v8hf)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B),
275 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_max_ph(__m256h __A,
277 return (__m256h)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B);
281 _mm256_mask_max_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
284 (__v16hf)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B),
289 _mm256_maskz_max_ph(__mmask16 __U, __m256h __A, __m256h __B) {
292 (__v16hf)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B),
296 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_max_ph(__m128h __A,
298 return (__m128h)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B);
303 __m128h __A,
306 (__mmask8)__U, (__v8hf)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B),
311 __m128h __A,
314 (__mmask8)__U, (__v8hf)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B),
318 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_abs_ph(__m256h __A) {
319 return (__m256h)_mm256_and_epi32(_mm256_set1_epi32(0x7FFF7FFF), (__m256i)__A);
322 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_abs_ph(__m128h __A) {
323 return (__m128h)_mm_and_epi32(_mm_set1_epi32(0x7FFF7FFF), (__m128i)__A);
326 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_conj_pch(__m256h __A) {
327 return (__m256h)_mm256_xor_ps((__m256)__A, _mm256_set1_ps(-0.0f));
331 _mm256_mask_conj_pch(__m256h __W, __mmask8 __U, __m256h __A) {
333 (__mmask8)__U, (__v8sf)_mm256_conj_pch(__A), (__v8sf)__W);
337 _mm256_maskz_conj_pch(__mmask8 __U, __m256h __A) {
339 (__mmask8)__U, (__v8sf)_mm256_conj_pch(__A), (__v8sf)_mm256_setzero_ps());
342 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_conj_pch(__m128h __A) {
343 return (__m128h)_mm_xor_ps((__m128)__A, _mm_set1_ps(-0.0f));
348 __m128h __A) {
350 (__mmask8)__U, (__v4sf)_mm_conj_pch(__A), (__v4sf)__W);
354 _mm_maskz_conj_pch(__mmask8 __U, __m128h __A) {
356 (__mmask8)__U, (__v4sf)_mm_conj_pch(__A), (__v4sf)_mm_setzero_ps());
375 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_rcp_ph(__m256h __A) {
377 (__v16hf)__A, (__v16hf)_mm256_undefined_ph(), (__mmask16)-1);
381 _mm256_mask_rcp_ph(__m256h __W, __mmask16 __U, __m256h __A) {
382 return (__m256h)__builtin_ia32_rcpph256_mask((__v16hf)__A, (__v16hf)__W,
387 _mm256_maskz_rcp_ph(__mmask16 __U, __m256h __A) {
389 (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U);
392 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rcp_ph(__m128h __A) {
394 (__v8hf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
399 __m128h __A) {
400 return (__m128h)__builtin_ia32_rcpph128_mask((__v8hf)__A, (__v8hf)__W,
405 __m128h __A) {
407 (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
410 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_rsqrt_ph(__m256h __A) {
412 (__v16hf)__A, (__v16hf)_mm256_undefined_ph(), (__mmask16)-1);
416 _mm256_mask_rsqrt_ph(__m256h __W, __mmask16 __U, __m256h __A) {
417 return (__m256h)__builtin_ia32_rsqrtph256_mask((__v16hf)__A, (__v16hf)__W,
422 _mm256_maskz_rsqrt_ph(__mmask16 __U, __m256h __A) {
424 (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U);
427 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rsqrt_ph(__m128h __A) {
429 (__v8hf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
434 __m128h __A) {
435 return (__m128h)__builtin_ia32_rsqrtph128_mask((__v8hf)__A, (__v8hf)__W,
440 _mm_maskz_rsqrt_ph(__mmask8 __U, __m128h __A) {
442 (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
445 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_getexp_ph(__m128h __A) {
447 (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1);
451 _mm_mask_getexp_ph(__m128h __W, __mmask8 __U, __m128h __A) {
452 return (__m128h)__builtin_ia32_getexpph128_mask((__v8hf)__A, (__v8hf)__W,
457 _mm_maskz_getexp_ph(__mmask8 __U, __m128h __A) {
459 (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
462 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_getexp_ph(__m256h __A) {
464 (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1);
468 _mm256_mask_getexp_ph(__m256h __W, __mmask16 __U, __m256h __A) {
469 return (__m256h)__builtin_ia32_getexpph256_mask((__v16hf)__A, (__v16hf)__W,
474 _mm256_maskz_getexp_ph(__mmask16 __U, __m256h __A) {
476 (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U);
509 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_scalef_ph(__m128h __A,
512 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1);
516 _mm_mask_scalef_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
517 return (__m128h)__builtin_ia32_scalefph128_mask((__v8hf)__A, (__v8hf)__B,
522 _mm_maskz_scalef_ph(__mmask8 __U, __m128h __A, __m128h __B) {
524 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
527 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_scalef_ph(__m256h __A,
530 (__v16hf)__A, (__v16hf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1);
534 _mm256_mask_scalef_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
535 return (__m256h)__builtin_ia32_scalefph256_mask((__v16hf)__A, (__v16hf)__B,
540 _mm256_maskz_scalef_ph(__mmask16 __U, __m256h __A, __m256h __B) {
542 (__v16hf)__A, (__v16hf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U);
609 __m128h __A) {
611 (__mmask8)__U, (__v8hf)_mm_sqrt_ph(__A), (__v8hf)__W);
615 __m128h __A) {
617 (__mmask8)__U, (__v8hf)_mm_sqrt_ph(__A), (__v8hf)_mm_setzero_ph());
625 _mm256_mask_sqrt_ph(__m256h __W, __mmask16 __U, __m256h __A) {
627 (__mmask16)__U, (__v16hf)_mm256_sqrt_ph(__A), (__v16hf)__W);
631 _mm256_maskz_sqrt_ph(__mmask16 __U, __m256h __A) {
633 (__v16hf)_mm256_sqrt_ph(__A),
653 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtpd_ph(__m128d __A) {
655 (__v2df)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
660 __m128d __A) {
661 return (__m128h)__builtin_ia32_vcvtpd2ph128_mask((__v2df)__A, (__v8hf)__W,
666 _mm_maskz_cvtpd_ph(__mmask8 __U, __m128d __A) {
668 (__v2df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
671 static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtpd_ph(__m256d __A) {
673 (__v4df)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
677 _mm256_mask_cvtpd_ph(__m128h __W, __mmask8 __U, __m256d __A) {
678 return (__m128h)__builtin_ia32_vcvtpd2ph256_mask((__v4df)__A, (__v8hf)__W,
683 _mm256_maskz_cvtpd_ph(__mmask8 __U, __m256d __A) {
685 (__v4df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
688 static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtph_pd(__m128h __A) {
690 (__v8hf)__A, (__v2df)_mm_undefined_pd(), (__mmask8)-1);
695 __m128h __A) {
696 return (__m128d)__builtin_ia32_vcvtph2pd128_mask((__v8hf)__A, (__v2df)__W,
701 _mm_maskz_cvtph_pd(__mmask8 __U, __m128h __A) {
703 (__v8hf)__A, (__v2df)_mm_setzero_pd(), (__mmask8)__U);
706 static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtph_pd(__m128h __A) {
708 (__v8hf)__A, (__v4df)_mm256_undefined_pd(), (__mmask8)-1);
712 _mm256_mask_cvtph_pd(__m256d __W, __mmask8 __U, __m128h __A) {
713 return (__m256d)__builtin_ia32_vcvtph2pd256_mask((__v8hf)__A, (__v4df)__W,
718 _mm256_maskz_cvtph_pd(__mmask8 __U, __m128h __A) {
720 (__v8hf)__A, (__v4df)_mm256_setzero_pd(), (__mmask8)__U);
723 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi16(__m128h __A) {
725 (__v8hf)__A, (__v8hi)_mm_undefined_si128(), (__mmask8)-1);
729 _mm_mask_cvtph_epi16(__m128i __W, __mmask8 __U, __m128h __A) {
730 return (__m128i)__builtin_ia32_vcvtph2w128_mask((__v8hf)__A, (__v8hi)__W,
735 _mm_maskz_cvtph_epi16(__mmask8 __U, __m128h __A) {
737 (__v8hf)__A, (__v8hi)_mm_setzero_si128(), (__mmask8)__U);
741 _mm256_cvtph_epi16(__m256h __A) {
743 (__v16hf)__A, (__v16hi)_mm256_undefined_si256(), (__mmask16)-1);
747 _mm256_mask_cvtph_epi16(__m256i __W, __mmask16 __U, __m256h __A) {
748 return (__m256i)__builtin_ia32_vcvtph2w256_mask((__v16hf)__A, (__v16hi)__W,
753 _mm256_maskz_cvtph_epi16(__mmask16 __U, __m256h __A) {
755 (__v16hf)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U);
758 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi16(__m128h __A) {
760 (__v8hf)__A, (__v8hi)_mm_undefined_si128(), (__mmask8)-1);
764 _mm_mask_cvttph_epi16(__m128i __W, __mmask8 __U, __m128h __A) {
765 return (__m128i)__builtin_ia32_vcvttph2w128_mask((__v8hf)__A, (__v8hi)__W,
770 _mm_maskz_cvttph_epi16(__mmask8 __U, __m128h __A) {
772 (__v8hf)__A, (__v8hi)_mm_setzero_si128(), (__mmask8)__U);
776 _mm256_cvttph_epi16(__m256h __A) {
778 (__v16hf)__A, (__v16hi)_mm256_undefined_si256(), (__mmask16)-1);
782 _mm256_mask_cvttph_epi16(__m256i __W, __mmask16 __U, __m256h __A) {
783 return (__m256i)__builtin_ia32_vcvttph2w256_mask((__v16hf)__A, (__v16hi)__W,
788 _mm256_maskz_cvttph_epi16(__mmask16 __U, __m256h __A) {
790 (__v16hf)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U);
793 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi16_ph(__m128i __A) {
794 return (__m128h) __builtin_convertvector((__v8hi)__A, __v8hf);
798 _mm_mask_cvtepi16_ph(__m128h __W, __mmask8 __U, __m128i __A) {
800 (__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)__W);
804 _mm_maskz_cvtepi16_ph(__mmask8 __U, __m128i __A) {
806 (__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)_mm_setzero_ph());
810 _mm256_cvtepi16_ph(__m256i __A) {
811 return (__m256h) __builtin_convertvector((__v16hi)__A, __v16hf);
815 _mm256_mask_cvtepi16_ph(__m256h __W, __mmask16 __U, __m256i __A) {
817 (__mmask16)__U, (__v16hf)_mm256_cvtepi16_ph(__A), (__v16hf)__W);
821 _mm256_maskz_cvtepi16_ph(__mmask16 __U, __m256i __A) {
823 (__v16hf)_mm256_cvtepi16_ph(__A),
827 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu16(__m128h __A) {
829 (__v8hf)__A, (__v8hu)_mm_undefined_si128(), (__mmask8)-1);
833 _mm_mask_cvtph_epu16(__m128i __W, __mmask8 __U, __m128h __A) {
834 return (__m128i)__builtin_ia32_vcvtph2uw128_mask((__v8hf)__A, (__v8hu)__W,
839 _mm_maskz_cvtph_epu16(__mmask8 __U, __m128h __A) {
841 (__v8hf)__A, (__v8hu)_mm_setzero_si128(), (__mmask8)__U);
845 _mm256_cvtph_epu16(__m256h __A) {
847 (__v16hf)__A, (__v16hu)_mm256_undefined_si256(), (__mmask16)-1);
851 _mm256_mask_cvtph_epu16(__m256i __W, __mmask16 __U, __m256h __A) {
852 return (__m256i)__builtin_ia32_vcvtph2uw256_mask((__v16hf)__A, (__v16hu)__W,
857 _mm256_maskz_cvtph_epu16(__mmask16 __U, __m256h __A) {
859 (__v16hf)__A, (__v16hu)_mm256_setzero_si256(), (__mmask16)__U);
862 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu16(__m128h __A) {
864 (__v8hf)__A, (__v8hu)_mm_undefined_si128(), (__mmask8)-1);
868 _mm_mask_cvttph_epu16(__m128i __W, __mmask8 __U, __m128h __A) {
869 return (__m128i)__builtin_ia32_vcvttph2uw128_mask((__v8hf)__A, (__v8hu)__W,
874 _mm_maskz_cvttph_epu16(__mmask8 __U, __m128h __A) {
876 (__v8hf)__A, (__v8hu)_mm_setzero_si128(), (__mmask8)__U);
880 _mm256_cvttph_epu16(__m256h __A) {
882 (__v16hf)__A, (__v16hu)_mm256_undefined_si256(), (__mmask16)-1);
886 _mm256_mask_cvttph_epu16(__m256i __W, __mmask16 __U, __m256h __A) {
887 return (__m256i)__builtin_ia32_vcvttph2uw256_mask((__v16hf)__A, (__v16hu)__W,
892 _mm256_maskz_cvttph_epu16(__mmask16 __U, __m256h __A) {
894 (__v16hf)__A, (__v16hu)_mm256_setzero_si256(), (__mmask16)__U);
897 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu16_ph(__m128i __A) {
898 return (__m128h) __builtin_convertvector((__v8hu)__A, __v8hf);
902 _mm_mask_cvtepu16_ph(__m128h __W, __mmask8 __U, __m128i __A) {
904 (__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)__W);
908 _mm_maskz_cvtepu16_ph(__mmask8 __U, __m128i __A) {
910 (__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)_mm_setzero_ph());
914 _mm256_cvtepu16_ph(__m256i __A) {
915 return (__m256h) __builtin_convertvector((__v16hu)__A, __v16hf);
919 _mm256_mask_cvtepu16_ph(__m256h __W, __mmask16 __U, __m256i __A) {
921 (__mmask16)__U, (__v16hf)_mm256_cvtepu16_ph(__A), (__v16hf)__W);
925 _mm256_maskz_cvtepu16_ph(__mmask16 __U, __m256i __A) {
927 (__v16hf)_mm256_cvtepu16_ph(__A),
931 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi32(__m128h __A) {
933 (__v8hf)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1);
937 _mm_mask_cvtph_epi32(__m128i __W, __mmask8 __U, __m128h __A) {
938 return (__m128i)__builtin_ia32_vcvtph2dq128_mask((__v8hf)__A, (__v4si)__W,
943 _mm_maskz_cvtph_epi32(__mmask8 __U, __m128h __A) {
945 (__v8hf)__A, (__v4si)_mm_setzero_si128(), (__mmask8)__U);
949 _mm256_cvtph_epi32(__m128h __A) {
951 (__v8hf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1);
955 _mm256_mask_cvtph_epi32(__m256i __W, __mmask8 __U, __m128h __A) {
956 return (__m256i)__builtin_ia32_vcvtph2dq256_mask((__v8hf)__A, (__v8si)__W,
961 _mm256_maskz_cvtph_epi32(__mmask8 __U, __m128h __A) {
963 (__v8hf)__A, (__v8si)_mm256_setzero_si256(), (__mmask8)__U);
966 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu32(__m128h __A) {
968 (__v8hf)__A, (__v4su)_mm_undefined_si128(), (__mmask8)-1);
972 _mm_mask_cvtph_epu32(__m128i __W, __mmask8 __U, __m128h __A) {
973 return (__m128i)__builtin_ia32_vcvtph2udq128_mask((__v8hf)__A, (__v4su)__W,
978 _mm_maskz_cvtph_epu32(__mmask8 __U, __m128h __A) {
980 (__v8hf)__A, (__v4su)_mm_setzero_si128(), (__mmask8)__U);
984 _mm256_cvtph_epu32(__m128h __A) {
986 (__v8hf)__A, (__v8su)_mm256_undefined_si256(), (__mmask8)-1);
990 _mm256_mask_cvtph_epu32(__m256i __W, __mmask8 __U, __m128h __A) {
991 return (__m256i)__builtin_ia32_vcvtph2udq256_mask((__v8hf)__A, (__v8su)__W,
996 _mm256_maskz_cvtph_epu32(__mmask8 __U, __m128h __A) {
998 (__v8hf)__A, (__v8su)_mm256_setzero_si256(), (__mmask8)__U);
1001 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi32_ph(__m128i __A) {
1003 (__v4si)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1007 _mm_mask_cvtepi32_ph(__m128h __W, __mmask8 __U, __m128i __A) {
1008 return (__m128h)__builtin_ia32_vcvtdq2ph128_mask((__v4si)__A, (__v8hf)__W,
1013 _mm_maskz_cvtepi32_ph(__mmask8 __U, __m128i __A) {
1015 (__v4si)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1019 _mm256_cvtepi32_ph(__m256i __A) {
1020 return (__m128h) __builtin_convertvector((__v8si)__A, __v8hf);
1024 _mm256_mask_cvtepi32_ph(__m128h __W, __mmask8 __U, __m256i __A) {
1026 (__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)__W);
1030 _mm256_maskz_cvtepi32_ph(__mmask8 __U, __m256i __A) {
1032 (__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)_mm_setzero_ph());
1035 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu32_ph(__m128i __A) {
1037 (__v4su)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1041 _mm_mask_cvtepu32_ph(__m128h __W, __mmask8 __U, __m128i __A) {
1042 return (__m128h)__builtin_ia32_vcvtudq2ph128_mask((__v4su)__A, (__v8hf)__W,
1047 _mm_maskz_cvtepu32_ph(__mmask8 __U, __m128i __A) {
1049 (__v4su)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1053 _mm256_cvtepu32_ph(__m256i __A) {
1054 return (__m128h) __builtin_convertvector((__v8su)__A, __v8hf);
1058 _mm256_mask_cvtepu32_ph(__m128h __W, __mmask8 __U, __m256i __A) {
1060 (__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)__W);
1064 _mm256_maskz_cvtepu32_ph(__mmask8 __U, __m256i __A) {
1066 (__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)_mm_setzero_ph());
1069 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi32(__m128h __A) {
1071 (__v8hf)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1);
1075 _mm_mask_cvttph_epi32(__m128i __W, __mmask8 __U, __m128h __A) {
1076 return (__m128i)__builtin_ia32_vcvttph2dq128_mask((__v8hf)__A, (__v4si)__W,
1081 _mm_maskz_cvttph_epi32(__mmask8 __U, __m128h __A) {
1083 (__v8hf)__A, (__v4si)_mm_setzero_si128(), (__mmask8)__U);
1087 _mm256_cvttph_epi32(__m128h __A) {
1089 (__v8hf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1);
1093 _mm256_mask_cvttph_epi32(__m256i __W, __mmask8 __U, __m128h __A) {
1094 return (__m256i)__builtin_ia32_vcvttph2dq256_mask((__v8hf)__A, (__v8si)__W,
1099 _mm256_maskz_cvttph_epi32(__mmask8 __U, __m128h __A) {
1101 (__v8hf)__A, (__v8si)_mm256_setzero_si256(), (__mmask8)__U);
1104 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu32(__m128h __A) {
1106 (__v8hf)__A, (__v4su)_mm_undefined_si128(), (__mmask8)-1);
1110 _mm_mask_cvttph_epu32(__m128i __W, __mmask8 __U, __m128h __A) {
1111 return (__m128i)__builtin_ia32_vcvttph2udq128_mask((__v8hf)__A, (__v4su)__W,
1116 _mm_maskz_cvttph_epu32(__mmask8 __U, __m128h __A) {
1118 (__v8hf)__A, (__v4su)_mm_setzero_si128(), (__mmask8)__U);
1122 _mm256_cvttph_epu32(__m128h __A) {
1124 (__v8hf)__A, (__v8su)_mm256_undefined_si256(), (__mmask8)-1);
1128 _mm256_mask_cvttph_epu32(__m256i __W, __mmask8 __U, __m128h __A) {
1129 return (__m256i)__builtin_ia32_vcvttph2udq256_mask((__v8hf)__A, (__v8su)__W,
1134 _mm256_maskz_cvttph_epu32(__mmask8 __U, __m128h __A) {
1136 (__v8hf)__A, (__v8su)_mm256_setzero_si256(), (__mmask8)__U);
1139 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi64_ph(__m128i __A) {
1141 (__v2di)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1145 _mm_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m128i __A) {
1146 return (__m128h)__builtin_ia32_vcvtqq2ph128_mask((__v2di)__A, (__v8hf)__W,
1151 _mm_maskz_cvtepi64_ph(__mmask8 __U, __m128i __A) {
1153 (__v2di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1157 _mm256_cvtepi64_ph(__m256i __A) {
1159 (__v4di)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1163 _mm256_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m256i __A) {
1164 return (__m128h)__builtin_ia32_vcvtqq2ph256_mask((__v4di)__A, (__v8hf)__W,
1169 _mm256_maskz_cvtepi64_ph(__mmask8 __U, __m256i __A) {
1171 (__v4di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1174 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi64(__m128h __A) {
1176 (__v8hf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1);
1180 _mm_mask_cvtph_epi64(__m128i __W, __mmask8 __U, __m128h __A) {
1181 return (__m128i)__builtin_ia32_vcvtph2qq128_mask((__v8hf)__A, (__v2di)__W,
1186 _mm_maskz_cvtph_epi64(__mmask8 __U, __m128h __A) {
1188 (__v8hf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U);
1192 _mm256_cvtph_epi64(__m128h __A) {
1194 (__v8hf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1);
1198 _mm256_mask_cvtph_epi64(__m256i __W, __mmask8 __U, __m128h __A) {
1199 return (__m256i)__builtin_ia32_vcvtph2qq256_mask((__v8hf)__A, (__v4di)__W,
1204 _mm256_maskz_cvtph_epi64(__mmask8 __U, __m128h __A) {
1206 (__v8hf)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U);
1209 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu64_ph(__m128i __A) {
1211 (__v2du)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1215 _mm_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m128i __A) {
1216 return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask((__v2du)__A, (__v8hf)__W,
1221 _mm_maskz_cvtepu64_ph(__mmask8 __U, __m128i __A) {
1223 (__v2du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1227 _mm256_cvtepu64_ph(__m256i __A) {
1229 (__v4du)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1233 _mm256_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m256i __A) {
1234 return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask((__v4du)__A, (__v8hf)__W,
1239 _mm256_maskz_cvtepu64_ph(__mmask8 __U, __m256i __A) {
1241 (__v4du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1244 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu64(__m128h __A) {
1246 (__v8hf)__A, (__v2du)_mm_undefined_si128(), (__mmask8)-1);
1250 _mm_mask_cvtph_epu64(__m128i __W, __mmask8 __U, __m128h __A) {
1251 return (__m128i)__builtin_ia32_vcvtph2uqq128_mask((__v8hf)__A, (__v2du)__W,
1256 _mm_maskz_cvtph_epu64(__mmask8 __U, __m128h __A) {
1258 (__v8hf)__A, (__v2du)_mm_setzero_si128(), (__mmask8)__U);
1262 _mm256_cvtph_epu64(__m128h __A) {
1264 (__v8hf)__A, (__v4du)_mm256_undefined_si256(), (__mmask8)-1);
1268 _mm256_mask_cvtph_epu64(__m256i __W, __mmask8 __U, __m128h __A) {
1269 return (__m256i)__builtin_ia32_vcvtph2uqq256_mask((__v8hf)__A, (__v4du)__W,
1274 _mm256_maskz_cvtph_epu64(__mmask8 __U, __m128h __A) {
1276 (__v8hf)__A, (__v4du)_mm256_setzero_si256(), (__mmask8)__U);
1279 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi64(__m128h __A) {
1281 (__v8hf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1);
1285 _mm_mask_cvttph_epi64(__m128i __W, __mmask8 __U, __m128h __A) {
1286 return (__m128i)__builtin_ia32_vcvttph2qq128_mask((__v8hf)__A, (__v2di)__W,
1291 _mm_maskz_cvttph_epi64(__mmask8 __U, __m128h __A) {
1293 (__v8hf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U);
1297 _mm256_cvttph_epi64(__m128h __A) {
1299 (__v8hf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1);
1303 _mm256_mask_cvttph_epi64(__m256i __W, __mmask8 __U, __m128h __A) {
1304 return (__m256i)__builtin_ia32_vcvttph2qq256_mask((__v8hf)__A, (__v4di)__W,
1309 _mm256_maskz_cvttph_epi64(__mmask8 __U, __m128h __A) {
1311 (__v8hf)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U);
1314 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu64(__m128h __A) {
1316 (__v8hf)__A, (__v2du)_mm_undefined_si128(), (__mmask8)-1);
1320 _mm_mask_cvttph_epu64(__m128i __W, __mmask8 __U, __m128h __A) {
1321 return (__m128i)__builtin_ia32_vcvttph2uqq128_mask((__v8hf)__A, (__v2du)__W,
1326 _mm_maskz_cvttph_epu64(__mmask8 __U, __m128h __A) {
1328 (__v8hf)__A, (__v2du)_mm_setzero_si128(), (__mmask8)__U);
1332 _mm256_cvttph_epu64(__m128h __A) {
1334 (__v8hf)__A, (__v4du)_mm256_undefined_si256(), (__mmask8)-1);
1338 _mm256_mask_cvttph_epu64(__m256i __W, __mmask8 __U, __m128h __A) {
1339 return (__m256i)__builtin_ia32_vcvttph2uqq256_mask((__v8hf)__A, (__v4du)__W,
1344 _mm256_maskz_cvttph_epu64(__mmask8 __U, __m128h __A) {
1346 (__v8hf)__A, (__v4du)_mm256_setzero_si256(), (__mmask8)__U);
1349 static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtxph_ps(__m128h __A) {
1351 (__v8hf)__A, (__v4sf)_mm_undefined_ps(), (__mmask8)-1);
1356 __m128h __A) {
1357 return (__m128)__builtin_ia32_vcvtph2psx128_mask((__v8hf)__A, (__v4sf)__W,
1362 _mm_maskz_cvtxph_ps(__mmask8 __U, __m128h __A) {
1364 (__v8hf)__A, (__v4sf)_mm_setzero_ps(), (__mmask8)__U);
1367 static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtxph_ps(__m128h __A) {
1369 (__v8hf)__A, (__v8sf)_mm256_undefined_ps(), (__mmask8)-1);
1373 _mm256_mask_cvtxph_ps(__m256 __W, __mmask8 __U, __m128h __A) {
1374 return (__m256)__builtin_ia32_vcvtph2psx256_mask((__v8hf)__A, (__v8sf)__W,
1379 _mm256_maskz_cvtxph_ps(__mmask8 __U, __m128h __A) {
1381 (__v8hf)__A, (__v8sf)_mm256_setzero_ps(), (__mmask8)__U);
1384 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtxps_ph(__m128 __A) {
1386 (__v4sf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1391 __m128 __A) {
1392 return (__m128h)__builtin_ia32_vcvtps2phx128_mask((__v4sf)__A, (__v8hf)__W,
1397 _mm_maskz_cvtxps_ph(__mmask8 __U, __m128 __A) {
1399 (__v4sf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1402 static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtxps_ph(__m256 __A) {
1404 (__v8sf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1408 _mm256_mask_cvtxps_ph(__m128h __W, __mmask8 __U, __m256 __A) {
1409 return (__m128h)__builtin_ia32_vcvtps2phx256_mask((__v8sf)__A, (__v8hf)__W,
1414 _mm256_maskz_cvtxps_ph(__mmask8 __U, __m256 __A) {
1416 (__v8sf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1419 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_ph(__m128h __A,
1422 return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B,
1426 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ph(__m128h __A,
1432 __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1433 (__v8hf)__A);
1437 _mm_mask3_fmadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1440 __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1445 _mm_maskz_fmadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1448 __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1452 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmsub_ph(__m128h __A,
1455 return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B,
1459 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ph(__m128h __A,
1464 (__mmask8)__U, _mm_fmsub_ph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1465 (__v8hf)__A);
1469 _mm_maskz_fmsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1471 (__mmask8)__U, _mm_fmsub_ph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1476 _mm_mask3_fnmadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1479 __builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1484 _mm_maskz_fnmadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1487 __builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1492 _mm_maskz_fnmsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1495 __builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1499 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmadd_ph(__m256h __A,
1502 return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B,
1507 _mm256_mask_fmadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
1510 __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1511 (__v16hf)__A);
1515 _mm256_mask3_fmadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
1518 __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1523 _mm256_maskz_fmadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1526 __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1530 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmsub_ph(__m256h __A,
1533 return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B,
1538 _mm256_mask_fmsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
1541 __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1542 (__v16hf)__A);
1546 _mm256_maskz_fmsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1549 __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1554 _mm256_mask3_fnmadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
1557 __builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1562 _mm256_maskz_fnmadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1565 __builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1570 _mm256_maskz_fnmsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1573 __builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1577 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmaddsub_ph(__m128h __A,
1580 return (__m128h)__builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B,
1585 _mm_mask_fmaddsub_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
1588 __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1589 (__v8hf)__A);
1593 _mm_mask3_fmaddsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1596 __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1601 _mm_maskz_fmaddsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1604 __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1608 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmsubadd_ph(__m128h __A,
1611 return (__m128h)__builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B,
1616 _mm_mask_fmsubadd_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
1619 __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1620 (__v8hf)__A);
1624 _mm_maskz_fmsubadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1627 __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1632 _mm256_fmaddsub_ph(__m256h __A, __m256h __B, __m256h __C) {
1633 return (__m256h)__builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B,
1638 _mm256_mask_fmaddsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
1641 __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1642 (__v16hf)__A);
1646 _mm256_mask3_fmaddsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
1649 __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1654 _mm256_maskz_fmaddsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1657 __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1662 _mm256_fmsubadd_ph(__m256h __A, __m256h __B, __m256h __C) {
1663 return (__m256h)__builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B,
1668 _mm256_mask_fmsubadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
1671 __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1672 (__v16hf)__A);
1676 _mm256_maskz_fmsubadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1679 __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1684 _mm_mask3_fmsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1687 __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1692 _mm256_mask3_fmsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
1695 __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1700 _mm_mask3_fmsubadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1703 __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1708 _mm256_mask3_fmsubadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
1711 __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1715 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmadd_ph(__m128h __A,
1718 return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B,
1723 _mm_mask_fnmadd_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
1726 __builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, (__v8hf)__C),
1727 (__v8hf)__A);
1730 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fnmadd_ph(__m256h __A,
1733 return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B,
1738 _mm256_mask_fnmadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
1741 __builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, (__v16hf)__C),
1742 (__v16hf)__A);
1745 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmsub_ph(__m128h __A,
1748 return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B,
1753 _mm_mask_fnmsub_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
1756 __builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C),
1757 (__v8hf)__A);
1761 _mm_mask3_fnmsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1764 __builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C),
1768 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fnmsub_ph(__m256h __A,
1771 return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B,
1776 _mm256_mask_fnmsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
1779 __builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C),
1780 (__v16hf)__A);
1784 _mm256_mask3_fnmsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
1787 __builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C),
1791 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmul_pch(__m128h __A,
1794 (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (__mmask8)-1);
1798 _mm_mask_fcmul_pch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
1799 return (__m128h)__builtin_ia32_vfcmulcph128_mask((__v4sf)__A, (__v4sf)__B,
1804 _mm_maskz_fcmul_pch(__mmask8 __U, __m128h __A, __m128h __B) {
1806 (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (__mmask8)__U);
1809 static __inline__ __m256h __DEFAULT_FN_ATTRS128 _mm256_fcmul_pch(__m256h __A,
1812 (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_undefined_ph(), (__mmask8)-1);
1816 _mm256_mask_fcmul_pch(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) {
1817 return (__m256h)__builtin_ia32_vfcmulcph256_mask((__v8sf)__A, (__v8sf)__B,
1822 _mm256_maskz_fcmul_pch(__mmask8 __U, __m256h __A, __m256h __B) {
1824 (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_setzero_ph(), (__mmask8)__U);
1827 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmadd_pch(__m128h __A,
1830 return (__m128h)__builtin_ia32_vfcmaddcph128_mask((__v4sf)__A, (__v4sf)__B,
1835 _mm_mask_fcmadd_pch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
1838 __builtin_ia32_vfcmaddcph128_mask((__v4sf)__A, (__v4sf)(__m128h)__B,
1840 (__v4sf)__A);
1844 _mm_mask3_fcmadd_pch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1845 return (__m128h)__builtin_ia32_vfcmaddcph128_mask((__v4sf)__A, (__v4sf)__B,
1850 _mm_maskz_fcmadd_pch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1852 (__v4sf)__A, (__v4sf)__B, (__v4sf)__C, (__mmask8)__U);
1855 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fcmadd_pch(__m256h __A,
1858 return (__m256h)__builtin_ia32_vfcmaddcph256_mask((__v8sf)__A, (__v8sf)__B,
1863 _mm256_mask_fcmadd_pch(__m256h __A, __mmask8 __U, __m256h __B, __m256h __C) {
1866 __builtin_ia32_vfcmaddcph256_mask((__v8sf)__A, (__v8sf)__B, (__v8sf)__C,
1868 (__v8sf)__A);
1872 _mm256_mask3_fcmadd_pch(__m256h __A, __m256h __B, __m256h __C, __mmask8 __U) {
1873 return (__m256h)__builtin_ia32_vfcmaddcph256_mask((__v8sf)__A, (__v8sf)__B,
1878 _mm256_maskz_fcmadd_pch(__mmask8 __U, __m256h __A, __m256h __B, __m256h __C) {
1880 (__v8sf)__A, (__v8sf)__B, (__v8sf)__C, (__mmask8)__U);
1883 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmul_pch(__m128h __A,
1886 (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (__mmask8)-1);
1891 __m128h __A,
1893 return (__m128h)__builtin_ia32_vfmulcph128_mask((__v4sf)__A, (__v4sf)__B,
1898 _mm_maskz_fmul_pch(__mmask8 __U, __m128h __A, __m128h __B) {
1900 (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (__mmask8)__U);
1903 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmul_pch(__m256h __A,
1906 (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_undefined_ph(), (__mmask8)-1);
1910 _mm256_mask_fmul_pch(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) {
1911 return (__m256h)__builtin_ia32_vfmulcph256_mask((__v8sf)__A, (__v8sf)__B,
1916 _mm256_maskz_fmul_pch(__mmask8 __U, __m256h __A, __m256h __B) {
1918 (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_setzero_ph(), (__mmask8)__U);
1921 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_pch(__m128h __A,
1924 return (__m128h)__builtin_ia32_vfmaddcph128_mask((__v4sf)__A, (__v4sf)__B,
1929 _mm_mask_fmadd_pch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
1932 __builtin_ia32_vfmaddcph128_mask((__v4sf)__A, (__v4sf)__B, (__v4sf)__C,
1934 (__v4sf)__A);
1938 _mm_mask3_fmadd_pch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1939 return (__m128h)__builtin_ia32_vfmaddcph128_mask((__v4sf)__A, (__v4sf)__B,
1944 _mm_maskz_fmadd_pch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1945 return (__m128h)__builtin_ia32_vfmaddcph128_maskz((__v4sf)__A, (__v4sf)__B,
1949 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmadd_pch(__m256h __A,
1952 return (__m256h)__builtin_ia32_vfmaddcph256_mask((__v8sf)__A, (__v8sf)__B,
1957 _mm256_mask_fmadd_pch(__m256h __A, __mmask8 __U, __m256h __B, __m256h __C) {
1960 __builtin_ia32_vfmaddcph256_mask((__v8sf)__A, (__v8sf)__B, (__v8sf)__C,
1962 (__v8sf)__A);
1966 _mm256_mask3_fmadd_pch(__m256h __A, __m256h __B, __m256h __C, __mmask8 __U) {
1967 return (__m256h)__builtin_ia32_vfmaddcph256_mask((__v8sf)__A, (__v8sf)__B,
1972 _mm256_maskz_fmadd_pch(__mmask8 __U, __m256h __A, __m256h __B, __m256h __C) {
1973 return (__m256h)__builtin_ia32_vfmaddcph256_maskz((__v8sf)__A, (__v8sf)__B,
1978 __m128h __A,
1981 (__v8hf)__A);
1985 _mm256_mask_blend_ph(__mmask16 __U, __m256h __A, __m256h __W) {
1987 (__v16hf)__A);
1991 _mm_permutex2var_ph(__m128h __A, __m128i __I, __m128h __B) {
1992 return (__m128h)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I,
1997 _mm256_permutex2var_ph(__m256h __A, __m256i __I, __m256h __B) {
1998 return (__m256h)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I,
2003 _mm_permutexvar_ph(__m128i __A, __m128h __B) {
2004 return (__m128h)__builtin_ia32_permvarhi128((__v8hi)__B, (__v8hi)__A);
2008 _mm256_permutexvar_ph(__m256i __A, __m256h __B) {
2009 return (__m256h)__builtin_ia32_permvarhi256((__v16hi)__B, (__v16hi)__A);