Lines Matching refs:__A

351 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_add_ph(__m512h __A,
353 return (__m512h)((__v32hf)__A + (__v32hf)__B);
357 _mm512_mask_add_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
359 (__mmask32)__U, (__v32hf)_mm512_add_ph(__A, __B), (__v32hf)__W);
363 _mm512_maskz_add_ph(__mmask32 __U, __m512h __A, __m512h __B) {
365 (__v32hf)_mm512_add_ph(__A, __B),
383 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_sub_ph(__m512h __A,
385 return (__m512h)((__v32hf)__A - (__v32hf)__B);
389 _mm512_mask_sub_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
391 (__mmask32)__U, (__v32hf)_mm512_sub_ph(__A, __B), (__v32hf)__W);
395 _mm512_maskz_sub_ph(__mmask32 __U, __m512h __A, __m512h __B) {
397 (__v32hf)_mm512_sub_ph(__A, __B),
415 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mul_ph(__m512h __A,
417 return (__m512h)((__v32hf)__A * (__v32hf)__B);
421 _mm512_mask_mul_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
423 (__mmask32)__U, (__v32hf)_mm512_mul_ph(__A, __B), (__v32hf)__W);
427 _mm512_maskz_mul_ph(__mmask32 __U, __m512h __A, __m512h __B) {
429 (__v32hf)_mm512_mul_ph(__A, __B),
447 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_div_ph(__m512h __A,
449 return (__m512h)((__v32hf)__A / (__v32hf)__B);
453 _mm512_mask_div_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
455 (__mmask32)__U, (__v32hf)_mm512_div_ph(__A, __B), (__v32hf)__W);
459 _mm512_maskz_div_ph(__mmask32 __U, __m512h __A, __m512h __B) {
461 (__v32hf)_mm512_div_ph(__A, __B),
479 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_min_ph(__m512h __A,
481 return (__m512h)__builtin_ia32_minph512((__v32hf)__A, (__v32hf)__B,
486 _mm512_mask_min_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
488 (__mmask32)__U, (__v32hf)_mm512_min_ph(__A, __B), (__v32hf)__W);
492 _mm512_maskz_min_ph(__mmask32 __U, __m512h __A, __m512h __B) {
494 (__v32hf)_mm512_min_ph(__A, __B),
512 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_max_ph(__m512h __A,
514 return (__m512h)__builtin_ia32_maxph512((__v32hf)__A, (__v32hf)__B,
519 _mm512_mask_max_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
521 (__mmask32)__U, (__v32hf)_mm512_max_ph(__A, __B), (__v32hf)__W);
525 _mm512_maskz_max_ph(__mmask32 __U, __m512h __A, __m512h __B) {
527 (__v32hf)_mm512_max_ph(__A, __B),
545 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_abs_ph(__m512h __A) {
546 return (__m512h)_mm512_and_epi32(_mm512_set1_epi32(0x7FFF7FFF), (__m512i)__A);
549 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_conj_pch(__m512h __A) {
550 return (__m512h)_mm512_xor_ps((__m512)__A, _mm512_set1_ps(-0.0f));
554 _mm512_mask_conj_pch(__m512h __W, __mmask16 __U, __m512h __A) {
556 (__mmask16)__U, (__v16sf)_mm512_conj_pch(__A), (__v16sf)__W);
560 _mm512_maskz_conj_pch(__mmask16 __U, __m512h __A) {
562 (__v16sf)_mm512_conj_pch(__A),
566 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_add_sh(__m128h __A,
568 __A[0] += __B[0];
569 return __A;
574 __m128h __A,
576 __A = _mm_add_sh(__A, __B);
577 return __builtin_ia32_selectsh_128(__U, __A, __W);
581 __m128h __A,
583 __A = _mm_add_sh(__A, __B);
584 return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph());
602 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sub_sh(__m128h __A,
604 __A[0] -= __B[0];
605 return __A;
610 __m128h __A,
612 __A = _mm_sub_sh(__A, __B);
613 return __builtin_ia32_selectsh_128(__U, __A, __W);
617 __m128h __A,
619 __A = _mm_sub_sh(__A, __B);
620 return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph());
638 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mul_sh(__m128h __A,
640 __A[0] *= __B[0];
641 return __A;
646 __m128h __A,
648 __A = _mm_mul_sh(__A, __B);
649 return __builtin_ia32_selectsh_128(__U, __A, __W);
653 __m128h __A,
655 __A = _mm_mul_sh(__A, __B);
656 return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph());
674 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_div_sh(__m128h __A,
676 __A[0] /= __B[0];
677 return __A;
682 __m128h __A,
684 __A = _mm_div_sh(__A, __B);
685 return __builtin_ia32_selectsh_128(__U, __A, __W);
689 __m128h __A,
691 __A = _mm_div_sh(__A, __B);
692 return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph());
710 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_min_sh(__m128h __A,
713 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
719 __m128h __A,
721 return (__m128h)__builtin_ia32_minsh_round_mask((__v8hf)__A, (__v8hf)__B,
727 __m128h __A,
730 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
749 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_max_sh(__m128h __A,
752 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
758 __m128h __A,
760 return (__m128h)__builtin_ia32_maxsh_round_mask((__v8hf)__A, (__v8hf)__B,
766 __m128h __A,
769 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
833 _mm_mask_load_sh(__m128h __W, __mmask8 __U, const void *__A) {
837 return (__m128h)__builtin_ia32_loadsh128_mask((const __v8hf *)__A, src, __U & 1);
841 _mm_maskz_load_sh(__mmask8 __U, const void *__A) {
843 (const __v8hf *)__A, (__v8hf)_mm_setzero_ph(), __U & 1);
894 __m128h __A) {
895 __builtin_ia32_storesh128_mask((__v8hf *)__W, __A, __U & 1);
899 __m512h __A) {
900 *(__m512h *)__P = __A;
904 __m256h __A) {
905 *(__m256h *)__P = __A;
909 __m128h __A) {
910 *(__m128h *)__P = __A;
914 __m512h __A) {
918 ((struct __storeu_ph *)__P)->__v = __A;
922 __m256h __A) {
926 ((struct __storeu_ph *)__P)->__v = __A;
930 __m128h __A) {
934 ((struct __storeu_ph *)__P)->__v = __A;
946 __m128h __A,
948 return __builtin_ia32_selectsh_128(__U, _mm_move_sh(__A, __B), __W);
952 __m128h __A,
954 return __builtin_ia32_selectsh_128(__U, _mm_move_sh(__A, __B),
968 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_rcp_ph(__m512h __A) {
970 (__v32hf)__A, (__v32hf)_mm512_undefined_ph(), (__mmask32)-1);
974 _mm512_mask_rcp_ph(__m512h __W, __mmask32 __U, __m512h __A) {
975 return (__m512h)__builtin_ia32_rcpph512_mask((__v32hf)__A, (__v32hf)__W,
980 _mm512_maskz_rcp_ph(__mmask32 __U, __m512h __A) {
982 (__v32hf)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U);
985 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_rsqrt_ph(__m512h __A) {
987 (__v32hf)__A, (__v32hf)_mm512_undefined_ph(), (__mmask32)-1);
991 _mm512_mask_rsqrt_ph(__m512h __W, __mmask32 __U, __m512h __A) {
992 return (__m512h)__builtin_ia32_rsqrtph512_mask((__v32hf)__A, (__v32hf)__W,
997 _mm512_maskz_rsqrt_ph(__mmask32 __U, __m512h __A) {
999 (__v32hf)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U);
1033 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_getexp_ph(__m512h __A) {
1035 (__v32hf)__A, (__v32hf)_mm512_undefined_ph(), (__mmask32)-1,
1040 _mm512_mask_getexp_ph(__m512h __W, __mmask32 __U, __m512h __A) {
1042 (__v32hf)__A, (__v32hf)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
1046 _mm512_maskz_getexp_ph(__mmask32 __U, __m512h __A) {
1048 (__v32hf)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U,
1066 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_scalef_ph(__m512h __A,
1069 (__v32hf)__A, (__v32hf)__B, (__v32hf)_mm512_undefined_ph(), (__mmask32)-1,
1074 _mm512_mask_scalef_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
1075 return (__m512h)__builtin_ia32_scalefph512_mask((__v32hf)__A, (__v32hf)__B,
1081 _mm512_maskz_scalef_ph(__mmask32 __U, __m512h __A, __m512h __B) {
1083 (__v32hf)__A, (__v32hf)__B, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U,
1162 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rcp_sh(__m128h __A,
1165 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1);
1170 __m128h __A,
1172 return (__m128h)__builtin_ia32_rcpsh_mask((__v8hf)__A, (__v8hf)__B,
1177 __m128h __A,
1180 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1183 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rsqrt_sh(__m128h __A,
1186 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1);
1191 __m128h __A,
1193 return (__m128h)__builtin_ia32_rsqrtsh_mask((__v8hf)__A, (__v8hf)__B,
1198 _mm_maskz_rsqrt_sh(__mmask8 __U, __m128h __A, __m128h __B) {
1200 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1238 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_getexp_sh(__m128h __A,
1241 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
1246 _mm_mask_getexp_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
1248 (__v8hf)__A, (__v8hf)__B, (__v8hf)__W, (__mmask8)__U,
1258 _mm_maskz_getexp_sh(__mmask8 __U, __m128h __A, __m128h __B) {
1260 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
1274 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_scalef_sh(__m128h __A,
1277 (__v8hf)__A, (__v8hf)(__B), (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
1282 _mm_mask_scalef_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
1283 return (__m128h)__builtin_ia32_scalefsh_round_mask((__v8hf)__A, (__v8hf)__B,
1294 _mm_maskz_scalef_sh(__mmask8 __U, __m128h __A, __m128h __B) {
1296 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
1378 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_sqrt_ph(__m512h __A) {
1379 return (__m512h)__builtin_ia32_sqrtph512((__v32hf)__A,
1384 _mm512_mask_sqrt_ph(__m512h __W, __mmask32 __U, __m512h __A) {
1387 (__v32hf)__builtin_ia32_sqrtph512((__A), (_MM_FROUND_CUR_DIRECTION)),
1392 _mm512_maskz_sqrt_ph(__mmask32 __U, __m512h __A) {
1395 (__v32hf)__builtin_ia32_sqrtph512((__A), (_MM_FROUND_CUR_DIRECTION)),
1414 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sqrt_sh(__m128h __A,
1417 (__v8hf)(__m128h)(__A), (__v8hf)(__m128h)(__B), (__v8hf)_mm_setzero_ph(),
1423 __m128h __A,
1426 (__v8hf)(__m128h)(__A), (__v8hf)(__m128h)(__B), (__v8hf)(__m128h)(__W),
1431 __m128h __A,
1434 (__v8hf)(__m128h)(__A), (__v8hf)(__m128h)(__B), (__v8hf)_mm_setzero_ph(),
1466 static __inline__ __m128h __DEFAULT_FN_ATTRS512 _mm512_cvtpd_ph(__m512d __A) {
1468 (__v8df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
1473 _mm512_mask_cvtpd_ph(__m128h __W, __mmask8 __U, __m512d __A) {
1475 (__v8df)__A, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
1479 _mm512_maskz_cvtpd_ph(__mmask8 __U, __m512d __A) {
1481 (__v8df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
1497 static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtph_pd(__m128h __A) {
1499 (__v8hf)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)-1,
1504 _mm512_mask_cvtph_pd(__m512d __W, __mmask8 __U, __m128h __A) {
1506 (__v8hf)__A, (__v8df)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
1510 _mm512_maskz_cvtph_pd(__mmask8 __U, __m128h __A) {
1512 (__v8hf)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)__U,
1530 static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtsh_ss(__m128 __A,
1533 (__v4sf)__A, (__v8hf)__B, (__v4sf)_mm_undefined_ps(), (__mmask8)-1,
1539 __m128 __A,
1541 return (__m128)__builtin_ia32_vcvtsh2ss_round_mask((__v4sf)__A, (__v8hf)__B,
1547 __m128 __A,
1550 (__v4sf)__A, (__v8hf)__B, (__v4sf)_mm_setzero_ps(), (__mmask8)__U,
1568 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtss_sh(__m128h __A,
1571 (__v8hf)__A, (__v4sf)__B, (__v8hf)_mm_undefined_ph(), (__mmask8)-1,
1577 __m128h __A,
1580 (__v8hf)__A, (__v4sf)__B, (__v8hf)__W, (__mmask8)__U,
1585 __m128h __A,
1588 (__v8hf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
1606 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtsd_sh(__m128h __A,
1609 (__v8hf)__A, (__v2df)__B, (__v8hf)_mm_undefined_ph(), (__mmask8)-1,
1615 __m128h __A,
1618 (__v8hf)__A, (__v2df)__B, (__v8hf)__W, (__mmask8)__U,
1623 _mm_maskz_cvtsd_sh(__mmask8 __U, __m128h __A, __m128d __B) {
1625 (__v8hf)__A, (__v2df)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
1643 static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtsh_sd(__m128d __A,
1646 (__v2df)__A, (__v8hf)__B, (__v2df)_mm_undefined_pd(), (__mmask8)-1,
1652 __m128d __A,
1655 (__v2df)__A, (__v8hf)__B, (__v2df)__W, (__mmask8)__U,
1660 _mm_maskz_cvtsh_sd(__mmask8 __U, __m128d __A, __m128h __B) {
1662 (__v2df)__A, (__v8hf)__B, (__v2df)_mm_setzero_pd(), (__mmask8)__U,
1681 _mm512_cvtph_epi16(__m512h __A) {
1683 (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)-1,
1688 _mm512_mask_cvtph_epi16(__m512i __W, __mmask32 __U, __m512h __A) {
1690 (__v32hf)__A, (__v32hi)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
1694 _mm512_maskz_cvtph_epi16(__mmask32 __U, __m512h __A) {
1696 (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)__U,
1715 _mm512_cvttph_epi16(__m512h __A) {
1717 (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)-1,
1722 _mm512_mask_cvttph_epi16(__m512i __W, __mmask32 __U, __m512h __A) {
1724 (__v32hf)__A, (__v32hi)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
1728 _mm512_maskz_cvttph_epi16(__mmask32 __U, __m512h __A) {
1730 (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)__U,
1748 _mm512_cvtepi16_ph(__m512i __A) {
1750 (__v32hi)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)-1,
1755 _mm512_mask_cvtepi16_ph(__m512h __W, __mmask32 __U, __m512i __A) {
1757 (__v32hi)__A, (__v32hf)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
1761 _mm512_maskz_cvtepi16_ph(__mmask32 __U, __m512i __A) {
1763 (__v32hi)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U,
1782 _mm512_cvtph_epu16(__m512h __A) {
1784 (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)-1,
1789 _mm512_mask_cvtph_epu16(__m512i __W, __mmask32 __U, __m512h __A) {
1791 (__v32hf)__A, (__v32hu)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
1795 _mm512_maskz_cvtph_epu16(__mmask32 __U, __m512h __A) {
1797 (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)__U,
1816 _mm512_cvttph_epu16(__m512h __A) {
1818 (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)-1,
1823 _mm512_mask_cvttph_epu16(__m512i __W, __mmask32 __U, __m512h __A) {
1825 (__v32hf)__A, (__v32hu)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
1829 _mm512_maskz_cvttph_epu16(__mmask32 __U, __m512h __A) {
1831 (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)__U,
1849 _mm512_cvtepu16_ph(__m512i __A) {
1851 (__v32hu)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)-1,
1856 _mm512_mask_cvtepu16_ph(__m512h __W, __mmask32 __U, __m512i __A) {
1858 (__v32hu)__A, (__v32hf)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
1862 _mm512_maskz_cvtepu16_ph(__mmask32 __U, __m512i __A) {
1864 (__v32hu)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U,
1883 _mm512_cvtph_epi32(__m256h __A) {
1885 (__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)-1,
1890 _mm512_mask_cvtph_epi32(__m512i __W, __mmask16 __U, __m256h __A) {
1892 (__v16hf)__A, (__v16si)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
1896 _mm512_maskz_cvtph_epi32(__mmask16 __U, __m256h __A) {
1898 (__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)__U,
1917 _mm512_cvtph_epu32(__m256h __A) {
1919 (__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)-1,
1924 _mm512_mask_cvtph_epu32(__m512i __W, __mmask16 __U, __m256h __A) {
1926 (__v16hf)__A, (__v16su)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
1930 _mm512_maskz_cvtph_epu32(__mmask16 __U, __m256h __A) {
1932 (__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)__U,
1950 _mm512_cvtepi32_ph(__m512i __A) {
1952 (__v16si)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1,
1957 _mm512_mask_cvtepi32_ph(__m256h __W, __mmask16 __U, __m512i __A) {
1959 (__v16si)__A, (__v16hf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
1963 _mm512_maskz_cvtepi32_ph(__mmask16 __U, __m512i __A) {
1965 (__v16si)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U,
1983 _mm512_cvtepu32_ph(__m512i __A) {
1985 (__v16su)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1,
1990 _mm512_mask_cvtepu32_ph(__m256h __W, __mmask16 __U, __m512i __A) {
1992 (__v16su)__A, (__v16hf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
1996 _mm512_maskz_cvtepu32_ph(__mmask16 __U, __m512i __A) {
1998 (__v16su)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U,
2017 _mm512_cvttph_epi32(__m256h __A) {
2019 (__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)-1,
2024 _mm512_mask_cvttph_epi32(__m512i __W, __mmask16 __U, __m256h __A) {
2026 (__v16hf)__A, (__v16si)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
2030 _mm512_maskz_cvttph_epi32(__mmask16 __U, __m256h __A) {
2032 (__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)__U,
2051 _mm512_cvttph_epu32(__m256h __A) {
2053 (__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)-1,
2058 _mm512_mask_cvttph_epu32(__m512i __W, __mmask16 __U, __m256h __A) {
2060 (__v16hf)__A, (__v16su)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
2064 _mm512_maskz_cvttph_epu32(__mmask16 __U, __m256h __A) {
2066 (__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)__U,
2083 _mm512_cvtepi64_ph(__m512i __A) {
2085 (__v8di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
2090 _mm512_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m512i __A) {
2092 (__v8di)__A, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
2096 _mm512_maskz_cvtepi64_ph(__mmask8 __U, __m512i __A) {
2098 (__v8di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
2116 _mm512_cvtph_epi64(__m128h __A) {
2118 (__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)-1,
2123 _mm512_mask_cvtph_epi64(__m512i __W, __mmask8 __U, __m128h __A) {
2125 (__v8hf)__A, (__v8di)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
2129 _mm512_maskz_cvtph_epi64(__mmask8 __U, __m128h __A) {
2131 (__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)__U,
2148 _mm512_cvtepu64_ph(__m512i __A) {
2150 (__v8du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
2155 _mm512_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m512i __A) {
2157 (__v8du)__A, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
2161 _mm512_maskz_cvtepu64_ph(__mmask8 __U, __m512i __A) {
2163 (__v8du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
2181 _mm512_cvtph_epu64(__m128h __A) {
2183 (__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)-1,
2188 _mm512_mask_cvtph_epu64(__m512i __W, __mmask8 __U, __m128h __A) {
2190 (__v8hf)__A, (__v8du)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
2194 _mm512_maskz_cvtph_epu64(__mmask8 __U, __m128h __A) {
2196 (__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)__U,
2214 _mm512_cvttph_epi64(__m128h __A) {
2216 (__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)-1,
2221 _mm512_mask_cvttph_epi64(__m512i __W, __mmask8 __U, __m128h __A) {
2223 (__v8hf)__A, (__v8di)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
2227 _mm512_maskz_cvttph_epi64(__mmask8 __U, __m128h __A) {
2229 (__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)__U,
2247 _mm512_cvttph_epu64(__m128h __A) {
2249 (__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)-1,
2254 _mm512_mask_cvttph_epu64(__m512i __W, __mmask8 __U, __m128h __A) {
2256 (__v8hf)__A, (__v8du)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
2260 _mm512_maskz_cvttph_epu64(__mmask8 __U, __m128h __A) {
2262 (__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)__U,
2269 static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvtsh_i32(__m128h __A) {
2270 return (int)__builtin_ia32_vcvtsh2si32((__v8hf)__A, _MM_FROUND_CUR_DIRECTION);
2277 _mm_cvtsh_u32(__m128h __A) {
2278 return (unsigned int)__builtin_ia32_vcvtsh2usi32((__v8hf)__A,
2286 static __inline__ long long __DEFAULT_FN_ATTRS128 _mm_cvtsh_i64(__m128h __A) {
2287 return (long long)__builtin_ia32_vcvtsh2si64((__v8hf)__A,
2295 _mm_cvtsh_u64(__m128h __A) {
2297 (__v8hf)__A, _MM_FROUND_CUR_DIRECTION);
2305 _mm_cvtu32_sh(__m128h __A, unsigned int __B) {
2306 __A[0] = __B;
2307 return __A;
2316 _mm_cvtu64_sh(__m128h __A, unsigned long long __B) {
2317 __A[0] = __B;
2318 return __A;
2325 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvti32_sh(__m128h __A,
2327 __A[0] = __B;
2328 return __A;
2335 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvti64_sh(__m128h __A,
2337 __A[0] = __B;
2338 return __A;
2345 static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttsh_i32(__m128h __A) {
2346 return (int)__builtin_ia32_vcvttsh2si32((__v8hf)__A,
2354 static __inline__ long long __DEFAULT_FN_ATTRS128 _mm_cvttsh_i64(__m128h __A) {
2355 return (long long)__builtin_ia32_vcvttsh2si64((__v8hf)__A,
2364 _mm_cvttsh_u32(__m128h __A) {
2365 return (unsigned int)__builtin_ia32_vcvttsh2usi32((__v8hf)__A,
2374 _mm_cvttsh_u64(__m128h __A) {
2376 (__v8hf)__A, _MM_FROUND_CUR_DIRECTION);
2393 static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtxph_ps(__m256h __A) {
2395 (__v16hf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)-1,
2400 _mm512_mask_cvtxph_ps(__m512 __W, __mmask16 __U, __m256h __A) {
2402 (__v16hf)__A, (__v16sf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
2406 _mm512_maskz_cvtxph_ps(__mmask16 __U, __m256h __A) {
2408 (__v16hf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U,
2425 static __inline__ __m256h __DEFAULT_FN_ATTRS512 _mm512_cvtxps_ph(__m512 __A) {
2427 (__v16sf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1,
2432 _mm512_mask_cvtxps_ph(__m256h __W, __mmask16 __U, __m512 __A) {
2434 (__v16sf)__A, (__v16hf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
2438 _mm512_maskz_cvtxps_ph(__mmask16 __U, __m512 __A) {
2440 (__v16sf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U,
2504 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fmadd_ph(__m512h __A,
2507 return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, (__v32hf)__B,
2513 _mm512_mask_fmadd_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
2514 return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, (__v32hf)__B,
2520 _mm512_mask3_fmadd_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
2521 return (__m512h)__builtin_ia32_vfmaddph512_mask3((__v32hf)__A, (__v32hf)__B,
2527 _mm512_maskz_fmadd_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
2528 return (__m512h)__builtin_ia32_vfmaddph512_maskz((__v32hf)__A, (__v32hf)__B,
2533 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fmsub_ph(__m512h __A,
2536 return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, (__v32hf)__B,
2542 _mm512_mask_fmsub_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
2543 return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, (__v32hf)__B,
2549 _mm512_maskz_fmsub_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
2551 (__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U,
2555 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fnmadd_ph(__m512h __A,
2558 return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, -(__v32hf)__B,
2564 _mm512_mask3_fnmadd_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
2565 return (__m512h)__builtin_ia32_vfmaddph512_mask3(-(__v32hf)__A, (__v32hf)__B,
2571 _mm512_maskz_fnmadd_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
2572 return (__m512h)__builtin_ia32_vfmaddph512_maskz(-(__v32hf)__A, (__v32hf)__B,
2577 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fnmsub_ph(__m512h __A,
2580 return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, -(__v32hf)__B,
2586 _mm512_maskz_fnmsub_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
2588 -(__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U,
2628 _mm512_fmaddsub_ph(__m512h __A, __m512h __B, __m512h __C) {
2630 (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)-1,
2635 _mm512_mask_fmaddsub_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
2637 (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U,
2642 _mm512_mask3_fmaddsub_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
2644 (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U,
2649 _mm512_maskz_fmaddsub_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
2651 (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U,
2656 _mm512_fmsubadd_ph(__m512h __A, __m512h __B, __m512h __C) {
2658 (__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)-1,
2663 _mm512_mask_fmsubadd_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
2665 (__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U,
2670 _mm512_maskz_fmsubadd_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
2672 (__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U,
2682 _mm512_mask3_fmsub_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
2683 return (__m512h)__builtin_ia32_vfmsubph512_mask3((__v32hf)__A, (__v32hf)__B,
2694 _mm512_mask3_fmsubadd_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
2696 (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U,
2706 _mm512_mask_fnmadd_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
2707 return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, -(__v32hf)__B,
2723 _mm512_mask_fnmsub_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
2724 return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, -(__v32hf)__B,
2730 _mm512_mask3_fnmsub_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
2731 return (__m512h)__builtin_ia32_vfmsubph512_mask3(-(__v32hf)__A, (__v32hf)__B,
2737 __m128h __A,
2739 return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, (__v8hf)__A, (__v8hf)__B,
2745 __m128h __A,
2747 return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, (__v8hf)__A, (__v8hf)__B,
2762 _mm_maskz_fmadd_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
2763 return __builtin_ia32_vfmaddsh3_maskz((__v8hf)__A, (__v8hf)__B, (__v8hf)__C,
2786 __m128h __A,
2788 return (__m128h)__builtin_ia32_vfmaddsh3_mask((__v8hf)__W, (__v8hf)__A,
2795 __m128h __A,
2797 return (__m128h)__builtin_ia32_vfmaddsh3_mask((__v8hf)__W, (__v8hf)__A,
2813 _mm_maskz_fmsub_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
2814 return (__m128h)__builtin_ia32_vfmaddsh3_maskz((__v8hf)__A, (__v8hf)__B,
2837 __m128h __A,
2839 return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, -(__v8hf)__A, (__v8hf)__B,
2844 _mm_mask_fnmadd_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
2845 return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, -(__v8hf)__A, (__v8hf)__B,
2860 _mm_maskz_fnmadd_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
2861 return __builtin_ia32_vfmaddsh3_maskz((__v8hf)__A, -(__v8hf)__B, (__v8hf)__C,
2884 __m128h __A,
2886 return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, -(__v8hf)__A, -(__v8hf)__B,
2891 _mm_mask_fnmsub_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
2892 return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, -(__v8hf)__A, -(__v8hf)__B,
2907 _mm_maskz_fnmsub_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
2908 return __builtin_ia32_vfmaddsh3_maskz((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C,
2930 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmadd_sch(__m128h __A,
2933 return (__m128h)__builtin_ia32_vfcmaddcsh_mask((__v4sf)__A, (__v4sf)__B,
2939 _mm_mask_fcmadd_sch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
2941 (__v4sf)__A, (__v4sf)(__B), (__v4sf)(__C), __U, _MM_FROUND_CUR_DIRECTION);
2945 _mm_maskz_fcmadd_sch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
2946 return (__m128h)__builtin_ia32_vfcmaddcsh_maskz((__v4sf)__A, (__v4sf)__B,
2952 _mm_mask3_fcmadd_sch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
2954 (__v4sf)__A, (__v4sf)__B, (__v4sf)__C, __U, _MM_FROUND_CUR_DIRECTION);
2977 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_sch(__m128h __A,
2980 return (__m128h)__builtin_ia32_vfmaddcsh_mask((__v4sf)__A, (__v4sf)__B,
2986 _mm_mask_fmadd_sch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
2988 (__v4sf)__A, (__v4sf)(__B), (__v4sf)(__C), __U, _MM_FROUND_CUR_DIRECTION);
2992 _mm_maskz_fmadd_sch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
2993 return (__m128h)__builtin_ia32_vfmaddcsh_maskz((__v4sf)__A, (__v4sf)__B,
2999 _mm_mask3_fmadd_sch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
3001 (__v4sf)__A, (__v4sf)__B, (__v4sf)__C, __U, _MM_FROUND_CUR_DIRECTION);
3024 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmul_sch(__m128h __A,
3027 (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (__mmask8)-1,
3032 _mm_mask_fcmul_sch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
3033 return (__m128h)__builtin_ia32_vfcmulcsh_mask((__v4sf)__A, (__v4sf)__B,
3039 _mm_maskz_fcmul_sch(__mmask8 __U, __m128h __A, __m128h __B) {
3041 (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (__mmask8)__U,
3060 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmul_sch(__m128h __A,
3063 (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (__mmask8)-1,
3069 __m128h __A,
3071 return (__m128h)__builtin_ia32_vfmulcsh_mask((__v4sf)__A, (__v4sf)__B,
3077 _mm_maskz_fmul_sch(__mmask8 __U, __m128h __A, __m128h __B) {
3079 (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (__mmask8)__U,
3098 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fcmul_pch(__m512h __A,
3101 (__v16sf)__A, (__v16sf)__B, (__v16sf)_mm512_undefined_ph(), (__mmask16)-1,
3106 _mm512_mask_fcmul_pch(__m512h __W, __mmask16 __U, __m512h __A, __m512h __B) {
3107 return (__m512h)__builtin_ia32_vfcmulcph512_mask((__v16sf)__A, (__v16sf)__B,
3113 _mm512_maskz_fcmul_pch(__mmask16 __U, __m512h __A, __m512h __B) {
3115 (__v16sf)__A, (__v16sf)__B, (__v16sf)_mm512_setzero_ph(), (__mmask16)__U,
3134 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fmul_pch(__m512h __A,
3137 (__v16sf)__A, (__v16sf)__B, (__v16sf)_mm512_undefined_ph(), (__mmask16)-1,
3142 _mm512_mask_fmul_pch(__m512h __W, __mmask16 __U, __m512h __A, __m512h __B) {
3143 return (__m512h)__builtin_ia32_vfmulcph512_mask((__v16sf)__A, (__v16sf)__B,
3149 _mm512_maskz_fmul_pch(__mmask16 __U, __m512h __A, __m512h __B) {
3151 (__v16sf)__A, (__v16sf)__B, (__v16sf)_mm512_setzero_ph(), (__mmask16)__U,
3170 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fcmadd_pch(__m512h __A,
3174 (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)-1,
3179 _mm512_mask_fcmadd_pch(__m512h __A, __mmask16 __U, __m512h __B, __m512h __C) {
3181 (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U,
3186 _mm512_mask3_fcmadd_pch(__m512h __A, __m512h __B, __m512h __C, __mmask16 __U) {
3188 (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U,
3193 _mm512_maskz_fcmadd_pch(__mmask16 __U, __m512h __A, __m512h __B, __m512h __C) {
3195 (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U,
3219 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fmadd_pch(__m512h __A,
3222 return (__m512h)__builtin_ia32_vfmaddcph512_mask3((__v16sf)__A, (__v16sf)__B,
3228 _mm512_mask_fmadd_pch(__m512h __A, __mmask16 __U, __m512h __B, __m512h __C) {
3229 return (__m512h)__builtin_ia32_vfmaddcph512_mask((__v16sf)__A, (__v16sf)__B,
3235 _mm512_mask3_fmadd_pch(__m512h __A, __m512h __B, __m512h __C, __mmask16 __U) {
3237 (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U,
3242 _mm512_maskz_fmadd_pch(__mmask16 __U, __m512h __A, __m512h __B, __m512h __C) {
3244 (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U,
3289 _mm512_mask_blend_ph(__mmask32 __U, __m512h __A, __m512h __W) {
3291 (__v32hf)__A);
3295 _mm512_permutex2var_ph(__m512h __A, __m512i __I, __m512h __B) {
3296 return (__m512h)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I,
3301 _mm512_permutexvar_ph(__m512i __A, __m512h __B) {
3302 return (__m512h)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A);