Lines Matching refs:__v32hf

19 typedef _Float16 __v32hf __attribute__((__vector_size__(64), __aligned__(64)));
66 return (__m512h)(__v32hf){__h, __h, __h, __h, __h, __h, __h, __h,
81 return (__m512h)(__v32hf){__h32, __h31, __h30, __h29, __h28, __h27, __h26,
353 return (__m512h)((__v32hf)__A + (__v32hf)__B);
359 (__mmask32)__U, (__v32hf)_mm512_add_ph(__A, __B), (__v32hf)__W);
365 (__v32hf)_mm512_add_ph(__A, __B),
366 (__v32hf)_mm512_setzero_ph());
370 ((__m512h)__builtin_ia32_addph512((__v32hf)(__m512h)(A), \
371 (__v32hf)(__m512h)(B), (int)(R)))
375 (__mmask32)(U), (__v32hf)_mm512_add_round_ph((A), (B), (R)), \
376 (__v32hf)(__m512h)(W)))
380 (__mmask32)(U), (__v32hf)_mm512_add_round_ph((A), (B), (R)), \
381 (__v32hf)_mm512_setzero_ph()))
385 return (__m512h)((__v32hf)__A - (__v32hf)__B);
391 (__mmask32)__U, (__v32hf)_mm512_sub_ph(__A, __B), (__v32hf)__W);
397 (__v32hf)_mm512_sub_ph(__A, __B),
398 (__v32hf)_mm512_setzero_ph());
402 ((__m512h)__builtin_ia32_subph512((__v32hf)(__m512h)(A), \
403 (__v32hf)(__m512h)(B), (int)(R)))
407 (__mmask32)(U), (__v32hf)_mm512_sub_round_ph((A), (B), (R)), \
408 (__v32hf)(__m512h)(W)))
412 (__mmask32)(U), (__v32hf)_mm512_sub_round_ph((A), (B), (R)), \
413 (__v32hf)_mm512_setzero_ph()))
417 return (__m512h)((__v32hf)__A * (__v32hf)__B);
423 (__mmask32)__U, (__v32hf)_mm512_mul_ph(__A, __B), (__v32hf)__W);
429 (__v32hf)_mm512_mul_ph(__A, __B),
430 (__v32hf)_mm512_setzero_ph());
434 ((__m512h)__builtin_ia32_mulph512((__v32hf)(__m512h)(A), \
435 (__v32hf)(__m512h)(B), (int)(R)))
439 (__mmask32)(U), (__v32hf)_mm512_mul_round_ph((A), (B), (R)), \
440 (__v32hf)(__m512h)(W)))
444 (__mmask32)(U), (__v32hf)_mm512_mul_round_ph((A), (B), (R)), \
445 (__v32hf)_mm512_setzero_ph()))
449 return (__m512h)((__v32hf)__A / (__v32hf)__B);
455 (__mmask32)__U, (__v32hf)_mm512_div_ph(__A, __B), (__v32hf)__W);
461 (__v32hf)_mm512_div_ph(__A, __B),
462 (__v32hf)_mm512_setzero_ph());
466 ((__m512h)__builtin_ia32_divph512((__v32hf)(__m512h)(A), \
467 (__v32hf)(__m512h)(B), (int)(R)))
471 (__mmask32)(U), (__v32hf)_mm512_div_round_ph((A), (B), (R)), \
472 (__v32hf)(__m512h)(W)))
476 (__mmask32)(U), (__v32hf)_mm512_div_round_ph((A), (B), (R)), \
477 (__v32hf)_mm512_setzero_ph()))
481 return (__m512h)__builtin_ia32_minph512((__v32hf)__A, (__v32hf)__B,
488 (__mmask32)__U, (__v32hf)_mm512_min_ph(__A, __B), (__v32hf)__W);
494 (__v32hf)_mm512_min_ph(__A, __B),
495 (__v32hf)_mm512_setzero_ph());
499 ((__m512h)__builtin_ia32_minph512((__v32hf)(__m512h)(A), \
500 (__v32hf)(__m512h)(B), (int)(R)))
504 (__mmask32)(U), (__v32hf)_mm512_min_round_ph((A), (B), (R)), \
505 (__v32hf)(__m512h)(W)))
509 (__mmask32)(U), (__v32hf)_mm512_min_round_ph((A), (B), (R)), \
510 (__v32hf)_mm512_setzero_ph()))
514 return (__m512h)__builtin_ia32_maxph512((__v32hf)__A, (__v32hf)__B,
521 (__mmask32)__U, (__v32hf)_mm512_max_ph(__A, __B), (__v32hf)__W);
527 (__v32hf)_mm512_max_ph(__A, __B),
528 (__v32hf)_mm512_setzero_ph());
532 ((__m512h)__builtin_ia32_maxph512((__v32hf)(__m512h)(A), \
533 (__v32hf)(__m512h)(B), (int)(R)))
537 (__mmask32)(U), (__v32hf)_mm512_max_round_ph((A), (B), (R)), \
538 (__v32hf)(__m512h)(W)))
542 (__mmask32)(U), (__v32hf)_mm512_max_round_ph((A), (B), (R)), \
543 (__v32hf)_mm512_setzero_ph()))
789 ((__mmask32)__builtin_ia32_cmpph512_mask((__v32hf)(__m512h)(A), \
790 (__v32hf)(__m512h)(B), (int)(P), \
794 ((__mmask32)__builtin_ia32_cmpph512_mask((__v32hf)(__m512h)(A), \
795 (__v32hf)(__m512h)(B), (int)(P), \
970 (__v32hf)__A, (__v32hf)_mm512_undefined_ph(), (__mmask32)-1);
975 return (__m512h)__builtin_ia32_rcpph512_mask((__v32hf)__A, (__v32hf)__W,
982 (__v32hf)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U);
987 (__v32hf)__A, (__v32hf)_mm512_undefined_ph(), (__mmask32)-1);
992 return (__m512h)__builtin_ia32_rsqrtph512_mask((__v32hf)__A, (__v32hf)__W,
999 (__v32hf)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U);
1004 (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), \
1005 (__v32hf)_mm512_undefined_ph(), (__mmask32)-1, \
1010 (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), (__v32hf)(__m512h)(W), \
1015 (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), \
1016 (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), _MM_FROUND_CUR_DIRECTION))
1020 (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), \
1021 (__v32hf)_mm512_undefined_ph(), (__mmask32)-1, (int)(R)))
1025 (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), (__v32hf)(__m512h)(W), \
1030 (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), \
1031 (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R)))
1035 (__v32hf)__A, (__v32hf)_mm512_undefined_ph(), (__mmask32)-1,
1042 (__v32hf)__A, (__v32hf)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
1048 (__v32hf)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U,
1053 ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \
1054 (__v32hf)_mm512_undefined_ph(), \
1059 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(W), (__mmask32)(U), (int)(R)))
1062 ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \
1063 (__v32hf)_mm512_setzero_ph(), \
1069 (__v32hf)__A, (__v32hf)__B, (__v32hf)_mm512_undefined_ph(), (__mmask32)-1,
1075 return (__m512h)__builtin_ia32_scalefph512_mask((__v32hf)__A, (__v32hf)__B,
1076 (__v32hf)__W, (__mmask32)__U,
1083 (__v32hf)__A, (__v32hf)__B, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U,
1089 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), \
1090 (__v32hf)_mm512_undefined_ph(), (__mmask32)-1, (int)(R)))
1094 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(W), \
1099 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), \
1100 (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R)))
1104 (__v32hf)(__m512h)(A), (int)(B), (__v32hf)(__m512h)(A), (__mmask32)-1, \
1109 (__v32hf)(__m512h)(C), (int)(imm), (__v32hf)(__m512h)(A), \
1114 (__v32hf)(__m512h)(B), (int)(imm), (__v32hf)_mm512_setzero_ph(), \
1118 ((__m512h)__builtin_ia32_rndscaleph_mask((__v32hf)(__m512h)(C), (int)(imm), \
1119 (__v32hf)(__m512h)(A), \
1123 ((__m512h)__builtin_ia32_rndscaleph_mask((__v32hf)(__m512h)(B), (int)(imm), \
1124 (__v32hf)_mm512_setzero_ph(), \
1128 ((__m512h)__builtin_ia32_rndscaleph_mask((__v32hf)(__m512h)(A), (int)(imm), \
1129 (__v32hf)_mm512_undefined_ph(), \
1134 (__v32hf)(__m512h)(A), (int)(imm), (__v32hf)_mm512_undefined_ph(), \
1139 (__v32hf)(__m512h)(A), (int)(imm), (__v32hf)(__m512h)(W), \
1144 (__v32hf)(__m512h)(A), (int)(imm), (__v32hf)_mm512_setzero_ph(), \
1148 ((__m512h)__builtin_ia32_reduceph512_mask((__v32hf)(__m512h)(A), (int)(imm), \
1149 (__v32hf)(__m512h)(W), \
1153 ((__m512h)__builtin_ia32_reduceph512_mask((__v32hf)(__m512h)(A), (int)(imm), \
1154 (__v32hf)_mm512_setzero_ph(), \
1158 ((__m512h)__builtin_ia32_reduceph512_mask((__v32hf)(__m512h)(A), (int)(imm), \
1159 (__v32hf)_mm512_undefined_ph(), \
1366 ((__m512h)__builtin_ia32_sqrtph512((__v32hf)(__m512h)(A), (int)(R)))
1370 (__mmask32)(U), (__v32hf)_mm512_sqrt_round_ph((A), (R)), \
1371 (__v32hf)(__m512h)(W)))
1375 (__mmask32)(U), (__v32hf)_mm512_sqrt_round_ph((A), (R)), \
1376 (__v32hf)_mm512_setzero_ph()))
1379 return (__m512h)__builtin_ia32_sqrtph512((__v32hf)__A,
1387 (__v32hf)__builtin_ia32_sqrtph512((__A), (_MM_FROUND_CUR_DIRECTION)),
1388 (__v32hf)(__m512h)(__W));
1395 (__v32hf)__builtin_ia32_sqrtph512((__A), (_MM_FROUND_CUR_DIRECTION)),
1396 (__v32hf)_mm512_setzero_ph());
1439 ((__mmask32)__builtin_ia32_fpclassph512_mask((__v32hf)(__m512h)(A), \
1443 ((__mmask32)__builtin_ia32_fpclassph512_mask((__v32hf)(__m512h)(A), \
1667 ((__m512i)__builtin_ia32_vcvtph2w512_mask((__v32hf)(A), \
1672 ((__m512i)__builtin_ia32_vcvtph2w512_mask((__v32hf)(A), (__v32hi)(W), \
1676 ((__m512i)__builtin_ia32_vcvtph2w512_mask((__v32hf)(A), \
1683 (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)-1,
1690 (__v32hf)__A, (__v32hi)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
1696 (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)__U,
1702 (__v32hf)(A), (__v32hi)_mm512_undefined_epi32(), (__mmask32)(-1), \
1706 ((__m512i)__builtin_ia32_vcvttph2w512_mask((__v32hf)(A), (__v32hi)(W), \
1710 ((__m512i)__builtin_ia32_vcvttph2w512_mask((__v32hf)(A), \
1717 (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)-1,
1724 (__v32hf)__A, (__v32hi)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
1730 (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)__U,
1736 (__v32hf)_mm512_undefined_ph(), \
1740 ((__m512h)__builtin_ia32_vcvtw2ph512_mask((__v32hi)(A), (__v32hf)(W), \
1745 (__v32hi)(A), (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R)))
1750 (__v32hi)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)-1,
1757 (__v32hi)__A, (__v32hf)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
1763 (__v32hi)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U,
1769 (__v32hf)(A), (__v32hu)_mm512_undefined_epi32(), (__mmask32)(-1), \
1773 ((__m512i)__builtin_ia32_vcvtph2uw512_mask((__v32hf)(A), (__v32hu)(W), \
1777 ((__m512i)__builtin_ia32_vcvtph2uw512_mask((__v32hf)(A), \
1784 (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)-1,
1791 (__v32hf)__A, (__v32hu)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
1797 (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)__U,
1803 (__v32hf)(A), (__v32hu)_mm512_undefined_epi32(), (__mmask32)(-1), \
1807 ((__m512i)__builtin_ia32_vcvttph2uw512_mask((__v32hf)(A), (__v32hu)(W), \
1811 ((__m512i)__builtin_ia32_vcvttph2uw512_mask((__v32hf)(A), \
1818 (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)-1,
1825 (__v32hf)__A, (__v32hu)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
1831 (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)__U,
1837 (__v32hf)_mm512_undefined_ph(), \
1841 ((__m512h)__builtin_ia32_vcvtuw2ph512_mask((__v32hu)(A), (__v32hf)(W), \
1846 (__v32hu)(A), (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R)))
1851 (__v32hu)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)-1,
1858 (__v32hu)__A, (__v32hf)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
1864 (__v32hu)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U,
2446 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2451 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2456 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2461 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2466 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
2471 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
2476 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
2481 (__v32hf)(__m512h)(A), -(__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2486 -(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2491 -(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2496 (__v32hf)(__m512h)(A), -(__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
2501 -(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
2507 return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, (__v32hf)__B,
2508 (__v32hf)__C, (__mmask32)-1,
2514 return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, (__v32hf)__B,
2515 (__v32hf)__C, (__mmask32)__U,
2521 return (__m512h)__builtin_ia32_vfmaddph512_mask3((__v32hf)__A, (__v32hf)__B,
2522 (__v32hf)__C, (__mmask32)__U,
2528 return (__m512h)__builtin_ia32_vfmaddph512_maskz((__v32hf)__A, (__v32hf)__B,
2529 (__v32hf)__C, (__mmask32)__U,
2536 return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, (__v32hf)__B,
2537 -(__v32hf)__C, (__mmask32)-1,
2543 return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, (__v32hf)__B,
2544 -(__v32hf)__C, (__mmask32)__U,
2551 (__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U,
2558 return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, -(__v32hf)__B,
2559 (__v32hf)__C, (__mmask32)-1,
2565 return (__m512h)__builtin_ia32_vfmaddph512_mask3(-(__v32hf)__A, (__v32hf)__B,
2566 (__v32hf)__C, (__mmask32)__U,
2572 return (__m512h)__builtin_ia32_vfmaddph512_maskz(-(__v32hf)__A, (__v32hf)__B,
2573 (__v32hf)__C, (__mmask32)__U,
2580 return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, -(__v32hf)__B,
2581 -(__v32hf)__C, (__mmask32)-1,
2588 -(__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U,
2594 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2599 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2604 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2609 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2614 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
2619 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
2624 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
2630 (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)-1,
2637 (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U,
2644 (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U,
2651 (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U,
2658 (__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)-1,
2665 (__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U,
2672 (__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U,
2678 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2683 return (__m512h)__builtin_ia32_vfmsubph512_mask3((__v32hf)__A, (__v32hf)__B,
2684 (__v32hf)__C, (__mmask32)__U,
2690 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2696 (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U,
2702 (__v32hf)(__m512h)(A), -(__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2707 return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, -(__v32hf)__B,
2708 (__v32hf)__C, (__mmask32)__U,
2714 (__v32hf)(__m512h)(A), -(__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
2719 -(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2724 return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, -(__v32hf)__B,
2725 -(__v32hf)__C, (__mmask32)__U,
2731 return (__m512h)__builtin_ia32_vfmsubph512_mask3(-(__v32hf)__A, (__v32hf)__B,
2732 (__v32hf)__C, (__mmask32)__U,
3290 return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U, (__v32hf)__W,
3291 (__v32hf)__A);