Lines Matching refs:__v16sf

19 typedef float __v16sf __attribute__((__vector_size__(64)));
824 return (__m512)((__v16sf)__a + (__v16sf)__b);
836 return (__m512)((__v16sf)__a * (__v16sf)__b);
848 return (__m512)((__v16sf)__a - (__v16sf)__b);
977 (__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \
978 (__v16sf)(__m512)(B), (int)(R))
982 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
983 (__v16sf)(W))
987 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
988 (__v16sf)_mm512_setzero_ps())
993 return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B,
1001 (__v16sf)_mm512_max_ps(__A, __B),
1002 (__v16sf)__W);
1009 (__v16sf)_mm512_max_ps(__A, __B),
1010 (__v16sf)_mm512_setzero_ps());
1212 (__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \
1213 (__v16sf)(__m512)(B), (int)(R))
1217 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1218 (__v16sf)(W))
1222 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1223 (__v16sf)_mm512_setzero_ps())
1228 return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B,
1236 (__v16sf)_mm512_min_ps(__A, __B),
1237 (__v16sf)__W);
1244 (__v16sf)_mm512_min_ps(__A, __B),
1245 (__v16sf)_mm512_setzero_ps());
1524 (__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R))
1528 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1529 (__v16sf)(__m512)(W))
1533 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1534 (__v16sf)_mm512_setzero_ps())
1539 return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A,
1547 (__v16sf)_mm512_sqrt_ps(__A),
1548 (__v16sf)__W);
1555 (__v16sf)_mm512_sqrt_ps(__A),
1556 (__v16sf)_mm512_setzero_ps());
1587 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1588 (__v16sf)
1596 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1597 (__v16sf) __W,
1604 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1605 (__v16sf)
1695 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1696 (__v16sf)
1704 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1705 (__v16sf) __W,
1712 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1713 (__v16sf)
1777 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1779 (__v16sf) __A, -1,
1786 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1788 (__v16sf) __W, __U,
1813 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1815 (__v16sf) __W, __U,
1822 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1824 (__v16sf) __A, -1,
1966 (__v16sf)_mm512_add_ps(__A, __B),
1967 (__v16sf)__W);
1973 (__v16sf)_mm512_add_ps(__A, __B),
1974 (__v16sf)_mm512_setzero_ps());
1992 (__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \
1993 (__v16sf)(__m512)(B), (int)(R))
1997 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
1998 (__v16sf)(__m512)(W))
2002 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
2003 (__v16sf)_mm512_setzero_ps())
2081 (__v16sf)_mm512_sub_ps(__A, __B),
2082 (__v16sf)__W);
2088 (__v16sf)_mm512_sub_ps(__A, __B),
2089 (__v16sf)_mm512_setzero_ps());
2107 (__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \
2108 (__v16sf)(__m512)(B), (int)(R))
2112 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2113 (__v16sf)(__m512)(W))
2117 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2118 (__v16sf)_mm512_setzero_ps())
2196 (__v16sf)_mm512_mul_ps(__A, __B),
2197 (__v16sf)__W);
2203 (__v16sf)_mm512_mul_ps(__A, __B),
2204 (__v16sf)_mm512_setzero_ps());
2222 (__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \
2223 (__v16sf)(__m512)(B), (int)(R))
2227 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2228 (__v16sf)(__m512)(W))
2232 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2233 (__v16sf)_mm512_setzero_ps())
2318 return (__m512)((__v16sf)__a/(__v16sf)__b);
2324 (__v16sf)_mm512_div_ps(__A, __B),
2325 (__v16sf)__W);
2331 (__v16sf)_mm512_div_ps(__A, __B),
2332 (__v16sf)_mm512_setzero_ps());
2350 (__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \
2351 (__v16sf)(__m512)(B), (int)(R))
2355 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2356 (__v16sf)(__m512)(W))
2360 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2361 (__v16sf)_mm512_setzero_ps())
2364 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2365 (__v16sf)_mm512_undefined_ps(), \
2370 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2371 (__v16sf)(__m512)(A), (__mmask16)(B), \
2375 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2376 (__v16sf)_mm512_setzero_ps(), \
2381 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2382 (__v16sf)(__m512)(A), (__mmask16)(B), \
2386 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2387 (__v16sf)_mm512_setzero_ps(), \
2391 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2392 (__v16sf)_mm512_undefined_ps(), \
2632 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2633 (__v16sf)(__m512)(B), \
2634 (__v16sf)(__m512)(C), \
2639 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2640 (__v16sf)(__m512)(B), \
2641 (__v16sf)(__m512)(C), \
2646 (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2647 (__v16sf)(__m512)(B), \
2648 (__v16sf)(__m512)(C), \
2653 (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2654 (__v16sf)(__m512)(B), \
2655 (__v16sf)(__m512)(C), \
2660 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2661 (__v16sf)(__m512)(B), \
2662 -(__v16sf)(__m512)(C), \
2667 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2668 (__v16sf)(__m512)(B), \
2669 -(__v16sf)(__m512)(C), \
2674 (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2675 (__v16sf)(__m512)(B), \
2676 -(__v16sf)(__m512)(C), \
2681 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2682 -(__v16sf)(__m512)(B), \
2683 (__v16sf)(__m512)(C), \
2688 (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2689 (__v16sf)(__m512)(B), \
2690 (__v16sf)(__m512)(C), \
2695 (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2696 (__v16sf)(__m512)(B), \
2697 (__v16sf)(__m512)(C), \
2702 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2703 -(__v16sf)(__m512)(B), \
2704 -(__v16sf)(__m512)(C), \
2709 (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2710 (__v16sf)(__m512)(B), \
2711 -(__v16sf)(__m512)(C), \
2718 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2719 (__v16sf) __B,
2720 (__v16sf) __C,
2728 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2729 (__v16sf) __B,
2730 (__v16sf) __C,
2738 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2739 (__v16sf) __B,
2740 (__v16sf) __C,
2748 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2749 (__v16sf) __B,
2750 (__v16sf) __C,
2758 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2759 (__v16sf) __B,
2760 -(__v16sf) __C,
2768 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2769 (__v16sf) __B,
2770 -(__v16sf) __C,
2778 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2779 (__v16sf) __B,
2780 -(__v16sf) __C,
2788 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2789 -(__v16sf) __B,
2790 (__v16sf) __C,
2798 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
2799 (__v16sf) __B,
2800 (__v16sf) __C,
2808 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2809 (__v16sf) __B,
2810 (__v16sf) __C,
2818 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2819 -(__v16sf) __B,
2820 -(__v16sf) __C,
2828 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2829 (__v16sf) __B,
2830 -(__v16sf) __C,
2955 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2956 (__v16sf)(__m512)(B), \
2957 (__v16sf)(__m512)(C), \
2962 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2963 (__v16sf)(__m512)(B), \
2964 (__v16sf)(__m512)(C), \
2969 (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
2970 (__v16sf)(__m512)(B), \
2971 (__v16sf)(__m512)(C), \
2976 (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2977 (__v16sf)(__m512)(B), \
2978 (__v16sf)(__m512)(C), \
2983 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2984 (__v16sf)(__m512)(B), \
2985 -(__v16sf)(__m512)(C), \
2990 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2991 (__v16sf)(__m512)(B), \
2992 -(__v16sf)(__m512)(C), \
2997 (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2998 (__v16sf)(__m512)(B), \
2999 -(__v16sf)(__m512)(C), \
3006 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3007 (__v16sf) __B,
3008 (__v16sf) __C,
3016 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3017 (__v16sf) __B,
3018 (__v16sf) __C,
3026 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3027 (__v16sf) __B,
3028 (__v16sf) __C,
3036 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3037 (__v16sf) __B,
3038 (__v16sf) __C,
3046 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3047 (__v16sf) __B,
3048 -(__v16sf) __C,
3056 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3057 (__v16sf) __B,
3058 -(__v16sf) __C,
3066 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3067 (__v16sf) __B,
3068 -(__v16sf) __C,
3091 (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
3092 (__v16sf)(__m512)(B), \
3093 (__v16sf)(__m512)(C), \
3099 return (__m512)__builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3100 (__v16sf) __B,
3101 (__v16sf) __C,
3124 (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
3125 (__v16sf)(__m512)(B), \
3126 (__v16sf)(__m512)(C), \
3133 return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3134 (__v16sf) __B,
3135 (__v16sf) __C,
3158 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3159 -(__v16sf)(__m512)(B), \
3160 (__v16sf)(__m512)(C), \
3167 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3168 -(__v16sf) __B,
3169 (__v16sf) __C,
3209 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3210 -(__v16sf)(__m512)(B), \
3211 -(__v16sf)(__m512)(C), \
3216 (__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \
3217 (__v16sf)(__m512)(B), \
3218 (__v16sf)(__m512)(C), \
3225 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3226 -(__v16sf) __B,
3227 -(__v16sf) __C,
3235 return (__m512) __builtin_ia32_vfmsubps512_mask3 (-(__v16sf) __A,
3236 (__v16sf) __B,
3237 (__v16sf) __C,
3359 (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
3364 (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3369 (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3387 (__v16sf) __W,
3388 (__v16sf) __A);
3410 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3411 (__v16sf)(__m512)(B), (int)(P), \
3415 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3416 (__v16sf)(__m512)(B), (int)(P), \
3522 (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3527 (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3532 (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3540 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3550 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3559 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3567 (__v16sf)_mm512_setzero_ps(), \
3572 (__v16sf)(__m512)(W), \
3577 (__v16sf)_mm512_setzero_ps(), \
3582 (__v16sf)_mm512_setzero_ps(), \
3587 (__v16sf)(__m512)(W), \
3592 (__v16sf)_mm512_setzero_ps(), \
3598 return (__m512)__builtin_convertvector((__v16su)__A, __v16sf);
3605 (__v16sf)_mm512_cvtepu32_ps(__A),
3606 (__v16sf)__W);
3613 (__v16sf)_mm512_cvtepu32_ps(__A),
3614 (__v16sf)_mm512_setzero_ps());
3654 return (__m512)__builtin_convertvector((__v16si)__A, __v16sf);
3661 (__v16sf)_mm512_cvtepi32_ps(__A),
3662 (__v16sf)__W);
3669 (__v16sf)_mm512_cvtepi32_ps(__A),
3670 (__v16sf)_mm512_setzero_ps());
3768 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3773 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3778 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3788 (__v16sf)_mm512_undefined_ps(), \
3793 (__v16sf)(__m512)(W), \
3798 (__v16sf)_mm512_setzero_ps(), \
3806 (__v16sf)
3816 (__v16sf) __W,
3825 (__v16sf) _mm512_setzero_ps (),
3873 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3878 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3883 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3891 __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
3899 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3908 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3915 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3920 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3925 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3932 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3941 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3950 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4002 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4007 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4012 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4019 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
4029 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4038 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4152 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4163 (__v16sf)_mm512_unpackhi_ps(__A, __B),
4164 (__v16sf)__W);
4171 (__v16sf)_mm512_unpackhi_ps(__A, __B),
4172 (__v16sf)_mm512_setzero_ps());
4178 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4189 (__v16sf)_mm512_unpacklo_ps(__A, __B),
4190 (__v16sf)__W);
4197 (__v16sf)_mm512_unpacklo_ps(__A, __B),
4198 (__v16sf)_mm512_setzero_ps());
4368 (__v16sf) __W,
4376 (__v16sf)
4425 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4426 (__v16sf) __W,
4433 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
4434 (__v16sf)
4541 __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
4569 __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
5347 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5348 (__v16sf)(__m512)(B), \
5353 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5354 (__v16sf)(__m512)(B), \
5359 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5360 (__v16sf)(__m512)(B), \
5366 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5367 (__v16sf)(__m512)(B), \
5373 (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5374 (__v16sf)(__m512)(B), \
5380 (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5381 (__v16sf)(__m512)(B), \
6155 (__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C))
6159 (__v16sf)_mm512_permute_ps((X), (C)), \
6160 (__v16sf)(__m512)(W))
6164 (__v16sf)_mm512_permute_ps((X), (C)), \
6165 (__v16sf)_mm512_setzero_ps())
6192 return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
6199 (__v16sf)_mm512_permutevar_ps(__A, __C),
6200 (__v16sf)__W);
6207 (__v16sf)_mm512_permutevar_ps(__A, __C),
6208 (__v16sf)_mm512_setzero_ps());
6247 return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I,
6248 (__v16sf) __B);
6255 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6256 (__v16sf)__A);
6263 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6264 (__v16sf)(__m512)__I);
6271 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6272 (__v16sf)_mm512_setzero_ps());
6455 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6456 (__v16sf)(__m512)(B), \
6457 (__v16sf)_mm512_undefined_ps(), \
6461 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6462 (__v16sf)(__m512)(B), \
6463 (__v16sf)(__m512)(W), \
6467 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6468 (__v16sf)(__m512)(B), \
6469 (__v16sf)_mm512_setzero_ps(), \
6475 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6476 (__v16sf) __B,
6477 (__v16sf)
6486 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6487 (__v16sf) __B,
6488 (__v16sf) __W,
6496 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6497 (__v16sf) __B,
6498 (__v16sf)
6645 (__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \
6646 (__v16sf)(__m512)(B), (int)(imm))
6650 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6651 (__v16sf)(__m512)(W))
6655 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6656 (__v16sf)_mm512_setzero_ps())
6715 (__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \
6716 (__v16sf)(__m512)(B), (int)(M))
6720 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6721 (__v16sf)(__m512)(W))
6725 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6726 (__v16sf)_mm512_setzero_ps())
6816 (__v16sf)_mm512_broadcast_f32x4(__A),
6817 (__v16sf)__O);
6824 (__v16sf)_mm512_broadcast_f32x4(__A),
6825 (__v16sf)_mm512_setzero_ps());
6918 (__v16sf) _mm512_broadcastss_ps(__A),
6919 (__v16sf) __O);
6926 (__v16sf) _mm512_broadcastss_ps(__A),
6927 (__v16sf) _mm512_setzero_ps());
7427 (__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \
7432 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7433 (__v16sf)(__m512)(W))
7437 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7438 (__v16sf)_mm512_setzero_ps())
7494 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7496 (__v16sf)_mm512_undefined_ps(), \
7500 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7502 (__v16sf)(__m512)(W), \
7506 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7508 (__v16sf)_mm512_setzero_ps(), \
7512 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7514 (__v16sf)_mm512_undefined_ps(), \
7519 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7521 (__v16sf)(__m512)(W), \
7526 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7528 (__v16sf)_mm512_setzero_ps(), \
7575 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7576 (__v16sf)_mm512_undefined_ps(), \
7580 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7581 (__v16sf)(__m512)(W), \
7585 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7586 (__v16sf)_mm512_setzero_ps(), \
7592 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7593 (__v16sf) _mm512_undefined_ps (),
7601 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7602 (__v16sf) __W,
7610 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7611 (__v16sf) _mm512_setzero_ps (),
7665 (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
7671 (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
7755 (__v16sf)(__m512)(v1), (int)(scale))
7760 (__v16sf)(__m512)(v1), (int)(scale))
8299 return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X);
8306 (__v16sf)_mm512_permutexvar_ps(__X, __Y),
8307 (__v16sf)__W);
8314 (__v16sf)_mm512_permutexvar_ps(__X, __Y),
8315 (__v16sf)_mm512_setzero_ps());
8468 typedef __v16sf __v16sf_aligned __attribute__((aligned(64)));
8509 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8510 (__v16sf) __W,
8517 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8518 (__v16sf)
8645 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8653 (__v16sf)_mm512_movehdup_ps(__A),
8654 (__v16sf)__W);
8661 (__v16sf)_mm512_movehdup_ps(__A),
8662 (__v16sf)_mm512_setzero_ps());
8668 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8676 (__v16sf)_mm512_moveldup_ps(__A),
8677 (__v16sf)__W);
8684 (__v16sf)_mm512_moveldup_ps(__A),
8685 (__v16sf)_mm512_setzero_ps());
8842 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8843 (__v16sf) __W,
8850 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8851 (__v16sf) _mm512_setzero_ps(),
8874 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8875 (__v16sf) __W,
8882 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8883 (__v16sf) _mm512_setzero_ps(),
8972 (__v16sf) __A,
8973 (__v16sf) __W);
8980 (__v16sf) __A,
8981 (__v16sf) _mm512_setzero_ps ());
9001 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,