Lines Matching defs:__B

352                                                               __m512h __B) {
353 return (__m512h)((__v32hf)__A + (__v32hf)__B);
357 _mm512_mask_add_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
359 (__mmask32)__U, (__v32hf)_mm512_add_ph(__A, __B), (__v32hf)__W);
363 _mm512_maskz_add_ph(__mmask32 __U, __m512h __A, __m512h __B) {
365 (__v32hf)_mm512_add_ph(__A, __B),
384 __m512h __B) {
385 return (__m512h)((__v32hf)__A - (__v32hf)__B);
389 _mm512_mask_sub_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
391 (__mmask32)__U, (__v32hf)_mm512_sub_ph(__A, __B), (__v32hf)__W);
395 _mm512_maskz_sub_ph(__mmask32 __U, __m512h __A, __m512h __B) {
397 (__v32hf)_mm512_sub_ph(__A, __B),
416 __m512h __B) {
417 return (__m512h)((__v32hf)__A * (__v32hf)__B);
421 _mm512_mask_mul_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
423 (__mmask32)__U, (__v32hf)_mm512_mul_ph(__A, __B), (__v32hf)__W);
427 _mm512_maskz_mul_ph(__mmask32 __U, __m512h __A, __m512h __B) {
429 (__v32hf)_mm512_mul_ph(__A, __B),
448 __m512h __B) {
449 return (__m512h)((__v32hf)__A / (__v32hf)__B);
453 _mm512_mask_div_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
455 (__mmask32)__U, (__v32hf)_mm512_div_ph(__A, __B), (__v32hf)__W);
459 _mm512_maskz_div_ph(__mmask32 __U, __m512h __A, __m512h __B) {
461 (__v32hf)_mm512_div_ph(__A, __B),
480 __m512h __B) {
481 return (__m512h)__builtin_ia32_minph512((__v32hf)__A, (__v32hf)__B,
486 _mm512_mask_min_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
488 (__mmask32)__U, (__v32hf)_mm512_min_ph(__A, __B), (__v32hf)__W);
492 _mm512_maskz_min_ph(__mmask32 __U, __m512h __A, __m512h __B) {
494 (__v32hf)_mm512_min_ph(__A, __B),
513 __m512h __B) {
514 return (__m512h)__builtin_ia32_maxph512((__v32hf)__A, (__v32hf)__B,
519 _mm512_mask_max_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
521 (__mmask32)__U, (__v32hf)_mm512_max_ph(__A, __B), (__v32hf)__W);
525 _mm512_maskz_max_ph(__mmask32 __U, __m512h __A, __m512h __B) {
527 (__v32hf)_mm512_max_ph(__A, __B),
567 __m128h __B) {
568 __A[0] += __B[0];
575 __m128h __B) {
576 __A = _mm_add_sh(__A, __B);
582 __m128h __B) {
583 __A = _mm_add_sh(__A, __B);
603 __m128h __B) {
604 __A[0] -= __B[0];
611 __m128h __B) {
612 __A = _mm_sub_sh(__A, __B);
618 __m128h __B) {
619 __A = _mm_sub_sh(__A, __B);
639 __m128h __B) {
640 __A[0] *= __B[0];
647 __m128h __B) {
648 __A = _mm_mul_sh(__A, __B);
654 __m128h __B) {
655 __A = _mm_mul_sh(__A, __B);
675 __m128h __B) {
676 __A[0] /= __B[0];
683 __m128h __B) {
684 __A = _mm_div_sh(__A, __B);
690 __m128h __B) {
691 __A = _mm_div_sh(__A, __B);
711 __m128h __B) {
713 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
720 __m128h __B) {
721 return (__m128h)__builtin_ia32_minsh_round_mask((__v8hf)__A, (__v8hf)__B,
728 __m128h __B) {
730 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
750 __m128h __B) {
752 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
759 __m128h __B) {
760 return (__m128h)__builtin_ia32_maxsh_round_mask((__v8hf)__A, (__v8hf)__B,
767 __m128h __B) {
769 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
947 __m128h __B) {
948 return __builtin_ia32_selectsh_128(__U, _mm_move_sh(__A, __B), __W);
953 __m128h __B) {
954 return __builtin_ia32_selectsh_128(__U, _mm_move_sh(__A, __B),
1067 __m512h __B) {
1069 (__v32hf)__A, (__v32hf)__B, (__v32hf)_mm512_undefined_ph(), (__mmask32)-1,
1074 _mm512_mask_scalef_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
1075 return (__m512h)__builtin_ia32_scalefph512_mask((__v32hf)__A, (__v32hf)__B,
1081 _mm512_maskz_scalef_ph(__mmask32 __U, __m512h __A, __m512h __B) {
1083 (__v32hf)__A, (__v32hf)__B, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U,
1163 __m128h __B) {
1165 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1);
1171 __m128h __B) {
1172 return (__m128h)__builtin_ia32_rcpsh_mask((__v8hf)__A, (__v8hf)__B,
1178 __m128h __B) {
1180 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1184 __m128h __B) {
1186 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1);
1192 __m128h __B) {
1193 return (__m128h)__builtin_ia32_rsqrtsh_mask((__v8hf)__A, (__v8hf)__B,
1198 _mm_maskz_rsqrt_sh(__mmask8 __U, __m128h __A, __m128h __B) {
1200 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1239 __m128h __B) {
1241 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
1246 _mm_mask_getexp_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
1248 (__v8hf)__A, (__v8hf)__B, (__v8hf)__W, (__mmask8)__U,
1258 _mm_maskz_getexp_sh(__mmask8 __U, __m128h __A, __m128h __B) {
1260 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
1275 __m128h __B) {
1277 (__v8hf)__A, (__v8hf)(__B), (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
1282 _mm_mask_scalef_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
1283 return (__m128h)__builtin_ia32_scalefsh_round_mask((__v8hf)__A, (__v8hf)__B,
1294 _mm_maskz_scalef_sh(__mmask8 __U, __m128h __A, __m128h __B) {
1296 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
1415 __m128h __B) {
1417 (__v8hf)(__m128h)(__A), (__v8hf)(__m128h)(__B), (__v8hf)_mm_setzero_ph(),
1424 __m128h __B) {
1426 (__v8hf)(__m128h)(__A), (__v8hf)(__m128h)(__B), (__v8hf)(__m128h)(__W),
1432 __m128h __B) {
1434 (__v8hf)(__m128h)(__A), (__v8hf)(__m128h)(__B), (__v8hf)_mm_setzero_ph(),
1531 __m128h __B) {
1533 (__v4sf)__A, (__v8hf)__B, (__v4sf)_mm_undefined_ps(), (__mmask8)-1,
1540 __m128h __B) {
1541 return (__m128)__builtin_ia32_vcvtsh2ss_round_mask((__v4sf)__A, (__v8hf)__B,
1548 __m128h __B) {
1550 (__v4sf)__A, (__v8hf)__B, (__v4sf)_mm_setzero_ps(), (__mmask8)__U,
1569 __m128 __B) {
1571 (__v8hf)__A, (__v4sf)__B, (__v8hf)_mm_undefined_ph(), (__mmask8)-1,
1578 __m128 __B) {
1580 (__v8hf)__A, (__v4sf)__B, (__v8hf)__W, (__mmask8)__U,
1586 __m128 __B) {
1588 (__v8hf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
1607 __m128d __B) {
1609 (__v8hf)__A, (__v2df)__B, (__v8hf)_mm_undefined_ph(), (__mmask8)-1,
1616 __m128d __B) {
1618 (__v8hf)__A, (__v2df)__B, (__v8hf)__W, (__mmask8)__U,
1623 _mm_maskz_cvtsd_sh(__mmask8 __U, __m128h __A, __m128d __B) {
1625 (__v8hf)__A, (__v2df)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
1644 __m128h __B) {
1646 (__v2df)__A, (__v8hf)__B, (__v2df)_mm_undefined_pd(), (__mmask8)-1,
1653 __m128h __B) {
1655 (__v2df)__A, (__v8hf)__B, (__v2df)__W, (__mmask8)__U,
1660 _mm_maskz_cvtsh_sd(__mmask8 __U, __m128d __A, __m128h __B) {
1662 (__v2df)__A, (__v8hf)__B, (__v2df)_mm_setzero_pd(), (__mmask8)__U,
2305 _mm_cvtu32_sh(__m128h __A, unsigned int __B) {
2306 __A[0] = __B;
2316 _mm_cvtu64_sh(__m128h __A, unsigned long long __B) {
2317 __A[0] = __B;
2326 int __B) {
2327 __A[0] = __B;
2336 long long __B) {
2337 __A[0] = __B;
2505 __m512h __B,
2507 return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, (__v32hf)__B,
2513 _mm512_mask_fmadd_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
2514 return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, (__v32hf)__B,
2520 _mm512_mask3_fmadd_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
2521 return (__m512h)__builtin_ia32_vfmaddph512_mask3((__v32hf)__A, (__v32hf)__B,
2527 _mm512_maskz_fmadd_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
2528 return (__m512h)__builtin_ia32_vfmaddph512_maskz((__v32hf)__A, (__v32hf)__B,
2534 __m512h __B,
2536 return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, (__v32hf)__B,
2542 _mm512_mask_fmsub_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
2543 return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, (__v32hf)__B,
2549 _mm512_maskz_fmsub_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
2551 (__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U,
2556 __m512h __B,
2558 return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, -(__v32hf)__B,
2564 _mm512_mask3_fnmadd_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
2565 return (__m512h)__builtin_ia32_vfmaddph512_mask3(-(__v32hf)__A, (__v32hf)__B,
2571 _mm512_maskz_fnmadd_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
2572 return (__m512h)__builtin_ia32_vfmaddph512_maskz(-(__v32hf)__A, (__v32hf)__B,
2578 __m512h __B,
2580 return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, -(__v32hf)__B,
2586 _mm512_maskz_fnmsub_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
2588 -(__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U,
2628 _mm512_fmaddsub_ph(__m512h __A, __m512h __B, __m512h __C) {
2630 (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)-1,
2635 _mm512_mask_fmaddsub_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
2637 (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U,
2642 _mm512_mask3_fmaddsub_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
2644 (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U,
2649 _mm512_maskz_fmaddsub_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
2651 (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U,
2656 _mm512_fmsubadd_ph(__m512h __A, __m512h __B, __m512h __C) {
2658 (__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)-1,
2663 _mm512_mask_fmsubadd_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
2665 (__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U,
2670 _mm512_maskz_fmsubadd_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
2672 (__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U,
2682 _mm512_mask3_fmsub_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
2683 return (__m512h)__builtin_ia32_vfmsubph512_mask3((__v32hf)__A, (__v32hf)__B,
2694 _mm512_mask3_fmsubadd_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
2696 (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U,
2706 _mm512_mask_fnmadd_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
2707 return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, -(__v32hf)__B,
2723 _mm512_mask_fnmsub_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
2724 return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, -(__v32hf)__B,
2730 _mm512_mask3_fnmsub_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
2731 return (__m512h)__builtin_ia32_vfmsubph512_mask3(-(__v32hf)__A, (__v32hf)__B,
2738 __m128h __B) {
2739 return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, (__v8hf)__A, (__v8hf)__B,
2746 __m128h __B) {
2747 return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, (__v8hf)__A, (__v8hf)__B,
2762 _mm_maskz_fmadd_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
2763 return __builtin_ia32_vfmaddsh3_maskz((__v8hf)__A, (__v8hf)__B, (__v8hf)__C,
2787 __m128h __B) {
2789 -(__v8hf)__B, (__mmask8)-1,
2796 __m128h __B) {
2798 -(__v8hf)__B, (__mmask8)__U,
2813 _mm_maskz_fmsub_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
2814 return (__m128h)__builtin_ia32_vfmaddsh3_maskz((__v8hf)__A, (__v8hf)__B,
2838 __m128h __B) {
2839 return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, -(__v8hf)__A, (__v8hf)__B,
2844 _mm_mask_fnmadd_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
2845 return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, -(__v8hf)__A, (__v8hf)__B,
2860 _mm_maskz_fnmadd_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
2861 return __builtin_ia32_vfmaddsh3_maskz((__v8hf)__A, -(__v8hf)__B, (__v8hf)__C,
2885 __m128h __B) {
2886 return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, -(__v8hf)__A, -(__v8hf)__B,
2891 _mm_mask_fnmsub_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
2892 return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, -(__v8hf)__A, -(__v8hf)__B,
2907 _mm_maskz_fnmsub_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
2908 return __builtin_ia32_vfmaddsh3_maskz((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C,
2931 __m128h __B,
2933 return (__m128h)__builtin_ia32_vfcmaddcsh_mask((__v4sf)__A, (__v4sf)__B,
2939 _mm_mask_fcmadd_sch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
2941 (__v4sf)__A, (__v4sf)(__B), (__v4sf)(__C), __U, _MM_FROUND_CUR_DIRECTION);
2945 _mm_maskz_fcmadd_sch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
2946 return (__m128h)__builtin_ia32_vfcmaddcsh_maskz((__v4sf)__A, (__v4sf)__B,
2952 _mm_mask3_fcmadd_sch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
2954 (__v4sf)__A, (__v4sf)__B, (__v4sf)__C, __U, _MM_FROUND_CUR_DIRECTION);
2978 __m128h __B,
2980 return (__m128h)__builtin_ia32_vfmaddcsh_mask((__v4sf)__A, (__v4sf)__B,
2986 _mm_mask_fmadd_sch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
2988 (__v4sf)__A, (__v4sf)(__B), (__v4sf)(__C), __U, _MM_FROUND_CUR_DIRECTION);
2992 _mm_maskz_fmadd_sch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
2993 return (__m128h)__builtin_ia32_vfmaddcsh_maskz((__v4sf)__A, (__v4sf)__B,
2999 _mm_mask3_fmadd_sch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
3001 (__v4sf)__A, (__v4sf)__B, (__v4sf)__C, __U, _MM_FROUND_CUR_DIRECTION);
3025 __m128h __B) {
3027 (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (__mmask8)-1,
3032 _mm_mask_fcmul_sch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
3033 return (__m128h)__builtin_ia32_vfcmulcsh_mask((__v4sf)__A, (__v4sf)__B,
3039 _mm_maskz_fcmul_sch(__mmask8 __U, __m128h __A, __m128h __B) {
3041 (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (__mmask8)__U,
3061 __m128h __B) {
3063 (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (__mmask8)-1,
3070 __m128h __B) {
3071 return (__m128h)__builtin_ia32_vfmulcsh_mask((__v4sf)__A, (__v4sf)__B,
3077 _mm_maskz_fmul_sch(__mmask8 __U, __m128h __A, __m128h __B) {
3079 (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (__mmask8)__U,
3099 __m512h __B) {
3101 (__v16sf)__A, (__v16sf)__B, (__v16sf)_mm512_undefined_ph(), (__mmask16)-1,
3106 _mm512_mask_fcmul_pch(__m512h __W, __mmask16 __U, __m512h __A, __m512h __B) {
3107 return (__m512h)__builtin_ia32_vfcmulcph512_mask((__v16sf)__A, (__v16sf)__B,
3113 _mm512_maskz_fcmul_pch(__mmask16 __U, __m512h __A, __m512h __B) {
3115 (__v16sf)__A, (__v16sf)__B, (__v16sf)_mm512_setzero_ph(), (__mmask16)__U,
3135 __m512h __B) {
3137 (__v16sf)__A, (__v16sf)__B, (__v16sf)_mm512_undefined_ph(), (__mmask16)-1,
3142 _mm512_mask_fmul_pch(__m512h __W, __mmask16 __U, __m512h __A, __m512h __B) {
3143 return (__m512h)__builtin_ia32_vfmulcph512_mask((__v16sf)__A, (__v16sf)__B,
3149 _mm512_maskz_fmul_pch(__mmask16 __U, __m512h __A, __m512h __B) {
3151 (__v16sf)__A, (__v16sf)__B, (__v16sf)_mm512_setzero_ph(), (__mmask16)__U,
3171 __m512h __B,
3174 (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)-1,
3179 _mm512_mask_fcmadd_pch(__m512h __A, __mmask16 __U, __m512h __B, __m512h __C) {
3181 (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U,
3186 _mm512_mask3_fcmadd_pch(__m512h __A, __m512h __B, __m512h __C, __mmask16 __U) {
3188 (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U,
3193 _mm512_maskz_fcmadd_pch(__mmask16 __U, __m512h __A, __m512h __B, __m512h __C) {
3195 (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U,
3220 __m512h __B,
3222 return (__m512h)__builtin_ia32_vfmaddcph512_mask3((__v16sf)__A, (__v16sf)__B,
3228 _mm512_mask_fmadd_pch(__m512h __A, __mmask16 __U, __m512h __B, __m512h __C) {
3229 return (__m512h)__builtin_ia32_vfmaddcph512_mask((__v16sf)__A, (__v16sf)__B,
3235 _mm512_mask3_fmadd_pch(__m512h __A, __m512h __B, __m512h __C, __mmask16 __U) {
3237 (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U,
3242 _mm512_maskz_fmadd_pch(__mmask16 __U, __m512h __A, __m512h __B, __m512h __C) {
3244 (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U,
3295 _mm512_permutex2var_ph(__m512h __A, __m512i __I, __m512h __B) {
3297 (__v32hi)__B);
3301 _mm512_permutexvar_ph(__m512i __A, __m512h __B) {
3302 return (__m512h)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A);