Lines Matching refs:__A

149     _mm_store_ps(float *__P, __m128 __A) {
150 vec_st((__v4sf)__A, 0, (__v4sf *)__P);
156 _mm_storeu_ps(float *__P, __m128 __A) {
157 *(__m128_u *)__P = __A;
163 _mm_storer_ps(float *__P, __m128 __A) {
169 __tmp = (__m128)vec_perm(__A, __A, __permute_vector);
177 _mm_store1_ps(float *__P, __m128 __A) {
178 __v4sf __va = vec_splat((__v4sf)__A, 0);
184 _mm_store_ps1(float *__P, __m128 __A) {
185 _mm_store1_ps(__P, __A);
198 _mm_move_ss(__m128 __A, __m128 __B) {
201 return (vec_sel((__v4sf)__A, (__v4sf)__B, __mask));
214 _mm_store_ss(float *__P, __m128 __A) {
215 *__P = ((__v4sf)__A)[0];
224 _mm_add_ss(__m128 __A, __m128 __B) {
232 __a = vec_splat(__A, 0);
236 float elements from __A. */
237 return (vec_sel(__A, __c, __mask));
239 __A[0] = __A[0] + __B[0];
240 return (__A);
246 _mm_sub_ss(__m128 __A, __m128 __B) {
254 __a = vec_splat(__A, 0);
258 float elements from __A. */
259 return (vec_sel(__A, __c, __mask));
261 __A[0] = __A[0] - __B[0];
262 return (__A);
268 _mm_mul_ss(__m128 __A, __m128 __B) {
276 __a = vec_splat(__A, 0);
280 float elements from __A. */
281 return (vec_sel(__A, __c, __mask));
283 __A[0] = __A[0] * __B[0];
284 return (__A);
290 _mm_div_ss(__m128 __A, __m128 __B) {
298 __a = vec_splat(__A, 0);
302 float elements from __A. */
303 return (vec_sel(__A, __c, __mask));
305 __A[0] = __A[0] / __B[0];
306 return (__A);
312 _mm_sqrt_ss(__m128 __A) {
319 __a = vec_splat(__A, 0);
322 * float elements from __A. */
323 return (vec_sel(__A, __c, __mask));
329 _mm_add_ps(__m128 __A, __m128 __B) {
330 return (__m128)((__v4sf)__A + (__v4sf)__B);
335 _mm_sub_ps(__m128 __A, __m128 __B) {
336 return (__m128)((__v4sf)__A - (__v4sf)__B);
341 _mm_mul_ps(__m128 __A, __m128 __B) {
342 return (__m128)((__v4sf)__A * (__v4sf)__B);
347 _mm_div_ps(__m128 __A, __m128 __B) {
348 return (__m128)((__v4sf)__A / (__v4sf)__B);
353 _mm_sqrt_ps(__m128 __A) {
354 return (vec_sqrt((__v4sf)__A));
359 _mm_rcp_ps(__m128 __A) {
360 return (vec_re((__v4sf)__A));
365 _mm_rsqrt_ps(__m128 __A) {
366 return (vec_rsqrte(__A));
371 _mm_rcp_ss(__m128 __A) {
378 __a = vec_splat(__A, 0);
381 * float elements from __A. */
382 return (vec_sel(__A, __c, __mask));
387 _mm_rsqrt_ss(__m128 __A) {
394 __a = vec_splat(__A, 0);
397 * float elements from __A. */
398 return (vec_sel(__A, __c, __mask));
403 _mm_min_ss(__m128 __A, __m128 __B) {
410 __a = vec_splat((__v4sf)__A, 0);
414 * float elements from __A. */
415 return (vec_sel((__v4sf)__A, __c, __mask));
420 _mm_max_ss(__m128 __A, __m128 __B) {
427 __a = vec_splat(__A, 0);
431 * float elements from __A. */
432 return (vec_sel((__v4sf)__A, __c, __mask));
437 _mm_min_ps(__m128 __A, __m128 __B) {
438 __vector __bool int __m = vec_cmpgt((__v4sf)__B, (__v4sf)__A);
439 return vec_sel(__B, __A, __m);
444 _mm_max_ps(__m128 __A, __m128 __B) {
445 __vector __bool int __m = vec_cmpgt((__v4sf)__A, (__v4sf)__B);
446 return vec_sel(__B, __A, __m);
452 _mm_and_ps(__m128 __A, __m128 __B) {
453 return ((__m128)vec_and((__v4sf)__A, (__v4sf)__B));
454 // return __builtin_ia32_andps (__A, __B);
459 _mm_andnot_ps(__m128 __A, __m128 __B) {
460 return ((__m128)vec_andc((__v4sf)__B, (__v4sf)__A));
465 _mm_or_ps(__m128 __A, __m128 __B) {
466 return ((__m128)vec_or((__v4sf)__A, (__v4sf)__B));
471 _mm_xor_ps(__m128 __A, __m128 __B) {
472 return ((__m128)vec_xor((__v4sf)__A, (__v4sf)__B));
480 _mm_cmpeq_ps(__m128 __A, __m128 __B) {
481 return ((__m128)vec_cmpeq((__v4sf)__A, (__v4sf)__B));
486 _mm_cmplt_ps(__m128 __A, __m128 __B) {
487 return ((__m128)vec_cmplt((__v4sf)__A, (__v4sf)__B));
492 _mm_cmple_ps(__m128 __A, __m128 __B) {
493 return ((__m128)vec_cmple((__v4sf)__A, (__v4sf)__B));
498 _mm_cmpgt_ps(__m128 __A, __m128 __B) {
499 return ((__m128)vec_cmpgt((__v4sf)__A, (__v4sf)__B));
504 _mm_cmpge_ps(__m128 __A, __m128 __B) {
505 return ((__m128)vec_cmpge((__v4sf)__A, (__v4sf)__B));
510 _mm_cmpneq_ps(__m128 __A, __m128 __B) {
511 __v4sf __temp = (__v4sf)vec_cmpeq((__v4sf)__A, (__v4sf)__B);
517 _mm_cmpnlt_ps(__m128 __A, __m128 __B) {
518 return ((__m128)vec_cmpge((__v4sf)__A, (__v4sf)__B));
523 _mm_cmpnle_ps(__m128 __A, __m128 __B) {
524 return ((__m128)vec_cmpgt((__v4sf)__A, (__v4sf)__B));
529 _mm_cmpngt_ps(__m128 __A, __m128 __B) {
530 return ((__m128)vec_cmple((__v4sf)__A, (__v4sf)__B));
535 _mm_cmpnge_ps(__m128 __A, __m128 __B) {
536 return ((__m128)vec_cmplt((__v4sf)__A, (__v4sf)__B));
541 _mm_cmpord_ps(__m128 __A, __m128 __B) {
547 __a = (__vector unsigned int)vec_abs((__v4sf)__A);
556 _mm_cmpunord_ps(__m128 __A, __m128 __B) {
562 __a = (__vector unsigned int)vec_abs((__v4sf)__A);
574 _mm_cmpeq_ss(__m128 __A, __m128 __B) {
581 __a = vec_splat((__v4sf)__A, 0);
585 * float elements from __A. */
586 return ((__m128)vec_sel((__v4sf)__A, __c, __mask));
591 _mm_cmplt_ss(__m128 __A, __m128 __B) {
598 __a = vec_splat((__v4sf)__A, 0);
602 * float elements from __A. */
603 return ((__m128)vec_sel((__v4sf)__A, __c, __mask));
608 _mm_cmple_ss(__m128 __A, __m128 __B) {
615 __a = vec_splat((__v4sf)__A, 0);
619 * float elements from __A. */
620 return ((__m128)vec_sel((__v4sf)__A, __c, __mask));
625 _mm_cmpgt_ss(__m128 __A, __m128 __B) {
632 __a = vec_splat((__v4sf)__A, 0);
636 * float elements from __A. */
637 return ((__m128)vec_sel((__v4sf)__A, __c, __mask));
642 _mm_cmpge_ss(__m128 __A, __m128 __B) {
649 __a = vec_splat((__v4sf)__A, 0);
653 * float elements from __A. */
654 return ((__m128)vec_sel((__v4sf)__A, __c, __mask));
659 _mm_cmpneq_ss(__m128 __A, __m128 __B) {
666 __a = vec_splat((__v4sf)__A, 0);
671 * float elements from __A. */
672 return ((__m128)vec_sel((__v4sf)__A, __c, __mask));
677 _mm_cmpnlt_ss(__m128 __A, __m128 __B) {
684 __a = vec_splat((__v4sf)__A, 0);
688 * float elements from __A. */
689 return ((__m128)vec_sel((__v4sf)__A, __c, __mask));
694 _mm_cmpnle_ss(__m128 __A, __m128 __B) {
701 __a = vec_splat((__v4sf)__A, 0);
705 * float elements from __A. */
706 return ((__m128)vec_sel((__v4sf)__A, __c, __mask));
711 _mm_cmpngt_ss(__m128 __A, __m128 __B) {
718 __a = vec_splat((__v4sf)__A, 0);
722 * float elements from __A. */
723 return ((__m128)vec_sel((__v4sf)__A, __c, __mask));
728 _mm_cmpnge_ss(__m128 __A, __m128 __B) {
735 __a = vec_splat((__v4sf)__A, 0);
739 * float elements from __A. */
740 return ((__m128)vec_sel((__v4sf)__A, __c, __mask));
745 _mm_cmpord_ss(__m128 __A, __m128 __B) {
752 __a = (__vector unsigned int)vec_abs((__v4sf)__A);
758 * float elements from __A. */
759 return ((__m128)vec_sel((__v4sf)__A, (__v4sf)__c, __mask));
764 _mm_cmpunord_ss(__m128 __A, __m128 __B) {
771 __a = (__vector unsigned int)vec_abs((__v4sf)__A);
777 * float elements from __A. */
778 return ((__m128)vec_sel((__v4sf)__A, (__v4sf)__c, __mask));
785 _mm_comieq_ss(__m128 __A, __m128 __B) {
786 return (__A[0] == __B[0]);
791 _mm_comilt_ss(__m128 __A, __m128 __B) {
792 return (__A[0] < __B[0]);
797 _mm_comile_ss(__m128 __A, __m128 __B) {
798 return (__A[0] <= __B[0]);
803 _mm_comigt_ss(__m128 __A, __m128 __B) {
804 return (__A[0] > __B[0]);
809 _mm_comige_ss(__m128 __A, __m128 __B) {
810 return (__A[0] >= __B[0]);
815 _mm_comineq_ss(__m128 __A, __m128 __B) {
816 return (__A[0] != __B[0]);
829 _mm_ucomieq_ss(__m128 __A, __m128 __B) {
830 return (__A[0] == __B[0]);
835 _mm_ucomilt_ss(__m128 __A, __m128 __B) {
836 return (__A[0] < __B[0]);
841 _mm_ucomile_ss(__m128 __A, __m128 __B) {
842 return (__A[0] <= __B[0]);
847 _mm_ucomigt_ss(__m128 __A, __m128 __B) {
848 return (__A[0] > __B[0]);
853 _mm_ucomige_ss(__m128 __A, __m128 __B) {
854 return (__A[0] >= __B[0]);
859 _mm_ucomineq_ss(__m128 __A, __m128 __B) {
860 return (__A[0] != __B[0]);
865 _mm_cvtss_f32(__m128 __A) {
866 return ((__v4sf)__A)[0];
873 _mm_cvtss_si32(__m128 __A) {
884 : "+wa"(__A), "=r"(__res), "=f"(__dtmp)
887 __res = __builtin_rint(__A[0]);
894 _mm_cvt_ss2si(__m128 __A) {
895 return _mm_cvtss_si32(__A);
904 _mm_cvtss_si64(__m128 __A) {
915 : "+wa"(__A), "=r"(__res), "=f"(__dtmp)
918 __res = __builtin_llrint(__A[0]);
926 _mm_cvtss_si64x(__m128 __A) {
927 return _mm_cvtss_si64((__v4sf)__A);
954 _mm_cvtps_pi32(__m128 __A) {
960 __temp = (__v4sf)vec_splat((__vector long long)__A, 0);
969 _mm_cvt_ps2pi(__m128 __A) {
970 return _mm_cvtps_pi32(__A);
976 _mm_cvttss_si32(__m128 __A) {
978 float __temp = __A[0];
985 _mm_cvtt_ss2si(__m128 __A) {
986 return _mm_cvttss_si32(__A);
992 _mm_cvttss_si64(__m128 __A) {
994 float __temp = __A[0];
1002 _mm_cvttss_si64x(__m128 __A) {
1004 float __temp = __A[0];
1013 _mm_cvttps_pi32(__m128 __A) {
1018 __temp = (__v4sf)vec_splat((__vector long long)__A, 0);
1026 _mm_cvtt_ps2pi(__m128 __A) {
1027 return _mm_cvttps_pi32(__A);
1033 _mm_cvtsi32_ss(__m128 __A, int __B) {
1035 __A[0] = __temp;
1037 return __A;
1042 _mm_cvt_si2ss(__m128 __A, int __B) {
1043 return _mm_cvtsi32_ss(__A, __B);
1050 _mm_cvtsi64_ss(__m128 __A, long long __B) {
1052 __A[0] = __temp;
1054 return __A;
1060 _mm_cvtsi64x_ss(__m128 __A, long long __B) {
1061 return _mm_cvtsi64_ss(__A, __B);
1068 _mm_cvtpi32_ps(__m128 __A, __m64 __B) {
1077 ((__vector unsigned long long)__A)[1]});
1082 _mm_cvt_pi2ps(__m128 __A, __m64 __B) {
1083 return _mm_cvtpi32_ps(__A, __B);
1089 _mm_cvtpi16_ps(__m64 __A) {
1094 __vs8 = (__vector signed short)(__vector unsigned long long){__A, __A};
1104 _mm_cvtpu16_ps(__m64 __A) {
1110 __vs8 = (__vector unsigned short)(__vector unsigned long long){__A, __A};
1125 _mm_cvtpi8_ps(__m64 __A) {
1131 __vc16 = (__vector signed char)(__vector unsigned long long){__A, __A};
1143 _mm_cvtpu8_ps(__m64 __A) {
1150 __vc16 = (__vector unsigned char)(__vector unsigned long long){__A, __A};
1168 _mm_cvtpi32x2_ps(__m64 __A, __m64 __B) {
1172 __vi4 = (__vector signed int)(__vector unsigned long long){__A, __B};
1180 _mm_cvtps_pi16(__m128 __A) {
1185 __rounded = vec_rint(__A);
1195 _mm_cvtps_pi8(__m128 __A) {
1202 __rounded = vec_rint(__A);
1213 _mm_shuffle_ps(__m128 __A, __m128 __B, int const __mask) {
1231 return vec_perm((__v4sf)__A, (__v4sf)__B, (__vector unsigned char)__t);
1237 _mm_unpackhi_ps(__m128 __A, __m128 __B) {
1238 return (__m128)vec_vmrglw((__v4sf)__A, (__v4sf)__B);
1244 _mm_unpacklo_ps(__m128 __A, __m128 __B) {
1245 return (__m128)vec_vmrghw((__v4sf)__A, (__v4sf)__B);
1252 _mm_loadh_pi(__m128 __A, __m64 const *__P) {
1253 __vector unsigned long long __a = (__vector unsigned long long)__A;
1263 _mm_storeh_pi(__m64 *__P, __m128 __A) {
1264 __vector unsigned long long __a = (__vector unsigned long long)__A;
1272 _mm_movehl_ps(__m128 __A, __m128 __B) {
1274 (__vector unsigned long long)__A);
1280 _mm_movelh_ps(__m128 __A, __m128 __B) {
1281 return (__m128)vec_mergeh((__vector unsigned long long)__A,
1289 _mm_loadl_pi(__m128 __A, __m64 const *__P) {
1290 __vector unsigned long long __a = (__vector unsigned long long)__A;
1300 _mm_storel_pi(__m64 *__P, __m128 __A) {
1301 __vector unsigned long long __a = (__vector unsigned long long)__A;
1312 _mm_movemask_ps(__m128 __A) {
1314 return vec_extractm((__vector unsigned int)__A);
1326 (__vector unsigned char)__A, (__vector unsigned char)__perm_mask));
1353 _mm_extract_pi16(__m64 const __A, int const __N) {
1359 return ((__A >> (__shiftr * 16)) & 0xffff);
1364 _m_pextrw(__m64 const __A, int const __N) {
1365 return _mm_extract_pi16(__A, __N);
1372 _mm_insert_pi16(__m64 const __A, int const __D, int const __N) {
1376 __m64 __result = (__A & (~__mask)) | (__shiftD & __mask);
1383 _m_pinsrw(__m64 const __A, int const __D, int const __N) {
1384 return _mm_insert_pi16(__A, __D, __N);
1391 _mm_max_pi16(__m64 __A, __m64 __B) {
1396 __a = (__vector signed short)vec_splats(__A);
1404 __m1.as_m64 = __A;
1422 _m_pmaxsw(__m64 __A, __m64 __B) {
1423 return _mm_max_pi16(__A, __B);
1429 _mm_max_pu8(__m64 __A, __m64 __B) {
1434 __a = (__vector unsigned char)vec_splats(__A);
1443 __m1.as_m64 = __A;
1458 _m_pmaxub(__m64 __A, __m64 __B) {
1459 return _mm_max_pu8(__A, __B);
1465 _mm_min_pi16(__m64 __A, __m64 __B) {
1470 __a = (__vector signed short)vec_splats(__A);
1478 __m1.as_m64 = __A;
1496 _m_pminsw(__m64 __A, __m64 __B) {
1497 return _mm_min_pi16(__A, __B);
1503 _mm_min_pu8(__m64 __A, __m64 __B) {
1508 __a = (__vector unsigned char)vec_splats(__A);
1517 __m1.as_m64 = __A;
1532 _m_pminub(__m64 __A, __m64 __B) {
1533 return _mm_min_pu8(__A, __B);
1539 _mm_movemask_pi8(__m64 __A) {
1547 return __builtin_bpermd(__p, __A);
1551 unsigned int __r1 = __builtin_bpermd(__mask, __A) & 0xf;
1552 unsigned int __r2 = __builtin_bpermd(__mask, __A >> 32) & 0xf;
1555 unsigned int __r1 = __builtin_bpermd(__mask, __A >> 32) & 0xf;
1556 unsigned int __r2 = __builtin_bpermd(__mask, __A) & 0xf;
1564 _m_pmovmskb(__m64 __A) {
1565 return _mm_movemask_pi8(__A);
1572 _mm_mulhi_pu16(__m64 __A, __m64 __B) {
1586 __a = (__vector unsigned short)vec_splats(__A);
1598 _m_pmulhuw(__m64 __A, __m64 __B) {
1599 return _mm_mulhi_pu16(__A, __B);
1606 _mm_shuffle_pi16(__m64 __A, int const __N) {
1633 __a = vec_splats(__A);
1640 _m_pshufw(__m64 __A, int const __N) {
1641 return _mm_shuffle_pi16(__A, __N);
1649 _mm_maskmove_si64(__m64 __A, __m64 __N, char *__P) {
1656 __tmp = (__tmp & (~__mask)) | (__A & __mask);
1662 _m_maskmovq(__m64 __A, __m64 __N, char *__P) {
1663 _mm_maskmove_si64(__A, __N, __P);
1669 _mm_avg_pu8(__m64 __A, __m64 __B) {
1672 __a = (__vector unsigned char)vec_splats(__A);
1680 _m_pavgb(__m64 __A, __m64 __B) {
1681 return _mm_avg_pu8(__A, __B);
1687 _mm_avg_pu16(__m64 __A, __m64 __B) {
1690 __a = (__vector unsigned short)vec_splats(__A);
1698 _m_pavgw(__m64 __A, __m64 __B) {
1699 return _mm_avg_pu16(__A, __B);
1707 _mm_sad_pu8(__m64 __A, __m64 __B) {
1714 __a = (__vector unsigned char)(__vector unsigned long long){0UL, __A};
1731 _m_psadbw(__m64 __A, __m64 __B) {
1732 return _mm_sad_pu8(__A, __B);
1738 _mm_stream_pi(__m64 *__P, __m64 __A) {
1741 *__P = __A;
1747 _mm_stream_ps(float *__P, __m128 __A) {
1750 _mm_store_ps(__P, __A);