Lines Matching defs:xmm0

769 // input: xmm0, xmm1
775 void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp1, Register tmp2, Register tmp3, Register tmp4) {
810 movsd(Address(rsp, 8), xmm0);
814 pextrw(eax, xmm0, 3);
823 movdqu(xmm3, xmm0);
831 por(xmm0, xmm2);
833 psrlq(xmm0, 27);
835 psrld(xmm0, 2);
838 rcpps(xmm0, xmm0);
849 mulss(xmm0, xmm7);
859 paddd(xmm0, xmm4);
861 movdl(edx, xmm0);
862 psllq(xmm0, 29);
866 pand(xmm0, xmm6);
870 mulpd(xmm5, xmm0);
873 mulsd(xmm3, xmm0);
890 movdqu(xmm0, ExternalAddress(48 + coeff)); //0x486ececcUL, 0x3fc4635eUL, 0x161bb241UL, 0xbf5dabe1UL
899 addpd(xmm0, xmm4);
906 mulpd(xmm0, xmm2);
921 addsd(xmm0, xmm7);
926 addpd(xmm3, xmm0);
929 pshufd(xmm0, xmm3, 238);
931 addsd(xmm0, xmm3);
937 mulsd(xmm2, xmm0);
947 pshufd(xmm0, xmm2, 68);
951 mulpd(xmm0, xmm0);
956 mulsd(xmm0, xmm0);
959 mulpd(xmm0, xmm3);
960 pshufd(xmm3, xmm0, 238);
961 mulsd(xmm0, xmm5);
963 addsd(xmm0, xmm1);
964 addsd(xmm0, xmm3);
965 addsd(xmm0, xmm5);
978 movq(xmm0, Address(rsp, 8));
989 mulsd(xmm0, xmm3);
993 movdqu(xmm3, xmm0);
994 pextrw(eax, xmm0, 3);
995 por(xmm0, xmm2);
997 psrlq(xmm0, 27);
999 psrld(xmm0, 2);
1000 rcpps(xmm0, xmm0);
1004 mulss(xmm0, xmm7);
1008 paddd(xmm0, xmm4);
1010 movdl(edx, xmm0);
1011 psllq(xmm0, 29);
1014 pand(xmm0, xmm6);
1020 mulpd(xmm5, xmm0);
1024 movq(xmm0, Address(rsp, 8));
1035 mulsd(xmm0, xmm3);
1039 movdqu(xmm3, xmm0);
1040 pextrw(eax, xmm0, 3);
1041 por(xmm0, xmm2);
1043 psrlq(xmm0, 27);
1045 psrld(xmm0, 2);
1046 rcpps(xmm0, xmm0);
1050 mulss(xmm0, xmm7);
1054 paddd(xmm0, xmm4);
1056 movdl(edx, xmm0);
1057 psllq(xmm0, 29);
1060 pand(xmm0, xmm6);
1066 mulpd(xmm5, xmm0);
1074 addsd(xmm0, xmm7);
1076 addpd(xmm3, xmm0);
1080 pshufd(xmm0, xmm3, 238);
1081 addsd(xmm0, xmm3);
1083 addsd(xmm5, xmm0);
1091 addsd(xmm0, xmm3);
1094 addsd(xmm0, xmm7);
1095 mulsd(xmm2, xmm0);
1123 pshufd(xmm0, xmm2, 68);
1125 mulpd(xmm0, xmm0);
1129 mulsd(xmm0, xmm0);
1134 mulpd(xmm0, xmm3);
1136 pshufd(xmm3, xmm0, 238);
1137 mulsd(xmm0, xmm5);
1143 addsd(xmm0, xmm1);
1144 addsd(xmm0, xmm3);
1145 movdqu(xmm1, xmm0);
1146 addsd(xmm0, xmm5);
1147 mulsd(xmm0, xmm4);
1148 pextrw(eax, xmm0, 3);
1157 movq(xmm0, Address(rsp, 8));
1158 movdqu(xmm2, xmm0);
1171 addsd(xmm0, xmm0);
1175 xorpd(xmm0, xmm0);
1177 pinsrw(xmm0, eax, 3);
1182 movq(xmm0, Address(rsp, 16));
1183 addpd(xmm0, xmm0);
1206 pextrw(eax, xmm0, 3);
1255 xorpd(xmm0, xmm0);
1257 pinsrw(xmm0, eax, 3);
1265 xorpd(xmm0, xmm0);
1267 pinsrw(xmm0, eax, 3);
1283 movq(xmm0, Address(rsp, 16));
1284 addsd(xmm0, xmm0);
1288 movq(xmm0, Address(rsp, 8));
1289 pextrw(eax, xmm0, 3);
1292 movdl(ecx, xmm0);
1293 psrlq(xmm0, 20);
1294 movdl(edx, xmm0);
1297 xorpd(xmm0, xmm0);
1299 pinsrw(xmm0, eax, 3);
1307 xorpd(xmm0, xmm0);
1315 pinsrw(xmm0, ecx, 3);
1400 xorpd(xmm0, xmm0);
1401 mulsd(xmm0, xmm1);
1430 xorpd(xmm0, xmm0);
1432 pinsrw(xmm0, eax, 3);
1436 xorpd(xmm0, xmm0);
1444 addsd(xmm0, xmm7);
1446 addpd(xmm3, xmm0);
1447 pshufd(xmm0, xmm3, 238);
1448 addsd(xmm3, xmm0);
1452 xorpd(xmm0, xmm0);
1456 pinsrw(xmm0, eax, 3);
1459 addsd(xmm0, xmm5);
1465 xorpd(xmm0, xmm0);
1467 pinsrw(xmm0, eax, 3);
1471 xorpd(xmm0, xmm0);
1473 pinsrw(xmm0, eax, 3);
1520 movq(xmm0, Address(rsp, 8));
1534 xorpd(xmm0, xmm0);
1540 divsd(xmm1, xmm0);
1541 movdqu(xmm0, xmm1);
1562 pinsrw(xmm0, eax, 3);
1566 mulsd(xmm0, xmm1);
1574 pinsrw(xmm0, eax, 3);
1575 mulsd(xmm0, xmm0);
1580 xorpd(xmm0, xmm2);
1603 movdl(xmm0, eax);
1604 psllq(xmm0, 32);
1605 pand(xmm0, xmm5);
1606 subsd(xmm5, xmm0);
1608 mulsd(xmm0, xmm4);
1610 addsd(xmm0, xmm5);
1620 paddd(xmm0, xmm4);
1621 movdl(edx, xmm0);
1622 psllq(xmm0, 29);
1628 pand(xmm0, xmm6);
1634 mulpd(xmm5, xmm0);
1637 mulsd(xmm3, xmm0);
1644 movq(xmm0, ExternalAddress(8 + coeff_h)); //0x00000000UL, 0xbf5dabe1UL
1649 mulsd(xmm0, xmm5);
1654 addsd(xmm5, xmm0);
1655 movdqu(xmm0, xmm7);
1659 subsd(xmm0, xmm7);
1662 addsd(xmm0, xmm5);
1668 addsd(xmm4, xmm0);
1669 movdqu(xmm0, ExternalAddress(coeff)); //0x6dc96112UL, 0xbf836578UL, 0xee241472UL, 0xbf9b0301UL
1689 mulpd(xmm0, xmm3);
1693 addpd(xmm0, xmm1);
1700 mulpd(xmm0, xmm4);
1703 mulpd(xmm0, xmm3);
1708 addpd(xmm5, xmm0);
1717 pshufd(xmm0, xmm5, 238);
1720 addsd(xmm5, xmm0);
1751 pshufd(xmm0, xmm4, 68);
1753 mulpd(xmm0, xmm0);
1760 mulsd(xmm0, xmm0);
1765 mulpd(xmm0, xmm3);
1767 pshufd(xmm3, xmm0, 238);
1772 mulsd(xmm0, xmm5);
1774 addsd(xmm0, xmm2);
1776 addsd(xmm0, xmm3);
1777 movdqu(xmm1, xmm0);
1778 addsd(xmm0, xmm5);
1779 mulsd(xmm0, xmm4);
1780 pextrw(eax, xmm0, 3);
1790 movq(xmm0, Address(rsp, 8));
1794 addsd(xmm2, xmm0);
1798 xorpd(xmm0, xmm0);
1800 pinsrw(xmm0, eax, 3);
1812 movdqu(xmm0, xmm1);
1824 pextrw(eax, xmm0, 3);
1827 xorpd(xmm0, xmm0);
1836 pinsrw(xmm0, edx, 3);
1840 movq(Address(rsp, 24), xmm0);
1843 movq(xmm0, Address(rsp, 24));
2487 // input: xmm0, xmm1
2493 void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
2518 movsd(xmm0, Address(rsp, 128));
2529 movsd(Address(rsp, 8), xmm0);
2530 movdqu(xmm3, xmm0);
2534 pextrw(eax, xmm0, 3);
2535 por(xmm0, xmm2);
2536 psllq(xmm0, 5);
2538 psrlq(xmm0, 34);
2546 rcpss(xmm0, xmm0);
2558 mulss(xmm0, xmm7);
2567 paddd(xmm0, xmm4);
2569 movdl(edx, xmm0);
2570 psllq(xmm0, 29);
2574 pand(xmm0, xmm6);
2579 mulpd(xmm5, xmm0);
2582 mulsd(xmm3, xmm0);
2598 movdqu(xmm0, Address(tmp, 8320));
2607 addpd(xmm0, xmm4);
2614 mulpd(xmm0, xmm2);
2629 addsd(xmm0, xmm7);
2633 addpd(xmm3, xmm0);
2636 pshufd(xmm0, xmm3, 238);
2638 addsd(xmm0, xmm3);
2644 mulsd(xmm2, xmm0);
2653 pshufd(xmm0, xmm2, 68);
2655 mulpd(xmm0, xmm0);
2660 mulsd(xmm0, xmm0);
2665 mulpd(xmm0, xmm3);
2667 pshufd(xmm3, xmm0, 238);
2668 mulsd(xmm0, xmm5);
2670 addsd(xmm0, xmm1);
2671 addsd(xmm0, xmm3);
2672 addsd(xmm0, xmm5);
2673 movsd(Address(rsp, 0), xmm0);
2678 movsd(xmm0, Address(rsp, 128));
2680 mulsd(xmm0, xmm1);
2681 movsd(Address(rsp, 0), xmm0);
2703 movsd(xmm0, Address(rsp, 8));
2714 mulsd(xmm0, xmm3);
2718 movdqu(xmm3, xmm0);
2719 pextrw(eax, xmm0, 3);
2720 por(xmm0, xmm2);
2722 psllq(xmm0, 5);
2724 psrlq(xmm0, 34);
2725 rcpss(xmm0, xmm0);
2729 mulss(xmm0, xmm7);
2733 paddd(xmm0, xmm4);
2735 movdl(edx, xmm0);
2736 psllq(xmm0, 29);
2739 pand(xmm0, xmm6);
2745 mulpd(xmm5, xmm0);
2757 movsd(xmm0, Address(rsp, 8));
2768 mulsd(xmm0, xmm3);
2772 movdqu(xmm3, xmm0);
2773 pextrw(eax, xmm0, 3);
2774 por(xmm0, xmm2);
2776 psllq(xmm0, 5);
2778 psrlq(xmm0, 34);
2779 rcpss(xmm0, xmm0);
2783 mulss(xmm0, xmm7);
2787 paddd(xmm0, xmm4);
2789 movdl(edx, xmm0);
2790 psllq(xmm0, 29);
2793 pand(xmm0, xmm6);
2799 mulpd(xmm5, xmm0);
2809 addsd(xmm0, xmm7);
2811 addpd(xmm3, xmm0);
2815 pshufd(xmm0, xmm3, 238);
2816 addsd(xmm0, xmm3);
2818 addsd(xmm5, xmm0);
2826 addsd(xmm0, xmm3);
2829 addsd(xmm0, xmm7);
2830 mulsd(xmm2, xmm0);
2857 pshufd(xmm0, xmm2, 68);
2859 mulpd(xmm0, xmm0);
2863 mulsd(xmm0, xmm0);
2868 mulpd(xmm0, xmm3);
2870 pshufd(xmm3, xmm0, 238);
2871 mulsd(xmm0, xmm5);
2877 addsd(xmm0, xmm1);
2879 addsd(xmm0, xmm3);
2880 movdqu(xmm1, xmm0);
2881 addsd(xmm0, xmm5);
2882 mulsd(xmm0, xmm4);
2883 pextrw(eax, xmm0, 3);
2890 movsd(Address(rsp, 0), xmm0);
2896 movsd(xmm0, Address(rsp, 8));
2897 movdqu(xmm2, xmm0);
2903 addsd(xmm0, xmm0);
2914 xorpd(xmm0, xmm0);
2916 pinsrw(xmm0, eax, 3);
2921 movsd(xmm0, Address(rsp, 16));
2922 addpd(xmm0, xmm0);
2945 pextrw(eax, xmm0, 3);
2991 xorpd(xmm0, xmm0);
2993 pinsrw(xmm0, eax, 3);
3003 xorpd(xmm0, xmm0);
3005 pinsrw(xmm0, eax, 3);
3021 movsd(xmm0, Address(rsp, 16));
3022 addsd(xmm0, xmm0);
3026 movsd(xmm0, Address(rsp, 8));
3027 pextrw(eax, xmm0, 3);
3030 movdl(ecx, xmm0);
3031 psrlq(xmm0, 20);
3032 movdl(edx, xmm0);
3035 xorpd(xmm0, xmm0);
3037 pinsrw(xmm0, eax, 3);
3045 xorpd(xmm0, xmm0);
3050 pinsrw(xmm0, ecx, 3);
3135 xorpd(xmm0, xmm0);
3136 mulsd(xmm0, xmm1);
3165 xorpd(xmm0, xmm0);
3167 pinsrw(xmm0, eax, 3);
3171 xorpd(xmm0, xmm0);
3179 addsd(xmm0, xmm7);
3181 addpd(xmm3, xmm0);
3182 pshufd(xmm0, xmm3, 238);
3183 addsd(xmm3, xmm0);
3186 xorpd(xmm0, xmm0);
3190 pinsrw(xmm0, eax, 3);
3194 addsd(xmm0, xmm5);
3199 xorpd(xmm0, xmm0);
3201 pinsrw(xmm0, eax, 3);
3205 xorpd(xmm0, xmm0);
3207 pinsrw(xmm0, eax, 3);
3211 xorpd(xmm0, xmm0);
3213 pinsrw(xmm0, eax, 3);
3260 movsd(xmm0, Address(rsp, 8));
3274 xorpd(xmm0, xmm0);
3280 divsd(xmm1, xmm0);
3281 movdqu(xmm0, xmm1);
3302 pinsrw(xmm0, eax, 3);
3307 mulsd(xmm0, xmm1);
3313 movsd(Address(rsp, 0), xmm0);
3319 pinsrw(xmm0, eax, 3);
3320 mulsd(xmm0, xmm0);
3324 xorpd(xmm0, xmm2);
3348 movdl(xmm0, eax);
3349 psllq(xmm0, 32);
3350 pand(xmm0, xmm5);
3351 subsd(xmm5, xmm0);
3353 mulsd(xmm0, xmm4);
3355 addsd(xmm0, xmm5);
3366 paddd(xmm0, xmm4);
3368 movdl(edx, xmm0);
3369 psllq(xmm0, 29);
3375 pand(xmm0, xmm6);
3381 mulpd(xmm5, xmm0);
3383 mulsd(xmm3, xmm0);
3390 movsd(xmm0, Address(tmp, 8));
3395 mulsd(xmm0, xmm5);
3400 addsd(xmm5, xmm0);
3401 movdqu(xmm0, xmm7);
3405 subsd(xmm0, xmm7);
3408 addsd(xmm0, xmm5);
3414 addsd(xmm4, xmm0);
3415 movdqu(xmm0, Address(tmp, 8272));
3435 mulpd(xmm0, xmm3);
3439 addpd(xmm0, xmm1);
3446 mulpd(xmm0, xmm4);
3449 mulpd(xmm0, xmm3);
3454 addpd(xmm5, xmm0);
3463 pshufd(xmm0, xmm5, 238);
3466 addsd(xmm5, xmm0);
3496 pshufd(xmm0, xmm4, 68);
3498 mulpd(xmm0, xmm0);
3505 mulsd(xmm0, xmm0);
3510 mulpd(xmm0, xmm3);
3512 pshufd(xmm3, xmm0, 238);
3517 mulsd(xmm0, xmm5);
3519 addsd(xmm0, xmm2);
3521 addsd(xmm0, xmm3);
3522 movdqu(xmm1, xmm0);
3523 addsd(xmm0, xmm5);
3525 mulsd(xmm0, xmm4);
3526 pextrw(eax, xmm0, 3);
3533 movsd(Address(rsp, 0), xmm0);
3541 movsd(xmm0, Address(rsp, 8));
3547 addsd(xmm2, xmm0);
3551 xorpd(xmm0, xmm0);
3553 pinsrw(xmm0, eax, 3);
3564 movdqu(xmm0, xmm1);
3568 pextrw(eax, xmm0, 3);
3571 xorpd(xmm0, xmm0);
3577 pinsrw(xmm0, edx, 3);