ecp_nistz256-x86_64.S (290207) | ecp_nistz256-x86_64.S (296279) |
---|---|
1 # $FreeBSD: head/secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S 290207 2015-10-30 20:51:33Z jkim $ | 1 # $FreeBSD: head/secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S 296279 2016-03-01 22:08:28Z jkim $ |
2.text 3 4 5 6.align 64 7.Lpoly: 8.quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001 9 --- 1107 unchanged lines hidden (view full) --- 1117 pushq %rbp 1118 pushq %rbx 1119 pushq %r12 1120 pushq %r13 1121 pushq %r14 1122 pushq %r15 1123 subq $160+8,%rsp 1124 | 2.text 3 4 5 6.align 64 7.Lpoly: 8.quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001 9 --- 1107 unchanged lines hidden (view full) --- 1117 pushq %rbp 1118 pushq %rbx 1119 pushq %r12 1120 pushq %r13 1121 pushq %r14 1122 pushq %r15 1123 subq $160+8,%rsp 1124 |
1125.Lpoint_double_shortcutq: |
|
1125 movdqu 0(%rsi),%xmm0 1126 movq %rsi,%rbx 1127 movdqu 16(%rsi),%xmm1 1128 movq 32+0(%rsi),%r12 1129 movq 32+8(%rsi),%r13 1130 movq 32+16(%rsi),%r8 1131 movq 32+24(%rsi),%r9 1132 movq .Lpoly+8(%rip),%r14 --- 204 unchanged lines hidden (view full) --- 1337 movdqa %xmm2,416(%rsp) 1338 movdqa %xmm3,416+16(%rsp) 1339 por %xmm2,%xmm3 1340 movdqa %xmm4,448(%rsp) 1341 movdqa %xmm5,448+16(%rsp) 1342 por %xmm1,%xmm3 1343 1344 movdqu 0(%rsi),%xmm0 | 1126 movdqu 0(%rsi),%xmm0 1127 movq %rsi,%rbx 1128 movdqu 16(%rsi),%xmm1 1129 movq 32+0(%rsi),%r12 1130 movq 32+8(%rsi),%r13 1131 movq 32+16(%rsi),%r8 1132 movq 32+24(%rsi),%r9 1133 movq .Lpoly+8(%rip),%r14 --- 204 unchanged lines hidden (view full) --- 1338 movdqa %xmm2,416(%rsp) 1339 movdqa %xmm3,416+16(%rsp) 1340 por %xmm2,%xmm3 1341 movdqa %xmm4,448(%rsp) 1342 movdqa %xmm5,448+16(%rsp) 1343 por %xmm1,%xmm3 1344 1345 movdqu 0(%rsi),%xmm0 |
1345 pshufd $177,%xmm3,%xmm5 | 1346 pshufd $0xb1,%xmm3,%xmm5 |
1346 movdqu 16(%rsi),%xmm1 1347 movdqu 32(%rsi),%xmm2 1348 por %xmm3,%xmm5 1349 movdqu 48(%rsi),%xmm3 1350 movq 64+0(%rsi),%rax 1351 movq 64+8(%rsi),%r14 1352 movq 64+16(%rsi),%r15 1353 movq 64+24(%rsi),%r8 1354 movdqa %xmm0,480(%rsp) | 1347 movdqu 16(%rsi),%xmm1 1348 movdqu 32(%rsi),%xmm2 1349 por %xmm3,%xmm5 1350 movdqu 48(%rsi),%xmm3 1351 movq 64+0(%rsi),%rax 1352 movq 64+8(%rsi),%r14 1353 movq 64+16(%rsi),%r15 1354 movq 64+24(%rsi),%r8 1355 movdqa %xmm0,480(%rsp) |
1355 pshufd $30,%xmm5,%xmm4 | 1356 pshufd $0x1e,%xmm5,%xmm4 |
1356 movdqa %xmm1,480+16(%rsp) 1357 por %xmm0,%xmm1 1358.byte 102,72,15,110,199 1359 movdqa %xmm2,512(%rsp) 1360 movdqa %xmm3,512+16(%rsp) 1361 por %xmm2,%xmm3 1362 por %xmm4,%xmm5 1363 pxor %xmm4,%xmm4 1364 por %xmm1,%xmm3 1365 1366 leaq 64-0(%rsi),%rsi 1367 movq %rax,544+0(%rsp) 1368 movq %r14,544+8(%rsp) 1369 movq %r15,544+16(%rsp) 1370 movq %r8,544+24(%rsp) 1371 leaq 96(%rsp),%rdi 1372 call __ecp_nistz256_sqr_montq 1373 1374 pcmpeqd %xmm4,%xmm5 | 1357 movdqa %xmm1,480+16(%rsp) 1358 por %xmm0,%xmm1 1359.byte 102,72,15,110,199 1360 movdqa %xmm2,512(%rsp) 1361 movdqa %xmm3,512+16(%rsp) 1362 por %xmm2,%xmm3 1363 por %xmm4,%xmm5 1364 pxor %xmm4,%xmm4 1365 por %xmm1,%xmm3 1366 1367 leaq 64-0(%rsi),%rsi 1368 movq %rax,544+0(%rsp) 1369 movq %r14,544+8(%rsp) 1370 movq %r15,544+16(%rsp) 1371 movq %r8,544+24(%rsp) 1372 leaq 96(%rsp),%rdi 1373 call __ecp_nistz256_sqr_montq 1374 1375 pcmpeqd %xmm4,%xmm5 |
1375 pshufd $177,%xmm3,%xmm4 | 1376 pshufd $0xb1,%xmm3,%xmm4 |
1376 por %xmm3,%xmm4 1377 pshufd $0,%xmm5,%xmm5 | 1377 por %xmm3,%xmm4 1378 pshufd $0,%xmm5,%xmm5 |
1378 pshufd $30,%xmm4,%xmm3 | 1379 pshufd $0x1e,%xmm4,%xmm3 |
1379 por %xmm3,%xmm4 1380 pxor %xmm3,%xmm3 1381 pcmpeqd %xmm3,%xmm4 1382 pshufd $0,%xmm4,%xmm4 1383 movq 64+0(%rbx),%rax 1384 movq 64+8(%rbx),%r14 1385 movq 64+16(%rbx),%r15 1386 movq 64+24(%rbx),%r8 | 1380 por %xmm3,%xmm4 1381 pxor %xmm3,%xmm3 1382 pcmpeqd %xmm3,%xmm4 1383 pshufd $0,%xmm4,%xmm4 1384 movq 64+0(%rbx),%rax 1385 movq 64+8(%rbx),%r14 1386 movq 64+16(%rbx),%r15 1387 movq 64+24(%rbx),%r8 |
1388.byte 102,72,15,110,203 |
|
1387 1388 leaq 64-0(%rbx),%rsi 1389 leaq 32(%rsp),%rdi 1390 call __ecp_nistz256_sqr_montq 1391 1392 movq 544(%rsp),%rax 1393 leaq 544(%rsp),%rbx 1394 movq 0+96(%rsp),%r9 --- 75 unchanged lines hidden (view full) --- 1470 1471.byte 0x3e 1472 jnz .Ladd_proceedq 1473.byte 102,73,15,126,208 1474.byte 102,73,15,126,217 1475 testq %r8,%r8 1476 jnz .Ladd_proceedq 1477 testq %r9,%r9 | 1389 1390 leaq 64-0(%rbx),%rsi 1391 leaq 32(%rsp),%rdi 1392 call __ecp_nistz256_sqr_montq 1393 1394 movq 544(%rsp),%rax 1395 leaq 544(%rsp),%rbx 1396 movq 0+96(%rsp),%r9 --- 75 unchanged lines hidden (view full) --- 1472 1473.byte 0x3e 1474 jnz .Ladd_proceedq 1475.byte 102,73,15,126,208 1476.byte 102,73,15,126,217 1477 testq %r8,%r8 1478 jnz .Ladd_proceedq 1479 testq %r9,%r9 |
1478 jz .Ladd_proceedq | 1480 jz .Ladd_doubleq |
1479 1480.byte 102,72,15,126,199 1481 pxor %xmm0,%xmm0 1482 movdqu %xmm0,0(%rdi) 1483 movdqu %xmm0,16(%rdi) 1484 movdqu %xmm0,32(%rdi) 1485 movdqu %xmm0,48(%rdi) 1486 movdqu %xmm0,64(%rdi) 1487 movdqu %xmm0,80(%rdi) 1488 jmp .Ladd_doneq 1489 1490.align 32 | 1481 1482.byte 102,72,15,126,199 1483 pxor %xmm0,%xmm0 1484 movdqu %xmm0,0(%rdi) 1485 movdqu %xmm0,16(%rdi) 1486 movdqu %xmm0,32(%rdi) 1487 movdqu %xmm0,48(%rdi) 1488 movdqu %xmm0,64(%rdi) 1489 movdqu %xmm0,80(%rdi) 1490 jmp .Ladd_doneq 1491 1492.align 32 |
1493.Ladd_doubleq: 1494.byte 102,72,15,126,206 1495.byte 102,72,15,126,199 1496 addq $416,%rsp 1497 jmp .Lpoint_double_shortcutq 1498 1499.align 32 |
|
1491.Ladd_proceedq: 1492 movq 0+64(%rsp),%rax 1493 movq 8+64(%rsp),%r14 1494 leaq 0+64(%rsp),%rsi 1495 movq 16+64(%rsp),%r15 1496 movq 24+64(%rsp),%r8 1497 leaq 96(%rsp),%rdi 1498 call __ecp_nistz256_sqr_montq --- 230 unchanged lines hidden (view full) --- 1729 movdqa %xmm2,352(%rsp) 1730 movdqa %xmm3,352+16(%rsp) 1731 por %xmm2,%xmm3 1732 movdqa %xmm4,384(%rsp) 1733 movdqa %xmm5,384+16(%rsp) 1734 por %xmm1,%xmm3 1735 1736 movdqu 0(%rbx),%xmm0 | 1500.Ladd_proceedq: 1501 movq 0+64(%rsp),%rax 1502 movq 8+64(%rsp),%r14 1503 leaq 0+64(%rsp),%rsi 1504 movq 16+64(%rsp),%r15 1505 movq 24+64(%rsp),%r8 1506 leaq 96(%rsp),%rdi 1507 call __ecp_nistz256_sqr_montq --- 230 unchanged lines hidden (view full) --- 1738 movdqa %xmm2,352(%rsp) 1739 movdqa %xmm3,352+16(%rsp) 1740 por %xmm2,%xmm3 1741 movdqa %xmm4,384(%rsp) 1742 movdqa %xmm5,384+16(%rsp) 1743 por %xmm1,%xmm3 1744 1745 movdqu 0(%rbx),%xmm0 |
1737 pshufd $177,%xmm3,%xmm5 | 1746 pshufd $0xb1,%xmm3,%xmm5 |
1738 movdqu 16(%rbx),%xmm1 1739 movdqu 32(%rbx),%xmm2 1740 por %xmm3,%xmm5 1741 movdqu 48(%rbx),%xmm3 1742 movdqa %xmm0,416(%rsp) | 1747 movdqu 16(%rbx),%xmm1 1748 movdqu 32(%rbx),%xmm2 1749 por %xmm3,%xmm5 1750 movdqu 48(%rbx),%xmm3 1751 movdqa %xmm0,416(%rsp) |
1743 pshufd $30,%xmm5,%xmm4 | 1752 pshufd $0x1e,%xmm5,%xmm4 |
1744 movdqa %xmm1,416+16(%rsp) 1745 por %xmm0,%xmm1 1746.byte 102,72,15,110,199 1747 movdqa %xmm2,448(%rsp) 1748 movdqa %xmm3,448+16(%rsp) 1749 por %xmm2,%xmm3 1750 por %xmm4,%xmm5 1751 pxor %xmm4,%xmm4 1752 por %xmm1,%xmm3 1753 1754 leaq 64-0(%rsi),%rsi 1755 leaq 32(%rsp),%rdi 1756 call __ecp_nistz256_sqr_montq 1757 1758 pcmpeqd %xmm4,%xmm5 | 1753 movdqa %xmm1,416+16(%rsp) 1754 por %xmm0,%xmm1 1755.byte 102,72,15,110,199 1756 movdqa %xmm2,448(%rsp) 1757 movdqa %xmm3,448+16(%rsp) 1758 por %xmm2,%xmm3 1759 por %xmm4,%xmm5 1760 pxor %xmm4,%xmm4 1761 por %xmm1,%xmm3 1762 1763 leaq 64-0(%rsi),%rsi 1764 leaq 32(%rsp),%rdi 1765 call __ecp_nistz256_sqr_montq 1766 1767 pcmpeqd %xmm4,%xmm5 |
1759 pshufd $177,%xmm3,%xmm4 | 1768 pshufd $0xb1,%xmm3,%xmm4 |
1760 movq 0(%rbx),%rax 1761 1762 movq %r12,%r9 1763 por %xmm3,%xmm4 1764 pshufd $0,%xmm5,%xmm5 | 1769 movq 0(%rbx),%rax 1770 1771 movq %r12,%r9 1772 por %xmm3,%xmm4 1773 pshufd $0,%xmm5,%xmm5 |
1765 pshufd $30,%xmm4,%xmm3 | 1774 pshufd $0x1e,%xmm4,%xmm3 |
1766 movq %r13,%r10 1767 por %xmm3,%xmm4 1768 pxor %xmm3,%xmm3 1769 movq %r14,%r11 1770 pcmpeqd %xmm3,%xmm4 1771 pshufd $0,%xmm4,%xmm4 1772 1773 leaq 32-0(%rsp),%rsi --- 232 unchanged lines hidden --- | 1775 movq %r13,%r10 1776 por %xmm3,%xmm4 1777 pxor %xmm3,%xmm3 1778 movq %r14,%r11 1779 pcmpeqd %xmm3,%xmm4 1780 pshufd $0,%xmm4,%xmm4 1781 1782 leaq 32-0(%rsp),%rsi --- 232 unchanged lines hidden --- |