Deleted Added
full compact
ecp_nistz256-x86_64.S (290207) ecp_nistz256-x86_64.S (296279)
1 # $FreeBSD: head/secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S 290207 2015-10-30 20:51:33Z jkim $
1 # $FreeBSD: head/secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S 296279 2016-03-01 22:08:28Z jkim $
2.text
3
4
5
6.align 64
7.Lpoly:
8.quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001
9

--- 1107 unchanged lines hidden (view full) ---

1117 pushq %rbp
1118 pushq %rbx
1119 pushq %r12
1120 pushq %r13
1121 pushq %r14
1122 pushq %r15
1123 subq $160+8,%rsp
1124
2.text
3
4
5
6.align 64
7.Lpoly:
8.quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001
9

--- 1107 unchanged lines hidden (view full) ---

1117 pushq %rbp
1118 pushq %rbx
1119 pushq %r12
1120 pushq %r13
1121 pushq %r14
1122 pushq %r15
1123 subq $160+8,%rsp
1124
1125.Lpoint_double_shortcutq:
1125 movdqu 0(%rsi),%xmm0
1126 movq %rsi,%rbx
1127 movdqu 16(%rsi),%xmm1
1128 movq 32+0(%rsi),%r12
1129 movq 32+8(%rsi),%r13
1130 movq 32+16(%rsi),%r8
1131 movq 32+24(%rsi),%r9
1132 movq .Lpoly+8(%rip),%r14

--- 204 unchanged lines hidden (view full) ---

1337 movdqa %xmm2,416(%rsp)
1338 movdqa %xmm3,416+16(%rsp)
1339 por %xmm2,%xmm3
1340 movdqa %xmm4,448(%rsp)
1341 movdqa %xmm5,448+16(%rsp)
1342 por %xmm1,%xmm3
1343
1344 movdqu 0(%rsi),%xmm0
1126 movdqu 0(%rsi),%xmm0
1127 movq %rsi,%rbx
1128 movdqu 16(%rsi),%xmm1
1129 movq 32+0(%rsi),%r12
1130 movq 32+8(%rsi),%r13
1131 movq 32+16(%rsi),%r8
1132 movq 32+24(%rsi),%r9
1133 movq .Lpoly+8(%rip),%r14

--- 204 unchanged lines hidden (view full) ---

1338 movdqa %xmm2,416(%rsp)
1339 movdqa %xmm3,416+16(%rsp)
1340 por %xmm2,%xmm3
1341 movdqa %xmm4,448(%rsp)
1342 movdqa %xmm5,448+16(%rsp)
1343 por %xmm1,%xmm3
1344
1345 movdqu 0(%rsi),%xmm0
1345 pshufd $177,%xmm3,%xmm5
1346 pshufd $0xb1,%xmm3,%xmm5
1346 movdqu 16(%rsi),%xmm1
1347 movdqu 32(%rsi),%xmm2
1348 por %xmm3,%xmm5
1349 movdqu 48(%rsi),%xmm3
1350 movq 64+0(%rsi),%rax
1351 movq 64+8(%rsi),%r14
1352 movq 64+16(%rsi),%r15
1353 movq 64+24(%rsi),%r8
1354 movdqa %xmm0,480(%rsp)
1347 movdqu 16(%rsi),%xmm1
1348 movdqu 32(%rsi),%xmm2
1349 por %xmm3,%xmm5
1350 movdqu 48(%rsi),%xmm3
1351 movq 64+0(%rsi),%rax
1352 movq 64+8(%rsi),%r14
1353 movq 64+16(%rsi),%r15
1354 movq 64+24(%rsi),%r8
1355 movdqa %xmm0,480(%rsp)
1355 pshufd $30,%xmm5,%xmm4
1356 pshufd $0x1e,%xmm5,%xmm4
1356 movdqa %xmm1,480+16(%rsp)
1357 por %xmm0,%xmm1
1358.byte 102,72,15,110,199
1359 movdqa %xmm2,512(%rsp)
1360 movdqa %xmm3,512+16(%rsp)
1361 por %xmm2,%xmm3
1362 por %xmm4,%xmm5
1363 pxor %xmm4,%xmm4
1364 por %xmm1,%xmm3
1365
1366 leaq 64-0(%rsi),%rsi
1367 movq %rax,544+0(%rsp)
1368 movq %r14,544+8(%rsp)
1369 movq %r15,544+16(%rsp)
1370 movq %r8,544+24(%rsp)
1371 leaq 96(%rsp),%rdi
1372 call __ecp_nistz256_sqr_montq
1373
1374 pcmpeqd %xmm4,%xmm5
1357 movdqa %xmm1,480+16(%rsp)
1358 por %xmm0,%xmm1
1359.byte 102,72,15,110,199
1360 movdqa %xmm2,512(%rsp)
1361 movdqa %xmm3,512+16(%rsp)
1362 por %xmm2,%xmm3
1363 por %xmm4,%xmm5
1364 pxor %xmm4,%xmm4
1365 por %xmm1,%xmm3
1366
1367 leaq 64-0(%rsi),%rsi
1368 movq %rax,544+0(%rsp)
1369 movq %r14,544+8(%rsp)
1370 movq %r15,544+16(%rsp)
1371 movq %r8,544+24(%rsp)
1372 leaq 96(%rsp),%rdi
1373 call __ecp_nistz256_sqr_montq
1374
1375 pcmpeqd %xmm4,%xmm5
1375 pshufd $177,%xmm3,%xmm4
1376 pshufd $0xb1,%xmm3,%xmm4
1376 por %xmm3,%xmm4
1377 pshufd $0,%xmm5,%xmm5
1377 por %xmm3,%xmm4
1378 pshufd $0,%xmm5,%xmm5
1378 pshufd $30,%xmm4,%xmm3
1379 pshufd $0x1e,%xmm4,%xmm3
1379 por %xmm3,%xmm4
1380 pxor %xmm3,%xmm3
1381 pcmpeqd %xmm3,%xmm4
1382 pshufd $0,%xmm4,%xmm4
1383 movq 64+0(%rbx),%rax
1384 movq 64+8(%rbx),%r14
1385 movq 64+16(%rbx),%r15
1386 movq 64+24(%rbx),%r8
1380 por %xmm3,%xmm4
1381 pxor %xmm3,%xmm3
1382 pcmpeqd %xmm3,%xmm4
1383 pshufd $0,%xmm4,%xmm4
1384 movq 64+0(%rbx),%rax
1385 movq 64+8(%rbx),%r14
1386 movq 64+16(%rbx),%r15
1387 movq 64+24(%rbx),%r8
1388.byte 102,72,15,110,203
1387
1388 leaq 64-0(%rbx),%rsi
1389 leaq 32(%rsp),%rdi
1390 call __ecp_nistz256_sqr_montq
1391
1392 movq 544(%rsp),%rax
1393 leaq 544(%rsp),%rbx
1394 movq 0+96(%rsp),%r9

--- 75 unchanged lines hidden (view full) ---

1470
1471.byte 0x3e
1472 jnz .Ladd_proceedq
1473.byte 102,73,15,126,208
1474.byte 102,73,15,126,217
1475 testq %r8,%r8
1476 jnz .Ladd_proceedq
1477 testq %r9,%r9
1389
1390 leaq 64-0(%rbx),%rsi
1391 leaq 32(%rsp),%rdi
1392 call __ecp_nistz256_sqr_montq
1393
1394 movq 544(%rsp),%rax
1395 leaq 544(%rsp),%rbx
1396 movq 0+96(%rsp),%r9

--- 75 unchanged lines hidden (view full) ---

1472
1473.byte 0x3e
1474 jnz .Ladd_proceedq
1475.byte 102,73,15,126,208
1476.byte 102,73,15,126,217
1477 testq %r8,%r8
1478 jnz .Ladd_proceedq
1479 testq %r9,%r9
1478 jz .Ladd_proceedq
1480 jz .Ladd_doubleq
1479
1480.byte 102,72,15,126,199
1481 pxor %xmm0,%xmm0
1482 movdqu %xmm0,0(%rdi)
1483 movdqu %xmm0,16(%rdi)
1484 movdqu %xmm0,32(%rdi)
1485 movdqu %xmm0,48(%rdi)
1486 movdqu %xmm0,64(%rdi)
1487 movdqu %xmm0,80(%rdi)
1488 jmp .Ladd_doneq
1489
1490.align 32
1481
1482.byte 102,72,15,126,199
1483 pxor %xmm0,%xmm0
1484 movdqu %xmm0,0(%rdi)
1485 movdqu %xmm0,16(%rdi)
1486 movdqu %xmm0,32(%rdi)
1487 movdqu %xmm0,48(%rdi)
1488 movdqu %xmm0,64(%rdi)
1489 movdqu %xmm0,80(%rdi)
1490 jmp .Ladd_doneq
1491
1492.align 32
1493.Ladd_doubleq:
1494.byte 102,72,15,126,206
1495.byte 102,72,15,126,199
1496 addq $416,%rsp
1497 jmp .Lpoint_double_shortcutq
1498
1499.align 32
1491.Ladd_proceedq:
1492 movq 0+64(%rsp),%rax
1493 movq 8+64(%rsp),%r14
1494 leaq 0+64(%rsp),%rsi
1495 movq 16+64(%rsp),%r15
1496 movq 24+64(%rsp),%r8
1497 leaq 96(%rsp),%rdi
1498 call __ecp_nistz256_sqr_montq

--- 230 unchanged lines hidden (view full) ---

1729 movdqa %xmm2,352(%rsp)
1730 movdqa %xmm3,352+16(%rsp)
1731 por %xmm2,%xmm3
1732 movdqa %xmm4,384(%rsp)
1733 movdqa %xmm5,384+16(%rsp)
1734 por %xmm1,%xmm3
1735
1736 movdqu 0(%rbx),%xmm0
1500.Ladd_proceedq:
1501 movq 0+64(%rsp),%rax
1502 movq 8+64(%rsp),%r14
1503 leaq 0+64(%rsp),%rsi
1504 movq 16+64(%rsp),%r15
1505 movq 24+64(%rsp),%r8
1506 leaq 96(%rsp),%rdi
1507 call __ecp_nistz256_sqr_montq

--- 230 unchanged lines hidden (view full) ---

1738 movdqa %xmm2,352(%rsp)
1739 movdqa %xmm3,352+16(%rsp)
1740 por %xmm2,%xmm3
1741 movdqa %xmm4,384(%rsp)
1742 movdqa %xmm5,384+16(%rsp)
1743 por %xmm1,%xmm3
1744
1745 movdqu 0(%rbx),%xmm0
1737 pshufd $177,%xmm3,%xmm5
1746 pshufd $0xb1,%xmm3,%xmm5
1738 movdqu 16(%rbx),%xmm1
1739 movdqu 32(%rbx),%xmm2
1740 por %xmm3,%xmm5
1741 movdqu 48(%rbx),%xmm3
1742 movdqa %xmm0,416(%rsp)
1747 movdqu 16(%rbx),%xmm1
1748 movdqu 32(%rbx),%xmm2
1749 por %xmm3,%xmm5
1750 movdqu 48(%rbx),%xmm3
1751 movdqa %xmm0,416(%rsp)
1743 pshufd $30,%xmm5,%xmm4
1752 pshufd $0x1e,%xmm5,%xmm4
1744 movdqa %xmm1,416+16(%rsp)
1745 por %xmm0,%xmm1
1746.byte 102,72,15,110,199
1747 movdqa %xmm2,448(%rsp)
1748 movdqa %xmm3,448+16(%rsp)
1749 por %xmm2,%xmm3
1750 por %xmm4,%xmm5
1751 pxor %xmm4,%xmm4
1752 por %xmm1,%xmm3
1753
1754 leaq 64-0(%rsi),%rsi
1755 leaq 32(%rsp),%rdi
1756 call __ecp_nistz256_sqr_montq
1757
1758 pcmpeqd %xmm4,%xmm5
1753 movdqa %xmm1,416+16(%rsp)
1754 por %xmm0,%xmm1
1755.byte 102,72,15,110,199
1756 movdqa %xmm2,448(%rsp)
1757 movdqa %xmm3,448+16(%rsp)
1758 por %xmm2,%xmm3
1759 por %xmm4,%xmm5
1760 pxor %xmm4,%xmm4
1761 por %xmm1,%xmm3
1762
1763 leaq 64-0(%rsi),%rsi
1764 leaq 32(%rsp),%rdi
1765 call __ecp_nistz256_sqr_montq
1766
1767 pcmpeqd %xmm4,%xmm5
1759 pshufd $177,%xmm3,%xmm4
1768 pshufd $0xb1,%xmm3,%xmm4
1760 movq 0(%rbx),%rax
1761
1762 movq %r12,%r9
1763 por %xmm3,%xmm4
1764 pshufd $0,%xmm5,%xmm5
1769 movq 0(%rbx),%rax
1770
1771 movq %r12,%r9
1772 por %xmm3,%xmm4
1773 pshufd $0,%xmm5,%xmm5
1765 pshufd $30,%xmm4,%xmm3
1774 pshufd $0x1e,%xmm4,%xmm3
1766 movq %r13,%r10
1767 por %xmm3,%xmm4
1768 pxor %xmm3,%xmm3
1769 movq %r14,%r11
1770 pcmpeqd %xmm3,%xmm4
1771 pshufd $0,%xmm4,%xmm4
1772
1773 leaq 32-0(%rsp),%rsi

--- 232 unchanged lines hidden ---
1775 movq %r13,%r10
1776 por %xmm3,%xmm4
1777 pxor %xmm3,%xmm3
1778 movq %r14,%r11
1779 pcmpeqd %xmm3,%xmm4
1780 pshufd $0,%xmm4,%xmm4
1781
1782 leaq 32-0(%rsp),%rsi

--- 232 unchanged lines hidden ---