ecp_nistz256-x86_64.S revision 1.1
1#include <machine/asm.h> 2.text 3 4 5 6.align 64 7.Lpoly: 8.quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001 9 10 11.LRR: 12.quad 0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd 13 14.LOne: 15.long 1,1,1,1,1,1,1,1 16.LTwo: 17.long 2,2,2,2,2,2,2,2 18.LThree: 19.long 3,3,3,3,3,3,3,3 20.LONE_mont: 21.quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe 22 23.globl ecp_nistz256_mul_by_2 24.type ecp_nistz256_mul_by_2,@function 25.align 64 26ecp_nistz256_mul_by_2: 27 pushq %r12 28 pushq %r13 29 30 movq 0(%rsi),%r8 31 xorq %r13,%r13 32 movq 8(%rsi),%r9 33 addq %r8,%r8 34 movq 16(%rsi),%r10 35 adcq %r9,%r9 36 movq 24(%rsi),%r11 37 leaq .Lpoly(%rip),%rsi 38 movq %r8,%rax 39 adcq %r10,%r10 40 adcq %r11,%r11 41 movq %r9,%rdx 42 adcq $0,%r13 43 44 subq 0(%rsi),%r8 45 movq %r10,%rcx 46 sbbq 8(%rsi),%r9 47 sbbq 16(%rsi),%r10 48 movq %r11,%r12 49 sbbq 24(%rsi),%r11 50 sbbq $0,%r13 51 52 cmovcq %rax,%r8 53 cmovcq %rdx,%r9 54 movq %r8,0(%rdi) 55 cmovcq %rcx,%r10 56 movq %r9,8(%rdi) 57 cmovcq %r12,%r11 58 movq %r10,16(%rdi) 59 movq %r11,24(%rdi) 60 61 popq %r13 62 popq %r12 63 .byte 0xf3,0xc3 64.size ecp_nistz256_mul_by_2,.-ecp_nistz256_mul_by_2 65 66 67 68.globl ecp_nistz256_div_by_2 69.type ecp_nistz256_div_by_2,@function 70.align 32 71ecp_nistz256_div_by_2: 72 pushq %r12 73 pushq %r13 74 75 movq 0(%rsi),%r8 76 movq 8(%rsi),%r9 77 movq 16(%rsi),%r10 78 movq %r8,%rax 79 movq 24(%rsi),%r11 80 leaq .Lpoly(%rip),%rsi 81 82 movq %r9,%rdx 83 xorq %r13,%r13 84 addq 0(%rsi),%r8 85 movq %r10,%rcx 86 adcq 8(%rsi),%r9 87 adcq 16(%rsi),%r10 88 movq %r11,%r12 89 adcq 24(%rsi),%r11 90 adcq $0,%r13 91 xorq %rsi,%rsi 92 testq $1,%rax 93 94 cmovzq %rax,%r8 95 cmovzq %rdx,%r9 96 cmovzq %rcx,%r10 97 cmovzq %r12,%r11 98 cmovzq %rsi,%r13 99 100 movq %r9,%rax 101 shrq $1,%r8 102 shlq $63,%rax 103 movq %r10,%rdx 104 shrq $1,%r9 105 orq %rax,%r8 106 shlq $63,%rdx 107 movq %r11,%rcx 108 shrq $1,%r10 109 orq %rdx,%r9 110 shlq $63,%rcx 111 shrq $1,%r11 112 shlq $63,%r13 113 orq %rcx,%r10 114 orq %r13,%r11 115 116 movq %r8,0(%rdi) 117 movq %r9,8(%rdi) 118 movq %r10,16(%rdi) 119 movq %r11,24(%rdi) 120 121 popq %r13 122 popq %r12 123 .byte 0xf3,0xc3 124.size ecp_nistz256_div_by_2,.-ecp_nistz256_div_by_2 125 126 127 128.globl ecp_nistz256_mul_by_3 129.type ecp_nistz256_mul_by_3,@function 130.align 32 131ecp_nistz256_mul_by_3: 132 pushq %r12 133 pushq %r13 134 135 movq 0(%rsi),%r8 136 xorq %r13,%r13 137 movq 8(%rsi),%r9 138 addq %r8,%r8 139 movq 16(%rsi),%r10 140 adcq %r9,%r9 141 movq 24(%rsi),%r11 142 movq %r8,%rax 143 adcq %r10,%r10 144 adcq %r11,%r11 145 movq %r9,%rdx 146 adcq $0,%r13 147 148 subq $-1,%r8 149 movq %r10,%rcx 150 sbbq .Lpoly+8(%rip),%r9 151 sbbq $0,%r10 152 movq %r11,%r12 153 sbbq .Lpoly+24(%rip),%r11 154 sbbq $0,%r13 155 156 cmovcq %rax,%r8 157 cmovcq %rdx,%r9 158 cmovcq %rcx,%r10 159 cmovcq %r12,%r11 160 161 xorq %r13,%r13 162 addq 0(%rsi),%r8 163 adcq 8(%rsi),%r9 164 movq %r8,%rax 165 adcq 16(%rsi),%r10 166 adcq 24(%rsi),%r11 167 movq %r9,%rdx 168 adcq $0,%r13 169 170 subq $-1,%r8 171 movq %r10,%rcx 172 sbbq .Lpoly+8(%rip),%r9 173 sbbq $0,%r10 174 movq %r11,%r12 175 sbbq .Lpoly+24(%rip),%r11 176 sbbq $0,%r13 177 178 cmovcq %rax,%r8 179 cmovcq %rdx,%r9 180 movq %r8,0(%rdi) 181 cmovcq %rcx,%r10 182 movq %r9,8(%rdi) 183 cmovcq %r12,%r11 184 movq %r10,16(%rdi) 185 movq %r11,24(%rdi) 186 187 popq %r13 188 popq %r12 189 .byte 0xf3,0xc3 190.size ecp_nistz256_mul_by_3,.-ecp_nistz256_mul_by_3 191 192 193 194.globl ecp_nistz256_add 195.type ecp_nistz256_add,@function 196.align 32 197ecp_nistz256_add: 198 pushq %r12 199 pushq %r13 200 201 movq 0(%rsi),%r8 202 xorq %r13,%r13 203 movq 8(%rsi),%r9 204 movq 16(%rsi),%r10 205 movq 24(%rsi),%r11 206 leaq .Lpoly(%rip),%rsi 207 208 addq 0(%rdx),%r8 209 adcq 8(%rdx),%r9 210 movq %r8,%rax 211 adcq 16(%rdx),%r10 212 adcq 24(%rdx),%r11 213 movq %r9,%rdx 214 adcq $0,%r13 215 216 subq 0(%rsi),%r8 217 movq %r10,%rcx 218 sbbq 8(%rsi),%r9 219 sbbq 16(%rsi),%r10 220 movq %r11,%r12 221 sbbq 24(%rsi),%r11 222 sbbq $0,%r13 223 224 cmovcq %rax,%r8 225 cmovcq %rdx,%r9 226 movq %r8,0(%rdi) 227 cmovcq %rcx,%r10 228 movq %r9,8(%rdi) 229 cmovcq %r12,%r11 230 movq %r10,16(%rdi) 231 movq %r11,24(%rdi) 232 233 popq %r13 234 popq %r12 235 .byte 0xf3,0xc3 236.size ecp_nistz256_add,.-ecp_nistz256_add 237 238 239 240.globl ecp_nistz256_sub 241.type ecp_nistz256_sub,@function 242.align 32 243ecp_nistz256_sub: 244 pushq %r12 245 pushq %r13 246 247 movq 0(%rsi),%r8 248 xorq %r13,%r13 249 movq 8(%rsi),%r9 250 movq 16(%rsi),%r10 251 movq 24(%rsi),%r11 252 leaq .Lpoly(%rip),%rsi 253 254 subq 0(%rdx),%r8 255 sbbq 8(%rdx),%r9 256 movq %r8,%rax 257 sbbq 16(%rdx),%r10 258 sbbq 24(%rdx),%r11 259 movq %r9,%rdx 260 sbbq $0,%r13 261 262 addq 0(%rsi),%r8 263 movq %r10,%rcx 264 adcq 8(%rsi),%r9 265 adcq 16(%rsi),%r10 266 movq %r11,%r12 267 adcq 24(%rsi),%r11 268 testq %r13,%r13 269 270 cmovzq %rax,%r8 271 cmovzq %rdx,%r9 272 movq %r8,0(%rdi) 273 cmovzq %rcx,%r10 274 movq %r9,8(%rdi) 275 cmovzq %r12,%r11 276 movq %r10,16(%rdi) 277 movq %r11,24(%rdi) 278 279 popq %r13 280 popq %r12 281 .byte 0xf3,0xc3 282.size ecp_nistz256_sub,.-ecp_nistz256_sub 283 284 285 286.globl ecp_nistz256_neg 287.type ecp_nistz256_neg,@function 288.align 32 289ecp_nistz256_neg: 290 pushq %r12 291 pushq %r13 292 293 xorq %r8,%r8 294 xorq %r9,%r9 295 xorq %r10,%r10 296 xorq %r11,%r11 297 xorq %r13,%r13 298 299 subq 0(%rsi),%r8 300 sbbq 8(%rsi),%r9 301 sbbq 16(%rsi),%r10 302 movq %r8,%rax 303 sbbq 24(%rsi),%r11 304 leaq .Lpoly(%rip),%rsi 305 movq %r9,%rdx 306 sbbq $0,%r13 307 308 addq 0(%rsi),%r8 309 movq %r10,%rcx 310 adcq 8(%rsi),%r9 311 adcq 16(%rsi),%r10 312 movq %r11,%r12 313 adcq 24(%rsi),%r11 314 testq %r13,%r13 315 316 cmovzq %rax,%r8 317 cmovzq %rdx,%r9 318 movq %r8,0(%rdi) 319 cmovzq %rcx,%r10 320 movq %r9,8(%rdi) 321 cmovzq %r12,%r11 322 movq %r10,16(%rdi) 323 movq %r11,24(%rdi) 324 325 popq %r13 326 popq %r12 327 .byte 0xf3,0xc3 328.size ecp_nistz256_neg,.-ecp_nistz256_neg 329 330 331 332 333.globl ecp_nistz256_to_mont 334.type ecp_nistz256_to_mont,@function 335.align 32 336ecp_nistz256_to_mont: 337 leaq .LRR(%rip),%rdx 338 jmp .Lmul_mont 339.size ecp_nistz256_to_mont,.-ecp_nistz256_to_mont 340 341 342 343 344 345 346 347.globl ecp_nistz256_mul_mont 348.type ecp_nistz256_mul_mont,@function 349.align 32 350ecp_nistz256_mul_mont: 351.Lmul_mont: 352 pushq %rbp 353 pushq %rbx 354 pushq %r12 355 pushq %r13 356 pushq %r14 357 pushq %r15 358 movq %rdx,%rbx 359 movq 0(%rdx),%rax 360 movq 0(%rsi),%r9 361 movq 8(%rsi),%r10 362 movq 16(%rsi),%r11 363 movq 24(%rsi),%r12 364 365 call __ecp_nistz256_mul_montq 366.Lmul_mont_done: 367 popq %r15 368 popq %r14 369 popq %r13 370 popq %r12 371 popq %rbx 372 popq %rbp 373 .byte 0xf3,0xc3 374.size ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont 375 376.type __ecp_nistz256_mul_montq,@function 377.align 32 378__ecp_nistz256_mul_montq: 379 380 381 movq %rax,%rbp 382 mulq %r9 383 movq .Lpoly+8(%rip),%r14 384 movq %rax,%r8 385 movq %rbp,%rax 386 movq %rdx,%r9 387 388 mulq %r10 389 movq .Lpoly+24(%rip),%r15 390 addq %rax,%r9 391 movq %rbp,%rax 392 adcq $0,%rdx 393 movq %rdx,%r10 394 395 mulq %r11 396 addq %rax,%r10 397 movq %rbp,%rax 398 adcq $0,%rdx 399 movq %rdx,%r11 400 401 mulq %r12 402 addq %rax,%r11 403 movq %r8,%rax 404 adcq $0,%rdx 405 xorq %r13,%r13 406 movq %rdx,%r12 407 408 409 410 411 412 413 414 415 416 417 movq %r8,%rbp 418 shlq $32,%r8 419 mulq %r15 420 shrq $32,%rbp 421 addq %r8,%r9 422 adcq %rbp,%r10 423 adcq %rax,%r11 424 movq 8(%rbx),%rax 425 adcq %rdx,%r12 426 adcq $0,%r13 427 xorq %r8,%r8 428 429 430 431 movq %rax,%rbp 432 mulq 0(%rsi) 433 addq %rax,%r9 434 movq %rbp,%rax 435 adcq $0,%rdx 436 movq %rdx,%rcx 437 438 mulq 8(%rsi) 439 addq %rcx,%r10 440 adcq $0,%rdx 441 addq %rax,%r10 442 movq %rbp,%rax 443 adcq $0,%rdx 444 movq %rdx,%rcx 445 446 mulq 16(%rsi) 447 addq %rcx,%r11 448 adcq $0,%rdx 449 addq %rax,%r11 450 movq %rbp,%rax 451 adcq $0,%rdx 452 movq %rdx,%rcx 453 454 mulq 24(%rsi) 455 addq %rcx,%r12 456 adcq $0,%rdx 457 addq %rax,%r12 458 movq %r9,%rax 459 adcq %rdx,%r13 460 adcq $0,%r8 461 462 463 464 movq %r9,%rbp 465 shlq $32,%r9 466 mulq %r15 467 shrq $32,%rbp 468 addq %r9,%r10 469 adcq %rbp,%r11 470 adcq %rax,%r12 471 movq 16(%rbx),%rax 472 adcq %rdx,%r13 473 adcq $0,%r8 474 xorq %r9,%r9 475 476 477 478 movq %rax,%rbp 479 mulq 0(%rsi) 480 addq %rax,%r10 481 movq %rbp,%rax 482 adcq $0,%rdx 483 movq %rdx,%rcx 484 485 mulq 8(%rsi) 486 addq %rcx,%r11 487 adcq $0,%rdx 488 addq %rax,%r11 489 movq %rbp,%rax 490 adcq $0,%rdx 491 movq %rdx,%rcx 492 493 mulq 16(%rsi) 494 addq %rcx,%r12 495 adcq $0,%rdx 496 addq %rax,%r12 497 movq %rbp,%rax 498 adcq $0,%rdx 499 movq %rdx,%rcx 500 501 mulq 24(%rsi) 502 addq %rcx,%r13 503 adcq $0,%rdx 504 addq %rax,%r13 505 movq %r10,%rax 506 adcq %rdx,%r8 507 adcq $0,%r9 508 509 510 511 movq %r10,%rbp 512 shlq $32,%r10 513 mulq %r15 514 shrq $32,%rbp 515 addq %r10,%r11 516 adcq %rbp,%r12 517 adcq %rax,%r13 518 movq 24(%rbx),%rax 519 adcq %rdx,%r8 520 adcq $0,%r9 521 xorq %r10,%r10 522 523 524 525 movq %rax,%rbp 526 mulq 0(%rsi) 527 addq %rax,%r11 528 movq %rbp,%rax 529 adcq $0,%rdx 530 movq %rdx,%rcx 531 532 mulq 8(%rsi) 533 addq %rcx,%r12 534 adcq $0,%rdx 535 addq %rax,%r12 536 movq %rbp,%rax 537 adcq $0,%rdx 538 movq %rdx,%rcx 539 540 mulq 16(%rsi) 541 addq %rcx,%r13 542 adcq $0,%rdx 543 addq %rax,%r13 544 movq %rbp,%rax 545 adcq $0,%rdx 546 movq %rdx,%rcx 547 548 mulq 24(%rsi) 549 addq %rcx,%r8 550 adcq $0,%rdx 551 addq %rax,%r8 552 movq %r11,%rax 553 adcq %rdx,%r9 554 adcq $0,%r10 555 556 557 558 movq %r11,%rbp 559 shlq $32,%r11 560 mulq %r15 561 shrq $32,%rbp 562 addq %r11,%r12 563 adcq %rbp,%r13 564 movq %r12,%rcx 565 adcq %rax,%r8 566 adcq %rdx,%r9 567 movq %r13,%rbp 568 adcq $0,%r10 569 570 571 572 subq $-1,%r12 573 movq %r8,%rbx 574 sbbq %r14,%r13 575 sbbq $0,%r8 576 movq %r9,%rdx 577 sbbq %r15,%r9 578 sbbq $0,%r10 579 580 cmovcq %rcx,%r12 581 cmovcq %rbp,%r13 582 movq %r12,0(%rdi) 583 cmovcq %rbx,%r8 584 movq %r13,8(%rdi) 585 cmovcq %rdx,%r9 586 movq %r8,16(%rdi) 587 movq %r9,24(%rdi) 588 589 .byte 0xf3,0xc3 590.size __ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq 591 592 593 594 595 596 597 598 599.globl ecp_nistz256_sqr_mont 600.type ecp_nistz256_sqr_mont,@function 601.align 32 602ecp_nistz256_sqr_mont: 603 pushq %rbp 604 pushq %rbx 605 pushq %r12 606 pushq %r13 607 pushq %r14 608 pushq %r15 609 movq 0(%rsi),%rax 610 movq 8(%rsi),%r14 611 movq 16(%rsi),%r15 612 movq 24(%rsi),%r8 613 614 call __ecp_nistz256_sqr_montq 615.Lsqr_mont_done: 616 popq %r15 617 popq %r14 618 popq %r13 619 popq %r12 620 popq %rbx 621 popq %rbp 622 .byte 0xf3,0xc3 623.size ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont 624 625.type __ecp_nistz256_sqr_montq,@function 626.align 32 627__ecp_nistz256_sqr_montq: 628 movq %rax,%r13 629 mulq %r14 630 movq %rax,%r9 631 movq %r15,%rax 632 movq %rdx,%r10 633 634 mulq %r13 635 addq %rax,%r10 636 movq %r8,%rax 637 adcq $0,%rdx 638 movq %rdx,%r11 639 640 mulq %r13 641 addq %rax,%r11 642 movq %r15,%rax 643 adcq $0,%rdx 644 movq %rdx,%r12 645 646 647 mulq %r14 648 addq %rax,%r11 649 movq %r8,%rax 650 adcq $0,%rdx 651 movq %rdx,%rbp 652 653 mulq %r14 654 addq %rax,%r12 655 movq %r8,%rax 656 adcq $0,%rdx 657 addq %rbp,%r12 658 movq %rdx,%r13 659 adcq $0,%r13 660 661 662 mulq %r15 663 xorq %r15,%r15 664 addq %rax,%r13 665 movq 0(%rsi),%rax 666 movq %rdx,%r14 667 adcq $0,%r14 668 669 addq %r9,%r9 670 adcq %r10,%r10 671 adcq %r11,%r11 672 adcq %r12,%r12 673 adcq %r13,%r13 674 adcq %r14,%r14 675 adcq $0,%r15 676 677 mulq %rax 678 movq %rax,%r8 679 movq 8(%rsi),%rax 680 movq %rdx,%rcx 681 682 mulq %rax 683 addq %rcx,%r9 684 adcq %rax,%r10 685 movq 16(%rsi),%rax 686 adcq $0,%rdx 687 movq %rdx,%rcx 688 689 mulq %rax 690 addq %rcx,%r11 691 adcq %rax,%r12 692 movq 24(%rsi),%rax 693 adcq $0,%rdx 694 movq %rdx,%rcx 695 696 mulq %rax 697 addq %rcx,%r13 698 adcq %rax,%r14 699 movq %r8,%rax 700 adcq %rdx,%r15 701 702 movq .Lpoly+8(%rip),%rsi 703 movq .Lpoly+24(%rip),%rbp 704 705 706 707 708 movq %r8,%rcx 709 shlq $32,%r8 710 mulq %rbp 711 shrq $32,%rcx 712 addq %r8,%r9 713 adcq %rcx,%r10 714 adcq %rax,%r11 715 movq %r9,%rax 716 adcq $0,%rdx 717 718 719 720 movq %r9,%rcx 721 shlq $32,%r9 722 movq %rdx,%r8 723 mulq %rbp 724 shrq $32,%rcx 725 addq %r9,%r10 726 adcq %rcx,%r11 727 adcq %rax,%r8 728 movq %r10,%rax 729 adcq $0,%rdx 730 731 732 733 movq %r10,%rcx 734 shlq $32,%r10 735 movq %rdx,%r9 736 mulq %rbp 737 shrq $32,%rcx 738 addq %r10,%r11 739 adcq %rcx,%r8 740 adcq %rax,%r9 741 movq %r11,%rax 742 adcq $0,%rdx 743 744 745 746 movq %r11,%rcx 747 shlq $32,%r11 748 movq %rdx,%r10 749 mulq %rbp 750 shrq $32,%rcx 751 addq %r11,%r8 752 adcq %rcx,%r9 753 adcq %rax,%r10 754 adcq $0,%rdx 755 xorq %r11,%r11 756 757 758 759 addq %r8,%r12 760 adcq %r9,%r13 761 movq %r12,%r8 762 adcq %r10,%r14 763 adcq %rdx,%r15 764 movq %r13,%r9 765 adcq $0,%r11 766 767 subq $-1,%r12 768 movq %r14,%r10 769 sbbq %rsi,%r13 770 sbbq $0,%r14 771 movq %r15,%rcx 772 sbbq %rbp,%r15 773 sbbq $0,%r11 774 775 cmovcq %r8,%r12 776 cmovcq %r9,%r13 777 movq %r12,0(%rdi) 778 cmovcq %r10,%r14 779 movq %r13,8(%rdi) 780 cmovcq %rcx,%r15 781 movq %r14,16(%rdi) 782 movq %r15,24(%rdi) 783 784 .byte 0xf3,0xc3 785.size __ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq 786 787 788 789 790 791 792.globl ecp_nistz256_from_mont 793.type ecp_nistz256_from_mont,@function 794.align 32 795ecp_nistz256_from_mont: 796 pushq %r12 797 pushq %r13 798 799 movq 0(%rsi),%rax 800 movq .Lpoly+24(%rip),%r13 801 movq 8(%rsi),%r9 802 movq 16(%rsi),%r10 803 movq 24(%rsi),%r11 804 movq %rax,%r8 805 movq .Lpoly+8(%rip),%r12 806 807 808 809 movq %rax,%rcx 810 shlq $32,%r8 811 mulq %r13 812 shrq $32,%rcx 813 addq %r8,%r9 814 adcq %rcx,%r10 815 adcq %rax,%r11 816 movq %r9,%rax 817 adcq $0,%rdx 818 819 820 821 movq %r9,%rcx 822 shlq $32,%r9 823 movq %rdx,%r8 824 mulq %r13 825 shrq $32,%rcx 826 addq %r9,%r10 827 adcq %rcx,%r11 828 adcq %rax,%r8 829 movq %r10,%rax 830 adcq $0,%rdx 831 832 833 834 movq %r10,%rcx 835 shlq $32,%r10 836 movq %rdx,%r9 837 mulq %r13 838 shrq $32,%rcx 839 addq %r10,%r11 840 adcq %rcx,%r8 841 adcq %rax,%r9 842 movq %r11,%rax 843 adcq $0,%rdx 844 845 846 847 movq %r11,%rcx 848 shlq $32,%r11 849 movq %rdx,%r10 850 mulq %r13 851 shrq $32,%rcx 852 addq %r11,%r8 853 adcq %rcx,%r9 854 movq %r8,%rcx 855 adcq %rax,%r10 856 movq %r9,%rsi 857 adcq $0,%rdx 858 859 860 861 subq $-1,%r8 862 movq %r10,%rax 863 sbbq %r12,%r9 864 sbbq $0,%r10 865 movq %rdx,%r11 866 sbbq %r13,%rdx 867 sbbq %r13,%r13 868 869 cmovnzq %rcx,%r8 870 cmovnzq %rsi,%r9 871 movq %r8,0(%rdi) 872 cmovnzq %rax,%r10 873 movq %r9,8(%rdi) 874 cmovzq %rdx,%r11 875 movq %r10,16(%rdi) 876 movq %r11,24(%rdi) 877 878 popq %r13 879 popq %r12 880 .byte 0xf3,0xc3 881.size ecp_nistz256_from_mont,.-ecp_nistz256_from_mont 882 883 884.globl ecp_nistz256_select_w5 885.type ecp_nistz256_select_w5,@function 886.align 32 887ecp_nistz256_select_w5: 888 movdqa .LOne(%rip),%xmm0 889 movd %edx,%xmm1 890 891 pxor %xmm2,%xmm2 892 pxor %xmm3,%xmm3 893 pxor %xmm4,%xmm4 894 pxor %xmm5,%xmm5 895 pxor %xmm6,%xmm6 896 pxor %xmm7,%xmm7 897 898 movdqa %xmm0,%xmm8 899 pshufd $0,%xmm1,%xmm1 900 901 movq $16,%rax 902.Lselect_loop_sse_w5: 903 904 movdqa %xmm8,%xmm15 905 paddd %xmm0,%xmm8 906 pcmpeqd %xmm1,%xmm15 907 908 movdqa 0(%rsi),%xmm9 909 movdqa 16(%rsi),%xmm10 910 movdqa 32(%rsi),%xmm11 911 movdqa 48(%rsi),%xmm12 912 movdqa 64(%rsi),%xmm13 913 movdqa 80(%rsi),%xmm14 914 leaq 96(%rsi),%rsi 915 916 pand %xmm15,%xmm9 917 pand %xmm15,%xmm10 918 por %xmm9,%xmm2 919 pand %xmm15,%xmm11 920 por %xmm10,%xmm3 921 pand %xmm15,%xmm12 922 por %xmm11,%xmm4 923 pand %xmm15,%xmm13 924 por %xmm12,%xmm5 925 pand %xmm15,%xmm14 926 por %xmm13,%xmm6 927 por %xmm14,%xmm7 928 929 decq %rax 930 jnz .Lselect_loop_sse_w5 931 932 movdqu %xmm2,0(%rdi) 933 movdqu %xmm3,16(%rdi) 934 movdqu %xmm4,32(%rdi) 935 movdqu %xmm5,48(%rdi) 936 movdqu %xmm6,64(%rdi) 937 movdqu %xmm7,80(%rdi) 938 .byte 0xf3,0xc3 939.size ecp_nistz256_select_w5,.-ecp_nistz256_select_w5 940 941 942 943.globl ecp_nistz256_select_w7 944.type ecp_nistz256_select_w7,@function 945.align 32 946ecp_nistz256_select_w7: 947 movdqa .LOne(%rip),%xmm8 948 movd %edx,%xmm1 949 950 pxor %xmm2,%xmm2 951 pxor %xmm3,%xmm3 952 pxor %xmm4,%xmm4 953 pxor %xmm5,%xmm5 954 955 movdqa %xmm8,%xmm0 956 pshufd $0,%xmm1,%xmm1 957 movq $64,%rax 958 959.Lselect_loop_sse_w7: 960 movdqa %xmm8,%xmm15 961 paddd %xmm0,%xmm8 962 movdqa 0(%rsi),%xmm9 963 movdqa 16(%rsi),%xmm10 964 pcmpeqd %xmm1,%xmm15 965 movdqa 32(%rsi),%xmm11 966 movdqa 48(%rsi),%xmm12 967 leaq 64(%rsi),%rsi 968 969 pand %xmm15,%xmm9 970 pand %xmm15,%xmm10 971 por %xmm9,%xmm2 972 pand %xmm15,%xmm11 973 por %xmm10,%xmm3 974 pand %xmm15,%xmm12 975 por %xmm11,%xmm4 976 prefetcht0 255(%rsi) 977 por %xmm12,%xmm5 978 979 decq %rax 980 jnz .Lselect_loop_sse_w7 981 982 movdqu %xmm2,0(%rdi) 983 movdqu %xmm3,16(%rdi) 984 movdqu %xmm4,32(%rdi) 985 movdqu %xmm5,48(%rdi) 986 .byte 0xf3,0xc3 987.size ecp_nistz256_select_w7,.-ecp_nistz256_select_w7 988.globl ecp_nistz256_avx2_select_w7 989.type ecp_nistz256_avx2_select_w7,@function 990.align 32 991ecp_nistz256_avx2_select_w7: 992.byte 0x0f,0x0b 993 .byte 0xf3,0xc3 994.size ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7 995.type __ecp_nistz256_add_toq,@function 996.align 32 997__ecp_nistz256_add_toq: 998 xorq %r11,%r11 999 addq 0(%rbx),%r12 1000 adcq 8(%rbx),%r13 1001 movq %r12,%rax 1002 adcq 16(%rbx),%r8 1003 adcq 24(%rbx),%r9 1004 movq %r13,%rbp 1005 adcq $0,%r11 1006 1007 subq $-1,%r12 1008 movq %r8,%rcx 1009 sbbq %r14,%r13 1010 sbbq $0,%r8 1011 movq %r9,%r10 1012 sbbq %r15,%r9 1013 sbbq $0,%r11 1014 1015 cmovcq %rax,%r12 1016 cmovcq %rbp,%r13 1017 movq %r12,0(%rdi) 1018 cmovcq %rcx,%r8 1019 movq %r13,8(%rdi) 1020 cmovcq %r10,%r9 1021 movq %r8,16(%rdi) 1022 movq %r9,24(%rdi) 1023 1024 .byte 0xf3,0xc3 1025.size __ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq 1026 1027.type __ecp_nistz256_sub_fromq,@function 1028.align 32 1029__ecp_nistz256_sub_fromq: 1030 subq 0(%rbx),%r12 1031 sbbq 8(%rbx),%r13 1032 movq %r12,%rax 1033 sbbq 16(%rbx),%r8 1034 sbbq 24(%rbx),%r9 1035 movq %r13,%rbp 1036 sbbq %r11,%r11 1037 1038 addq $-1,%r12 1039 movq %r8,%rcx 1040 adcq %r14,%r13 1041 adcq $0,%r8 1042 movq %r9,%r10 1043 adcq %r15,%r9 1044 testq %r11,%r11 1045 1046 cmovzq %rax,%r12 1047 cmovzq %rbp,%r13 1048 movq %r12,0(%rdi) 1049 cmovzq %rcx,%r8 1050 movq %r13,8(%rdi) 1051 cmovzq %r10,%r9 1052 movq %r8,16(%rdi) 1053 movq %r9,24(%rdi) 1054 1055 .byte 0xf3,0xc3 1056.size __ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq 1057 1058.type __ecp_nistz256_subq,@function 1059.align 32 1060__ecp_nistz256_subq: 1061 subq %r12,%rax 1062 sbbq %r13,%rbp 1063 movq %rax,%r12 1064 sbbq %r8,%rcx 1065 sbbq %r9,%r10 1066 movq %rbp,%r13 1067 sbbq %r11,%r11 1068 1069 addq $-1,%rax 1070 movq %rcx,%r8 1071 adcq %r14,%rbp 1072 adcq $0,%rcx 1073 movq %r10,%r9 1074 adcq %r15,%r10 1075 testq %r11,%r11 1076 1077 cmovnzq %rax,%r12 1078 cmovnzq %rbp,%r13 1079 cmovnzq %rcx,%r8 1080 cmovnzq %r10,%r9 1081 1082 .byte 0xf3,0xc3 1083.size __ecp_nistz256_subq,.-__ecp_nistz256_subq 1084 1085.type __ecp_nistz256_mul_by_2q,@function 1086.align 32 1087__ecp_nistz256_mul_by_2q: 1088 xorq %r11,%r11 1089 addq %r12,%r12 1090 adcq %r13,%r13 1091 movq %r12,%rax 1092 adcq %r8,%r8 1093 adcq %r9,%r9 1094 movq %r13,%rbp 1095 adcq $0,%r11 1096 1097 subq $-1,%r12 1098 movq %r8,%rcx 1099 sbbq %r14,%r13 1100 sbbq $0,%r8 1101 movq %r9,%r10 1102 sbbq %r15,%r9 1103 sbbq $0,%r11 1104 1105 cmovcq %rax,%r12 1106 cmovcq %rbp,%r13 1107 movq %r12,0(%rdi) 1108 cmovcq %rcx,%r8 1109 movq %r13,8(%rdi) 1110 cmovcq %r10,%r9 1111 movq %r8,16(%rdi) 1112 movq %r9,24(%rdi) 1113 1114 .byte 0xf3,0xc3 1115.size __ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q 1116.globl ecp_nistz256_point_double 1117.type ecp_nistz256_point_double,@function 1118.align 32 1119ecp_nistz256_point_double: 1120 pushq %rbp 1121 pushq %rbx 1122 pushq %r12 1123 pushq %r13 1124 pushq %r14 1125 pushq %r15 1126 subq $160+8,%rsp 1127 1128.Lpoint_double_shortcutq: 1129 movdqu 0(%rsi),%xmm0 1130 movq %rsi,%rbx 1131 movdqu 16(%rsi),%xmm1 1132 movq 32+0(%rsi),%r12 1133 movq 32+8(%rsi),%r13 1134 movq 32+16(%rsi),%r8 1135 movq 32+24(%rsi),%r9 1136 movq .Lpoly+8(%rip),%r14 1137 movq .Lpoly+24(%rip),%r15 1138 movdqa %xmm0,96(%rsp) 1139 movdqa %xmm1,96+16(%rsp) 1140 leaq 32(%rdi),%r10 1141 leaq 64(%rdi),%r11 1142.byte 102,72,15,110,199 1143.byte 102,73,15,110,202 1144.byte 102,73,15,110,211 1145 1146 leaq 0(%rsp),%rdi 1147 call __ecp_nistz256_mul_by_2q 1148 1149 movq 64+0(%rsi),%rax 1150 movq 64+8(%rsi),%r14 1151 movq 64+16(%rsi),%r15 1152 movq 64+24(%rsi),%r8 1153 leaq 64-0(%rsi),%rsi 1154 leaq 64(%rsp),%rdi 1155 call __ecp_nistz256_sqr_montq 1156 1157 movq 0+0(%rsp),%rax 1158 movq 8+0(%rsp),%r14 1159 leaq 0+0(%rsp),%rsi 1160 movq 16+0(%rsp),%r15 1161 movq 24+0(%rsp),%r8 1162 leaq 0(%rsp),%rdi 1163 call __ecp_nistz256_sqr_montq 1164 1165 movq 32(%rbx),%rax 1166 movq 64+0(%rbx),%r9 1167 movq 64+8(%rbx),%r10 1168 movq 64+16(%rbx),%r11 1169 movq 64+24(%rbx),%r12 1170 leaq 64-0(%rbx),%rsi 1171 leaq 32(%rbx),%rbx 1172.byte 102,72,15,126,215 1173 call __ecp_nistz256_mul_montq 1174 call __ecp_nistz256_mul_by_2q 1175 1176 movq 96+0(%rsp),%r12 1177 movq 96+8(%rsp),%r13 1178 leaq 64(%rsp),%rbx 1179 movq 96+16(%rsp),%r8 1180 movq 96+24(%rsp),%r9 1181 leaq 32(%rsp),%rdi 1182 call __ecp_nistz256_add_toq 1183 1184 movq 96+0(%rsp),%r12 1185 movq 96+8(%rsp),%r13 1186 leaq 64(%rsp),%rbx 1187 movq 96+16(%rsp),%r8 1188 movq 96+24(%rsp),%r9 1189 leaq 64(%rsp),%rdi 1190 call __ecp_nistz256_sub_fromq 1191 1192 movq 0+0(%rsp),%rax 1193 movq 8+0(%rsp),%r14 1194 leaq 0+0(%rsp),%rsi 1195 movq 16+0(%rsp),%r15 1196 movq 24+0(%rsp),%r8 1197.byte 102,72,15,126,207 1198 call __ecp_nistz256_sqr_montq 1199 xorq %r9,%r9 1200 movq %r12,%rax 1201 addq $-1,%r12 1202 movq %r13,%r10 1203 adcq %rsi,%r13 1204 movq %r14,%rcx 1205 adcq $0,%r14 1206 movq %r15,%r8 1207 adcq %rbp,%r15 1208 adcq $0,%r9 1209 xorq %rsi,%rsi 1210 testq $1,%rax 1211 1212 cmovzq %rax,%r12 1213 cmovzq %r10,%r13 1214 cmovzq %rcx,%r14 1215 cmovzq %r8,%r15 1216 cmovzq %rsi,%r9 1217 1218 movq %r13,%rax 1219 shrq $1,%r12 1220 shlq $63,%rax 1221 movq %r14,%r10 1222 shrq $1,%r13 1223 orq %rax,%r12 1224 shlq $63,%r10 1225 movq %r15,%rcx 1226 shrq $1,%r14 1227 orq %r10,%r13 1228 shlq $63,%rcx 1229 movq %r12,0(%rdi) 1230 shrq $1,%r15 1231 movq %r13,8(%rdi) 1232 shlq $63,%r9 1233 orq %rcx,%r14 1234 orq %r9,%r15 1235 movq %r14,16(%rdi) 1236 movq %r15,24(%rdi) 1237 movq 64(%rsp),%rax 1238 leaq 64(%rsp),%rbx 1239 movq 0+32(%rsp),%r9 1240 movq 8+32(%rsp),%r10 1241 leaq 0+32(%rsp),%rsi 1242 movq 16+32(%rsp),%r11 1243 movq 24+32(%rsp),%r12 1244 leaq 32(%rsp),%rdi 1245 call __ecp_nistz256_mul_montq 1246 1247 leaq 128(%rsp),%rdi 1248 call __ecp_nistz256_mul_by_2q 1249 1250 leaq 32(%rsp),%rbx 1251 leaq 32(%rsp),%rdi 1252 call __ecp_nistz256_add_toq 1253 1254 movq 96(%rsp),%rax 1255 leaq 96(%rsp),%rbx 1256 movq 0+0(%rsp),%r9 1257 movq 8+0(%rsp),%r10 1258 leaq 0+0(%rsp),%rsi 1259 movq 16+0(%rsp),%r11 1260 movq 24+0(%rsp),%r12 1261 leaq 0(%rsp),%rdi 1262 call __ecp_nistz256_mul_montq 1263 1264 leaq 128(%rsp),%rdi 1265 call __ecp_nistz256_mul_by_2q 1266 1267 movq 0+32(%rsp),%rax 1268 movq 8+32(%rsp),%r14 1269 leaq 0+32(%rsp),%rsi 1270 movq 16+32(%rsp),%r15 1271 movq 24+32(%rsp),%r8 1272.byte 102,72,15,126,199 1273 call __ecp_nistz256_sqr_montq 1274 1275 leaq 128(%rsp),%rbx 1276 movq %r14,%r8 1277 movq %r15,%r9 1278 movq %rsi,%r14 1279 movq %rbp,%r15 1280 call __ecp_nistz256_sub_fromq 1281 1282 movq 0+0(%rsp),%rax 1283 movq 0+8(%rsp),%rbp 1284 movq 0+16(%rsp),%rcx 1285 movq 0+24(%rsp),%r10 1286 leaq 0(%rsp),%rdi 1287 call __ecp_nistz256_subq 1288 1289 movq 32(%rsp),%rax 1290 leaq 32(%rsp),%rbx 1291 movq %r12,%r14 1292 xorl %ecx,%ecx 1293 movq %r12,0+0(%rsp) 1294 movq %r13,%r10 1295 movq %r13,0+8(%rsp) 1296 cmovzq %r8,%r11 1297 movq %r8,0+16(%rsp) 1298 leaq 0-0(%rsp),%rsi 1299 cmovzq %r9,%r12 1300 movq %r9,0+24(%rsp) 1301 movq %r14,%r9 1302 leaq 0(%rsp),%rdi 1303 call __ecp_nistz256_mul_montq 1304 1305.byte 102,72,15,126,203 1306.byte 102,72,15,126,207 1307 call __ecp_nistz256_sub_fromq 1308 1309 addq $160+8,%rsp 1310 popq %r15 1311 popq %r14 1312 popq %r13 1313 popq %r12 1314 popq %rbx 1315 popq %rbp 1316 .byte 0xf3,0xc3 1317.size ecp_nistz256_point_double,.-ecp_nistz256_point_double 1318.globl ecp_nistz256_point_add 1319.type ecp_nistz256_point_add,@function 1320.align 32 1321ecp_nistz256_point_add: 1322 pushq %rbp 1323 pushq %rbx 1324 pushq %r12 1325 pushq %r13 1326 pushq %r14 1327 pushq %r15 1328 subq $576+8,%rsp 1329 1330 movdqu 0(%rsi),%xmm0 1331 movdqu 16(%rsi),%xmm1 1332 movdqu 32(%rsi),%xmm2 1333 movdqu 48(%rsi),%xmm3 1334 movdqu 64(%rsi),%xmm4 1335 movdqu 80(%rsi),%xmm5 1336 movq %rsi,%rbx 1337 movq %rdx,%rsi 1338 movdqa %xmm0,384(%rsp) 1339 movdqa %xmm1,384+16(%rsp) 1340 movdqa %xmm2,416(%rsp) 1341 movdqa %xmm3,416+16(%rsp) 1342 movdqa %xmm4,448(%rsp) 1343 movdqa %xmm5,448+16(%rsp) 1344 por %xmm4,%xmm5 1345 1346 movdqu 0(%rsi),%xmm0 1347 pshufd $0xb1,%xmm5,%xmm3 1348 movdqu 16(%rsi),%xmm1 1349 movdqu 32(%rsi),%xmm2 1350 por %xmm3,%xmm5 1351 movdqu 48(%rsi),%xmm3 1352 movq 64+0(%rsi),%rax 1353 movq 64+8(%rsi),%r14 1354 movq 64+16(%rsi),%r15 1355 movq 64+24(%rsi),%r8 1356 movdqa %xmm0,480(%rsp) 1357 pshufd $0x1e,%xmm5,%xmm4 1358 movdqa %xmm1,480+16(%rsp) 1359 movdqu 64(%rsi),%xmm0 1360 movdqu 80(%rsi),%xmm1 1361 movdqa %xmm2,512(%rsp) 1362 movdqa %xmm3,512+16(%rsp) 1363 por %xmm4,%xmm5 1364 pxor %xmm4,%xmm4 1365 por %xmm0,%xmm1 1366.byte 102,72,15,110,199 1367 1368 leaq 64-0(%rsi),%rsi 1369 movq %rax,544+0(%rsp) 1370 movq %r14,544+8(%rsp) 1371 movq %r15,544+16(%rsp) 1372 movq %r8,544+24(%rsp) 1373 leaq 96(%rsp),%rdi 1374 call __ecp_nistz256_sqr_montq 1375 1376 pcmpeqd %xmm4,%xmm5 1377 pshufd $0xb1,%xmm1,%xmm4 1378 por %xmm1,%xmm4 1379 pshufd $0,%xmm5,%xmm5 1380 pshufd $0x1e,%xmm4,%xmm3 1381 por %xmm3,%xmm4 1382 pxor %xmm3,%xmm3 1383 pcmpeqd %xmm3,%xmm4 1384 pshufd $0,%xmm4,%xmm4 1385 movq 64+0(%rbx),%rax 1386 movq 64+8(%rbx),%r14 1387 movq 64+16(%rbx),%r15 1388 movq 64+24(%rbx),%r8 1389.byte 102,72,15,110,203 1390 1391 leaq 64-0(%rbx),%rsi 1392 leaq 32(%rsp),%rdi 1393 call __ecp_nistz256_sqr_montq 1394 1395 movq 544(%rsp),%rax 1396 leaq 544(%rsp),%rbx 1397 movq 0+96(%rsp),%r9 1398 movq 8+96(%rsp),%r10 1399 leaq 0+96(%rsp),%rsi 1400 movq 16+96(%rsp),%r11 1401 movq 24+96(%rsp),%r12 1402 leaq 224(%rsp),%rdi 1403 call __ecp_nistz256_mul_montq 1404 1405 movq 448(%rsp),%rax 1406 leaq 448(%rsp),%rbx 1407 movq 0+32(%rsp),%r9 1408 movq 8+32(%rsp),%r10 1409 leaq 0+32(%rsp),%rsi 1410 movq 16+32(%rsp),%r11 1411 movq 24+32(%rsp),%r12 1412 leaq 256(%rsp),%rdi 1413 call __ecp_nistz256_mul_montq 1414 1415 movq 416(%rsp),%rax 1416 leaq 416(%rsp),%rbx 1417 movq 0+224(%rsp),%r9 1418 movq 8+224(%rsp),%r10 1419 leaq 0+224(%rsp),%rsi 1420 movq 16+224(%rsp),%r11 1421 movq 24+224(%rsp),%r12 1422 leaq 224(%rsp),%rdi 1423 call __ecp_nistz256_mul_montq 1424 1425 movq 512(%rsp),%rax 1426 leaq 512(%rsp),%rbx 1427 movq 0+256(%rsp),%r9 1428 movq 8+256(%rsp),%r10 1429 leaq 0+256(%rsp),%rsi 1430 movq 16+256(%rsp),%r11 1431 movq 24+256(%rsp),%r12 1432 leaq 256(%rsp),%rdi 1433 call __ecp_nistz256_mul_montq 1434 1435 leaq 224(%rsp),%rbx 1436 leaq 64(%rsp),%rdi 1437 call __ecp_nistz256_sub_fromq 1438 1439 orq %r13,%r12 1440 movdqa %xmm4,%xmm2 1441 orq %r8,%r12 1442 orq %r9,%r12 1443 por %xmm5,%xmm2 1444.byte 102,73,15,110,220 1445 1446 movq 384(%rsp),%rax 1447 leaq 384(%rsp),%rbx 1448 movq 0+96(%rsp),%r9 1449 movq 8+96(%rsp),%r10 1450 leaq 0+96(%rsp),%rsi 1451 movq 16+96(%rsp),%r11 1452 movq 24+96(%rsp),%r12 1453 leaq 160(%rsp),%rdi 1454 call __ecp_nistz256_mul_montq 1455 1456 movq 480(%rsp),%rax 1457 leaq 480(%rsp),%rbx 1458 movq 0+32(%rsp),%r9 1459 movq 8+32(%rsp),%r10 1460 leaq 0+32(%rsp),%rsi 1461 movq 16+32(%rsp),%r11 1462 movq 24+32(%rsp),%r12 1463 leaq 192(%rsp),%rdi 1464 call __ecp_nistz256_mul_montq 1465 1466 leaq 160(%rsp),%rbx 1467 leaq 0(%rsp),%rdi 1468 call __ecp_nistz256_sub_fromq 1469 1470 orq %r13,%r12 1471 orq %r8,%r12 1472 orq %r9,%r12 1473 1474.byte 0x3e 1475 jnz .Ladd_proceedq 1476.byte 102,73,15,126,208 1477.byte 102,73,15,126,217 1478 testq %r8,%r8 1479 jnz .Ladd_proceedq 1480 testq %r9,%r9 1481 jz .Ladd_doubleq 1482 1483.byte 102,72,15,126,199 1484 pxor %xmm0,%xmm0 1485 movdqu %xmm0,0(%rdi) 1486 movdqu %xmm0,16(%rdi) 1487 movdqu %xmm0,32(%rdi) 1488 movdqu %xmm0,48(%rdi) 1489 movdqu %xmm0,64(%rdi) 1490 movdqu %xmm0,80(%rdi) 1491 jmp .Ladd_doneq 1492 1493.align 32 1494.Ladd_doubleq: 1495.byte 102,72,15,126,206 1496.byte 102,72,15,126,199 1497 addq $416,%rsp 1498 jmp .Lpoint_double_shortcutq 1499 1500.align 32 1501.Ladd_proceedq: 1502 movq 0+64(%rsp),%rax 1503 movq 8+64(%rsp),%r14 1504 leaq 0+64(%rsp),%rsi 1505 movq 16+64(%rsp),%r15 1506 movq 24+64(%rsp),%r8 1507 leaq 96(%rsp),%rdi 1508 call __ecp_nistz256_sqr_montq 1509 1510 movq 448(%rsp),%rax 1511 leaq 448(%rsp),%rbx 1512 movq 0+0(%rsp),%r9 1513 movq 8+0(%rsp),%r10 1514 leaq 0+0(%rsp),%rsi 1515 movq 16+0(%rsp),%r11 1516 movq 24+0(%rsp),%r12 1517 leaq 352(%rsp),%rdi 1518 call __ecp_nistz256_mul_montq 1519 1520 movq 0+0(%rsp),%rax 1521 movq 8+0(%rsp),%r14 1522 leaq 0+0(%rsp),%rsi 1523 movq 16+0(%rsp),%r15 1524 movq 24+0(%rsp),%r8 1525 leaq 32(%rsp),%rdi 1526 call __ecp_nistz256_sqr_montq 1527 1528 movq 544(%rsp),%rax 1529 leaq 544(%rsp),%rbx 1530 movq 0+352(%rsp),%r9 1531 movq 8+352(%rsp),%r10 1532 leaq 0+352(%rsp),%rsi 1533 movq 16+352(%rsp),%r11 1534 movq 24+352(%rsp),%r12 1535 leaq 352(%rsp),%rdi 1536 call __ecp_nistz256_mul_montq 1537 1538 movq 0(%rsp),%rax 1539 leaq 0(%rsp),%rbx 1540 movq 0+32(%rsp),%r9 1541 movq 8+32(%rsp),%r10 1542 leaq 0+32(%rsp),%rsi 1543 movq 16+32(%rsp),%r11 1544 movq 24+32(%rsp),%r12 1545 leaq 128(%rsp),%rdi 1546 call __ecp_nistz256_mul_montq 1547 1548 movq 160(%rsp),%rax 1549 leaq 160(%rsp),%rbx 1550 movq 0+32(%rsp),%r9 1551 movq 8+32(%rsp),%r10 1552 leaq 0+32(%rsp),%rsi 1553 movq 16+32(%rsp),%r11 1554 movq 24+32(%rsp),%r12 1555 leaq 192(%rsp),%rdi 1556 call __ecp_nistz256_mul_montq 1557 1558 1559 1560 1561 xorq %r11,%r11 1562 addq %r12,%r12 1563 leaq 96(%rsp),%rsi 1564 adcq %r13,%r13 1565 movq %r12,%rax 1566 adcq %r8,%r8 1567 adcq %r9,%r9 1568 movq %r13,%rbp 1569 adcq $0,%r11 1570 1571 subq $-1,%r12 1572 movq %r8,%rcx 1573 sbbq %r14,%r13 1574 sbbq $0,%r8 1575 movq %r9,%r10 1576 sbbq %r15,%r9 1577 sbbq $0,%r11 1578 1579 cmovcq %rax,%r12 1580 movq 0(%rsi),%rax 1581 cmovcq %rbp,%r13 1582 movq 8(%rsi),%rbp 1583 cmovcq %rcx,%r8 1584 movq 16(%rsi),%rcx 1585 cmovcq %r10,%r9 1586 movq 24(%rsi),%r10 1587 1588 call __ecp_nistz256_subq 1589 1590 leaq 128(%rsp),%rbx 1591 leaq 288(%rsp),%rdi 1592 call __ecp_nistz256_sub_fromq 1593 1594 movq 192+0(%rsp),%rax 1595 movq 192+8(%rsp),%rbp 1596 movq 192+16(%rsp),%rcx 1597 movq 192+24(%rsp),%r10 1598 leaq 320(%rsp),%rdi 1599 1600 call __ecp_nistz256_subq 1601 1602 movq %r12,0(%rdi) 1603 movq %r13,8(%rdi) 1604 movq %r8,16(%rdi) 1605 movq %r9,24(%rdi) 1606 movq 128(%rsp),%rax 1607 leaq 128(%rsp),%rbx 1608 movq 0+224(%rsp),%r9 1609 movq 8+224(%rsp),%r10 1610 leaq 0+224(%rsp),%rsi 1611 movq 16+224(%rsp),%r11 1612 movq 24+224(%rsp),%r12 1613 leaq 256(%rsp),%rdi 1614 call __ecp_nistz256_mul_montq 1615 1616 movq 320(%rsp),%rax 1617 leaq 320(%rsp),%rbx 1618 movq 0+64(%rsp),%r9 1619 movq 8+64(%rsp),%r10 1620 leaq 0+64(%rsp),%rsi 1621 movq 16+64(%rsp),%r11 1622 movq 24+64(%rsp),%r12 1623 leaq 320(%rsp),%rdi 1624 call __ecp_nistz256_mul_montq 1625 1626 leaq 256(%rsp),%rbx 1627 leaq 320(%rsp),%rdi 1628 call __ecp_nistz256_sub_fromq 1629 1630.byte 102,72,15,126,199 1631 1632 movdqa %xmm5,%xmm0 1633 movdqa %xmm5,%xmm1 1634 pandn 352(%rsp),%xmm0 1635 movdqa %xmm5,%xmm2 1636 pandn 352+16(%rsp),%xmm1 1637 movdqa %xmm5,%xmm3 1638 pand 544(%rsp),%xmm2 1639 pand 544+16(%rsp),%xmm3 1640 por %xmm0,%xmm2 1641 por %xmm1,%xmm3 1642 1643 movdqa %xmm4,%xmm0 1644 movdqa %xmm4,%xmm1 1645 pandn %xmm2,%xmm0 1646 movdqa %xmm4,%xmm2 1647 pandn %xmm3,%xmm1 1648 movdqa %xmm4,%xmm3 1649 pand 448(%rsp),%xmm2 1650 pand 448+16(%rsp),%xmm3 1651 por %xmm0,%xmm2 1652 por %xmm1,%xmm3 1653 movdqu %xmm2,64(%rdi) 1654 movdqu %xmm3,80(%rdi) 1655 1656 movdqa %xmm5,%xmm0 1657 movdqa %xmm5,%xmm1 1658 pandn 288(%rsp),%xmm0 1659 movdqa %xmm5,%xmm2 1660 pandn 288+16(%rsp),%xmm1 1661 movdqa %xmm5,%xmm3 1662 pand 480(%rsp),%xmm2 1663 pand 480+16(%rsp),%xmm3 1664 por %xmm0,%xmm2 1665 por %xmm1,%xmm3 1666 1667 movdqa %xmm4,%xmm0 1668 movdqa %xmm4,%xmm1 1669 pandn %xmm2,%xmm0 1670 movdqa %xmm4,%xmm2 1671 pandn %xmm3,%xmm1 1672 movdqa %xmm4,%xmm3 1673 pand 384(%rsp),%xmm2 1674 pand 384+16(%rsp),%xmm3 1675 por %xmm0,%xmm2 1676 por %xmm1,%xmm3 1677 movdqu %xmm2,0(%rdi) 1678 movdqu %xmm3,16(%rdi) 1679 1680 movdqa %xmm5,%xmm0 1681 movdqa %xmm5,%xmm1 1682 pandn 320(%rsp),%xmm0 1683 movdqa %xmm5,%xmm2 1684 pandn 320+16(%rsp),%xmm1 1685 movdqa %xmm5,%xmm3 1686 pand 512(%rsp),%xmm2 1687 pand 512+16(%rsp),%xmm3 1688 por %xmm0,%xmm2 1689 por %xmm1,%xmm3 1690 1691 movdqa %xmm4,%xmm0 1692 movdqa %xmm4,%xmm1 1693 pandn %xmm2,%xmm0 1694 movdqa %xmm4,%xmm2 1695 pandn %xmm3,%xmm1 1696 movdqa %xmm4,%xmm3 1697 pand 416(%rsp),%xmm2 1698 pand 416+16(%rsp),%xmm3 1699 por %xmm0,%xmm2 1700 por %xmm1,%xmm3 1701 movdqu %xmm2,32(%rdi) 1702 movdqu %xmm3,48(%rdi) 1703 1704.Ladd_doneq: 1705 addq $576+8,%rsp 1706 popq %r15 1707 popq %r14 1708 popq %r13 1709 popq %r12 1710 popq %rbx 1711 popq %rbp 1712 .byte 0xf3,0xc3 1713.size ecp_nistz256_point_add,.-ecp_nistz256_point_add 1714.globl ecp_nistz256_point_add_affine 1715.type ecp_nistz256_point_add_affine,@function 1716.align 32 1717ecp_nistz256_point_add_affine: 1718 pushq %rbp 1719 pushq %rbx 1720 pushq %r12 1721 pushq %r13 1722 pushq %r14 1723 pushq %r15 1724 subq $480+8,%rsp 1725 1726 movdqu 0(%rsi),%xmm0 1727 movq %rdx,%rbx 1728 movdqu 16(%rsi),%xmm1 1729 movdqu 32(%rsi),%xmm2 1730 movdqu 48(%rsi),%xmm3 1731 movdqu 64(%rsi),%xmm4 1732 movdqu 80(%rsi),%xmm5 1733 movq 64+0(%rsi),%rax 1734 movq 64+8(%rsi),%r14 1735 movq 64+16(%rsi),%r15 1736 movq 64+24(%rsi),%r8 1737 movdqa %xmm0,320(%rsp) 1738 movdqa %xmm1,320+16(%rsp) 1739 movdqa %xmm2,352(%rsp) 1740 movdqa %xmm3,352+16(%rsp) 1741 movdqa %xmm4,384(%rsp) 1742 movdqa %xmm5,384+16(%rsp) 1743 por %xmm4,%xmm5 1744 1745 movdqu 0(%rbx),%xmm0 1746 pshufd $0xb1,%xmm5,%xmm3 1747 movdqu 16(%rbx),%xmm1 1748 movdqu 32(%rbx),%xmm2 1749 por %xmm3,%xmm5 1750 movdqu 48(%rbx),%xmm3 1751 movdqa %xmm0,416(%rsp) 1752 pshufd $0x1e,%xmm5,%xmm4 1753 movdqa %xmm1,416+16(%rsp) 1754 por %xmm0,%xmm1 1755.byte 102,72,15,110,199 1756 movdqa %xmm2,448(%rsp) 1757 movdqa %xmm3,448+16(%rsp) 1758 por %xmm2,%xmm3 1759 por %xmm4,%xmm5 1760 pxor %xmm4,%xmm4 1761 por %xmm1,%xmm3 1762 1763 leaq 64-0(%rsi),%rsi 1764 leaq 32(%rsp),%rdi 1765 call __ecp_nistz256_sqr_montq 1766 1767 pcmpeqd %xmm4,%xmm5 1768 pshufd $0xb1,%xmm3,%xmm4 1769 movq 0(%rbx),%rax 1770 1771 movq %r12,%r9 1772 por %xmm3,%xmm4 1773 pshufd $0,%xmm5,%xmm5 1774 pshufd $0x1e,%xmm4,%xmm3 1775 movq %r13,%r10 1776 por %xmm3,%xmm4 1777 pxor %xmm3,%xmm3 1778 movq %r14,%r11 1779 pcmpeqd %xmm3,%xmm4 1780 pshufd $0,%xmm4,%xmm4 1781 1782 leaq 32-0(%rsp),%rsi 1783 movq %r15,%r12 1784 leaq 0(%rsp),%rdi 1785 call __ecp_nistz256_mul_montq 1786 1787 leaq 320(%rsp),%rbx 1788 leaq 64(%rsp),%rdi 1789 call __ecp_nistz256_sub_fromq 1790 1791 movq 384(%rsp),%rax 1792 leaq 384(%rsp),%rbx 1793 movq 0+32(%rsp),%r9 1794 movq 8+32(%rsp),%r10 1795 leaq 0+32(%rsp),%rsi 1796 movq 16+32(%rsp),%r11 1797 movq 24+32(%rsp),%r12 1798 leaq 32(%rsp),%rdi 1799 call __ecp_nistz256_mul_montq 1800 1801 movq 384(%rsp),%rax 1802 leaq 384(%rsp),%rbx 1803 movq 0+64(%rsp),%r9 1804 movq 8+64(%rsp),%r10 1805 leaq 0+64(%rsp),%rsi 1806 movq 16+64(%rsp),%r11 1807 movq 24+64(%rsp),%r12 1808 leaq 288(%rsp),%rdi 1809 call __ecp_nistz256_mul_montq 1810 1811 movq 448(%rsp),%rax 1812 leaq 448(%rsp),%rbx 1813 movq 0+32(%rsp),%r9 1814 movq 8+32(%rsp),%r10 1815 leaq 0+32(%rsp),%rsi 1816 movq 16+32(%rsp),%r11 1817 movq 24+32(%rsp),%r12 1818 leaq 32(%rsp),%rdi 1819 call __ecp_nistz256_mul_montq 1820 1821 leaq 352(%rsp),%rbx 1822 leaq 96(%rsp),%rdi 1823 call __ecp_nistz256_sub_fromq 1824 1825 movq 0+64(%rsp),%rax 1826 movq 8+64(%rsp),%r14 1827 leaq 0+64(%rsp),%rsi 1828 movq 16+64(%rsp),%r15 1829 movq 24+64(%rsp),%r8 1830 leaq 128(%rsp),%rdi 1831 call __ecp_nistz256_sqr_montq 1832 1833 movq 0+96(%rsp),%rax 1834 movq 8+96(%rsp),%r14 1835 leaq 0+96(%rsp),%rsi 1836 movq 16+96(%rsp),%r15 1837 movq 24+96(%rsp),%r8 1838 leaq 192(%rsp),%rdi 1839 call __ecp_nistz256_sqr_montq 1840 1841 movq 128(%rsp),%rax 1842 leaq 128(%rsp),%rbx 1843 movq 0+64(%rsp),%r9 1844 movq 8+64(%rsp),%r10 1845 leaq 0+64(%rsp),%rsi 1846 movq 16+64(%rsp),%r11 1847 movq 24+64(%rsp),%r12 1848 leaq 160(%rsp),%rdi 1849 call __ecp_nistz256_mul_montq 1850 1851 movq 320(%rsp),%rax 1852 leaq 320(%rsp),%rbx 1853 movq 0+128(%rsp),%r9 1854 movq 8+128(%rsp),%r10 1855 leaq 0+128(%rsp),%rsi 1856 movq 16+128(%rsp),%r11 1857 movq 24+128(%rsp),%r12 1858 leaq 0(%rsp),%rdi 1859 call __ecp_nistz256_mul_montq 1860 1861 1862 1863 1864 xorq %r11,%r11 1865 addq %r12,%r12 1866 leaq 192(%rsp),%rsi 1867 adcq %r13,%r13 1868 movq %r12,%rax 1869 adcq %r8,%r8 1870 adcq %r9,%r9 1871 movq %r13,%rbp 1872 adcq $0,%r11 1873 1874 subq $-1,%r12 1875 movq %r8,%rcx 1876 sbbq %r14,%r13 1877 sbbq $0,%r8 1878 movq %r9,%r10 1879 sbbq %r15,%r9 1880 sbbq $0,%r11 1881 1882 cmovcq %rax,%r12 1883 movq 0(%rsi),%rax 1884 cmovcq %rbp,%r13 1885 movq 8(%rsi),%rbp 1886 cmovcq %rcx,%r8 1887 movq 16(%rsi),%rcx 1888 cmovcq %r10,%r9 1889 movq 24(%rsi),%r10 1890 1891 call __ecp_nistz256_subq 1892 1893 leaq 160(%rsp),%rbx 1894 leaq 224(%rsp),%rdi 1895 call __ecp_nistz256_sub_fromq 1896 1897 movq 0+0(%rsp),%rax 1898 movq 0+8(%rsp),%rbp 1899 movq 0+16(%rsp),%rcx 1900 movq 0+24(%rsp),%r10 1901 leaq 64(%rsp),%rdi 1902 1903 call __ecp_nistz256_subq 1904 1905 movq %r12,0(%rdi) 1906 movq %r13,8(%rdi) 1907 movq %r8,16(%rdi) 1908 movq %r9,24(%rdi) 1909 movq 352(%rsp),%rax 1910 leaq 352(%rsp),%rbx 1911 movq 0+160(%rsp),%r9 1912 movq 8+160(%rsp),%r10 1913 leaq 0+160(%rsp),%rsi 1914 movq 16+160(%rsp),%r11 1915 movq 24+160(%rsp),%r12 1916 leaq 32(%rsp),%rdi 1917 call __ecp_nistz256_mul_montq 1918 1919 movq 96(%rsp),%rax 1920 leaq 96(%rsp),%rbx 1921 movq 0+64(%rsp),%r9 1922 movq 8+64(%rsp),%r10 1923 leaq 0+64(%rsp),%rsi 1924 movq 16+64(%rsp),%r11 1925 movq 24+64(%rsp),%r12 1926 leaq 64(%rsp),%rdi 1927 call __ecp_nistz256_mul_montq 1928 1929 leaq 32(%rsp),%rbx 1930 leaq 256(%rsp),%rdi 1931 call __ecp_nistz256_sub_fromq 1932 1933.byte 102,72,15,126,199 1934 1935 movdqa %xmm5,%xmm0 1936 movdqa %xmm5,%xmm1 1937 pandn 288(%rsp),%xmm0 1938 movdqa %xmm5,%xmm2 1939 pandn 288+16(%rsp),%xmm1 1940 movdqa %xmm5,%xmm3 1941 pand .LONE_mont(%rip),%xmm2 1942 pand .LONE_mont+16(%rip),%xmm3 1943 por %xmm0,%xmm2 1944 por %xmm1,%xmm3 1945 1946 movdqa %xmm4,%xmm0 1947 movdqa %xmm4,%xmm1 1948 pandn %xmm2,%xmm0 1949 movdqa %xmm4,%xmm2 1950 pandn %xmm3,%xmm1 1951 movdqa %xmm4,%xmm3 1952 pand 384(%rsp),%xmm2 1953 pand 384+16(%rsp),%xmm3 1954 por %xmm0,%xmm2 1955 por %xmm1,%xmm3 1956 movdqu %xmm2,64(%rdi) 1957 movdqu %xmm3,80(%rdi) 1958 1959 movdqa %xmm5,%xmm0 1960 movdqa %xmm5,%xmm1 1961 pandn 224(%rsp),%xmm0 1962 movdqa %xmm5,%xmm2 1963 pandn 224+16(%rsp),%xmm1 1964 movdqa %xmm5,%xmm3 1965 pand 416(%rsp),%xmm2 1966 pand 416+16(%rsp),%xmm3 1967 por %xmm0,%xmm2 1968 por %xmm1,%xmm3 1969 1970 movdqa %xmm4,%xmm0 1971 movdqa %xmm4,%xmm1 1972 pandn %xmm2,%xmm0 1973 movdqa %xmm4,%xmm2 1974 pandn %xmm3,%xmm1 1975 movdqa %xmm4,%xmm3 1976 pand 320(%rsp),%xmm2 1977 pand 320+16(%rsp),%xmm3 1978 por %xmm0,%xmm2 1979 por %xmm1,%xmm3 1980 movdqu %xmm2,0(%rdi) 1981 movdqu %xmm3,16(%rdi) 1982 1983 movdqa %xmm5,%xmm0 1984 movdqa %xmm5,%xmm1 1985 pandn 256(%rsp),%xmm0 1986 movdqa %xmm5,%xmm2 1987 pandn 256+16(%rsp),%xmm1 1988 movdqa %xmm5,%xmm3 1989 pand 448(%rsp),%xmm2 1990 pand 448+16(%rsp),%xmm3 1991 por %xmm0,%xmm2 1992 por %xmm1,%xmm3 1993 1994 movdqa %xmm4,%xmm0 1995 movdqa %xmm4,%xmm1 1996 pandn %xmm2,%xmm0 1997 movdqa %xmm4,%xmm2 1998 pandn %xmm3,%xmm1 1999 movdqa %xmm4,%xmm3 2000 pand 352(%rsp),%xmm2 2001 pand 352+16(%rsp),%xmm3 2002 por %xmm0,%xmm2 2003 por %xmm1,%xmm3 2004 movdqu %xmm2,32(%rdi) 2005 movdqu %xmm3,48(%rdi) 2006 2007 addq $480+8,%rsp 2008 popq %r15 2009 popq %r14 2010 popq %r13 2011 popq %r12 2012 popq %rbx 2013 popq %rbp 2014 .byte 0xf3,0xc3 2015.size ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine 2016