bn-586.S revision 1.7
1#include <machine/asm.h> 2.text 3.globl bn_mul_add_words 4.type bn_mul_add_words,@function 5.align 16 6bn_mul_add_words: 7.L_bn_mul_add_words_begin: 8 call .L000PIC_me_up 9.L000PIC_me_up: 10 popl %eax 11 leal OPENSSL_ia32cap_P-.L000PIC_me_up(%eax),%eax 12 btl $26,(%eax) 13 jnc .L001maw_non_sse2 14 movl 4(%esp),%eax 15 movl 8(%esp),%edx 16 movl 12(%esp),%ecx 17 movd 16(%esp),%mm0 18 pxor %mm1,%mm1 19 jmp .L002maw_sse2_entry 20.align 16 21.L003maw_sse2_unrolled: 22 movd (%eax),%mm3 23 paddq %mm3,%mm1 24 movd (%edx),%mm2 25 pmuludq %mm0,%mm2 26 movd 4(%edx),%mm4 27 pmuludq %mm0,%mm4 28 movd 8(%edx),%mm6 29 pmuludq %mm0,%mm6 30 movd 12(%edx),%mm7 31 pmuludq %mm0,%mm7 32 paddq %mm2,%mm1 33 movd 4(%eax),%mm3 34 paddq %mm4,%mm3 35 movd 8(%eax),%mm5 36 paddq %mm6,%mm5 37 movd 12(%eax),%mm4 38 paddq %mm4,%mm7 39 movd %mm1,(%eax) 40 movd 16(%edx),%mm2 41 pmuludq %mm0,%mm2 42 psrlq $32,%mm1 43 movd 20(%edx),%mm4 44 pmuludq %mm0,%mm4 45 paddq %mm3,%mm1 46 movd 24(%edx),%mm6 47 pmuludq %mm0,%mm6 48 movd %mm1,4(%eax) 49 psrlq $32,%mm1 50 movd 28(%edx),%mm3 51 addl $32,%edx 52 pmuludq %mm0,%mm3 53 paddq %mm5,%mm1 54 movd 16(%eax),%mm5 55 paddq %mm5,%mm2 56 movd %mm1,8(%eax) 57 psrlq $32,%mm1 58 paddq %mm7,%mm1 59 movd 20(%eax),%mm5 60 paddq %mm5,%mm4 61 movd %mm1,12(%eax) 62 psrlq $32,%mm1 63 paddq %mm2,%mm1 64 movd 24(%eax),%mm5 65 paddq %mm5,%mm6 66 movd %mm1,16(%eax) 67 psrlq $32,%mm1 68 paddq %mm4,%mm1 69 movd 28(%eax),%mm5 70 paddq %mm5,%mm3 71 movd %mm1,20(%eax) 72 psrlq $32,%mm1 73 paddq %mm6,%mm1 74 movd %mm1,24(%eax) 75 psrlq $32,%mm1 76 paddq %mm3,%mm1 77 movd %mm1,28(%eax) 78 leal 32(%eax),%eax 79 psrlq $32,%mm1 80 subl $8,%ecx 81 jz .L004maw_sse2_exit 82.L002maw_sse2_entry: 83 testl $4294967288,%ecx 84 jnz .L003maw_sse2_unrolled 85.align 4 86.L005maw_sse2_loop: 87 movd (%edx),%mm2 88 movd (%eax),%mm3 89 pmuludq %mm0,%mm2 90 leal 4(%edx),%edx 91 paddq %mm3,%mm1 92 paddq %mm2,%mm1 93 movd %mm1,(%eax) 94 subl $1,%ecx 95 psrlq $32,%mm1 96 leal 4(%eax),%eax 97 jnz .L005maw_sse2_loop 98.L004maw_sse2_exit: 99 movd %mm1,%eax 100 emms 101 ret 102.align 16 103.L001maw_non_sse2: 104 pushl %ebp 105 pushl %ebx 106 pushl %esi 107 pushl %edi 108 109 xorl %esi,%esi 110 movl 20(%esp),%edi 111 movl 28(%esp),%ecx 112 movl 24(%esp),%ebx 113 andl $4294967288,%ecx 114 movl 32(%esp),%ebp 115 pushl %ecx 116 jz .L006maw_finish 117.align 16 118.L007maw_loop: 119 120 movl (%ebx),%eax 121 mull %ebp 122 addl %esi,%eax 123 adcl $0,%edx 124 addl (%edi),%eax 125 adcl $0,%edx 126 movl %eax,(%edi) 127 movl %edx,%esi 128 129 movl 4(%ebx),%eax 130 mull %ebp 131 addl %esi,%eax 132 adcl $0,%edx 133 addl 4(%edi),%eax 134 adcl $0,%edx 135 movl %eax,4(%edi) 136 movl %edx,%esi 137 138 movl 8(%ebx),%eax 139 mull %ebp 140 addl %esi,%eax 141 adcl $0,%edx 142 addl 8(%edi),%eax 143 adcl $0,%edx 144 movl %eax,8(%edi) 145 movl %edx,%esi 146 147 movl 12(%ebx),%eax 148 mull %ebp 149 addl %esi,%eax 150 adcl $0,%edx 151 addl 12(%edi),%eax 152 adcl $0,%edx 153 movl %eax,12(%edi) 154 movl %edx,%esi 155 156 movl 16(%ebx),%eax 157 mull %ebp 158 addl %esi,%eax 159 adcl $0,%edx 160 addl 16(%edi),%eax 161 adcl $0,%edx 162 movl %eax,16(%edi) 163 movl %edx,%esi 164 165 movl 20(%ebx),%eax 166 mull %ebp 167 addl %esi,%eax 168 adcl $0,%edx 169 addl 20(%edi),%eax 170 adcl $0,%edx 171 movl %eax,20(%edi) 172 movl %edx,%esi 173 174 movl 24(%ebx),%eax 175 mull %ebp 176 addl %esi,%eax 177 adcl $0,%edx 178 addl 24(%edi),%eax 179 adcl $0,%edx 180 movl %eax,24(%edi) 181 movl %edx,%esi 182 183 movl 28(%ebx),%eax 184 mull %ebp 185 addl %esi,%eax 186 adcl $0,%edx 187 addl 28(%edi),%eax 188 adcl $0,%edx 189 movl %eax,28(%edi) 190 movl %edx,%esi 191 192 subl $8,%ecx 193 leal 32(%ebx),%ebx 194 leal 32(%edi),%edi 195 jnz .L007maw_loop 196.L006maw_finish: 197 movl 32(%esp),%ecx 198 andl $7,%ecx 199 jnz .L008maw_finish2 200 jmp .L009maw_end 201.L008maw_finish2: 202 203 movl (%ebx),%eax 204 mull %ebp 205 addl %esi,%eax 206 adcl $0,%edx 207 addl (%edi),%eax 208 adcl $0,%edx 209 decl %ecx 210 movl %eax,(%edi) 211 movl %edx,%esi 212 jz .L009maw_end 213 214 movl 4(%ebx),%eax 215 mull %ebp 216 addl %esi,%eax 217 adcl $0,%edx 218 addl 4(%edi),%eax 219 adcl $0,%edx 220 decl %ecx 221 movl %eax,4(%edi) 222 movl %edx,%esi 223 jz .L009maw_end 224 225 movl 8(%ebx),%eax 226 mull %ebp 227 addl %esi,%eax 228 adcl $0,%edx 229 addl 8(%edi),%eax 230 adcl $0,%edx 231 decl %ecx 232 movl %eax,8(%edi) 233 movl %edx,%esi 234 jz .L009maw_end 235 236 movl 12(%ebx),%eax 237 mull %ebp 238 addl %esi,%eax 239 adcl $0,%edx 240 addl 12(%edi),%eax 241 adcl $0,%edx 242 decl %ecx 243 movl %eax,12(%edi) 244 movl %edx,%esi 245 jz .L009maw_end 246 247 movl 16(%ebx),%eax 248 mull %ebp 249 addl %esi,%eax 250 adcl $0,%edx 251 addl 16(%edi),%eax 252 adcl $0,%edx 253 decl %ecx 254 movl %eax,16(%edi) 255 movl %edx,%esi 256 jz .L009maw_end 257 258 movl 20(%ebx),%eax 259 mull %ebp 260 addl %esi,%eax 261 adcl $0,%edx 262 addl 20(%edi),%eax 263 adcl $0,%edx 264 decl %ecx 265 movl %eax,20(%edi) 266 movl %edx,%esi 267 jz .L009maw_end 268 269 movl 24(%ebx),%eax 270 mull %ebp 271 addl %esi,%eax 272 adcl $0,%edx 273 addl 24(%edi),%eax 274 adcl $0,%edx 275 movl %eax,24(%edi) 276 movl %edx,%esi 277.L009maw_end: 278 movl %esi,%eax 279 popl %ecx 280 popl %edi 281 popl %esi 282 popl %ebx 283 popl %ebp 284 ret 285.size bn_mul_add_words,.-.L_bn_mul_add_words_begin 286.globl bn_mul_words 287.type bn_mul_words,@function 288.align 16 289bn_mul_words: 290.L_bn_mul_words_begin: 291 call .L010PIC_me_up 292.L010PIC_me_up: 293 popl %eax 294 leal OPENSSL_ia32cap_P-.L010PIC_me_up(%eax),%eax 295 btl $26,(%eax) 296 jnc .L011mw_non_sse2 297 movl 4(%esp),%eax 298 movl 8(%esp),%edx 299 movl 12(%esp),%ecx 300 movd 16(%esp),%mm0 301 pxor %mm1,%mm1 302.align 16 303.L012mw_sse2_loop: 304 movd (%edx),%mm2 305 pmuludq %mm0,%mm2 306 leal 4(%edx),%edx 307 paddq %mm2,%mm1 308 movd %mm1,(%eax) 309 subl $1,%ecx 310 psrlq $32,%mm1 311 leal 4(%eax),%eax 312 jnz .L012mw_sse2_loop 313 movd %mm1,%eax 314 emms 315 ret 316.align 16 317.L011mw_non_sse2: 318 pushl %ebp 319 pushl %ebx 320 pushl %esi 321 pushl %edi 322 323 xorl %esi,%esi 324 movl 20(%esp),%edi 325 movl 24(%esp),%ebx 326 movl 28(%esp),%ebp 327 movl 32(%esp),%ecx 328 andl $4294967288,%ebp 329 jz .L013mw_finish 330.L014mw_loop: 331 332 movl (%ebx),%eax 333 mull %ecx 334 addl %esi,%eax 335 adcl $0,%edx 336 movl %eax,(%edi) 337 movl %edx,%esi 338 339 movl 4(%ebx),%eax 340 mull %ecx 341 addl %esi,%eax 342 adcl $0,%edx 343 movl %eax,4(%edi) 344 movl %edx,%esi 345 346 movl 8(%ebx),%eax 347 mull %ecx 348 addl %esi,%eax 349 adcl $0,%edx 350 movl %eax,8(%edi) 351 movl %edx,%esi 352 353 movl 12(%ebx),%eax 354 mull %ecx 355 addl %esi,%eax 356 adcl $0,%edx 357 movl %eax,12(%edi) 358 movl %edx,%esi 359 360 movl 16(%ebx),%eax 361 mull %ecx 362 addl %esi,%eax 363 adcl $0,%edx 364 movl %eax,16(%edi) 365 movl %edx,%esi 366 367 movl 20(%ebx),%eax 368 mull %ecx 369 addl %esi,%eax 370 adcl $0,%edx 371 movl %eax,20(%edi) 372 movl %edx,%esi 373 374 movl 24(%ebx),%eax 375 mull %ecx 376 addl %esi,%eax 377 adcl $0,%edx 378 movl %eax,24(%edi) 379 movl %edx,%esi 380 381 movl 28(%ebx),%eax 382 mull %ecx 383 addl %esi,%eax 384 adcl $0,%edx 385 movl %eax,28(%edi) 386 movl %edx,%esi 387 388 addl $32,%ebx 389 addl $32,%edi 390 subl $8,%ebp 391 jz .L013mw_finish 392 jmp .L014mw_loop 393.L013mw_finish: 394 movl 28(%esp),%ebp 395 andl $7,%ebp 396 jnz .L015mw_finish2 397 jmp .L016mw_end 398.L015mw_finish2: 399 400 movl (%ebx),%eax 401 mull %ecx 402 addl %esi,%eax 403 adcl $0,%edx 404 movl %eax,(%edi) 405 movl %edx,%esi 406 decl %ebp 407 jz .L016mw_end 408 409 movl 4(%ebx),%eax 410 mull %ecx 411 addl %esi,%eax 412 adcl $0,%edx 413 movl %eax,4(%edi) 414 movl %edx,%esi 415 decl %ebp 416 jz .L016mw_end 417 418 movl 8(%ebx),%eax 419 mull %ecx 420 addl %esi,%eax 421 adcl $0,%edx 422 movl %eax,8(%edi) 423 movl %edx,%esi 424 decl %ebp 425 jz .L016mw_end 426 427 movl 12(%ebx),%eax 428 mull %ecx 429 addl %esi,%eax 430 adcl $0,%edx 431 movl %eax,12(%edi) 432 movl %edx,%esi 433 decl %ebp 434 jz .L016mw_end 435 436 movl 16(%ebx),%eax 437 mull %ecx 438 addl %esi,%eax 439 adcl $0,%edx 440 movl %eax,16(%edi) 441 movl %edx,%esi 442 decl %ebp 443 jz .L016mw_end 444 445 movl 20(%ebx),%eax 446 mull %ecx 447 addl %esi,%eax 448 adcl $0,%edx 449 movl %eax,20(%edi) 450 movl %edx,%esi 451 decl %ebp 452 jz .L016mw_end 453 454 movl 24(%ebx),%eax 455 mull %ecx 456 addl %esi,%eax 457 adcl $0,%edx 458 movl %eax,24(%edi) 459 movl %edx,%esi 460.L016mw_end: 461 movl %esi,%eax 462 popl %edi 463 popl %esi 464 popl %ebx 465 popl %ebp 466 ret 467.size bn_mul_words,.-.L_bn_mul_words_begin 468.globl bn_sqr_words 469.type bn_sqr_words,@function 470.align 16 471bn_sqr_words: 472.L_bn_sqr_words_begin: 473 call .L017PIC_me_up 474.L017PIC_me_up: 475 popl %eax 476 leal OPENSSL_ia32cap_P-.L017PIC_me_up(%eax),%eax 477 btl $26,(%eax) 478 jnc .L018sqr_non_sse2 479 movl 4(%esp),%eax 480 movl 8(%esp),%edx 481 movl 12(%esp),%ecx 482.align 16 483.L019sqr_sse2_loop: 484 movd (%edx),%mm0 485 pmuludq %mm0,%mm0 486 leal 4(%edx),%edx 487 movq %mm0,(%eax) 488 subl $1,%ecx 489 leal 8(%eax),%eax 490 jnz .L019sqr_sse2_loop 491 emms 492 ret 493.align 16 494.L018sqr_non_sse2: 495 pushl %ebp 496 pushl %ebx 497 pushl %esi 498 pushl %edi 499 500 movl 20(%esp),%esi 501 movl 24(%esp),%edi 502 movl 28(%esp),%ebx 503 andl $4294967288,%ebx 504 jz .L020sw_finish 505.L021sw_loop: 506 507 movl (%edi),%eax 508 mull %eax 509 movl %eax,(%esi) 510 movl %edx,4(%esi) 511 512 movl 4(%edi),%eax 513 mull %eax 514 movl %eax,8(%esi) 515 movl %edx,12(%esi) 516 517 movl 8(%edi),%eax 518 mull %eax 519 movl %eax,16(%esi) 520 movl %edx,20(%esi) 521 522 movl 12(%edi),%eax 523 mull %eax 524 movl %eax,24(%esi) 525 movl %edx,28(%esi) 526 527 movl 16(%edi),%eax 528 mull %eax 529 movl %eax,32(%esi) 530 movl %edx,36(%esi) 531 532 movl 20(%edi),%eax 533 mull %eax 534 movl %eax,40(%esi) 535 movl %edx,44(%esi) 536 537 movl 24(%edi),%eax 538 mull %eax 539 movl %eax,48(%esi) 540 movl %edx,52(%esi) 541 542 movl 28(%edi),%eax 543 mull %eax 544 movl %eax,56(%esi) 545 movl %edx,60(%esi) 546 547 addl $32,%edi 548 addl $64,%esi 549 subl $8,%ebx 550 jnz .L021sw_loop 551.L020sw_finish: 552 movl 28(%esp),%ebx 553 andl $7,%ebx 554 jz .L022sw_end 555 556 movl (%edi),%eax 557 mull %eax 558 movl %eax,(%esi) 559 decl %ebx 560 movl %edx,4(%esi) 561 jz .L022sw_end 562 563 movl 4(%edi),%eax 564 mull %eax 565 movl %eax,8(%esi) 566 decl %ebx 567 movl %edx,12(%esi) 568 jz .L022sw_end 569 570 movl 8(%edi),%eax 571 mull %eax 572 movl %eax,16(%esi) 573 decl %ebx 574 movl %edx,20(%esi) 575 jz .L022sw_end 576 577 movl 12(%edi),%eax 578 mull %eax 579 movl %eax,24(%esi) 580 decl %ebx 581 movl %edx,28(%esi) 582 jz .L022sw_end 583 584 movl 16(%edi),%eax 585 mull %eax 586 movl %eax,32(%esi) 587 decl %ebx 588 movl %edx,36(%esi) 589 jz .L022sw_end 590 591 movl 20(%edi),%eax 592 mull %eax 593 movl %eax,40(%esi) 594 decl %ebx 595 movl %edx,44(%esi) 596 jz .L022sw_end 597 598 movl 24(%edi),%eax 599 mull %eax 600 movl %eax,48(%esi) 601 movl %edx,52(%esi) 602.L022sw_end: 603 popl %edi 604 popl %esi 605 popl %ebx 606 popl %ebp 607 ret 608.size bn_sqr_words,.-.L_bn_sqr_words_begin 609.globl bn_div_words 610.type bn_div_words,@function 611.align 16 612bn_div_words: 613.L_bn_div_words_begin: 614 movl 4(%esp),%edx 615 movl 8(%esp),%eax 616 movl 12(%esp),%ecx 617 divl %ecx 618 ret 619.size bn_div_words,.-.L_bn_div_words_begin 620.globl bn_add_words 621.type bn_add_words,@function 622.align 16 623bn_add_words: 624.L_bn_add_words_begin: 625 pushl %ebp 626 pushl %ebx 627 pushl %esi 628 pushl %edi 629 630 movl 20(%esp),%ebx 631 movl 24(%esp),%esi 632 movl 28(%esp),%edi 633 movl 32(%esp),%ebp 634 xorl %eax,%eax 635 andl $4294967288,%ebp 636 jz .L023aw_finish 637.L024aw_loop: 638 639 movl (%esi),%ecx 640 movl (%edi),%edx 641 addl %eax,%ecx 642 movl $0,%eax 643 adcl %eax,%eax 644 addl %edx,%ecx 645 adcl $0,%eax 646 movl %ecx,(%ebx) 647 648 movl 4(%esi),%ecx 649 movl 4(%edi),%edx 650 addl %eax,%ecx 651 movl $0,%eax 652 adcl %eax,%eax 653 addl %edx,%ecx 654 adcl $0,%eax 655 movl %ecx,4(%ebx) 656 657 movl 8(%esi),%ecx 658 movl 8(%edi),%edx 659 addl %eax,%ecx 660 movl $0,%eax 661 adcl %eax,%eax 662 addl %edx,%ecx 663 adcl $0,%eax 664 movl %ecx,8(%ebx) 665 666 movl 12(%esi),%ecx 667 movl 12(%edi),%edx 668 addl %eax,%ecx 669 movl $0,%eax 670 adcl %eax,%eax 671 addl %edx,%ecx 672 adcl $0,%eax 673 movl %ecx,12(%ebx) 674 675 movl 16(%esi),%ecx 676 movl 16(%edi),%edx 677 addl %eax,%ecx 678 movl $0,%eax 679 adcl %eax,%eax 680 addl %edx,%ecx 681 adcl $0,%eax 682 movl %ecx,16(%ebx) 683 684 movl 20(%esi),%ecx 685 movl 20(%edi),%edx 686 addl %eax,%ecx 687 movl $0,%eax 688 adcl %eax,%eax 689 addl %edx,%ecx 690 adcl $0,%eax 691 movl %ecx,20(%ebx) 692 693 movl 24(%esi),%ecx 694 movl 24(%edi),%edx 695 addl %eax,%ecx 696 movl $0,%eax 697 adcl %eax,%eax 698 addl %edx,%ecx 699 adcl $0,%eax 700 movl %ecx,24(%ebx) 701 702 movl 28(%esi),%ecx 703 movl 28(%edi),%edx 704 addl %eax,%ecx 705 movl $0,%eax 706 adcl %eax,%eax 707 addl %edx,%ecx 708 adcl $0,%eax 709 movl %ecx,28(%ebx) 710 711 addl $32,%esi 712 addl $32,%edi 713 addl $32,%ebx 714 subl $8,%ebp 715 jnz .L024aw_loop 716.L023aw_finish: 717 movl 32(%esp),%ebp 718 andl $7,%ebp 719 jz .L025aw_end 720 721 movl (%esi),%ecx 722 movl (%edi),%edx 723 addl %eax,%ecx 724 movl $0,%eax 725 adcl %eax,%eax 726 addl %edx,%ecx 727 adcl $0,%eax 728 decl %ebp 729 movl %ecx,(%ebx) 730 jz .L025aw_end 731 732 movl 4(%esi),%ecx 733 movl 4(%edi),%edx 734 addl %eax,%ecx 735 movl $0,%eax 736 adcl %eax,%eax 737 addl %edx,%ecx 738 adcl $0,%eax 739 decl %ebp 740 movl %ecx,4(%ebx) 741 jz .L025aw_end 742 743 movl 8(%esi),%ecx 744 movl 8(%edi),%edx 745 addl %eax,%ecx 746 movl $0,%eax 747 adcl %eax,%eax 748 addl %edx,%ecx 749 adcl $0,%eax 750 decl %ebp 751 movl %ecx,8(%ebx) 752 jz .L025aw_end 753 754 movl 12(%esi),%ecx 755 movl 12(%edi),%edx 756 addl %eax,%ecx 757 movl $0,%eax 758 adcl %eax,%eax 759 addl %edx,%ecx 760 adcl $0,%eax 761 decl %ebp 762 movl %ecx,12(%ebx) 763 jz .L025aw_end 764 765 movl 16(%esi),%ecx 766 movl 16(%edi),%edx 767 addl %eax,%ecx 768 movl $0,%eax 769 adcl %eax,%eax 770 addl %edx,%ecx 771 adcl $0,%eax 772 decl %ebp 773 movl %ecx,16(%ebx) 774 jz .L025aw_end 775 776 movl 20(%esi),%ecx 777 movl 20(%edi),%edx 778 addl %eax,%ecx 779 movl $0,%eax 780 adcl %eax,%eax 781 addl %edx,%ecx 782 adcl $0,%eax 783 decl %ebp 784 movl %ecx,20(%ebx) 785 jz .L025aw_end 786 787 movl 24(%esi),%ecx 788 movl 24(%edi),%edx 789 addl %eax,%ecx 790 movl $0,%eax 791 adcl %eax,%eax 792 addl %edx,%ecx 793 adcl $0,%eax 794 movl %ecx,24(%ebx) 795.L025aw_end: 796 popl %edi 797 popl %esi 798 popl %ebx 799 popl %ebp 800 ret 801.size bn_add_words,.-.L_bn_add_words_begin 802.globl bn_sub_words 803.type bn_sub_words,@function 804.align 16 805bn_sub_words: 806.L_bn_sub_words_begin: 807 pushl %ebp 808 pushl %ebx 809 pushl %esi 810 pushl %edi 811 812 movl 20(%esp),%ebx 813 movl 24(%esp),%esi 814 movl 28(%esp),%edi 815 movl 32(%esp),%ebp 816 xorl %eax,%eax 817 andl $4294967288,%ebp 818 jz .L026aw_finish 819.L027aw_loop: 820 821 movl (%esi),%ecx 822 movl (%edi),%edx 823 subl %eax,%ecx 824 movl $0,%eax 825 adcl %eax,%eax 826 subl %edx,%ecx 827 adcl $0,%eax 828 movl %ecx,(%ebx) 829 830 movl 4(%esi),%ecx 831 movl 4(%edi),%edx 832 subl %eax,%ecx 833 movl $0,%eax 834 adcl %eax,%eax 835 subl %edx,%ecx 836 adcl $0,%eax 837 movl %ecx,4(%ebx) 838 839 movl 8(%esi),%ecx 840 movl 8(%edi),%edx 841 subl %eax,%ecx 842 movl $0,%eax 843 adcl %eax,%eax 844 subl %edx,%ecx 845 adcl $0,%eax 846 movl %ecx,8(%ebx) 847 848 movl 12(%esi),%ecx 849 movl 12(%edi),%edx 850 subl %eax,%ecx 851 movl $0,%eax 852 adcl %eax,%eax 853 subl %edx,%ecx 854 adcl $0,%eax 855 movl %ecx,12(%ebx) 856 857 movl 16(%esi),%ecx 858 movl 16(%edi),%edx 859 subl %eax,%ecx 860 movl $0,%eax 861 adcl %eax,%eax 862 subl %edx,%ecx 863 adcl $0,%eax 864 movl %ecx,16(%ebx) 865 866 movl 20(%esi),%ecx 867 movl 20(%edi),%edx 868 subl %eax,%ecx 869 movl $0,%eax 870 adcl %eax,%eax 871 subl %edx,%ecx 872 adcl $0,%eax 873 movl %ecx,20(%ebx) 874 875 movl 24(%esi),%ecx 876 movl 24(%edi),%edx 877 subl %eax,%ecx 878 movl $0,%eax 879 adcl %eax,%eax 880 subl %edx,%ecx 881 adcl $0,%eax 882 movl %ecx,24(%ebx) 883 884 movl 28(%esi),%ecx 885 movl 28(%edi),%edx 886 subl %eax,%ecx 887 movl $0,%eax 888 adcl %eax,%eax 889 subl %edx,%ecx 890 adcl $0,%eax 891 movl %ecx,28(%ebx) 892 893 addl $32,%esi 894 addl $32,%edi 895 addl $32,%ebx 896 subl $8,%ebp 897 jnz .L027aw_loop 898.L026aw_finish: 899 movl 32(%esp),%ebp 900 andl $7,%ebp 901 jz .L028aw_end 902 903 movl (%esi),%ecx 904 movl (%edi),%edx 905 subl %eax,%ecx 906 movl $0,%eax 907 adcl %eax,%eax 908 subl %edx,%ecx 909 adcl $0,%eax 910 decl %ebp 911 movl %ecx,(%ebx) 912 jz .L028aw_end 913 914 movl 4(%esi),%ecx 915 movl 4(%edi),%edx 916 subl %eax,%ecx 917 movl $0,%eax 918 adcl %eax,%eax 919 subl %edx,%ecx 920 adcl $0,%eax 921 decl %ebp 922 movl %ecx,4(%ebx) 923 jz .L028aw_end 924 925 movl 8(%esi),%ecx 926 movl 8(%edi),%edx 927 subl %eax,%ecx 928 movl $0,%eax 929 adcl %eax,%eax 930 subl %edx,%ecx 931 adcl $0,%eax 932 decl %ebp 933 movl %ecx,8(%ebx) 934 jz .L028aw_end 935 936 movl 12(%esi),%ecx 937 movl 12(%edi),%edx 938 subl %eax,%ecx 939 movl $0,%eax 940 adcl %eax,%eax 941 subl %edx,%ecx 942 adcl $0,%eax 943 decl %ebp 944 movl %ecx,12(%ebx) 945 jz .L028aw_end 946 947 movl 16(%esi),%ecx 948 movl 16(%edi),%edx 949 subl %eax,%ecx 950 movl $0,%eax 951 adcl %eax,%eax 952 subl %edx,%ecx 953 adcl $0,%eax 954 decl %ebp 955 movl %ecx,16(%ebx) 956 jz .L028aw_end 957 958 movl 20(%esi),%ecx 959 movl 20(%edi),%edx 960 subl %eax,%ecx 961 movl $0,%eax 962 adcl %eax,%eax 963 subl %edx,%ecx 964 adcl $0,%eax 965 decl %ebp 966 movl %ecx,20(%ebx) 967 jz .L028aw_end 968 969 movl 24(%esi),%ecx 970 movl 24(%edi),%edx 971 subl %eax,%ecx 972 movl $0,%eax 973 adcl %eax,%eax 974 subl %edx,%ecx 975 adcl $0,%eax 976 movl %ecx,24(%ebx) 977.L028aw_end: 978 popl %edi 979 popl %esi 980 popl %ebx 981 popl %ebp 982 ret 983.size bn_sub_words,.-.L_bn_sub_words_begin 984.globl bn_sub_part_words 985.type bn_sub_part_words,@function 986.align 16 987bn_sub_part_words: 988.L_bn_sub_part_words_begin: 989 pushl %ebp 990 pushl %ebx 991 pushl %esi 992 pushl %edi 993 994 movl 20(%esp),%ebx 995 movl 24(%esp),%esi 996 movl 28(%esp),%edi 997 movl 32(%esp),%ebp 998 xorl %eax,%eax 999 andl $4294967288,%ebp 1000 jz .L029aw_finish 1001.L030aw_loop: 1002 1003 movl (%esi),%ecx 1004 movl (%edi),%edx 1005 subl %eax,%ecx 1006 movl $0,%eax 1007 adcl %eax,%eax 1008 subl %edx,%ecx 1009 adcl $0,%eax 1010 movl %ecx,(%ebx) 1011 1012 movl 4(%esi),%ecx 1013 movl 4(%edi),%edx 1014 subl %eax,%ecx 1015 movl $0,%eax 1016 adcl %eax,%eax 1017 subl %edx,%ecx 1018 adcl $0,%eax 1019 movl %ecx,4(%ebx) 1020 1021 movl 8(%esi),%ecx 1022 movl 8(%edi),%edx 1023 subl %eax,%ecx 1024 movl $0,%eax 1025 adcl %eax,%eax 1026 subl %edx,%ecx 1027 adcl $0,%eax 1028 movl %ecx,8(%ebx) 1029 1030 movl 12(%esi),%ecx 1031 movl 12(%edi),%edx 1032 subl %eax,%ecx 1033 movl $0,%eax 1034 adcl %eax,%eax 1035 subl %edx,%ecx 1036 adcl $0,%eax 1037 movl %ecx,12(%ebx) 1038 1039 movl 16(%esi),%ecx 1040 movl 16(%edi),%edx 1041 subl %eax,%ecx 1042 movl $0,%eax 1043 adcl %eax,%eax 1044 subl %edx,%ecx 1045 adcl $0,%eax 1046 movl %ecx,16(%ebx) 1047 1048 movl 20(%esi),%ecx 1049 movl 20(%edi),%edx 1050 subl %eax,%ecx 1051 movl $0,%eax 1052 adcl %eax,%eax 1053 subl %edx,%ecx 1054 adcl $0,%eax 1055 movl %ecx,20(%ebx) 1056 1057 movl 24(%esi),%ecx 1058 movl 24(%edi),%edx 1059 subl %eax,%ecx 1060 movl $0,%eax 1061 adcl %eax,%eax 1062 subl %edx,%ecx 1063 adcl $0,%eax 1064 movl %ecx,24(%ebx) 1065 1066 movl 28(%esi),%ecx 1067 movl 28(%edi),%edx 1068 subl %eax,%ecx 1069 movl $0,%eax 1070 adcl %eax,%eax 1071 subl %edx,%ecx 1072 adcl $0,%eax 1073 movl %ecx,28(%ebx) 1074 1075 addl $32,%esi 1076 addl $32,%edi 1077 addl $32,%ebx 1078 subl $8,%ebp 1079 jnz .L030aw_loop 1080.L029aw_finish: 1081 movl 32(%esp),%ebp 1082 andl $7,%ebp 1083 jz .L031aw_end 1084 1085 movl (%esi),%ecx 1086 movl (%edi),%edx 1087 subl %eax,%ecx 1088 movl $0,%eax 1089 adcl %eax,%eax 1090 subl %edx,%ecx 1091 adcl $0,%eax 1092 movl %ecx,(%ebx) 1093 addl $4,%esi 1094 addl $4,%edi 1095 addl $4,%ebx 1096 decl %ebp 1097 jz .L031aw_end 1098 1099 movl (%esi),%ecx 1100 movl (%edi),%edx 1101 subl %eax,%ecx 1102 movl $0,%eax 1103 adcl %eax,%eax 1104 subl %edx,%ecx 1105 adcl $0,%eax 1106 movl %ecx,(%ebx) 1107 addl $4,%esi 1108 addl $4,%edi 1109 addl $4,%ebx 1110 decl %ebp 1111 jz .L031aw_end 1112 1113 movl (%esi),%ecx 1114 movl (%edi),%edx 1115 subl %eax,%ecx 1116 movl $0,%eax 1117 adcl %eax,%eax 1118 subl %edx,%ecx 1119 adcl $0,%eax 1120 movl %ecx,(%ebx) 1121 addl $4,%esi 1122 addl $4,%edi 1123 addl $4,%ebx 1124 decl %ebp 1125 jz .L031aw_end 1126 1127 movl (%esi),%ecx 1128 movl (%edi),%edx 1129 subl %eax,%ecx 1130 movl $0,%eax 1131 adcl %eax,%eax 1132 subl %edx,%ecx 1133 adcl $0,%eax 1134 movl %ecx,(%ebx) 1135 addl $4,%esi 1136 addl $4,%edi 1137 addl $4,%ebx 1138 decl %ebp 1139 jz .L031aw_end 1140 1141 movl (%esi),%ecx 1142 movl (%edi),%edx 1143 subl %eax,%ecx 1144 movl $0,%eax 1145 adcl %eax,%eax 1146 subl %edx,%ecx 1147 adcl $0,%eax 1148 movl %ecx,(%ebx) 1149 addl $4,%esi 1150 addl $4,%edi 1151 addl $4,%ebx 1152 decl %ebp 1153 jz .L031aw_end 1154 1155 movl (%esi),%ecx 1156 movl (%edi),%edx 1157 subl %eax,%ecx 1158 movl $0,%eax 1159 adcl %eax,%eax 1160 subl %edx,%ecx 1161 adcl $0,%eax 1162 movl %ecx,(%ebx) 1163 addl $4,%esi 1164 addl $4,%edi 1165 addl $4,%ebx 1166 decl %ebp 1167 jz .L031aw_end 1168 1169 movl (%esi),%ecx 1170 movl (%edi),%edx 1171 subl %eax,%ecx 1172 movl $0,%eax 1173 adcl %eax,%eax 1174 subl %edx,%ecx 1175 adcl $0,%eax 1176 movl %ecx,(%ebx) 1177 addl $4,%esi 1178 addl $4,%edi 1179 addl $4,%ebx 1180.L031aw_end: 1181 cmpl $0,36(%esp) 1182 je .L032pw_end 1183 movl 36(%esp),%ebp 1184 cmpl $0,%ebp 1185 je .L032pw_end 1186 jge .L033pw_pos 1187 1188 movl $0,%edx 1189 subl %ebp,%edx 1190 movl %edx,%ebp 1191 andl $4294967288,%ebp 1192 jz .L034pw_neg_finish 1193.L035pw_neg_loop: 1194 1195 movl $0,%ecx 1196 movl (%edi),%edx 1197 subl %eax,%ecx 1198 movl $0,%eax 1199 adcl %eax,%eax 1200 subl %edx,%ecx 1201 adcl $0,%eax 1202 movl %ecx,(%ebx) 1203 1204 movl $0,%ecx 1205 movl 4(%edi),%edx 1206 subl %eax,%ecx 1207 movl $0,%eax 1208 adcl %eax,%eax 1209 subl %edx,%ecx 1210 adcl $0,%eax 1211 movl %ecx,4(%ebx) 1212 1213 movl $0,%ecx 1214 movl 8(%edi),%edx 1215 subl %eax,%ecx 1216 movl $0,%eax 1217 adcl %eax,%eax 1218 subl %edx,%ecx 1219 adcl $0,%eax 1220 movl %ecx,8(%ebx) 1221 1222 movl $0,%ecx 1223 movl 12(%edi),%edx 1224 subl %eax,%ecx 1225 movl $0,%eax 1226 adcl %eax,%eax 1227 subl %edx,%ecx 1228 adcl $0,%eax 1229 movl %ecx,12(%ebx) 1230 1231 movl $0,%ecx 1232 movl 16(%edi),%edx 1233 subl %eax,%ecx 1234 movl $0,%eax 1235 adcl %eax,%eax 1236 subl %edx,%ecx 1237 adcl $0,%eax 1238 movl %ecx,16(%ebx) 1239 1240 movl $0,%ecx 1241 movl 20(%edi),%edx 1242 subl %eax,%ecx 1243 movl $0,%eax 1244 adcl %eax,%eax 1245 subl %edx,%ecx 1246 adcl $0,%eax 1247 movl %ecx,20(%ebx) 1248 1249 movl $0,%ecx 1250 movl 24(%edi),%edx 1251 subl %eax,%ecx 1252 movl $0,%eax 1253 adcl %eax,%eax 1254 subl %edx,%ecx 1255 adcl $0,%eax 1256 movl %ecx,24(%ebx) 1257 1258 movl $0,%ecx 1259 movl 28(%edi),%edx 1260 subl %eax,%ecx 1261 movl $0,%eax 1262 adcl %eax,%eax 1263 subl %edx,%ecx 1264 adcl $0,%eax 1265 movl %ecx,28(%ebx) 1266 1267 addl $32,%edi 1268 addl $32,%ebx 1269 subl $8,%ebp 1270 jnz .L035pw_neg_loop 1271.L034pw_neg_finish: 1272 movl 36(%esp),%edx 1273 movl $0,%ebp 1274 subl %edx,%ebp 1275 andl $7,%ebp 1276 jz .L032pw_end 1277 1278 movl $0,%ecx 1279 movl (%edi),%edx 1280 subl %eax,%ecx 1281 movl $0,%eax 1282 adcl %eax,%eax 1283 subl %edx,%ecx 1284 adcl $0,%eax 1285 decl %ebp 1286 movl %ecx,(%ebx) 1287 jz .L032pw_end 1288 1289 movl $0,%ecx 1290 movl 4(%edi),%edx 1291 subl %eax,%ecx 1292 movl $0,%eax 1293 adcl %eax,%eax 1294 subl %edx,%ecx 1295 adcl $0,%eax 1296 decl %ebp 1297 movl %ecx,4(%ebx) 1298 jz .L032pw_end 1299 1300 movl $0,%ecx 1301 movl 8(%edi),%edx 1302 subl %eax,%ecx 1303 movl $0,%eax 1304 adcl %eax,%eax 1305 subl %edx,%ecx 1306 adcl $0,%eax 1307 decl %ebp 1308 movl %ecx,8(%ebx) 1309 jz .L032pw_end 1310 1311 movl $0,%ecx 1312 movl 12(%edi),%edx 1313 subl %eax,%ecx 1314 movl $0,%eax 1315 adcl %eax,%eax 1316 subl %edx,%ecx 1317 adcl $0,%eax 1318 decl %ebp 1319 movl %ecx,12(%ebx) 1320 jz .L032pw_end 1321 1322 movl $0,%ecx 1323 movl 16(%edi),%edx 1324 subl %eax,%ecx 1325 movl $0,%eax 1326 adcl %eax,%eax 1327 subl %edx,%ecx 1328 adcl $0,%eax 1329 decl %ebp 1330 movl %ecx,16(%ebx) 1331 jz .L032pw_end 1332 1333 movl $0,%ecx 1334 movl 20(%edi),%edx 1335 subl %eax,%ecx 1336 movl $0,%eax 1337 adcl %eax,%eax 1338 subl %edx,%ecx 1339 adcl $0,%eax 1340 decl %ebp 1341 movl %ecx,20(%ebx) 1342 jz .L032pw_end 1343 1344 movl $0,%ecx 1345 movl 24(%edi),%edx 1346 subl %eax,%ecx 1347 movl $0,%eax 1348 adcl %eax,%eax 1349 subl %edx,%ecx 1350 adcl $0,%eax 1351 movl %ecx,24(%ebx) 1352 jmp .L032pw_end 1353.L033pw_pos: 1354 andl $4294967288,%ebp 1355 jz .L036pw_pos_finish 1356.L037pw_pos_loop: 1357 1358 movl (%esi),%ecx 1359 subl %eax,%ecx 1360 movl %ecx,(%ebx) 1361 jnc .L038pw_nc0 1362 1363 movl 4(%esi),%ecx 1364 subl %eax,%ecx 1365 movl %ecx,4(%ebx) 1366 jnc .L039pw_nc1 1367 1368 movl 8(%esi),%ecx 1369 subl %eax,%ecx 1370 movl %ecx,8(%ebx) 1371 jnc .L040pw_nc2 1372 1373 movl 12(%esi),%ecx 1374 subl %eax,%ecx 1375 movl %ecx,12(%ebx) 1376 jnc .L041pw_nc3 1377 1378 movl 16(%esi),%ecx 1379 subl %eax,%ecx 1380 movl %ecx,16(%ebx) 1381 jnc .L042pw_nc4 1382 1383 movl 20(%esi),%ecx 1384 subl %eax,%ecx 1385 movl %ecx,20(%ebx) 1386 jnc .L043pw_nc5 1387 1388 movl 24(%esi),%ecx 1389 subl %eax,%ecx 1390 movl %ecx,24(%ebx) 1391 jnc .L044pw_nc6 1392 1393 movl 28(%esi),%ecx 1394 subl %eax,%ecx 1395 movl %ecx,28(%ebx) 1396 jnc .L045pw_nc7 1397 1398 addl $32,%esi 1399 addl $32,%ebx 1400 subl $8,%ebp 1401 jnz .L037pw_pos_loop 1402.L036pw_pos_finish: 1403 movl 36(%esp),%ebp 1404 andl $7,%ebp 1405 jz .L032pw_end 1406 1407 movl (%esi),%ecx 1408 subl %eax,%ecx 1409 movl %ecx,(%ebx) 1410 jnc .L046pw_tail_nc0 1411 decl %ebp 1412 jz .L032pw_end 1413 1414 movl 4(%esi),%ecx 1415 subl %eax,%ecx 1416 movl %ecx,4(%ebx) 1417 jnc .L047pw_tail_nc1 1418 decl %ebp 1419 jz .L032pw_end 1420 1421 movl 8(%esi),%ecx 1422 subl %eax,%ecx 1423 movl %ecx,8(%ebx) 1424 jnc .L048pw_tail_nc2 1425 decl %ebp 1426 jz .L032pw_end 1427 1428 movl 12(%esi),%ecx 1429 subl %eax,%ecx 1430 movl %ecx,12(%ebx) 1431 jnc .L049pw_tail_nc3 1432 decl %ebp 1433 jz .L032pw_end 1434 1435 movl 16(%esi),%ecx 1436 subl %eax,%ecx 1437 movl %ecx,16(%ebx) 1438 jnc .L050pw_tail_nc4 1439 decl %ebp 1440 jz .L032pw_end 1441 1442 movl 20(%esi),%ecx 1443 subl %eax,%ecx 1444 movl %ecx,20(%ebx) 1445 jnc .L051pw_tail_nc5 1446 decl %ebp 1447 jz .L032pw_end 1448 1449 movl 24(%esi),%ecx 1450 subl %eax,%ecx 1451 movl %ecx,24(%ebx) 1452 jnc .L052pw_tail_nc6 1453 movl $1,%eax 1454 jmp .L032pw_end 1455.L053pw_nc_loop: 1456 movl (%esi),%ecx 1457 movl %ecx,(%ebx) 1458.L038pw_nc0: 1459 movl 4(%esi),%ecx 1460 movl %ecx,4(%ebx) 1461.L039pw_nc1: 1462 movl 8(%esi),%ecx 1463 movl %ecx,8(%ebx) 1464.L040pw_nc2: 1465 movl 12(%esi),%ecx 1466 movl %ecx,12(%ebx) 1467.L041pw_nc3: 1468 movl 16(%esi),%ecx 1469 movl %ecx,16(%ebx) 1470.L042pw_nc4: 1471 movl 20(%esi),%ecx 1472 movl %ecx,20(%ebx) 1473.L043pw_nc5: 1474 movl 24(%esi),%ecx 1475 movl %ecx,24(%ebx) 1476.L044pw_nc6: 1477 movl 28(%esi),%ecx 1478 movl %ecx,28(%ebx) 1479.L045pw_nc7: 1480 1481 addl $32,%esi 1482 addl $32,%ebx 1483 subl $8,%ebp 1484 jnz .L053pw_nc_loop 1485 movl 36(%esp),%ebp 1486 andl $7,%ebp 1487 jz .L054pw_nc_end 1488 movl (%esi),%ecx 1489 movl %ecx,(%ebx) 1490.L046pw_tail_nc0: 1491 decl %ebp 1492 jz .L054pw_nc_end 1493 movl 4(%esi),%ecx 1494 movl %ecx,4(%ebx) 1495.L047pw_tail_nc1: 1496 decl %ebp 1497 jz .L054pw_nc_end 1498 movl 8(%esi),%ecx 1499 movl %ecx,8(%ebx) 1500.L048pw_tail_nc2: 1501 decl %ebp 1502 jz .L054pw_nc_end 1503 movl 12(%esi),%ecx 1504 movl %ecx,12(%ebx) 1505.L049pw_tail_nc3: 1506 decl %ebp 1507 jz .L054pw_nc_end 1508 movl 16(%esi),%ecx 1509 movl %ecx,16(%ebx) 1510.L050pw_tail_nc4: 1511 decl %ebp 1512 jz .L054pw_nc_end 1513 movl 20(%esi),%ecx 1514 movl %ecx,20(%ebx) 1515.L051pw_tail_nc5: 1516 decl %ebp 1517 jz .L054pw_nc_end 1518 movl 24(%esi),%ecx 1519 movl %ecx,24(%ebx) 1520.L052pw_tail_nc6: 1521.L054pw_nc_end: 1522 movl $0,%eax 1523.L032pw_end: 1524 popl %edi 1525 popl %esi 1526 popl %ebx 1527 popl %ebp 1528 ret 1529.size bn_sub_part_words,.-.L_bn_sub_part_words_begin 1530.comm OPENSSL_ia32cap_P,16,4 1531