x86-mont.S revision 303975
1# $FreeBSD: releng/11.0/secure/lib/libcrypto/i386/x86-mont.S 299481 2016-05-11 20:11:21Z jkim $ 2# Do not modify. This file is auto-generated from x86-mont.pl. 3#ifdef PIC 4.file "x86-mont.S" 5.text 6.globl bn_mul_mont 7.type bn_mul_mont,@function 8.align 16 9bn_mul_mont: 10.L_bn_mul_mont_begin: 11 pushl %ebp 12 pushl %ebx 13 pushl %esi 14 pushl %edi 15 xorl %eax,%eax 16 movl 40(%esp),%edi 17 cmpl $4,%edi 18 jl .L000just_leave 19 leal 20(%esp),%esi 20 leal 24(%esp),%edx 21 movl %esp,%ebp 22 addl $2,%edi 23 negl %edi 24 leal -32(%esp,%edi,4),%esp 25 negl %edi 26 movl %esp,%eax 27 subl %edx,%eax 28 andl $2047,%eax 29 subl %eax,%esp 30 xorl %esp,%edx 31 andl $2048,%edx 32 xorl $2048,%edx 33 subl %edx,%esp 34 andl $-64,%esp 35 movl %ebp,%eax 36 subl %esp,%eax 37 andl $-4096,%eax 38.L001page_walk: 39 movl (%esp,%eax,1),%edx 40 subl $4096,%eax 41.byte 46 42 jnc .L001page_walk 43 movl (%esi),%eax 44 movl 4(%esi),%ebx 45 movl 8(%esi),%ecx 46 movl 12(%esi),%edx 47 movl 16(%esi),%esi 48 movl (%esi),%esi 49 movl %eax,4(%esp) 50 movl %ebx,8(%esp) 51 movl %ecx,12(%esp) 52 movl %edx,16(%esp) 53 movl %esi,20(%esp) 54 leal -3(%edi),%ebx 55 movl %ebp,24(%esp) 56 call .L002PIC_me_up 57.L002PIC_me_up: 58 popl %eax 59 leal OPENSSL_ia32cap_P-.L002PIC_me_up(%eax),%eax 60 btl $26,(%eax) 61 jnc .L003non_sse2 62 movl $-1,%eax 63 movd %eax,%mm7 64 movl 8(%esp),%esi 65 movl 12(%esp),%edi 66 movl 16(%esp),%ebp 67 xorl %edx,%edx 68 xorl %ecx,%ecx 69 movd (%edi),%mm4 70 movd (%esi),%mm5 71 movd (%ebp),%mm3 72 pmuludq %mm4,%mm5 73 movq %mm5,%mm2 74 movq %mm5,%mm0 75 pand %mm7,%mm0 76 pmuludq 20(%esp),%mm5 77 pmuludq %mm5,%mm3 78 paddq %mm0,%mm3 79 movd 4(%ebp),%mm1 80 movd 4(%esi),%mm0 81 psrlq $32,%mm2 82 psrlq $32,%mm3 83 incl %ecx 84.align 16 85.L0041st: 86 pmuludq %mm4,%mm0 87 pmuludq %mm5,%mm1 88 paddq %mm0,%mm2 89 paddq %mm1,%mm3 90 movq %mm2,%mm0 91 pand %mm7,%mm0 92 movd 4(%ebp,%ecx,4),%mm1 93 paddq %mm0,%mm3 94 movd 4(%esi,%ecx,4),%mm0 95 psrlq $32,%mm2 96 movd %mm3,28(%esp,%ecx,4) 97 psrlq $32,%mm3 98 leal 1(%ecx),%ecx 99 cmpl %ebx,%ecx 100 jl .L0041st 101 pmuludq %mm4,%mm0 102 pmuludq %mm5,%mm1 103 paddq %mm0,%mm2 104 paddq %mm1,%mm3 105 movq %mm2,%mm0 106 pand %mm7,%mm0 107 paddq %mm0,%mm3 108 movd %mm3,28(%esp,%ecx,4) 109 psrlq $32,%mm2 110 psrlq $32,%mm3 111 paddq %mm2,%mm3 112 movq %mm3,32(%esp,%ebx,4) 113 incl %edx 114.L005outer: 115 xorl %ecx,%ecx 116 movd (%edi,%edx,4),%mm4 117 movd (%esi),%mm5 118 movd 32(%esp),%mm6 119 movd (%ebp),%mm3 120 pmuludq %mm4,%mm5 121 paddq %mm6,%mm5 122 movq %mm5,%mm0 123 movq %mm5,%mm2 124 pand %mm7,%mm0 125 pmuludq 20(%esp),%mm5 126 pmuludq %mm5,%mm3 127 paddq %mm0,%mm3 128 movd 36(%esp),%mm6 129 movd 4(%ebp),%mm1 130 movd 4(%esi),%mm0 131 psrlq $32,%mm2 132 psrlq $32,%mm3 133 paddq %mm6,%mm2 134 incl %ecx 135 decl %ebx 136.L006inner: 137 pmuludq %mm4,%mm0 138 pmuludq %mm5,%mm1 139 paddq %mm0,%mm2 140 paddq %mm1,%mm3 141 movq %mm2,%mm0 142 movd 36(%esp,%ecx,4),%mm6 143 pand %mm7,%mm0 144 movd 4(%ebp,%ecx,4),%mm1 145 paddq %mm0,%mm3 146 movd 4(%esi,%ecx,4),%mm0 147 psrlq $32,%mm2 148 movd %mm3,28(%esp,%ecx,4) 149 psrlq $32,%mm3 150 paddq %mm6,%mm2 151 decl %ebx 152 leal 1(%ecx),%ecx 153 jnz .L006inner 154 movl %ecx,%ebx 155 pmuludq %mm4,%mm0 156 pmuludq %mm5,%mm1 157 paddq %mm0,%mm2 158 paddq %mm1,%mm3 159 movq %mm2,%mm0 160 pand %mm7,%mm0 161 paddq %mm0,%mm3 162 movd %mm3,28(%esp,%ecx,4) 163 psrlq $32,%mm2 164 psrlq $32,%mm3 165 movd 36(%esp,%ebx,4),%mm6 166 paddq %mm2,%mm3 167 paddq %mm6,%mm3 168 movq %mm3,32(%esp,%ebx,4) 169 leal 1(%edx),%edx 170 cmpl %ebx,%edx 171 jle .L005outer 172 emms 173 jmp .L007common_tail 174.align 16 175.L003non_sse2: 176 movl 8(%esp),%esi 177 leal 1(%ebx),%ebp 178 movl 12(%esp),%edi 179 xorl %ecx,%ecx 180 movl %esi,%edx 181 andl $1,%ebp 182 subl %edi,%edx 183 leal 4(%edi,%ebx,4),%eax 184 orl %edx,%ebp 185 movl (%edi),%edi 186 jz .L008bn_sqr_mont 187 movl %eax,28(%esp) 188 movl (%esi),%eax 189 xorl %edx,%edx 190.align 16 191.L009mull: 192 movl %edx,%ebp 193 mull %edi 194 addl %eax,%ebp 195 leal 1(%ecx),%ecx 196 adcl $0,%edx 197 movl (%esi,%ecx,4),%eax 198 cmpl %ebx,%ecx 199 movl %ebp,28(%esp,%ecx,4) 200 jl .L009mull 201 movl %edx,%ebp 202 mull %edi 203 movl 20(%esp),%edi 204 addl %ebp,%eax 205 movl 16(%esp),%esi 206 adcl $0,%edx 207 imull 32(%esp),%edi 208 movl %eax,32(%esp,%ebx,4) 209 xorl %ecx,%ecx 210 movl %edx,36(%esp,%ebx,4) 211 movl %ecx,40(%esp,%ebx,4) 212 movl (%esi),%eax 213 mull %edi 214 addl 32(%esp),%eax 215 movl 4(%esi),%eax 216 adcl $0,%edx 217 incl %ecx 218 jmp .L0102ndmadd 219.align 16 220.L0111stmadd: 221 movl %edx,%ebp 222 mull %edi 223 addl 32(%esp,%ecx,4),%ebp 224 leal 1(%ecx),%ecx 225 adcl $0,%edx 226 addl %eax,%ebp 227 movl (%esi,%ecx,4),%eax 228 adcl $0,%edx 229 cmpl %ebx,%ecx 230 movl %ebp,28(%esp,%ecx,4) 231 jl .L0111stmadd 232 movl %edx,%ebp 233 mull %edi 234 addl 32(%esp,%ebx,4),%eax 235 movl 20(%esp),%edi 236 adcl $0,%edx 237 movl 16(%esp),%esi 238 addl %eax,%ebp 239 adcl $0,%edx 240 imull 32(%esp),%edi 241 xorl %ecx,%ecx 242 addl 36(%esp,%ebx,4),%edx 243 movl %ebp,32(%esp,%ebx,4) 244 adcl $0,%ecx 245 movl (%esi),%eax 246 movl %edx,36(%esp,%ebx,4) 247 movl %ecx,40(%esp,%ebx,4) 248 mull %edi 249 addl 32(%esp),%eax 250 movl 4(%esi),%eax 251 adcl $0,%edx 252 movl $1,%ecx 253.align 16 254.L0102ndmadd: 255 movl %edx,%ebp 256 mull %edi 257 addl 32(%esp,%ecx,4),%ebp 258 leal 1(%ecx),%ecx 259 adcl $0,%edx 260 addl %eax,%ebp 261 movl (%esi,%ecx,4),%eax 262 adcl $0,%edx 263 cmpl %ebx,%ecx 264 movl %ebp,24(%esp,%ecx,4) 265 jl .L0102ndmadd 266 movl %edx,%ebp 267 mull %edi 268 addl 32(%esp,%ebx,4),%ebp 269 adcl $0,%edx 270 addl %eax,%ebp 271 adcl $0,%edx 272 movl %ebp,28(%esp,%ebx,4) 273 xorl %eax,%eax 274 movl 12(%esp),%ecx 275 addl 36(%esp,%ebx,4),%edx 276 adcl 40(%esp,%ebx,4),%eax 277 leal 4(%ecx),%ecx 278 movl %edx,32(%esp,%ebx,4) 279 cmpl 28(%esp),%ecx 280 movl %eax,36(%esp,%ebx,4) 281 je .L007common_tail 282 movl (%ecx),%edi 283 movl 8(%esp),%esi 284 movl %ecx,12(%esp) 285 xorl %ecx,%ecx 286 xorl %edx,%edx 287 movl (%esi),%eax 288 jmp .L0111stmadd 289.align 16 290.L008bn_sqr_mont: 291 movl %ebx,(%esp) 292 movl %ecx,12(%esp) 293 movl %edi,%eax 294 mull %edi 295 movl %eax,32(%esp) 296 movl %edx,%ebx 297 shrl $1,%edx 298 andl $1,%ebx 299 incl %ecx 300.align 16 301.L012sqr: 302 movl (%esi,%ecx,4),%eax 303 movl %edx,%ebp 304 mull %edi 305 addl %ebp,%eax 306 leal 1(%ecx),%ecx 307 adcl $0,%edx 308 leal (%ebx,%eax,2),%ebp 309 shrl $31,%eax 310 cmpl (%esp),%ecx 311 movl %eax,%ebx 312 movl %ebp,28(%esp,%ecx,4) 313 jl .L012sqr 314 movl (%esi,%ecx,4),%eax 315 movl %edx,%ebp 316 mull %edi 317 addl %ebp,%eax 318 movl 20(%esp),%edi 319 adcl $0,%edx 320 movl 16(%esp),%esi 321 leal (%ebx,%eax,2),%ebp 322 imull 32(%esp),%edi 323 shrl $31,%eax 324 movl %ebp,32(%esp,%ecx,4) 325 leal (%eax,%edx,2),%ebp 326 movl (%esi),%eax 327 shrl $31,%edx 328 movl %ebp,36(%esp,%ecx,4) 329 movl %edx,40(%esp,%ecx,4) 330 mull %edi 331 addl 32(%esp),%eax 332 movl %ecx,%ebx 333 adcl $0,%edx 334 movl 4(%esi),%eax 335 movl $1,%ecx 336.align 16 337.L0133rdmadd: 338 movl %edx,%ebp 339 mull %edi 340 addl 32(%esp,%ecx,4),%ebp 341 adcl $0,%edx 342 addl %eax,%ebp 343 movl 4(%esi,%ecx,4),%eax 344 adcl $0,%edx 345 movl %ebp,28(%esp,%ecx,4) 346 movl %edx,%ebp 347 mull %edi 348 addl 36(%esp,%ecx,4),%ebp 349 leal 2(%ecx),%ecx 350 adcl $0,%edx 351 addl %eax,%ebp 352 movl (%esi,%ecx,4),%eax 353 adcl $0,%edx 354 cmpl %ebx,%ecx 355 movl %ebp,24(%esp,%ecx,4) 356 jl .L0133rdmadd 357 movl %edx,%ebp 358 mull %edi 359 addl 32(%esp,%ebx,4),%ebp 360 adcl $0,%edx 361 addl %eax,%ebp 362 adcl $0,%edx 363 movl %ebp,28(%esp,%ebx,4) 364 movl 12(%esp),%ecx 365 xorl %eax,%eax 366 movl 8(%esp),%esi 367 addl 36(%esp,%ebx,4),%edx 368 adcl 40(%esp,%ebx,4),%eax 369 movl %edx,32(%esp,%ebx,4) 370 cmpl %ebx,%ecx 371 movl %eax,36(%esp,%ebx,4) 372 je .L007common_tail 373 movl 4(%esi,%ecx,4),%edi 374 leal 1(%ecx),%ecx 375 movl %edi,%eax 376 movl %ecx,12(%esp) 377 mull %edi 378 addl 32(%esp,%ecx,4),%eax 379 adcl $0,%edx 380 movl %eax,32(%esp,%ecx,4) 381 xorl %ebp,%ebp 382 cmpl %ebx,%ecx 383 leal 1(%ecx),%ecx 384 je .L014sqrlast 385 movl %edx,%ebx 386 shrl $1,%edx 387 andl $1,%ebx 388.align 16 389.L015sqradd: 390 movl (%esi,%ecx,4),%eax 391 movl %edx,%ebp 392 mull %edi 393 addl %ebp,%eax 394 leal (%eax,%eax,1),%ebp 395 adcl $0,%edx 396 shrl $31,%eax 397 addl 32(%esp,%ecx,4),%ebp 398 leal 1(%ecx),%ecx 399 adcl $0,%eax 400 addl %ebx,%ebp 401 adcl $0,%eax 402 cmpl (%esp),%ecx 403 movl %ebp,28(%esp,%ecx,4) 404 movl %eax,%ebx 405 jle .L015sqradd 406 movl %edx,%ebp 407 addl %edx,%edx 408 shrl $31,%ebp 409 addl %ebx,%edx 410 adcl $0,%ebp 411.L014sqrlast: 412 movl 20(%esp),%edi 413 movl 16(%esp),%esi 414 imull 32(%esp),%edi 415 addl 32(%esp,%ecx,4),%edx 416 movl (%esi),%eax 417 adcl $0,%ebp 418 movl %edx,32(%esp,%ecx,4) 419 movl %ebp,36(%esp,%ecx,4) 420 mull %edi 421 addl 32(%esp),%eax 422 leal -1(%ecx),%ebx 423 adcl $0,%edx 424 movl $1,%ecx 425 movl 4(%esi),%eax 426 jmp .L0133rdmadd 427.align 16 428.L007common_tail: 429 movl 16(%esp),%ebp 430 movl 4(%esp),%edi 431 leal 32(%esp),%esi 432 movl (%esi),%eax 433 movl %ebx,%ecx 434 xorl %edx,%edx 435.align 16 436.L016sub: 437 sbbl (%ebp,%edx,4),%eax 438 movl %eax,(%edi,%edx,4) 439 decl %ecx 440 movl 4(%esi,%edx,4),%eax 441 leal 1(%edx),%edx 442 jge .L016sub 443 sbbl $0,%eax 444 andl %eax,%esi 445 notl %eax 446 movl %edi,%ebp 447 andl %eax,%ebp 448 orl %ebp,%esi 449.align 16 450.L017copy: 451 movl (%esi,%ebx,4),%eax 452 movl %eax,(%edi,%ebx,4) 453 movl %ecx,32(%esp,%ebx,4) 454 decl %ebx 455 jge .L017copy 456 movl 24(%esp),%esp 457 movl $1,%eax 458.L000just_leave: 459 popl %edi 460 popl %esi 461 popl %ebx 462 popl %ebp 463 ret 464.size bn_mul_mont,.-.L_bn_mul_mont_begin 465.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 466.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 467.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 468.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 469.byte 111,114,103,62,0 470.comm OPENSSL_ia32cap_P,16,4 471#else 472.file "x86-mont.S" 473.text 474.globl bn_mul_mont 475.type bn_mul_mont,@function 476.align 16 477bn_mul_mont: 478.L_bn_mul_mont_begin: 479 pushl %ebp 480 pushl %ebx 481 pushl %esi 482 pushl %edi 483 xorl %eax,%eax 484 movl 40(%esp),%edi 485 cmpl $4,%edi 486 jl .L000just_leave 487 leal 20(%esp),%esi 488 leal 24(%esp),%edx 489 movl %esp,%ebp 490 addl $2,%edi 491 negl %edi 492 leal -32(%esp,%edi,4),%esp 493 negl %edi 494 movl %esp,%eax 495 subl %edx,%eax 496 andl $2047,%eax 497 subl %eax,%esp 498 xorl %esp,%edx 499 andl $2048,%edx 500 xorl $2048,%edx 501 subl %edx,%esp 502 andl $-64,%esp 503 movl %ebp,%eax 504 subl %esp,%eax 505 andl $-4096,%eax 506.L001page_walk: 507 movl (%esp,%eax,1),%edx 508 subl $4096,%eax 509.byte 46 510 jnc .L001page_walk 511 movl (%esi),%eax 512 movl 4(%esi),%ebx 513 movl 8(%esi),%ecx 514 movl 12(%esi),%edx 515 movl 16(%esi),%esi 516 movl (%esi),%esi 517 movl %eax,4(%esp) 518 movl %ebx,8(%esp) 519 movl %ecx,12(%esp) 520 movl %edx,16(%esp) 521 movl %esi,20(%esp) 522 leal -3(%edi),%ebx 523 movl %ebp,24(%esp) 524 leal OPENSSL_ia32cap_P,%eax 525 btl $26,(%eax) 526 jnc .L002non_sse2 527 movl $-1,%eax 528 movd %eax,%mm7 529 movl 8(%esp),%esi 530 movl 12(%esp),%edi 531 movl 16(%esp),%ebp 532 xorl %edx,%edx 533 xorl %ecx,%ecx 534 movd (%edi),%mm4 535 movd (%esi),%mm5 536 movd (%ebp),%mm3 537 pmuludq %mm4,%mm5 538 movq %mm5,%mm2 539 movq %mm5,%mm0 540 pand %mm7,%mm0 541 pmuludq 20(%esp),%mm5 542 pmuludq %mm5,%mm3 543 paddq %mm0,%mm3 544 movd 4(%ebp),%mm1 545 movd 4(%esi),%mm0 546 psrlq $32,%mm2 547 psrlq $32,%mm3 548 incl %ecx 549.align 16 550.L0031st: 551 pmuludq %mm4,%mm0 552 pmuludq %mm5,%mm1 553 paddq %mm0,%mm2 554 paddq %mm1,%mm3 555 movq %mm2,%mm0 556 pand %mm7,%mm0 557 movd 4(%ebp,%ecx,4),%mm1 558 paddq %mm0,%mm3 559 movd 4(%esi,%ecx,4),%mm0 560 psrlq $32,%mm2 561 movd %mm3,28(%esp,%ecx,4) 562 psrlq $32,%mm3 563 leal 1(%ecx),%ecx 564 cmpl %ebx,%ecx 565 jl .L0031st 566 pmuludq %mm4,%mm0 567 pmuludq %mm5,%mm1 568 paddq %mm0,%mm2 569 paddq %mm1,%mm3 570 movq %mm2,%mm0 571 pand %mm7,%mm0 572 paddq %mm0,%mm3 573 movd %mm3,28(%esp,%ecx,4) 574 psrlq $32,%mm2 575 psrlq $32,%mm3 576 paddq %mm2,%mm3 577 movq %mm3,32(%esp,%ebx,4) 578 incl %edx 579.L004outer: 580 xorl %ecx,%ecx 581 movd (%edi,%edx,4),%mm4 582 movd (%esi),%mm5 583 movd 32(%esp),%mm6 584 movd (%ebp),%mm3 585 pmuludq %mm4,%mm5 586 paddq %mm6,%mm5 587 movq %mm5,%mm0 588 movq %mm5,%mm2 589 pand %mm7,%mm0 590 pmuludq 20(%esp),%mm5 591 pmuludq %mm5,%mm3 592 paddq %mm0,%mm3 593 movd 36(%esp),%mm6 594 movd 4(%ebp),%mm1 595 movd 4(%esi),%mm0 596 psrlq $32,%mm2 597 psrlq $32,%mm3 598 paddq %mm6,%mm2 599 incl %ecx 600 decl %ebx 601.L005inner: 602 pmuludq %mm4,%mm0 603 pmuludq %mm5,%mm1 604 paddq %mm0,%mm2 605 paddq %mm1,%mm3 606 movq %mm2,%mm0 607 movd 36(%esp,%ecx,4),%mm6 608 pand %mm7,%mm0 609 movd 4(%ebp,%ecx,4),%mm1 610 paddq %mm0,%mm3 611 movd 4(%esi,%ecx,4),%mm0 612 psrlq $32,%mm2 613 movd %mm3,28(%esp,%ecx,4) 614 psrlq $32,%mm3 615 paddq %mm6,%mm2 616 decl %ebx 617 leal 1(%ecx),%ecx 618 jnz .L005inner 619 movl %ecx,%ebx 620 pmuludq %mm4,%mm0 621 pmuludq %mm5,%mm1 622 paddq %mm0,%mm2 623 paddq %mm1,%mm3 624 movq %mm2,%mm0 625 pand %mm7,%mm0 626 paddq %mm0,%mm3 627 movd %mm3,28(%esp,%ecx,4) 628 psrlq $32,%mm2 629 psrlq $32,%mm3 630 movd 36(%esp,%ebx,4),%mm6 631 paddq %mm2,%mm3 632 paddq %mm6,%mm3 633 movq %mm3,32(%esp,%ebx,4) 634 leal 1(%edx),%edx 635 cmpl %ebx,%edx 636 jle .L004outer 637 emms 638 jmp .L006common_tail 639.align 16 640.L002non_sse2: 641 movl 8(%esp),%esi 642 leal 1(%ebx),%ebp 643 movl 12(%esp),%edi 644 xorl %ecx,%ecx 645 movl %esi,%edx 646 andl $1,%ebp 647 subl %edi,%edx 648 leal 4(%edi,%ebx,4),%eax 649 orl %edx,%ebp 650 movl (%edi),%edi 651 jz .L007bn_sqr_mont 652 movl %eax,28(%esp) 653 movl (%esi),%eax 654 xorl %edx,%edx 655.align 16 656.L008mull: 657 movl %edx,%ebp 658 mull %edi 659 addl %eax,%ebp 660 leal 1(%ecx),%ecx 661 adcl $0,%edx 662 movl (%esi,%ecx,4),%eax 663 cmpl %ebx,%ecx 664 movl %ebp,28(%esp,%ecx,4) 665 jl .L008mull 666 movl %edx,%ebp 667 mull %edi 668 movl 20(%esp),%edi 669 addl %ebp,%eax 670 movl 16(%esp),%esi 671 adcl $0,%edx 672 imull 32(%esp),%edi 673 movl %eax,32(%esp,%ebx,4) 674 xorl %ecx,%ecx 675 movl %edx,36(%esp,%ebx,4) 676 movl %ecx,40(%esp,%ebx,4) 677 movl (%esi),%eax 678 mull %edi 679 addl 32(%esp),%eax 680 movl 4(%esi),%eax 681 adcl $0,%edx 682 incl %ecx 683 jmp .L0092ndmadd 684.align 16 685.L0101stmadd: 686 movl %edx,%ebp 687 mull %edi 688 addl 32(%esp,%ecx,4),%ebp 689 leal 1(%ecx),%ecx 690 adcl $0,%edx 691 addl %eax,%ebp 692 movl (%esi,%ecx,4),%eax 693 adcl $0,%edx 694 cmpl %ebx,%ecx 695 movl %ebp,28(%esp,%ecx,4) 696 jl .L0101stmadd 697 movl %edx,%ebp 698 mull %edi 699 addl 32(%esp,%ebx,4),%eax 700 movl 20(%esp),%edi 701 adcl $0,%edx 702 movl 16(%esp),%esi 703 addl %eax,%ebp 704 adcl $0,%edx 705 imull 32(%esp),%edi 706 xorl %ecx,%ecx 707 addl 36(%esp,%ebx,4),%edx 708 movl %ebp,32(%esp,%ebx,4) 709 adcl $0,%ecx 710 movl (%esi),%eax 711 movl %edx,36(%esp,%ebx,4) 712 movl %ecx,40(%esp,%ebx,4) 713 mull %edi 714 addl 32(%esp),%eax 715 movl 4(%esi),%eax 716 adcl $0,%edx 717 movl $1,%ecx 718.align 16 719.L0092ndmadd: 720 movl %edx,%ebp 721 mull %edi 722 addl 32(%esp,%ecx,4),%ebp 723 leal 1(%ecx),%ecx 724 adcl $0,%edx 725 addl %eax,%ebp 726 movl (%esi,%ecx,4),%eax 727 adcl $0,%edx 728 cmpl %ebx,%ecx 729 movl %ebp,24(%esp,%ecx,4) 730 jl .L0092ndmadd 731 movl %edx,%ebp 732 mull %edi 733 addl 32(%esp,%ebx,4),%ebp 734 adcl $0,%edx 735 addl %eax,%ebp 736 adcl $0,%edx 737 movl %ebp,28(%esp,%ebx,4) 738 xorl %eax,%eax 739 movl 12(%esp),%ecx 740 addl 36(%esp,%ebx,4),%edx 741 adcl 40(%esp,%ebx,4),%eax 742 leal 4(%ecx),%ecx 743 movl %edx,32(%esp,%ebx,4) 744 cmpl 28(%esp),%ecx 745 movl %eax,36(%esp,%ebx,4) 746 je .L006common_tail 747 movl (%ecx),%edi 748 movl 8(%esp),%esi 749 movl %ecx,12(%esp) 750 xorl %ecx,%ecx 751 xorl %edx,%edx 752 movl (%esi),%eax 753 jmp .L0101stmadd 754.align 16 755.L007bn_sqr_mont: 756 movl %ebx,(%esp) 757 movl %ecx,12(%esp) 758 movl %edi,%eax 759 mull %edi 760 movl %eax,32(%esp) 761 movl %edx,%ebx 762 shrl $1,%edx 763 andl $1,%ebx 764 incl %ecx 765.align 16 766.L011sqr: 767 movl (%esi,%ecx,4),%eax 768 movl %edx,%ebp 769 mull %edi 770 addl %ebp,%eax 771 leal 1(%ecx),%ecx 772 adcl $0,%edx 773 leal (%ebx,%eax,2),%ebp 774 shrl $31,%eax 775 cmpl (%esp),%ecx 776 movl %eax,%ebx 777 movl %ebp,28(%esp,%ecx,4) 778 jl .L011sqr 779 movl (%esi,%ecx,4),%eax 780 movl %edx,%ebp 781 mull %edi 782 addl %ebp,%eax 783 movl 20(%esp),%edi 784 adcl $0,%edx 785 movl 16(%esp),%esi 786 leal (%ebx,%eax,2),%ebp 787 imull 32(%esp),%edi 788 shrl $31,%eax 789 movl %ebp,32(%esp,%ecx,4) 790 leal (%eax,%edx,2),%ebp 791 movl (%esi),%eax 792 shrl $31,%edx 793 movl %ebp,36(%esp,%ecx,4) 794 movl %edx,40(%esp,%ecx,4) 795 mull %edi 796 addl 32(%esp),%eax 797 movl %ecx,%ebx 798 adcl $0,%edx 799 movl 4(%esi),%eax 800 movl $1,%ecx 801.align 16 802.L0123rdmadd: 803 movl %edx,%ebp 804 mull %edi 805 addl 32(%esp,%ecx,4),%ebp 806 adcl $0,%edx 807 addl %eax,%ebp 808 movl 4(%esi,%ecx,4),%eax 809 adcl $0,%edx 810 movl %ebp,28(%esp,%ecx,4) 811 movl %edx,%ebp 812 mull %edi 813 addl 36(%esp,%ecx,4),%ebp 814 leal 2(%ecx),%ecx 815 adcl $0,%edx 816 addl %eax,%ebp 817 movl (%esi,%ecx,4),%eax 818 adcl $0,%edx 819 cmpl %ebx,%ecx 820 movl %ebp,24(%esp,%ecx,4) 821 jl .L0123rdmadd 822 movl %edx,%ebp 823 mull %edi 824 addl 32(%esp,%ebx,4),%ebp 825 adcl $0,%edx 826 addl %eax,%ebp 827 adcl $0,%edx 828 movl %ebp,28(%esp,%ebx,4) 829 movl 12(%esp),%ecx 830 xorl %eax,%eax 831 movl 8(%esp),%esi 832 addl 36(%esp,%ebx,4),%edx 833 adcl 40(%esp,%ebx,4),%eax 834 movl %edx,32(%esp,%ebx,4) 835 cmpl %ebx,%ecx 836 movl %eax,36(%esp,%ebx,4) 837 je .L006common_tail 838 movl 4(%esi,%ecx,4),%edi 839 leal 1(%ecx),%ecx 840 movl %edi,%eax 841 movl %ecx,12(%esp) 842 mull %edi 843 addl 32(%esp,%ecx,4),%eax 844 adcl $0,%edx 845 movl %eax,32(%esp,%ecx,4) 846 xorl %ebp,%ebp 847 cmpl %ebx,%ecx 848 leal 1(%ecx),%ecx 849 je .L013sqrlast 850 movl %edx,%ebx 851 shrl $1,%edx 852 andl $1,%ebx 853.align 16 854.L014sqradd: 855 movl (%esi,%ecx,4),%eax 856 movl %edx,%ebp 857 mull %edi 858 addl %ebp,%eax 859 leal (%eax,%eax,1),%ebp 860 adcl $0,%edx 861 shrl $31,%eax 862 addl 32(%esp,%ecx,4),%ebp 863 leal 1(%ecx),%ecx 864 adcl $0,%eax 865 addl %ebx,%ebp 866 adcl $0,%eax 867 cmpl (%esp),%ecx 868 movl %ebp,28(%esp,%ecx,4) 869 movl %eax,%ebx 870 jle .L014sqradd 871 movl %edx,%ebp 872 addl %edx,%edx 873 shrl $31,%ebp 874 addl %ebx,%edx 875 adcl $0,%ebp 876.L013sqrlast: 877 movl 20(%esp),%edi 878 movl 16(%esp),%esi 879 imull 32(%esp),%edi 880 addl 32(%esp,%ecx,4),%edx 881 movl (%esi),%eax 882 adcl $0,%ebp 883 movl %edx,32(%esp,%ecx,4) 884 movl %ebp,36(%esp,%ecx,4) 885 mull %edi 886 addl 32(%esp),%eax 887 leal -1(%ecx),%ebx 888 adcl $0,%edx 889 movl $1,%ecx 890 movl 4(%esi),%eax 891 jmp .L0123rdmadd 892.align 16 893.L006common_tail: 894 movl 16(%esp),%ebp 895 movl 4(%esp),%edi 896 leal 32(%esp),%esi 897 movl (%esi),%eax 898 movl %ebx,%ecx 899 xorl %edx,%edx 900.align 16 901.L015sub: 902 sbbl (%ebp,%edx,4),%eax 903 movl %eax,(%edi,%edx,4) 904 decl %ecx 905 movl 4(%esi,%edx,4),%eax 906 leal 1(%edx),%edx 907 jge .L015sub 908 sbbl $0,%eax 909 andl %eax,%esi 910 notl %eax 911 movl %edi,%ebp 912 andl %eax,%ebp 913 orl %ebp,%esi 914.align 16 915.L016copy: 916 movl (%esi,%ebx,4),%eax 917 movl %eax,(%edi,%ebx,4) 918 movl %ecx,32(%esp,%ebx,4) 919 decl %ebx 920 jge .L016copy 921 movl 24(%esp),%esp 922 movl $1,%eax 923.L000just_leave: 924 popl %edi 925 popl %esi 926 popl %ebx 927 popl %ebp 928 ret 929.size bn_mul_mont,.-.L_bn_mul_mont_begin 930.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 931.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 932.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 933.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 934.byte 111,114,103,62,0 935.comm OPENSSL_ia32cap_P,16,4 936#endif 937