x86-mont.S revision 306195
1/* $FreeBSD: stable/11/secure/lib/libcrypto/i386/x86-mont.S 306195 2016-09-22 14:57:48Z jkim $ */ 2/* Do not modify. This file is auto-generated from x86-mont.pl. */ 3#ifdef PIC 4.file "x86-mont.S" 5.text 6.globl bn_mul_mont 7.type bn_mul_mont,@function 8.align 16 9bn_mul_mont: 10.L_bn_mul_mont_begin: 11 pushl %ebp 12 pushl %ebx 13 pushl %esi 14 pushl %edi 15 xorl %eax,%eax 16 movl 40(%esp),%edi 17 cmpl $4,%edi 18 jl .L000just_leave 19 leal 20(%esp),%esi 20 leal 24(%esp),%edx 21 addl $2,%edi 22 negl %edi 23 leal -32(%esp,%edi,4),%ebp 24 negl %edi 25 movl %ebp,%eax 26 subl %edx,%eax 27 andl $2047,%eax 28 subl %eax,%ebp 29 xorl %ebp,%edx 30 andl $2048,%edx 31 xorl $2048,%edx 32 subl %edx,%ebp 33 andl $-64,%ebp 34 movl %esp,%eax 35 subl %ebp,%eax 36 andl $-4096,%eax 37 movl %esp,%edx 38 leal (%ebp,%eax,1),%esp 39 movl (%esp),%eax 40 cmpl %ebp,%esp 41 ja .L001page_walk 42 jmp .L002page_walk_done 43.align 16 44.L001page_walk: 45 leal -4096(%esp),%esp 46 movl (%esp),%eax 47 cmpl %ebp,%esp 48 ja .L001page_walk 49.L002page_walk_done: 50 movl (%esi),%eax 51 movl 4(%esi),%ebx 52 movl 8(%esi),%ecx 53 movl 12(%esi),%ebp 54 movl 16(%esi),%esi 55 movl (%esi),%esi 56 movl %eax,4(%esp) 57 movl %ebx,8(%esp) 58 movl %ecx,12(%esp) 59 movl %ebp,16(%esp) 60 movl %esi,20(%esp) 61 leal -3(%edi),%ebx 62 movl %edx,24(%esp) 63 call .L003PIC_me_up 64.L003PIC_me_up: 65 popl %eax 66 leal OPENSSL_ia32cap_P-.L003PIC_me_up(%eax),%eax 67 btl $26,(%eax) 68 jnc .L004non_sse2 69 movl $-1,%eax 70 movd %eax,%mm7 71 movl 8(%esp),%esi 72 movl 12(%esp),%edi 73 movl 16(%esp),%ebp 74 xorl %edx,%edx 75 xorl %ecx,%ecx 76 movd (%edi),%mm4 77 movd (%esi),%mm5 78 movd (%ebp),%mm3 79 pmuludq %mm4,%mm5 80 movq %mm5,%mm2 81 movq %mm5,%mm0 82 pand %mm7,%mm0 83 pmuludq 20(%esp),%mm5 84 pmuludq %mm5,%mm3 85 paddq %mm0,%mm3 86 movd 4(%ebp),%mm1 87 movd 4(%esi),%mm0 88 psrlq $32,%mm2 89 psrlq $32,%mm3 90 incl %ecx 91.align 16 92.L0051st: 93 pmuludq %mm4,%mm0 94 pmuludq %mm5,%mm1 95 paddq %mm0,%mm2 96 paddq %mm1,%mm3 97 movq %mm2,%mm0 98 pand %mm7,%mm0 99 movd 4(%ebp,%ecx,4),%mm1 100 paddq %mm0,%mm3 101 movd 4(%esi,%ecx,4),%mm0 102 psrlq $32,%mm2 103 movd %mm3,28(%esp,%ecx,4) 104 psrlq $32,%mm3 105 leal 1(%ecx),%ecx 106 cmpl %ebx,%ecx 107 jl .L0051st 108 pmuludq %mm4,%mm0 109 pmuludq %mm5,%mm1 110 paddq %mm0,%mm2 111 paddq %mm1,%mm3 112 movq %mm2,%mm0 113 pand %mm7,%mm0 114 paddq %mm0,%mm3 115 movd %mm3,28(%esp,%ecx,4) 116 psrlq $32,%mm2 117 psrlq $32,%mm3 118 paddq %mm2,%mm3 119 movq %mm3,32(%esp,%ebx,4) 120 incl %edx 121.L006outer: 122 xorl %ecx,%ecx 123 movd (%edi,%edx,4),%mm4 124 movd (%esi),%mm5 125 movd 32(%esp),%mm6 126 movd (%ebp),%mm3 127 pmuludq %mm4,%mm5 128 paddq %mm6,%mm5 129 movq %mm5,%mm0 130 movq %mm5,%mm2 131 pand %mm7,%mm0 132 pmuludq 20(%esp),%mm5 133 pmuludq %mm5,%mm3 134 paddq %mm0,%mm3 135 movd 36(%esp),%mm6 136 movd 4(%ebp),%mm1 137 movd 4(%esi),%mm0 138 psrlq $32,%mm2 139 psrlq $32,%mm3 140 paddq %mm6,%mm2 141 incl %ecx 142 decl %ebx 143.L007inner: 144 pmuludq %mm4,%mm0 145 pmuludq %mm5,%mm1 146 paddq %mm0,%mm2 147 paddq %mm1,%mm3 148 movq %mm2,%mm0 149 movd 36(%esp,%ecx,4),%mm6 150 pand %mm7,%mm0 151 movd 4(%ebp,%ecx,4),%mm1 152 paddq %mm0,%mm3 153 movd 4(%esi,%ecx,4),%mm0 154 psrlq $32,%mm2 155 movd %mm3,28(%esp,%ecx,4) 156 psrlq $32,%mm3 157 paddq %mm6,%mm2 158 decl %ebx 159 leal 1(%ecx),%ecx 160 jnz .L007inner 161 movl %ecx,%ebx 162 pmuludq %mm4,%mm0 163 pmuludq %mm5,%mm1 164 paddq %mm0,%mm2 165 paddq %mm1,%mm3 166 movq %mm2,%mm0 167 pand %mm7,%mm0 168 paddq %mm0,%mm3 169 movd %mm3,28(%esp,%ecx,4) 170 psrlq $32,%mm2 171 psrlq $32,%mm3 172 movd 36(%esp,%ebx,4),%mm6 173 paddq %mm2,%mm3 174 paddq %mm6,%mm3 175 movq %mm3,32(%esp,%ebx,4) 176 leal 1(%edx),%edx 177 cmpl %ebx,%edx 178 jle .L006outer 179 emms 180 jmp .L008common_tail 181.align 16 182.L004non_sse2: 183 movl 8(%esp),%esi 184 leal 1(%ebx),%ebp 185 movl 12(%esp),%edi 186 xorl %ecx,%ecx 187 movl %esi,%edx 188 andl $1,%ebp 189 subl %edi,%edx 190 leal 4(%edi,%ebx,4),%eax 191 orl %edx,%ebp 192 movl (%edi),%edi 193 jz .L009bn_sqr_mont 194 movl %eax,28(%esp) 195 movl (%esi),%eax 196 xorl %edx,%edx 197.align 16 198.L010mull: 199 movl %edx,%ebp 200 mull %edi 201 addl %eax,%ebp 202 leal 1(%ecx),%ecx 203 adcl $0,%edx 204 movl (%esi,%ecx,4),%eax 205 cmpl %ebx,%ecx 206 movl %ebp,28(%esp,%ecx,4) 207 jl .L010mull 208 movl %edx,%ebp 209 mull %edi 210 movl 20(%esp),%edi 211 addl %ebp,%eax 212 movl 16(%esp),%esi 213 adcl $0,%edx 214 imull 32(%esp),%edi 215 movl %eax,32(%esp,%ebx,4) 216 xorl %ecx,%ecx 217 movl %edx,36(%esp,%ebx,4) 218 movl %ecx,40(%esp,%ebx,4) 219 movl (%esi),%eax 220 mull %edi 221 addl 32(%esp),%eax 222 movl 4(%esi),%eax 223 adcl $0,%edx 224 incl %ecx 225 jmp .L0112ndmadd 226.align 16 227.L0121stmadd: 228 movl %edx,%ebp 229 mull %edi 230 addl 32(%esp,%ecx,4),%ebp 231 leal 1(%ecx),%ecx 232 adcl $0,%edx 233 addl %eax,%ebp 234 movl (%esi,%ecx,4),%eax 235 adcl $0,%edx 236 cmpl %ebx,%ecx 237 movl %ebp,28(%esp,%ecx,4) 238 jl .L0121stmadd 239 movl %edx,%ebp 240 mull %edi 241 addl 32(%esp,%ebx,4),%eax 242 movl 20(%esp),%edi 243 adcl $0,%edx 244 movl 16(%esp),%esi 245 addl %eax,%ebp 246 adcl $0,%edx 247 imull 32(%esp),%edi 248 xorl %ecx,%ecx 249 addl 36(%esp,%ebx,4),%edx 250 movl %ebp,32(%esp,%ebx,4) 251 adcl $0,%ecx 252 movl (%esi),%eax 253 movl %edx,36(%esp,%ebx,4) 254 movl %ecx,40(%esp,%ebx,4) 255 mull %edi 256 addl 32(%esp),%eax 257 movl 4(%esi),%eax 258 adcl $0,%edx 259 movl $1,%ecx 260.align 16 261.L0112ndmadd: 262 movl %edx,%ebp 263 mull %edi 264 addl 32(%esp,%ecx,4),%ebp 265 leal 1(%ecx),%ecx 266 adcl $0,%edx 267 addl %eax,%ebp 268 movl (%esi,%ecx,4),%eax 269 adcl $0,%edx 270 cmpl %ebx,%ecx 271 movl %ebp,24(%esp,%ecx,4) 272 jl .L0112ndmadd 273 movl %edx,%ebp 274 mull %edi 275 addl 32(%esp,%ebx,4),%ebp 276 adcl $0,%edx 277 addl %eax,%ebp 278 adcl $0,%edx 279 movl %ebp,28(%esp,%ebx,4) 280 xorl %eax,%eax 281 movl 12(%esp),%ecx 282 addl 36(%esp,%ebx,4),%edx 283 adcl 40(%esp,%ebx,4),%eax 284 leal 4(%ecx),%ecx 285 movl %edx,32(%esp,%ebx,4) 286 cmpl 28(%esp),%ecx 287 movl %eax,36(%esp,%ebx,4) 288 je .L008common_tail 289 movl (%ecx),%edi 290 movl 8(%esp),%esi 291 movl %ecx,12(%esp) 292 xorl %ecx,%ecx 293 xorl %edx,%edx 294 movl (%esi),%eax 295 jmp .L0121stmadd 296.align 16 297.L009bn_sqr_mont: 298 movl %ebx,(%esp) 299 movl %ecx,12(%esp) 300 movl %edi,%eax 301 mull %edi 302 movl %eax,32(%esp) 303 movl %edx,%ebx 304 shrl $1,%edx 305 andl $1,%ebx 306 incl %ecx 307.align 16 308.L013sqr: 309 movl (%esi,%ecx,4),%eax 310 movl %edx,%ebp 311 mull %edi 312 addl %ebp,%eax 313 leal 1(%ecx),%ecx 314 adcl $0,%edx 315 leal (%ebx,%eax,2),%ebp 316 shrl $31,%eax 317 cmpl (%esp),%ecx 318 movl %eax,%ebx 319 movl %ebp,28(%esp,%ecx,4) 320 jl .L013sqr 321 movl (%esi,%ecx,4),%eax 322 movl %edx,%ebp 323 mull %edi 324 addl %ebp,%eax 325 movl 20(%esp),%edi 326 adcl $0,%edx 327 movl 16(%esp),%esi 328 leal (%ebx,%eax,2),%ebp 329 imull 32(%esp),%edi 330 shrl $31,%eax 331 movl %ebp,32(%esp,%ecx,4) 332 leal (%eax,%edx,2),%ebp 333 movl (%esi),%eax 334 shrl $31,%edx 335 movl %ebp,36(%esp,%ecx,4) 336 movl %edx,40(%esp,%ecx,4) 337 mull %edi 338 addl 32(%esp),%eax 339 movl %ecx,%ebx 340 adcl $0,%edx 341 movl 4(%esi),%eax 342 movl $1,%ecx 343.align 16 344.L0143rdmadd: 345 movl %edx,%ebp 346 mull %edi 347 addl 32(%esp,%ecx,4),%ebp 348 adcl $0,%edx 349 addl %eax,%ebp 350 movl 4(%esi,%ecx,4),%eax 351 adcl $0,%edx 352 movl %ebp,28(%esp,%ecx,4) 353 movl %edx,%ebp 354 mull %edi 355 addl 36(%esp,%ecx,4),%ebp 356 leal 2(%ecx),%ecx 357 adcl $0,%edx 358 addl %eax,%ebp 359 movl (%esi,%ecx,4),%eax 360 adcl $0,%edx 361 cmpl %ebx,%ecx 362 movl %ebp,24(%esp,%ecx,4) 363 jl .L0143rdmadd 364 movl %edx,%ebp 365 mull %edi 366 addl 32(%esp,%ebx,4),%ebp 367 adcl $0,%edx 368 addl %eax,%ebp 369 adcl $0,%edx 370 movl %ebp,28(%esp,%ebx,4) 371 movl 12(%esp),%ecx 372 xorl %eax,%eax 373 movl 8(%esp),%esi 374 addl 36(%esp,%ebx,4),%edx 375 adcl 40(%esp,%ebx,4),%eax 376 movl %edx,32(%esp,%ebx,4) 377 cmpl %ebx,%ecx 378 movl %eax,36(%esp,%ebx,4) 379 je .L008common_tail 380 movl 4(%esi,%ecx,4),%edi 381 leal 1(%ecx),%ecx 382 movl %edi,%eax 383 movl %ecx,12(%esp) 384 mull %edi 385 addl 32(%esp,%ecx,4),%eax 386 adcl $0,%edx 387 movl %eax,32(%esp,%ecx,4) 388 xorl %ebp,%ebp 389 cmpl %ebx,%ecx 390 leal 1(%ecx),%ecx 391 je .L015sqrlast 392 movl %edx,%ebx 393 shrl $1,%edx 394 andl $1,%ebx 395.align 16 396.L016sqradd: 397 movl (%esi,%ecx,4),%eax 398 movl %edx,%ebp 399 mull %edi 400 addl %ebp,%eax 401 leal (%eax,%eax,1),%ebp 402 adcl $0,%edx 403 shrl $31,%eax 404 addl 32(%esp,%ecx,4),%ebp 405 leal 1(%ecx),%ecx 406 adcl $0,%eax 407 addl %ebx,%ebp 408 adcl $0,%eax 409 cmpl (%esp),%ecx 410 movl %ebp,28(%esp,%ecx,4) 411 movl %eax,%ebx 412 jle .L016sqradd 413 movl %edx,%ebp 414 addl %edx,%edx 415 shrl $31,%ebp 416 addl %ebx,%edx 417 adcl $0,%ebp 418.L015sqrlast: 419 movl 20(%esp),%edi 420 movl 16(%esp),%esi 421 imull 32(%esp),%edi 422 addl 32(%esp,%ecx,4),%edx 423 movl (%esi),%eax 424 adcl $0,%ebp 425 movl %edx,32(%esp,%ecx,4) 426 movl %ebp,36(%esp,%ecx,4) 427 mull %edi 428 addl 32(%esp),%eax 429 leal -1(%ecx),%ebx 430 adcl $0,%edx 431 movl $1,%ecx 432 movl 4(%esi),%eax 433 jmp .L0143rdmadd 434.align 16 435.L008common_tail: 436 movl 16(%esp),%ebp 437 movl 4(%esp),%edi 438 leal 32(%esp),%esi 439 movl (%esi),%eax 440 movl %ebx,%ecx 441 xorl %edx,%edx 442.align 16 443.L017sub: 444 sbbl (%ebp,%edx,4),%eax 445 movl %eax,(%edi,%edx,4) 446 decl %ecx 447 movl 4(%esi,%edx,4),%eax 448 leal 1(%edx),%edx 449 jge .L017sub 450 sbbl $0,%eax 451 andl %eax,%esi 452 notl %eax 453 movl %edi,%ebp 454 andl %eax,%ebp 455 orl %ebp,%esi 456.align 16 457.L018copy: 458 movl (%esi,%ebx,4),%eax 459 movl %eax,(%edi,%ebx,4) 460 movl %ecx,32(%esp,%ebx,4) 461 decl %ebx 462 jge .L018copy 463 movl 24(%esp),%esp 464 movl $1,%eax 465.L000just_leave: 466 popl %edi 467 popl %esi 468 popl %ebx 469 popl %ebp 470 ret 471.size bn_mul_mont,.-.L_bn_mul_mont_begin 472.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 473.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 474.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 475.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 476.byte 111,114,103,62,0 477.comm OPENSSL_ia32cap_P,16,4 478#else 479.file "x86-mont.S" 480.text 481.globl bn_mul_mont 482.type bn_mul_mont,@function 483.align 16 484bn_mul_mont: 485.L_bn_mul_mont_begin: 486 pushl %ebp 487 pushl %ebx 488 pushl %esi 489 pushl %edi 490 xorl %eax,%eax 491 movl 40(%esp),%edi 492 cmpl $4,%edi 493 jl .L000just_leave 494 leal 20(%esp),%esi 495 leal 24(%esp),%edx 496 addl $2,%edi 497 negl %edi 498 leal -32(%esp,%edi,4),%ebp 499 negl %edi 500 movl %ebp,%eax 501 subl %edx,%eax 502 andl $2047,%eax 503 subl %eax,%ebp 504 xorl %ebp,%edx 505 andl $2048,%edx 506 xorl $2048,%edx 507 subl %edx,%ebp 508 andl $-64,%ebp 509 movl %esp,%eax 510 subl %ebp,%eax 511 andl $-4096,%eax 512 movl %esp,%edx 513 leal (%ebp,%eax,1),%esp 514 movl (%esp),%eax 515 cmpl %ebp,%esp 516 ja .L001page_walk 517 jmp .L002page_walk_done 518.align 16 519.L001page_walk: 520 leal -4096(%esp),%esp 521 movl (%esp),%eax 522 cmpl %ebp,%esp 523 ja .L001page_walk 524.L002page_walk_done: 525 movl (%esi),%eax 526 movl 4(%esi),%ebx 527 movl 8(%esi),%ecx 528 movl 12(%esi),%ebp 529 movl 16(%esi),%esi 530 movl (%esi),%esi 531 movl %eax,4(%esp) 532 movl %ebx,8(%esp) 533 movl %ecx,12(%esp) 534 movl %ebp,16(%esp) 535 movl %esi,20(%esp) 536 leal -3(%edi),%ebx 537 movl %edx,24(%esp) 538 leal OPENSSL_ia32cap_P,%eax 539 btl $26,(%eax) 540 jnc .L003non_sse2 541 movl $-1,%eax 542 movd %eax,%mm7 543 movl 8(%esp),%esi 544 movl 12(%esp),%edi 545 movl 16(%esp),%ebp 546 xorl %edx,%edx 547 xorl %ecx,%ecx 548 movd (%edi),%mm4 549 movd (%esi),%mm5 550 movd (%ebp),%mm3 551 pmuludq %mm4,%mm5 552 movq %mm5,%mm2 553 movq %mm5,%mm0 554 pand %mm7,%mm0 555 pmuludq 20(%esp),%mm5 556 pmuludq %mm5,%mm3 557 paddq %mm0,%mm3 558 movd 4(%ebp),%mm1 559 movd 4(%esi),%mm0 560 psrlq $32,%mm2 561 psrlq $32,%mm3 562 incl %ecx 563.align 16 564.L0041st: 565 pmuludq %mm4,%mm0 566 pmuludq %mm5,%mm1 567 paddq %mm0,%mm2 568 paddq %mm1,%mm3 569 movq %mm2,%mm0 570 pand %mm7,%mm0 571 movd 4(%ebp,%ecx,4),%mm1 572 paddq %mm0,%mm3 573 movd 4(%esi,%ecx,4),%mm0 574 psrlq $32,%mm2 575 movd %mm3,28(%esp,%ecx,4) 576 psrlq $32,%mm3 577 leal 1(%ecx),%ecx 578 cmpl %ebx,%ecx 579 jl .L0041st 580 pmuludq %mm4,%mm0 581 pmuludq %mm5,%mm1 582 paddq %mm0,%mm2 583 paddq %mm1,%mm3 584 movq %mm2,%mm0 585 pand %mm7,%mm0 586 paddq %mm0,%mm3 587 movd %mm3,28(%esp,%ecx,4) 588 psrlq $32,%mm2 589 psrlq $32,%mm3 590 paddq %mm2,%mm3 591 movq %mm3,32(%esp,%ebx,4) 592 incl %edx 593.L005outer: 594 xorl %ecx,%ecx 595 movd (%edi,%edx,4),%mm4 596 movd (%esi),%mm5 597 movd 32(%esp),%mm6 598 movd (%ebp),%mm3 599 pmuludq %mm4,%mm5 600 paddq %mm6,%mm5 601 movq %mm5,%mm0 602 movq %mm5,%mm2 603 pand %mm7,%mm0 604 pmuludq 20(%esp),%mm5 605 pmuludq %mm5,%mm3 606 paddq %mm0,%mm3 607 movd 36(%esp),%mm6 608 movd 4(%ebp),%mm1 609 movd 4(%esi),%mm0 610 psrlq $32,%mm2 611 psrlq $32,%mm3 612 paddq %mm6,%mm2 613 incl %ecx 614 decl %ebx 615.L006inner: 616 pmuludq %mm4,%mm0 617 pmuludq %mm5,%mm1 618 paddq %mm0,%mm2 619 paddq %mm1,%mm3 620 movq %mm2,%mm0 621 movd 36(%esp,%ecx,4),%mm6 622 pand %mm7,%mm0 623 movd 4(%ebp,%ecx,4),%mm1 624 paddq %mm0,%mm3 625 movd 4(%esi,%ecx,4),%mm0 626 psrlq $32,%mm2 627 movd %mm3,28(%esp,%ecx,4) 628 psrlq $32,%mm3 629 paddq %mm6,%mm2 630 decl %ebx 631 leal 1(%ecx),%ecx 632 jnz .L006inner 633 movl %ecx,%ebx 634 pmuludq %mm4,%mm0 635 pmuludq %mm5,%mm1 636 paddq %mm0,%mm2 637 paddq %mm1,%mm3 638 movq %mm2,%mm0 639 pand %mm7,%mm0 640 paddq %mm0,%mm3 641 movd %mm3,28(%esp,%ecx,4) 642 psrlq $32,%mm2 643 psrlq $32,%mm3 644 movd 36(%esp,%ebx,4),%mm6 645 paddq %mm2,%mm3 646 paddq %mm6,%mm3 647 movq %mm3,32(%esp,%ebx,4) 648 leal 1(%edx),%edx 649 cmpl %ebx,%edx 650 jle .L005outer 651 emms 652 jmp .L007common_tail 653.align 16 654.L003non_sse2: 655 movl 8(%esp),%esi 656 leal 1(%ebx),%ebp 657 movl 12(%esp),%edi 658 xorl %ecx,%ecx 659 movl %esi,%edx 660 andl $1,%ebp 661 subl %edi,%edx 662 leal 4(%edi,%ebx,4),%eax 663 orl %edx,%ebp 664 movl (%edi),%edi 665 jz .L008bn_sqr_mont 666 movl %eax,28(%esp) 667 movl (%esi),%eax 668 xorl %edx,%edx 669.align 16 670.L009mull: 671 movl %edx,%ebp 672 mull %edi 673 addl %eax,%ebp 674 leal 1(%ecx),%ecx 675 adcl $0,%edx 676 movl (%esi,%ecx,4),%eax 677 cmpl %ebx,%ecx 678 movl %ebp,28(%esp,%ecx,4) 679 jl .L009mull 680 movl %edx,%ebp 681 mull %edi 682 movl 20(%esp),%edi 683 addl %ebp,%eax 684 movl 16(%esp),%esi 685 adcl $0,%edx 686 imull 32(%esp),%edi 687 movl %eax,32(%esp,%ebx,4) 688 xorl %ecx,%ecx 689 movl %edx,36(%esp,%ebx,4) 690 movl %ecx,40(%esp,%ebx,4) 691 movl (%esi),%eax 692 mull %edi 693 addl 32(%esp),%eax 694 movl 4(%esi),%eax 695 adcl $0,%edx 696 incl %ecx 697 jmp .L0102ndmadd 698.align 16 699.L0111stmadd: 700 movl %edx,%ebp 701 mull %edi 702 addl 32(%esp,%ecx,4),%ebp 703 leal 1(%ecx),%ecx 704 adcl $0,%edx 705 addl %eax,%ebp 706 movl (%esi,%ecx,4),%eax 707 adcl $0,%edx 708 cmpl %ebx,%ecx 709 movl %ebp,28(%esp,%ecx,4) 710 jl .L0111stmadd 711 movl %edx,%ebp 712 mull %edi 713 addl 32(%esp,%ebx,4),%eax 714 movl 20(%esp),%edi 715 adcl $0,%edx 716 movl 16(%esp),%esi 717 addl %eax,%ebp 718 adcl $0,%edx 719 imull 32(%esp),%edi 720 xorl %ecx,%ecx 721 addl 36(%esp,%ebx,4),%edx 722 movl %ebp,32(%esp,%ebx,4) 723 adcl $0,%ecx 724 movl (%esi),%eax 725 movl %edx,36(%esp,%ebx,4) 726 movl %ecx,40(%esp,%ebx,4) 727 mull %edi 728 addl 32(%esp),%eax 729 movl 4(%esi),%eax 730 adcl $0,%edx 731 movl $1,%ecx 732.align 16 733.L0102ndmadd: 734 movl %edx,%ebp 735 mull %edi 736 addl 32(%esp,%ecx,4),%ebp 737 leal 1(%ecx),%ecx 738 adcl $0,%edx 739 addl %eax,%ebp 740 movl (%esi,%ecx,4),%eax 741 adcl $0,%edx 742 cmpl %ebx,%ecx 743 movl %ebp,24(%esp,%ecx,4) 744 jl .L0102ndmadd 745 movl %edx,%ebp 746 mull %edi 747 addl 32(%esp,%ebx,4),%ebp 748 adcl $0,%edx 749 addl %eax,%ebp 750 adcl $0,%edx 751 movl %ebp,28(%esp,%ebx,4) 752 xorl %eax,%eax 753 movl 12(%esp),%ecx 754 addl 36(%esp,%ebx,4),%edx 755 adcl 40(%esp,%ebx,4),%eax 756 leal 4(%ecx),%ecx 757 movl %edx,32(%esp,%ebx,4) 758 cmpl 28(%esp),%ecx 759 movl %eax,36(%esp,%ebx,4) 760 je .L007common_tail 761 movl (%ecx),%edi 762 movl 8(%esp),%esi 763 movl %ecx,12(%esp) 764 xorl %ecx,%ecx 765 xorl %edx,%edx 766 movl (%esi),%eax 767 jmp .L0111stmadd 768.align 16 769.L008bn_sqr_mont: 770 movl %ebx,(%esp) 771 movl %ecx,12(%esp) 772 movl %edi,%eax 773 mull %edi 774 movl %eax,32(%esp) 775 movl %edx,%ebx 776 shrl $1,%edx 777 andl $1,%ebx 778 incl %ecx 779.align 16 780.L012sqr: 781 movl (%esi,%ecx,4),%eax 782 movl %edx,%ebp 783 mull %edi 784 addl %ebp,%eax 785 leal 1(%ecx),%ecx 786 adcl $0,%edx 787 leal (%ebx,%eax,2),%ebp 788 shrl $31,%eax 789 cmpl (%esp),%ecx 790 movl %eax,%ebx 791 movl %ebp,28(%esp,%ecx,4) 792 jl .L012sqr 793 movl (%esi,%ecx,4),%eax 794 movl %edx,%ebp 795 mull %edi 796 addl %ebp,%eax 797 movl 20(%esp),%edi 798 adcl $0,%edx 799 movl 16(%esp),%esi 800 leal (%ebx,%eax,2),%ebp 801 imull 32(%esp),%edi 802 shrl $31,%eax 803 movl %ebp,32(%esp,%ecx,4) 804 leal (%eax,%edx,2),%ebp 805 movl (%esi),%eax 806 shrl $31,%edx 807 movl %ebp,36(%esp,%ecx,4) 808 movl %edx,40(%esp,%ecx,4) 809 mull %edi 810 addl 32(%esp),%eax 811 movl %ecx,%ebx 812 adcl $0,%edx 813 movl 4(%esi),%eax 814 movl $1,%ecx 815.align 16 816.L0133rdmadd: 817 movl %edx,%ebp 818 mull %edi 819 addl 32(%esp,%ecx,4),%ebp 820 adcl $0,%edx 821 addl %eax,%ebp 822 movl 4(%esi,%ecx,4),%eax 823 adcl $0,%edx 824 movl %ebp,28(%esp,%ecx,4) 825 movl %edx,%ebp 826 mull %edi 827 addl 36(%esp,%ecx,4),%ebp 828 leal 2(%ecx),%ecx 829 adcl $0,%edx 830 addl %eax,%ebp 831 movl (%esi,%ecx,4),%eax 832 adcl $0,%edx 833 cmpl %ebx,%ecx 834 movl %ebp,24(%esp,%ecx,4) 835 jl .L0133rdmadd 836 movl %edx,%ebp 837 mull %edi 838 addl 32(%esp,%ebx,4),%ebp 839 adcl $0,%edx 840 addl %eax,%ebp 841 adcl $0,%edx 842 movl %ebp,28(%esp,%ebx,4) 843 movl 12(%esp),%ecx 844 xorl %eax,%eax 845 movl 8(%esp),%esi 846 addl 36(%esp,%ebx,4),%edx 847 adcl 40(%esp,%ebx,4),%eax 848 movl %edx,32(%esp,%ebx,4) 849 cmpl %ebx,%ecx 850 movl %eax,36(%esp,%ebx,4) 851 je .L007common_tail 852 movl 4(%esi,%ecx,4),%edi 853 leal 1(%ecx),%ecx 854 movl %edi,%eax 855 movl %ecx,12(%esp) 856 mull %edi 857 addl 32(%esp,%ecx,4),%eax 858 adcl $0,%edx 859 movl %eax,32(%esp,%ecx,4) 860 xorl %ebp,%ebp 861 cmpl %ebx,%ecx 862 leal 1(%ecx),%ecx 863 je .L014sqrlast 864 movl %edx,%ebx 865 shrl $1,%edx 866 andl $1,%ebx 867.align 16 868.L015sqradd: 869 movl (%esi,%ecx,4),%eax 870 movl %edx,%ebp 871 mull %edi 872 addl %ebp,%eax 873 leal (%eax,%eax,1),%ebp 874 adcl $0,%edx 875 shrl $31,%eax 876 addl 32(%esp,%ecx,4),%ebp 877 leal 1(%ecx),%ecx 878 adcl $0,%eax 879 addl %ebx,%ebp 880 adcl $0,%eax 881 cmpl (%esp),%ecx 882 movl %ebp,28(%esp,%ecx,4) 883 movl %eax,%ebx 884 jle .L015sqradd 885 movl %edx,%ebp 886 addl %edx,%edx 887 shrl $31,%ebp 888 addl %ebx,%edx 889 adcl $0,%ebp 890.L014sqrlast: 891 movl 20(%esp),%edi 892 movl 16(%esp),%esi 893 imull 32(%esp),%edi 894 addl 32(%esp,%ecx,4),%edx 895 movl (%esi),%eax 896 adcl $0,%ebp 897 movl %edx,32(%esp,%ecx,4) 898 movl %ebp,36(%esp,%ecx,4) 899 mull %edi 900 addl 32(%esp),%eax 901 leal -1(%ecx),%ebx 902 adcl $0,%edx 903 movl $1,%ecx 904 movl 4(%esi),%eax 905 jmp .L0133rdmadd 906.align 16 907.L007common_tail: 908 movl 16(%esp),%ebp 909 movl 4(%esp),%edi 910 leal 32(%esp),%esi 911 movl (%esi),%eax 912 movl %ebx,%ecx 913 xorl %edx,%edx 914.align 16 915.L016sub: 916 sbbl (%ebp,%edx,4),%eax 917 movl %eax,(%edi,%edx,4) 918 decl %ecx 919 movl 4(%esi,%edx,4),%eax 920 leal 1(%edx),%edx 921 jge .L016sub 922 sbbl $0,%eax 923 andl %eax,%esi 924 notl %eax 925 movl %edi,%ebp 926 andl %eax,%ebp 927 orl %ebp,%esi 928.align 16 929.L017copy: 930 movl (%esi,%ebx,4),%eax 931 movl %eax,(%edi,%ebx,4) 932 movl %ecx,32(%esp,%ebx,4) 933 decl %ebx 934 jge .L017copy 935 movl 24(%esp),%esp 936 movl $1,%eax 937.L000just_leave: 938 popl %edi 939 popl %esi 940 popl %ebx 941 popl %ebp 942 ret 943.size bn_mul_mont,.-.L_bn_mul_mont_begin 944.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 945.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 946.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 947.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 948.byte 111,114,103,62,0 949.comm OPENSSL_ia32cap_P,16,4 950#endif 951