x86_64-mont5.S revision 1.6
1#include <machine/asm.h> 2.text 3 4 5 6.globl bn_mul_mont_gather5 7.type bn_mul_mont_gather5,@function 8.align 64 9bn_mul_mont_gather5: 10 movl %r9d,%r9d 11 movq %rsp,%rax 12 testl $7,%r9d 13 jnz .Lmul_enter 14 movl OPENSSL_ia32cap_P+8(%rip),%r11d 15 jmp .Lmul4x_enter 16 17.align 16 18.Lmul_enter: 19 movd 8(%rsp),%xmm5 20 pushq %rbx 21 pushq %rbp 22 pushq %r12 23 pushq %r13 24 pushq %r14 25 pushq %r15 26 27 negq %r9 28 movq %rsp,%r11 29 leaq -280(%rsp,%r9,8),%r10 30 negq %r9 31 andq $-1024,%r10 32 33 34 35 36 37 38 39 40 41 subq %r10,%r11 42 andq $-4096,%r11 43 leaq (%r10,%r11,1),%rsp 44 movq (%rsp),%r11 45 cmpq %r10,%rsp 46 ja .Lmul_page_walk 47 jmp .Lmul_page_walk_done 48 49.Lmul_page_walk: 50 leaq -4096(%rsp),%rsp 51 movq (%rsp),%r11 52 cmpq %r10,%rsp 53 ja .Lmul_page_walk 54.Lmul_page_walk_done: 55 56 leaq .Linc(%rip),%r10 57 movq %rax,8(%rsp,%r9,8) 58.Lmul_body: 59 60 leaq 128(%rdx),%r12 61 movdqa 0(%r10),%xmm0 62 movdqa 16(%r10),%xmm1 63 leaq 24-112(%rsp,%r9,8),%r10 64 andq $-16,%r10 65 66 pshufd $0,%xmm5,%xmm5 67 movdqa %xmm1,%xmm4 68 movdqa %xmm1,%xmm2 69 paddd %xmm0,%xmm1 70 pcmpeqd %xmm5,%xmm0 71.byte 0x67 72 movdqa %xmm4,%xmm3 73 paddd %xmm1,%xmm2 74 pcmpeqd %xmm5,%xmm1 75 movdqa %xmm0,112(%r10) 76 movdqa %xmm4,%xmm0 77 78 paddd %xmm2,%xmm3 79 pcmpeqd %xmm5,%xmm2 80 movdqa %xmm1,128(%r10) 81 movdqa %xmm4,%xmm1 82 83 paddd %xmm3,%xmm0 84 pcmpeqd %xmm5,%xmm3 85 movdqa %xmm2,144(%r10) 86 movdqa %xmm4,%xmm2 87 88 paddd %xmm0,%xmm1 89 pcmpeqd %xmm5,%xmm0 90 movdqa %xmm3,160(%r10) 91 movdqa %xmm4,%xmm3 92 paddd %xmm1,%xmm2 93 pcmpeqd %xmm5,%xmm1 94 movdqa %xmm0,176(%r10) 95 movdqa %xmm4,%xmm0 96 97 paddd %xmm2,%xmm3 98 pcmpeqd %xmm5,%xmm2 99 movdqa %xmm1,192(%r10) 100 movdqa %xmm4,%xmm1 101 102 paddd %xmm3,%xmm0 103 pcmpeqd %xmm5,%xmm3 104 movdqa %xmm2,208(%r10) 105 movdqa %xmm4,%xmm2 106 107 paddd %xmm0,%xmm1 108 pcmpeqd %xmm5,%xmm0 109 movdqa %xmm3,224(%r10) 110 movdqa %xmm4,%xmm3 111 paddd %xmm1,%xmm2 112 pcmpeqd %xmm5,%xmm1 113 movdqa %xmm0,240(%r10) 114 movdqa %xmm4,%xmm0 115 116 paddd %xmm2,%xmm3 117 pcmpeqd %xmm5,%xmm2 118 movdqa %xmm1,256(%r10) 119 movdqa %xmm4,%xmm1 120 121 paddd %xmm3,%xmm0 122 pcmpeqd %xmm5,%xmm3 123 movdqa %xmm2,272(%r10) 124 movdqa %xmm4,%xmm2 125 126 paddd %xmm0,%xmm1 127 pcmpeqd %xmm5,%xmm0 128 movdqa %xmm3,288(%r10) 129 movdqa %xmm4,%xmm3 130 paddd %xmm1,%xmm2 131 pcmpeqd %xmm5,%xmm1 132 movdqa %xmm0,304(%r10) 133 134 paddd %xmm2,%xmm3 135.byte 0x67 136 pcmpeqd %xmm5,%xmm2 137 movdqa %xmm1,320(%r10) 138 139 pcmpeqd %xmm5,%xmm3 140 movdqa %xmm2,336(%r10) 141 pand 64(%r12),%xmm0 142 143 pand 80(%r12),%xmm1 144 pand 96(%r12),%xmm2 145 movdqa %xmm3,352(%r10) 146 pand 112(%r12),%xmm3 147 por %xmm2,%xmm0 148 por %xmm3,%xmm1 149 movdqa -128(%r12),%xmm4 150 movdqa -112(%r12),%xmm5 151 movdqa -96(%r12),%xmm2 152 pand 112(%r10),%xmm4 153 movdqa -80(%r12),%xmm3 154 pand 128(%r10),%xmm5 155 por %xmm4,%xmm0 156 pand 144(%r10),%xmm2 157 por %xmm5,%xmm1 158 pand 160(%r10),%xmm3 159 por %xmm2,%xmm0 160 por %xmm3,%xmm1 161 movdqa -64(%r12),%xmm4 162 movdqa -48(%r12),%xmm5 163 movdqa -32(%r12),%xmm2 164 pand 176(%r10),%xmm4 165 movdqa -16(%r12),%xmm3 166 pand 192(%r10),%xmm5 167 por %xmm4,%xmm0 168 pand 208(%r10),%xmm2 169 por %xmm5,%xmm1 170 pand 224(%r10),%xmm3 171 por %xmm2,%xmm0 172 por %xmm3,%xmm1 173 movdqa 0(%r12),%xmm4 174 movdqa 16(%r12),%xmm5 175 movdqa 32(%r12),%xmm2 176 pand 240(%r10),%xmm4 177 movdqa 48(%r12),%xmm3 178 pand 256(%r10),%xmm5 179 por %xmm4,%xmm0 180 pand 272(%r10),%xmm2 181 por %xmm5,%xmm1 182 pand 288(%r10),%xmm3 183 por %xmm2,%xmm0 184 por %xmm3,%xmm1 185 por %xmm1,%xmm0 186 pshufd $0x4e,%xmm0,%xmm1 187 por %xmm1,%xmm0 188 leaq 256(%r12),%r12 189.byte 102,72,15,126,195 190 191 movq (%r8),%r8 192 movq (%rsi),%rax 193 194 xorq %r14,%r14 195 xorq %r15,%r15 196 197 movq %r8,%rbp 198 mulq %rbx 199 movq %rax,%r10 200 movq (%rcx),%rax 201 202 imulq %r10,%rbp 203 movq %rdx,%r11 204 205 mulq %rbp 206 addq %rax,%r10 207 movq 8(%rsi),%rax 208 adcq $0,%rdx 209 movq %rdx,%r13 210 211 leaq 1(%r15),%r15 212 jmp .L1st_enter 213 214.align 16 215.L1st: 216 addq %rax,%r13 217 movq (%rsi,%r15,8),%rax 218 adcq $0,%rdx 219 addq %r11,%r13 220 movq %r10,%r11 221 adcq $0,%rdx 222 movq %r13,-16(%rsp,%r15,8) 223 movq %rdx,%r13 224 225.L1st_enter: 226 mulq %rbx 227 addq %rax,%r11 228 movq (%rcx,%r15,8),%rax 229 adcq $0,%rdx 230 leaq 1(%r15),%r15 231 movq %rdx,%r10 232 233 mulq %rbp 234 cmpq %r9,%r15 235 jne .L1st 236 237 238 addq %rax,%r13 239 adcq $0,%rdx 240 addq %r11,%r13 241 adcq $0,%rdx 242 movq %r13,-16(%rsp,%r9,8) 243 movq %rdx,%r13 244 movq %r10,%r11 245 246 xorq %rdx,%rdx 247 addq %r11,%r13 248 adcq $0,%rdx 249 movq %r13,-8(%rsp,%r9,8) 250 movq %rdx,(%rsp,%r9,8) 251 252 leaq 1(%r14),%r14 253 jmp .Louter 254.align 16 255.Louter: 256 leaq 24+128(%rsp,%r9,8),%rdx 257 andq $-16,%rdx 258 pxor %xmm4,%xmm4 259 pxor %xmm5,%xmm5 260 movdqa -128(%r12),%xmm0 261 movdqa -112(%r12),%xmm1 262 movdqa -96(%r12),%xmm2 263 movdqa -80(%r12),%xmm3 264 pand -128(%rdx),%xmm0 265 pand -112(%rdx),%xmm1 266 por %xmm0,%xmm4 267 pand -96(%rdx),%xmm2 268 por %xmm1,%xmm5 269 pand -80(%rdx),%xmm3 270 por %xmm2,%xmm4 271 por %xmm3,%xmm5 272 movdqa -64(%r12),%xmm0 273 movdqa -48(%r12),%xmm1 274 movdqa -32(%r12),%xmm2 275 movdqa -16(%r12),%xmm3 276 pand -64(%rdx),%xmm0 277 pand -48(%rdx),%xmm1 278 por %xmm0,%xmm4 279 pand -32(%rdx),%xmm2 280 por %xmm1,%xmm5 281 pand -16(%rdx),%xmm3 282 por %xmm2,%xmm4 283 por %xmm3,%xmm5 284 movdqa 0(%r12),%xmm0 285 movdqa 16(%r12),%xmm1 286 movdqa 32(%r12),%xmm2 287 movdqa 48(%r12),%xmm3 288 pand 0(%rdx),%xmm0 289 pand 16(%rdx),%xmm1 290 por %xmm0,%xmm4 291 pand 32(%rdx),%xmm2 292 por %xmm1,%xmm5 293 pand 48(%rdx),%xmm3 294 por %xmm2,%xmm4 295 por %xmm3,%xmm5 296 movdqa 64(%r12),%xmm0 297 movdqa 80(%r12),%xmm1 298 movdqa 96(%r12),%xmm2 299 movdqa 112(%r12),%xmm3 300 pand 64(%rdx),%xmm0 301 pand 80(%rdx),%xmm1 302 por %xmm0,%xmm4 303 pand 96(%rdx),%xmm2 304 por %xmm1,%xmm5 305 pand 112(%rdx),%xmm3 306 por %xmm2,%xmm4 307 por %xmm3,%xmm5 308 por %xmm5,%xmm4 309 pshufd $0x4e,%xmm4,%xmm0 310 por %xmm4,%xmm0 311 leaq 256(%r12),%r12 312 313 movq (%rsi),%rax 314.byte 102,72,15,126,195 315 316 xorq %r15,%r15 317 movq %r8,%rbp 318 movq (%rsp),%r10 319 320 mulq %rbx 321 addq %rax,%r10 322 movq (%rcx),%rax 323 adcq $0,%rdx 324 325 imulq %r10,%rbp 326 movq %rdx,%r11 327 328 mulq %rbp 329 addq %rax,%r10 330 movq 8(%rsi),%rax 331 adcq $0,%rdx 332 movq 8(%rsp),%r10 333 movq %rdx,%r13 334 335 leaq 1(%r15),%r15 336 jmp .Linner_enter 337 338.align 16 339.Linner: 340 addq %rax,%r13 341 movq (%rsi,%r15,8),%rax 342 adcq $0,%rdx 343 addq %r10,%r13 344 movq (%rsp,%r15,8),%r10 345 adcq $0,%rdx 346 movq %r13,-16(%rsp,%r15,8) 347 movq %rdx,%r13 348 349.Linner_enter: 350 mulq %rbx 351 addq %rax,%r11 352 movq (%rcx,%r15,8),%rax 353 adcq $0,%rdx 354 addq %r11,%r10 355 movq %rdx,%r11 356 adcq $0,%r11 357 leaq 1(%r15),%r15 358 359 mulq %rbp 360 cmpq %r9,%r15 361 jne .Linner 362 363 addq %rax,%r13 364 adcq $0,%rdx 365 addq %r10,%r13 366 movq (%rsp,%r9,8),%r10 367 adcq $0,%rdx 368 movq %r13,-16(%rsp,%r9,8) 369 movq %rdx,%r13 370 371 xorq %rdx,%rdx 372 addq %r11,%r13 373 adcq $0,%rdx 374 addq %r10,%r13 375 adcq $0,%rdx 376 movq %r13,-8(%rsp,%r9,8) 377 movq %rdx,(%rsp,%r9,8) 378 379 leaq 1(%r14),%r14 380 cmpq %r9,%r14 381 jb .Louter 382 383 xorq %r14,%r14 384 movq (%rsp),%rax 385 leaq (%rsp),%rsi 386 movq %r9,%r15 387 jmp .Lsub 388.align 16 389.Lsub: sbbq (%rcx,%r14,8),%rax 390 movq %rax,(%rdi,%r14,8) 391 movq 8(%rsi,%r14,8),%rax 392 leaq 1(%r14),%r14 393 decq %r15 394 jnz .Lsub 395 396 sbbq $0,%rax 397 xorq %r14,%r14 398 andq %rax,%rsi 399 notq %rax 400 movq %rdi,%rcx 401 andq %rax,%rcx 402 movq %r9,%r15 403 orq %rcx,%rsi 404.align 16 405.Lcopy: 406 movq (%rsi,%r14,8),%rax 407 movq %r14,(%rsp,%r14,8) 408 movq %rax,(%rdi,%r14,8) 409 leaq 1(%r14),%r14 410 subq $1,%r15 411 jnz .Lcopy 412 413 movq 8(%rsp,%r9,8),%rsi 414 movq $1,%rax 415 416 movq -48(%rsi),%r15 417 movq -40(%rsi),%r14 418 movq -32(%rsi),%r13 419 movq -24(%rsi),%r12 420 movq -16(%rsi),%rbp 421 movq -8(%rsi),%rbx 422 leaq (%rsi),%rsp 423.Lmul_epilogue: 424 .byte 0xf3,0xc3 425.size bn_mul_mont_gather5,.-bn_mul_mont_gather5 426.type bn_mul4x_mont_gather5,@function 427.align 32 428bn_mul4x_mont_gather5: 429.byte 0x67 430 movq %rsp,%rax 431.Lmul4x_enter: 432 andl $0x80108,%r11d 433 cmpl $0x80108,%r11d 434 je .Lmulx4x_enter 435 pushq %rbx 436 pushq %rbp 437 pushq %r12 438 pushq %r13 439 pushq %r14 440 pushq %r15 441.Lmul4x_prologue: 442 443.byte 0x67 444 shll $3,%r9d 445 leaq (%r9,%r9,2),%r10 446 negq %r9 447 448 449 450 451 452 453 454 455 456 457 leaq -320(%rsp,%r9,2),%r11 458 movq %rsp,%rbp 459 subq %rdi,%r11 460 andq $4095,%r11 461 cmpq %r11,%r10 462 jb .Lmul4xsp_alt 463 subq %r11,%rbp 464 leaq -320(%rbp,%r9,2),%rbp 465 jmp .Lmul4xsp_done 466 467.align 32 468.Lmul4xsp_alt: 469 leaq 4096-320(,%r9,2),%r10 470 leaq -320(%rbp,%r9,2),%rbp 471 subq %r10,%r11 472 movq $0,%r10 473 cmovcq %r10,%r11 474 subq %r11,%rbp 475.Lmul4xsp_done: 476 andq $-64,%rbp 477 movq %rsp,%r11 478 subq %rbp,%r11 479 andq $-4096,%r11 480 leaq (%r11,%rbp,1),%rsp 481 movq (%rsp),%r10 482 cmpq %rbp,%rsp 483 ja .Lmul4x_page_walk 484 jmp .Lmul4x_page_walk_done 485 486.Lmul4x_page_walk: 487 leaq -4096(%rsp),%rsp 488 movq (%rsp),%r10 489 cmpq %rbp,%rsp 490 ja .Lmul4x_page_walk 491.Lmul4x_page_walk_done: 492 493 negq %r9 494 495 movq %rax,40(%rsp) 496.Lmul4x_body: 497 498 call mul4x_internal 499 500 movq 40(%rsp),%rsi 501 movq $1,%rax 502 503 movq -48(%rsi),%r15 504 movq -40(%rsi),%r14 505 movq -32(%rsi),%r13 506 movq -24(%rsi),%r12 507 movq -16(%rsi),%rbp 508 movq -8(%rsi),%rbx 509 leaq (%rsi),%rsp 510.Lmul4x_epilogue: 511 .byte 0xf3,0xc3 512.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 513 514.type mul4x_internal,@function 515.align 32 516mul4x_internal: 517 shlq $5,%r9 518 movd 8(%rax),%xmm5 519 leaq .Linc(%rip),%rax 520 leaq 128(%rdx,%r9,1),%r13 521 shrq $5,%r9 522 movdqa 0(%rax),%xmm0 523 movdqa 16(%rax),%xmm1 524 leaq 88-112(%rsp,%r9,1),%r10 525 leaq 128(%rdx),%r12 526 527 pshufd $0,%xmm5,%xmm5 528 movdqa %xmm1,%xmm4 529.byte 0x67,0x67 530 movdqa %xmm1,%xmm2 531 paddd %xmm0,%xmm1 532 pcmpeqd %xmm5,%xmm0 533.byte 0x67 534 movdqa %xmm4,%xmm3 535 paddd %xmm1,%xmm2 536 pcmpeqd %xmm5,%xmm1 537 movdqa %xmm0,112(%r10) 538 movdqa %xmm4,%xmm0 539 540 paddd %xmm2,%xmm3 541 pcmpeqd %xmm5,%xmm2 542 movdqa %xmm1,128(%r10) 543 movdqa %xmm4,%xmm1 544 545 paddd %xmm3,%xmm0 546 pcmpeqd %xmm5,%xmm3 547 movdqa %xmm2,144(%r10) 548 movdqa %xmm4,%xmm2 549 550 paddd %xmm0,%xmm1 551 pcmpeqd %xmm5,%xmm0 552 movdqa %xmm3,160(%r10) 553 movdqa %xmm4,%xmm3 554 paddd %xmm1,%xmm2 555 pcmpeqd %xmm5,%xmm1 556 movdqa %xmm0,176(%r10) 557 movdqa %xmm4,%xmm0 558 559 paddd %xmm2,%xmm3 560 pcmpeqd %xmm5,%xmm2 561 movdqa %xmm1,192(%r10) 562 movdqa %xmm4,%xmm1 563 564 paddd %xmm3,%xmm0 565 pcmpeqd %xmm5,%xmm3 566 movdqa %xmm2,208(%r10) 567 movdqa %xmm4,%xmm2 568 569 paddd %xmm0,%xmm1 570 pcmpeqd %xmm5,%xmm0 571 movdqa %xmm3,224(%r10) 572 movdqa %xmm4,%xmm3 573 paddd %xmm1,%xmm2 574 pcmpeqd %xmm5,%xmm1 575 movdqa %xmm0,240(%r10) 576 movdqa %xmm4,%xmm0 577 578 paddd %xmm2,%xmm3 579 pcmpeqd %xmm5,%xmm2 580 movdqa %xmm1,256(%r10) 581 movdqa %xmm4,%xmm1 582 583 paddd %xmm3,%xmm0 584 pcmpeqd %xmm5,%xmm3 585 movdqa %xmm2,272(%r10) 586 movdqa %xmm4,%xmm2 587 588 paddd %xmm0,%xmm1 589 pcmpeqd %xmm5,%xmm0 590 movdqa %xmm3,288(%r10) 591 movdqa %xmm4,%xmm3 592 paddd %xmm1,%xmm2 593 pcmpeqd %xmm5,%xmm1 594 movdqa %xmm0,304(%r10) 595 596 paddd %xmm2,%xmm3 597.byte 0x67 598 pcmpeqd %xmm5,%xmm2 599 movdqa %xmm1,320(%r10) 600 601 pcmpeqd %xmm5,%xmm3 602 movdqa %xmm2,336(%r10) 603 pand 64(%r12),%xmm0 604 605 pand 80(%r12),%xmm1 606 pand 96(%r12),%xmm2 607 movdqa %xmm3,352(%r10) 608 pand 112(%r12),%xmm3 609 por %xmm2,%xmm0 610 por %xmm3,%xmm1 611 movdqa -128(%r12),%xmm4 612 movdqa -112(%r12),%xmm5 613 movdqa -96(%r12),%xmm2 614 pand 112(%r10),%xmm4 615 movdqa -80(%r12),%xmm3 616 pand 128(%r10),%xmm5 617 por %xmm4,%xmm0 618 pand 144(%r10),%xmm2 619 por %xmm5,%xmm1 620 pand 160(%r10),%xmm3 621 por %xmm2,%xmm0 622 por %xmm3,%xmm1 623 movdqa -64(%r12),%xmm4 624 movdqa -48(%r12),%xmm5 625 movdqa -32(%r12),%xmm2 626 pand 176(%r10),%xmm4 627 movdqa -16(%r12),%xmm3 628 pand 192(%r10),%xmm5 629 por %xmm4,%xmm0 630 pand 208(%r10),%xmm2 631 por %xmm5,%xmm1 632 pand 224(%r10),%xmm3 633 por %xmm2,%xmm0 634 por %xmm3,%xmm1 635 movdqa 0(%r12),%xmm4 636 movdqa 16(%r12),%xmm5 637 movdqa 32(%r12),%xmm2 638 pand 240(%r10),%xmm4 639 movdqa 48(%r12),%xmm3 640 pand 256(%r10),%xmm5 641 por %xmm4,%xmm0 642 pand 272(%r10),%xmm2 643 por %xmm5,%xmm1 644 pand 288(%r10),%xmm3 645 por %xmm2,%xmm0 646 por %xmm3,%xmm1 647 por %xmm1,%xmm0 648 pshufd $0x4e,%xmm0,%xmm1 649 por %xmm1,%xmm0 650 leaq 256(%r12),%r12 651.byte 102,72,15,126,195 652 653 movq %r13,16+8(%rsp) 654 movq %rdi,56+8(%rsp) 655 656 movq (%r8),%r8 657 movq (%rsi),%rax 658 leaq (%rsi,%r9,1),%rsi 659 negq %r9 660 661 movq %r8,%rbp 662 mulq %rbx 663 movq %rax,%r10 664 movq (%rcx),%rax 665 666 imulq %r10,%rbp 667 leaq 64+8(%rsp),%r14 668 movq %rdx,%r11 669 670 mulq %rbp 671 addq %rax,%r10 672 movq 8(%rsi,%r9,1),%rax 673 adcq $0,%rdx 674 movq %rdx,%rdi 675 676 mulq %rbx 677 addq %rax,%r11 678 movq 8(%rcx),%rax 679 adcq $0,%rdx 680 movq %rdx,%r10 681 682 mulq %rbp 683 addq %rax,%rdi 684 movq 16(%rsi,%r9,1),%rax 685 adcq $0,%rdx 686 addq %r11,%rdi 687 leaq 32(%r9),%r15 688 leaq 32(%rcx),%rcx 689 adcq $0,%rdx 690 movq %rdi,(%r14) 691 movq %rdx,%r13 692 jmp .L1st4x 693 694.align 32 695.L1st4x: 696 mulq %rbx 697 addq %rax,%r10 698 movq -16(%rcx),%rax 699 leaq 32(%r14),%r14 700 adcq $0,%rdx 701 movq %rdx,%r11 702 703 mulq %rbp 704 addq %rax,%r13 705 movq -8(%rsi,%r15,1),%rax 706 adcq $0,%rdx 707 addq %r10,%r13 708 adcq $0,%rdx 709 movq %r13,-24(%r14) 710 movq %rdx,%rdi 711 712 mulq %rbx 713 addq %rax,%r11 714 movq -8(%rcx),%rax 715 adcq $0,%rdx 716 movq %rdx,%r10 717 718 mulq %rbp 719 addq %rax,%rdi 720 movq (%rsi,%r15,1),%rax 721 adcq $0,%rdx 722 addq %r11,%rdi 723 adcq $0,%rdx 724 movq %rdi,-16(%r14) 725 movq %rdx,%r13 726 727 mulq %rbx 728 addq %rax,%r10 729 movq 0(%rcx),%rax 730 adcq $0,%rdx 731 movq %rdx,%r11 732 733 mulq %rbp 734 addq %rax,%r13 735 movq 8(%rsi,%r15,1),%rax 736 adcq $0,%rdx 737 addq %r10,%r13 738 adcq $0,%rdx 739 movq %r13,-8(%r14) 740 movq %rdx,%rdi 741 742 mulq %rbx 743 addq %rax,%r11 744 movq 8(%rcx),%rax 745 adcq $0,%rdx 746 movq %rdx,%r10 747 748 mulq %rbp 749 addq %rax,%rdi 750 movq 16(%rsi,%r15,1),%rax 751 adcq $0,%rdx 752 addq %r11,%rdi 753 leaq 32(%rcx),%rcx 754 adcq $0,%rdx 755 movq %rdi,(%r14) 756 movq %rdx,%r13 757 758 addq $32,%r15 759 jnz .L1st4x 760 761 mulq %rbx 762 addq %rax,%r10 763 movq -16(%rcx),%rax 764 leaq 32(%r14),%r14 765 adcq $0,%rdx 766 movq %rdx,%r11 767 768 mulq %rbp 769 addq %rax,%r13 770 movq -8(%rsi),%rax 771 adcq $0,%rdx 772 addq %r10,%r13 773 adcq $0,%rdx 774 movq %r13,-24(%r14) 775 movq %rdx,%rdi 776 777 mulq %rbx 778 addq %rax,%r11 779 movq -8(%rcx),%rax 780 adcq $0,%rdx 781 movq %rdx,%r10 782 783 mulq %rbp 784 addq %rax,%rdi 785 movq (%rsi,%r9,1),%rax 786 adcq $0,%rdx 787 addq %r11,%rdi 788 adcq $0,%rdx 789 movq %rdi,-16(%r14) 790 movq %rdx,%r13 791 792 leaq (%rcx,%r9,1),%rcx 793 794 xorq %rdi,%rdi 795 addq %r10,%r13 796 adcq $0,%rdi 797 movq %r13,-8(%r14) 798 799 jmp .Louter4x 800 801.align 32 802.Louter4x: 803 leaq 16+128(%r14),%rdx 804 pxor %xmm4,%xmm4 805 pxor %xmm5,%xmm5 806 movdqa -128(%r12),%xmm0 807 movdqa -112(%r12),%xmm1 808 movdqa -96(%r12),%xmm2 809 movdqa -80(%r12),%xmm3 810 pand -128(%rdx),%xmm0 811 pand -112(%rdx),%xmm1 812 por %xmm0,%xmm4 813 pand -96(%rdx),%xmm2 814 por %xmm1,%xmm5 815 pand -80(%rdx),%xmm3 816 por %xmm2,%xmm4 817 por %xmm3,%xmm5 818 movdqa -64(%r12),%xmm0 819 movdqa -48(%r12),%xmm1 820 movdqa -32(%r12),%xmm2 821 movdqa -16(%r12),%xmm3 822 pand -64(%rdx),%xmm0 823 pand -48(%rdx),%xmm1 824 por %xmm0,%xmm4 825 pand -32(%rdx),%xmm2 826 por %xmm1,%xmm5 827 pand -16(%rdx),%xmm3 828 por %xmm2,%xmm4 829 por %xmm3,%xmm5 830 movdqa 0(%r12),%xmm0 831 movdqa 16(%r12),%xmm1 832 movdqa 32(%r12),%xmm2 833 movdqa 48(%r12),%xmm3 834 pand 0(%rdx),%xmm0 835 pand 16(%rdx),%xmm1 836 por %xmm0,%xmm4 837 pand 32(%rdx),%xmm2 838 por %xmm1,%xmm5 839 pand 48(%rdx),%xmm3 840 por %xmm2,%xmm4 841 por %xmm3,%xmm5 842 movdqa 64(%r12),%xmm0 843 movdqa 80(%r12),%xmm1 844 movdqa 96(%r12),%xmm2 845 movdqa 112(%r12),%xmm3 846 pand 64(%rdx),%xmm0 847 pand 80(%rdx),%xmm1 848 por %xmm0,%xmm4 849 pand 96(%rdx),%xmm2 850 por %xmm1,%xmm5 851 pand 112(%rdx),%xmm3 852 por %xmm2,%xmm4 853 por %xmm3,%xmm5 854 por %xmm5,%xmm4 855 pshufd $0x4e,%xmm4,%xmm0 856 por %xmm4,%xmm0 857 leaq 256(%r12),%r12 858.byte 102,72,15,126,195 859 860 movq (%r14,%r9,1),%r10 861 movq %r8,%rbp 862 mulq %rbx 863 addq %rax,%r10 864 movq (%rcx),%rax 865 adcq $0,%rdx 866 867 imulq %r10,%rbp 868 movq %rdx,%r11 869 movq %rdi,(%r14) 870 871 leaq (%r14,%r9,1),%r14 872 873 mulq %rbp 874 addq %rax,%r10 875 movq 8(%rsi,%r9,1),%rax 876 adcq $0,%rdx 877 movq %rdx,%rdi 878 879 mulq %rbx 880 addq %rax,%r11 881 movq 8(%rcx),%rax 882 adcq $0,%rdx 883 addq 8(%r14),%r11 884 adcq $0,%rdx 885 movq %rdx,%r10 886 887 mulq %rbp 888 addq %rax,%rdi 889 movq 16(%rsi,%r9,1),%rax 890 adcq $0,%rdx 891 addq %r11,%rdi 892 leaq 32(%r9),%r15 893 leaq 32(%rcx),%rcx 894 adcq $0,%rdx 895 movq %rdx,%r13 896 jmp .Linner4x 897 898.align 32 899.Linner4x: 900 mulq %rbx 901 addq %rax,%r10 902 movq -16(%rcx),%rax 903 adcq $0,%rdx 904 addq 16(%r14),%r10 905 leaq 32(%r14),%r14 906 adcq $0,%rdx 907 movq %rdx,%r11 908 909 mulq %rbp 910 addq %rax,%r13 911 movq -8(%rsi,%r15,1),%rax 912 adcq $0,%rdx 913 addq %r10,%r13 914 adcq $0,%rdx 915 movq %rdi,-32(%r14) 916 movq %rdx,%rdi 917 918 mulq %rbx 919 addq %rax,%r11 920 movq -8(%rcx),%rax 921 adcq $0,%rdx 922 addq -8(%r14),%r11 923 adcq $0,%rdx 924 movq %rdx,%r10 925 926 mulq %rbp 927 addq %rax,%rdi 928 movq (%rsi,%r15,1),%rax 929 adcq $0,%rdx 930 addq %r11,%rdi 931 adcq $0,%rdx 932 movq %r13,-24(%r14) 933 movq %rdx,%r13 934 935 mulq %rbx 936 addq %rax,%r10 937 movq 0(%rcx),%rax 938 adcq $0,%rdx 939 addq (%r14),%r10 940 adcq $0,%rdx 941 movq %rdx,%r11 942 943 mulq %rbp 944 addq %rax,%r13 945 movq 8(%rsi,%r15,1),%rax 946 adcq $0,%rdx 947 addq %r10,%r13 948 adcq $0,%rdx 949 movq %rdi,-16(%r14) 950 movq %rdx,%rdi 951 952 mulq %rbx 953 addq %rax,%r11 954 movq 8(%rcx),%rax 955 adcq $0,%rdx 956 addq 8(%r14),%r11 957 adcq $0,%rdx 958 movq %rdx,%r10 959 960 mulq %rbp 961 addq %rax,%rdi 962 movq 16(%rsi,%r15,1),%rax 963 adcq $0,%rdx 964 addq %r11,%rdi 965 leaq 32(%rcx),%rcx 966 adcq $0,%rdx 967 movq %r13,-8(%r14) 968 movq %rdx,%r13 969 970 addq $32,%r15 971 jnz .Linner4x 972 973 mulq %rbx 974 addq %rax,%r10 975 movq -16(%rcx),%rax 976 adcq $0,%rdx 977 addq 16(%r14),%r10 978 leaq 32(%r14),%r14 979 adcq $0,%rdx 980 movq %rdx,%r11 981 982 mulq %rbp 983 addq %rax,%r13 984 movq -8(%rsi),%rax 985 adcq $0,%rdx 986 addq %r10,%r13 987 adcq $0,%rdx 988 movq %rdi,-32(%r14) 989 movq %rdx,%rdi 990 991 mulq %rbx 992 addq %rax,%r11 993 movq %rbp,%rax 994 movq -8(%rcx),%rbp 995 adcq $0,%rdx 996 addq -8(%r14),%r11 997 adcq $0,%rdx 998 movq %rdx,%r10 999 1000 mulq %rbp 1001 addq %rax,%rdi 1002 movq (%rsi,%r9,1),%rax 1003 adcq $0,%rdx 1004 addq %r11,%rdi 1005 adcq $0,%rdx 1006 movq %r13,-24(%r14) 1007 movq %rdx,%r13 1008 1009 movq %rdi,-16(%r14) 1010 leaq (%rcx,%r9,1),%rcx 1011 1012 xorq %rdi,%rdi 1013 addq %r10,%r13 1014 adcq $0,%rdi 1015 addq (%r14),%r13 1016 adcq $0,%rdi 1017 movq %r13,-8(%r14) 1018 1019 cmpq 16+8(%rsp),%r12 1020 jb .Louter4x 1021 xorq %rax,%rax 1022 subq %r13,%rbp 1023 adcq %r15,%r15 1024 orq %r15,%rdi 1025 subq %rdi,%rax 1026 leaq (%r14,%r9,1),%rbx 1027 movq (%rcx),%r12 1028 leaq (%rcx),%rbp 1029 movq %r9,%rcx 1030 sarq $3+2,%rcx 1031 movq 56+8(%rsp),%rdi 1032 decq %r12 1033 xorq %r10,%r10 1034 movq 8(%rbp),%r13 1035 movq 16(%rbp),%r14 1036 movq 24(%rbp),%r15 1037 jmp .Lsqr4x_sub_entry 1038.size mul4x_internal,.-mul4x_internal 1039.globl bn_power5 1040.type bn_power5,@function 1041.align 32 1042bn_power5: 1043 movq %rsp,%rax 1044 movl OPENSSL_ia32cap_P+8(%rip),%r11d 1045 andl $0x80108,%r11d 1046 cmpl $0x80108,%r11d 1047 je .Lpowerx5_enter 1048 pushq %rbx 1049 pushq %rbp 1050 pushq %r12 1051 pushq %r13 1052 pushq %r14 1053 pushq %r15 1054.Lpower5_prologue: 1055 1056 shll $3,%r9d 1057 leal (%r9,%r9,2),%r10d 1058 negq %r9 1059 movq (%r8),%r8 1060 1061 1062 1063 1064 1065 1066 1067 1068 leaq -320(%rsp,%r9,2),%r11 1069 movq %rsp,%rbp 1070 subq %rdi,%r11 1071 andq $4095,%r11 1072 cmpq %r11,%r10 1073 jb .Lpwr_sp_alt 1074 subq %r11,%rbp 1075 leaq -320(%rbp,%r9,2),%rbp 1076 jmp .Lpwr_sp_done 1077 1078.align 32 1079.Lpwr_sp_alt: 1080 leaq 4096-320(,%r9,2),%r10 1081 leaq -320(%rbp,%r9,2),%rbp 1082 subq %r10,%r11 1083 movq $0,%r10 1084 cmovcq %r10,%r11 1085 subq %r11,%rbp 1086.Lpwr_sp_done: 1087 andq $-64,%rbp 1088 movq %rsp,%r11 1089 subq %rbp,%r11 1090 andq $-4096,%r11 1091 leaq (%r11,%rbp,1),%rsp 1092 movq (%rsp),%r10 1093 cmpq %rbp,%rsp 1094 ja .Lpwr_page_walk 1095 jmp .Lpwr_page_walk_done 1096 1097.Lpwr_page_walk: 1098 leaq -4096(%rsp),%rsp 1099 movq (%rsp),%r10 1100 cmpq %rbp,%rsp 1101 ja .Lpwr_page_walk 1102.Lpwr_page_walk_done: 1103 1104 movq %r9,%r10 1105 negq %r9 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 movq %r8,32(%rsp) 1117 movq %rax,40(%rsp) 1118.Lpower5_body: 1119.byte 102,72,15,110,207 1120.byte 102,72,15,110,209 1121.byte 102,73,15,110,218 1122.byte 102,72,15,110,226 1123 1124 call __bn_sqr8x_internal 1125 call __bn_post4x_internal 1126 call __bn_sqr8x_internal 1127 call __bn_post4x_internal 1128 call __bn_sqr8x_internal 1129 call __bn_post4x_internal 1130 call __bn_sqr8x_internal 1131 call __bn_post4x_internal 1132 call __bn_sqr8x_internal 1133 call __bn_post4x_internal 1134 1135.byte 102,72,15,126,209 1136.byte 102,72,15,126,226 1137 movq %rsi,%rdi 1138 movq 40(%rsp),%rax 1139 leaq 32(%rsp),%r8 1140 1141 call mul4x_internal 1142 1143 movq 40(%rsp),%rsi 1144 movq $1,%rax 1145 movq -48(%rsi),%r15 1146 movq -40(%rsi),%r14 1147 movq -32(%rsi),%r13 1148 movq -24(%rsi),%r12 1149 movq -16(%rsi),%rbp 1150 movq -8(%rsi),%rbx 1151 leaq (%rsi),%rsp 1152.Lpower5_epilogue: 1153 .byte 0xf3,0xc3 1154.size bn_power5,.-bn_power5 1155 1156.globl bn_sqr8x_internal 1157.hidden bn_sqr8x_internal 1158.type bn_sqr8x_internal,@function 1159.align 32 1160bn_sqr8x_internal: 1161__bn_sqr8x_internal: 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 leaq 32(%r10),%rbp 1236 leaq (%rsi,%r9,1),%rsi 1237 1238 movq %r9,%rcx 1239 1240 1241 movq -32(%rsi,%rbp,1),%r14 1242 leaq 48+8(%rsp,%r9,2),%rdi 1243 movq -24(%rsi,%rbp,1),%rax 1244 leaq -32(%rdi,%rbp,1),%rdi 1245 movq -16(%rsi,%rbp,1),%rbx 1246 movq %rax,%r15 1247 1248 mulq %r14 1249 movq %rax,%r10 1250 movq %rbx,%rax 1251 movq %rdx,%r11 1252 movq %r10,-24(%rdi,%rbp,1) 1253 1254 mulq %r14 1255 addq %rax,%r11 1256 movq %rbx,%rax 1257 adcq $0,%rdx 1258 movq %r11,-16(%rdi,%rbp,1) 1259 movq %rdx,%r10 1260 1261 1262 movq -8(%rsi,%rbp,1),%rbx 1263 mulq %r15 1264 movq %rax,%r12 1265 movq %rbx,%rax 1266 movq %rdx,%r13 1267 1268 leaq (%rbp),%rcx 1269 mulq %r14 1270 addq %rax,%r10 1271 movq %rbx,%rax 1272 movq %rdx,%r11 1273 adcq $0,%r11 1274 addq %r12,%r10 1275 adcq $0,%r11 1276 movq %r10,-8(%rdi,%rcx,1) 1277 jmp .Lsqr4x_1st 1278 1279.align 32 1280.Lsqr4x_1st: 1281 movq (%rsi,%rcx,1),%rbx 1282 mulq %r15 1283 addq %rax,%r13 1284 movq %rbx,%rax 1285 movq %rdx,%r12 1286 adcq $0,%r12 1287 1288 mulq %r14 1289 addq %rax,%r11 1290 movq %rbx,%rax 1291 movq 8(%rsi,%rcx,1),%rbx 1292 movq %rdx,%r10 1293 adcq $0,%r10 1294 addq %r13,%r11 1295 adcq $0,%r10 1296 1297 1298 mulq %r15 1299 addq %rax,%r12 1300 movq %rbx,%rax 1301 movq %r11,(%rdi,%rcx,1) 1302 movq %rdx,%r13 1303 adcq $0,%r13 1304 1305 mulq %r14 1306 addq %rax,%r10 1307 movq %rbx,%rax 1308 movq 16(%rsi,%rcx,1),%rbx 1309 movq %rdx,%r11 1310 adcq $0,%r11 1311 addq %r12,%r10 1312 adcq $0,%r11 1313 1314 mulq %r15 1315 addq %rax,%r13 1316 movq %rbx,%rax 1317 movq %r10,8(%rdi,%rcx,1) 1318 movq %rdx,%r12 1319 adcq $0,%r12 1320 1321 mulq %r14 1322 addq %rax,%r11 1323 movq %rbx,%rax 1324 movq 24(%rsi,%rcx,1),%rbx 1325 movq %rdx,%r10 1326 adcq $0,%r10 1327 addq %r13,%r11 1328 adcq $0,%r10 1329 1330 1331 mulq %r15 1332 addq %rax,%r12 1333 movq %rbx,%rax 1334 movq %r11,16(%rdi,%rcx,1) 1335 movq %rdx,%r13 1336 adcq $0,%r13 1337 leaq 32(%rcx),%rcx 1338 1339 mulq %r14 1340 addq %rax,%r10 1341 movq %rbx,%rax 1342 movq %rdx,%r11 1343 adcq $0,%r11 1344 addq %r12,%r10 1345 adcq $0,%r11 1346 movq %r10,-8(%rdi,%rcx,1) 1347 1348 cmpq $0,%rcx 1349 jne .Lsqr4x_1st 1350 1351 mulq %r15 1352 addq %rax,%r13 1353 leaq 16(%rbp),%rbp 1354 adcq $0,%rdx 1355 addq %r11,%r13 1356 adcq $0,%rdx 1357 1358 movq %r13,(%rdi) 1359 movq %rdx,%r12 1360 movq %rdx,8(%rdi) 1361 jmp .Lsqr4x_outer 1362 1363.align 32 1364.Lsqr4x_outer: 1365 movq -32(%rsi,%rbp,1),%r14 1366 leaq 48+8(%rsp,%r9,2),%rdi 1367 movq -24(%rsi,%rbp,1),%rax 1368 leaq -32(%rdi,%rbp,1),%rdi 1369 movq -16(%rsi,%rbp,1),%rbx 1370 movq %rax,%r15 1371 1372 mulq %r14 1373 movq -24(%rdi,%rbp,1),%r10 1374 addq %rax,%r10 1375 movq %rbx,%rax 1376 adcq $0,%rdx 1377 movq %r10,-24(%rdi,%rbp,1) 1378 movq %rdx,%r11 1379 1380 mulq %r14 1381 addq %rax,%r11 1382 movq %rbx,%rax 1383 adcq $0,%rdx 1384 addq -16(%rdi,%rbp,1),%r11 1385 movq %rdx,%r10 1386 adcq $0,%r10 1387 movq %r11,-16(%rdi,%rbp,1) 1388 1389 xorq %r12,%r12 1390 1391 movq -8(%rsi,%rbp,1),%rbx 1392 mulq %r15 1393 addq %rax,%r12 1394 movq %rbx,%rax 1395 adcq $0,%rdx 1396 addq -8(%rdi,%rbp,1),%r12 1397 movq %rdx,%r13 1398 adcq $0,%r13 1399 1400 mulq %r14 1401 addq %rax,%r10 1402 movq %rbx,%rax 1403 adcq $0,%rdx 1404 addq %r12,%r10 1405 movq %rdx,%r11 1406 adcq $0,%r11 1407 movq %r10,-8(%rdi,%rbp,1) 1408 1409 leaq (%rbp),%rcx 1410 jmp .Lsqr4x_inner 1411 1412.align 32 1413.Lsqr4x_inner: 1414 movq (%rsi,%rcx,1),%rbx 1415 mulq %r15 1416 addq %rax,%r13 1417 movq %rbx,%rax 1418 movq %rdx,%r12 1419 adcq $0,%r12 1420 addq (%rdi,%rcx,1),%r13 1421 adcq $0,%r12 1422 1423.byte 0x67 1424 mulq %r14 1425 addq %rax,%r11 1426 movq %rbx,%rax 1427 movq 8(%rsi,%rcx,1),%rbx 1428 movq %rdx,%r10 1429 adcq $0,%r10 1430 addq %r13,%r11 1431 adcq $0,%r10 1432 1433 mulq %r15 1434 addq %rax,%r12 1435 movq %r11,(%rdi,%rcx,1) 1436 movq %rbx,%rax 1437 movq %rdx,%r13 1438 adcq $0,%r13 1439 addq 8(%rdi,%rcx,1),%r12 1440 leaq 16(%rcx),%rcx 1441 adcq $0,%r13 1442 1443 mulq %r14 1444 addq %rax,%r10 1445 movq %rbx,%rax 1446 adcq $0,%rdx 1447 addq %r12,%r10 1448 movq %rdx,%r11 1449 adcq $0,%r11 1450 movq %r10,-8(%rdi,%rcx,1) 1451 1452 cmpq $0,%rcx 1453 jne .Lsqr4x_inner 1454 1455.byte 0x67 1456 mulq %r15 1457 addq %rax,%r13 1458 adcq $0,%rdx 1459 addq %r11,%r13 1460 adcq $0,%rdx 1461 1462 movq %r13,(%rdi) 1463 movq %rdx,%r12 1464 movq %rdx,8(%rdi) 1465 1466 addq $16,%rbp 1467 jnz .Lsqr4x_outer 1468 1469 1470 movq -32(%rsi),%r14 1471 leaq 48+8(%rsp,%r9,2),%rdi 1472 movq -24(%rsi),%rax 1473 leaq -32(%rdi,%rbp,1),%rdi 1474 movq -16(%rsi),%rbx 1475 movq %rax,%r15 1476 1477 mulq %r14 1478 addq %rax,%r10 1479 movq %rbx,%rax 1480 movq %rdx,%r11 1481 adcq $0,%r11 1482 1483 mulq %r14 1484 addq %rax,%r11 1485 movq %rbx,%rax 1486 movq %r10,-24(%rdi) 1487 movq %rdx,%r10 1488 adcq $0,%r10 1489 addq %r13,%r11 1490 movq -8(%rsi),%rbx 1491 adcq $0,%r10 1492 1493 mulq %r15 1494 addq %rax,%r12 1495 movq %rbx,%rax 1496 movq %r11,-16(%rdi) 1497 movq %rdx,%r13 1498 adcq $0,%r13 1499 1500 mulq %r14 1501 addq %rax,%r10 1502 movq %rbx,%rax 1503 movq %rdx,%r11 1504 adcq $0,%r11 1505 addq %r12,%r10 1506 adcq $0,%r11 1507 movq %r10,-8(%rdi) 1508 1509 mulq %r15 1510 addq %rax,%r13 1511 movq -16(%rsi),%rax 1512 adcq $0,%rdx 1513 addq %r11,%r13 1514 adcq $0,%rdx 1515 1516 movq %r13,(%rdi) 1517 movq %rdx,%r12 1518 movq %rdx,8(%rdi) 1519 1520 mulq %rbx 1521 addq $16,%rbp 1522 xorq %r14,%r14 1523 subq %r9,%rbp 1524 xorq %r15,%r15 1525 1526 addq %r12,%rax 1527 adcq $0,%rdx 1528 movq %rax,8(%rdi) 1529 movq %rdx,16(%rdi) 1530 movq %r15,24(%rdi) 1531 1532 movq -16(%rsi,%rbp,1),%rax 1533 leaq 48+8(%rsp),%rdi 1534 xorq %r10,%r10 1535 movq 8(%rdi),%r11 1536 1537 leaq (%r14,%r10,2),%r12 1538 shrq $63,%r10 1539 leaq (%rcx,%r11,2),%r13 1540 shrq $63,%r11 1541 orq %r10,%r13 1542 movq 16(%rdi),%r10 1543 movq %r11,%r14 1544 mulq %rax 1545 negq %r15 1546 movq 24(%rdi),%r11 1547 adcq %rax,%r12 1548 movq -8(%rsi,%rbp,1),%rax 1549 movq %r12,(%rdi) 1550 adcq %rdx,%r13 1551 1552 leaq (%r14,%r10,2),%rbx 1553 movq %r13,8(%rdi) 1554 sbbq %r15,%r15 1555 shrq $63,%r10 1556 leaq (%rcx,%r11,2),%r8 1557 shrq $63,%r11 1558 orq %r10,%r8 1559 movq 32(%rdi),%r10 1560 movq %r11,%r14 1561 mulq %rax 1562 negq %r15 1563 movq 40(%rdi),%r11 1564 adcq %rax,%rbx 1565 movq 0(%rsi,%rbp,1),%rax 1566 movq %rbx,16(%rdi) 1567 adcq %rdx,%r8 1568 leaq 16(%rbp),%rbp 1569 movq %r8,24(%rdi) 1570 sbbq %r15,%r15 1571 leaq 64(%rdi),%rdi 1572 jmp .Lsqr4x_shift_n_add 1573 1574.align 32 1575.Lsqr4x_shift_n_add: 1576 leaq (%r14,%r10,2),%r12 1577 shrq $63,%r10 1578 leaq (%rcx,%r11,2),%r13 1579 shrq $63,%r11 1580 orq %r10,%r13 1581 movq -16(%rdi),%r10 1582 movq %r11,%r14 1583 mulq %rax 1584 negq %r15 1585 movq -8(%rdi),%r11 1586 adcq %rax,%r12 1587 movq -8(%rsi,%rbp,1),%rax 1588 movq %r12,-32(%rdi) 1589 adcq %rdx,%r13 1590 1591 leaq (%r14,%r10,2),%rbx 1592 movq %r13,-24(%rdi) 1593 sbbq %r15,%r15 1594 shrq $63,%r10 1595 leaq (%rcx,%r11,2),%r8 1596 shrq $63,%r11 1597 orq %r10,%r8 1598 movq 0(%rdi),%r10 1599 movq %r11,%r14 1600 mulq %rax 1601 negq %r15 1602 movq 8(%rdi),%r11 1603 adcq %rax,%rbx 1604 movq 0(%rsi,%rbp,1),%rax 1605 movq %rbx,-16(%rdi) 1606 adcq %rdx,%r8 1607 1608 leaq (%r14,%r10,2),%r12 1609 movq %r8,-8(%rdi) 1610 sbbq %r15,%r15 1611 shrq $63,%r10 1612 leaq (%rcx,%r11,2),%r13 1613 shrq $63,%r11 1614 orq %r10,%r13 1615 movq 16(%rdi),%r10 1616 movq %r11,%r14 1617 mulq %rax 1618 negq %r15 1619 movq 24(%rdi),%r11 1620 adcq %rax,%r12 1621 movq 8(%rsi,%rbp,1),%rax 1622 movq %r12,0(%rdi) 1623 adcq %rdx,%r13 1624 1625 leaq (%r14,%r10,2),%rbx 1626 movq %r13,8(%rdi) 1627 sbbq %r15,%r15 1628 shrq $63,%r10 1629 leaq (%rcx,%r11,2),%r8 1630 shrq $63,%r11 1631 orq %r10,%r8 1632 movq 32(%rdi),%r10 1633 movq %r11,%r14 1634 mulq %rax 1635 negq %r15 1636 movq 40(%rdi),%r11 1637 adcq %rax,%rbx 1638 movq 16(%rsi,%rbp,1),%rax 1639 movq %rbx,16(%rdi) 1640 adcq %rdx,%r8 1641 movq %r8,24(%rdi) 1642 sbbq %r15,%r15 1643 leaq 64(%rdi),%rdi 1644 addq $32,%rbp 1645 jnz .Lsqr4x_shift_n_add 1646 1647 leaq (%r14,%r10,2),%r12 1648.byte 0x67 1649 shrq $63,%r10 1650 leaq (%rcx,%r11,2),%r13 1651 shrq $63,%r11 1652 orq %r10,%r13 1653 movq -16(%rdi),%r10 1654 movq %r11,%r14 1655 mulq %rax 1656 negq %r15 1657 movq -8(%rdi),%r11 1658 adcq %rax,%r12 1659 movq -8(%rsi),%rax 1660 movq %r12,-32(%rdi) 1661 adcq %rdx,%r13 1662 1663 leaq (%r14,%r10,2),%rbx 1664 movq %r13,-24(%rdi) 1665 sbbq %r15,%r15 1666 shrq $63,%r10 1667 leaq (%rcx,%r11,2),%r8 1668 shrq $63,%r11 1669 orq %r10,%r8 1670 mulq %rax 1671 negq %r15 1672 adcq %rax,%rbx 1673 adcq %rdx,%r8 1674 movq %rbx,-16(%rdi) 1675 movq %r8,-8(%rdi) 1676.byte 102,72,15,126,213 1677__bn_sqr8x_reduction: 1678 xorq %rax,%rax 1679 leaq (%r9,%rbp,1),%rcx 1680 leaq 48+8(%rsp,%r9,2),%rdx 1681 movq %rcx,0+8(%rsp) 1682 leaq 48+8(%rsp,%r9,1),%rdi 1683 movq %rdx,8+8(%rsp) 1684 negq %r9 1685 jmp .L8x_reduction_loop 1686 1687.align 32 1688.L8x_reduction_loop: 1689 leaq (%rdi,%r9,1),%rdi 1690.byte 0x66 1691 movq 0(%rdi),%rbx 1692 movq 8(%rdi),%r9 1693 movq 16(%rdi),%r10 1694 movq 24(%rdi),%r11 1695 movq 32(%rdi),%r12 1696 movq 40(%rdi),%r13 1697 movq 48(%rdi),%r14 1698 movq 56(%rdi),%r15 1699 movq %rax,(%rdx) 1700 leaq 64(%rdi),%rdi 1701 1702.byte 0x67 1703 movq %rbx,%r8 1704 imulq 32+8(%rsp),%rbx 1705 movq 0(%rbp),%rax 1706 movl $8,%ecx 1707 jmp .L8x_reduce 1708 1709.align 32 1710.L8x_reduce: 1711 mulq %rbx 1712 movq 8(%rbp),%rax 1713 negq %r8 1714 movq %rdx,%r8 1715 adcq $0,%r8 1716 1717 mulq %rbx 1718 addq %rax,%r9 1719 movq 16(%rbp),%rax 1720 adcq $0,%rdx 1721 addq %r9,%r8 1722 movq %rbx,48-8+8(%rsp,%rcx,8) 1723 movq %rdx,%r9 1724 adcq $0,%r9 1725 1726 mulq %rbx 1727 addq %rax,%r10 1728 movq 24(%rbp),%rax 1729 adcq $0,%rdx 1730 addq %r10,%r9 1731 movq 32+8(%rsp),%rsi 1732 movq %rdx,%r10 1733 adcq $0,%r10 1734 1735 mulq %rbx 1736 addq %rax,%r11 1737 movq 32(%rbp),%rax 1738 adcq $0,%rdx 1739 imulq %r8,%rsi 1740 addq %r11,%r10 1741 movq %rdx,%r11 1742 adcq $0,%r11 1743 1744 mulq %rbx 1745 addq %rax,%r12 1746 movq 40(%rbp),%rax 1747 adcq $0,%rdx 1748 addq %r12,%r11 1749 movq %rdx,%r12 1750 adcq $0,%r12 1751 1752 mulq %rbx 1753 addq %rax,%r13 1754 movq 48(%rbp),%rax 1755 adcq $0,%rdx 1756 addq %r13,%r12 1757 movq %rdx,%r13 1758 adcq $0,%r13 1759 1760 mulq %rbx 1761 addq %rax,%r14 1762 movq 56(%rbp),%rax 1763 adcq $0,%rdx 1764 addq %r14,%r13 1765 movq %rdx,%r14 1766 adcq $0,%r14 1767 1768 mulq %rbx 1769 movq %rsi,%rbx 1770 addq %rax,%r15 1771 movq 0(%rbp),%rax 1772 adcq $0,%rdx 1773 addq %r15,%r14 1774 movq %rdx,%r15 1775 adcq $0,%r15 1776 1777 decl %ecx 1778 jnz .L8x_reduce 1779 1780 leaq 64(%rbp),%rbp 1781 xorq %rax,%rax 1782 movq 8+8(%rsp),%rdx 1783 cmpq 0+8(%rsp),%rbp 1784 jae .L8x_no_tail 1785 1786.byte 0x66 1787 addq 0(%rdi),%r8 1788 adcq 8(%rdi),%r9 1789 adcq 16(%rdi),%r10 1790 adcq 24(%rdi),%r11 1791 adcq 32(%rdi),%r12 1792 adcq 40(%rdi),%r13 1793 adcq 48(%rdi),%r14 1794 adcq 56(%rdi),%r15 1795 sbbq %rsi,%rsi 1796 1797 movq 48+56+8(%rsp),%rbx 1798 movl $8,%ecx 1799 movq 0(%rbp),%rax 1800 jmp .L8x_tail 1801 1802.align 32 1803.L8x_tail: 1804 mulq %rbx 1805 addq %rax,%r8 1806 movq 8(%rbp),%rax 1807 movq %r8,(%rdi) 1808 movq %rdx,%r8 1809 adcq $0,%r8 1810 1811 mulq %rbx 1812 addq %rax,%r9 1813 movq 16(%rbp),%rax 1814 adcq $0,%rdx 1815 addq %r9,%r8 1816 leaq 8(%rdi),%rdi 1817 movq %rdx,%r9 1818 adcq $0,%r9 1819 1820 mulq %rbx 1821 addq %rax,%r10 1822 movq 24(%rbp),%rax 1823 adcq $0,%rdx 1824 addq %r10,%r9 1825 movq %rdx,%r10 1826 adcq $0,%r10 1827 1828 mulq %rbx 1829 addq %rax,%r11 1830 movq 32(%rbp),%rax 1831 adcq $0,%rdx 1832 addq %r11,%r10 1833 movq %rdx,%r11 1834 adcq $0,%r11 1835 1836 mulq %rbx 1837 addq %rax,%r12 1838 movq 40(%rbp),%rax 1839 adcq $0,%rdx 1840 addq %r12,%r11 1841 movq %rdx,%r12 1842 adcq $0,%r12 1843 1844 mulq %rbx 1845 addq %rax,%r13 1846 movq 48(%rbp),%rax 1847 adcq $0,%rdx 1848 addq %r13,%r12 1849 movq %rdx,%r13 1850 adcq $0,%r13 1851 1852 mulq %rbx 1853 addq %rax,%r14 1854 movq 56(%rbp),%rax 1855 adcq $0,%rdx 1856 addq %r14,%r13 1857 movq %rdx,%r14 1858 adcq $0,%r14 1859 1860 mulq %rbx 1861 movq 48-16+8(%rsp,%rcx,8),%rbx 1862 addq %rax,%r15 1863 adcq $0,%rdx 1864 addq %r15,%r14 1865 movq 0(%rbp),%rax 1866 movq %rdx,%r15 1867 adcq $0,%r15 1868 1869 decl %ecx 1870 jnz .L8x_tail 1871 1872 leaq 64(%rbp),%rbp 1873 movq 8+8(%rsp),%rdx 1874 cmpq 0+8(%rsp),%rbp 1875 jae .L8x_tail_done 1876 1877 movq 48+56+8(%rsp),%rbx 1878 negq %rsi 1879 movq 0(%rbp),%rax 1880 adcq 0(%rdi),%r8 1881 adcq 8(%rdi),%r9 1882 adcq 16(%rdi),%r10 1883 adcq 24(%rdi),%r11 1884 adcq 32(%rdi),%r12 1885 adcq 40(%rdi),%r13 1886 adcq 48(%rdi),%r14 1887 adcq 56(%rdi),%r15 1888 sbbq %rsi,%rsi 1889 1890 movl $8,%ecx 1891 jmp .L8x_tail 1892 1893.align 32 1894.L8x_tail_done: 1895 xorq %rax,%rax 1896 addq (%rdx),%r8 1897 adcq $0,%r9 1898 adcq $0,%r10 1899 adcq $0,%r11 1900 adcq $0,%r12 1901 adcq $0,%r13 1902 adcq $0,%r14 1903 adcq $0,%r15 1904 adcq $0,%rax 1905 1906 negq %rsi 1907.L8x_no_tail: 1908 adcq 0(%rdi),%r8 1909 adcq 8(%rdi),%r9 1910 adcq 16(%rdi),%r10 1911 adcq 24(%rdi),%r11 1912 adcq 32(%rdi),%r12 1913 adcq 40(%rdi),%r13 1914 adcq 48(%rdi),%r14 1915 adcq 56(%rdi),%r15 1916 adcq $0,%rax 1917 movq -8(%rbp),%rcx 1918 xorq %rsi,%rsi 1919 1920.byte 102,72,15,126,213 1921 1922 movq %r8,0(%rdi) 1923 movq %r9,8(%rdi) 1924.byte 102,73,15,126,217 1925 movq %r10,16(%rdi) 1926 movq %r11,24(%rdi) 1927 movq %r12,32(%rdi) 1928 movq %r13,40(%rdi) 1929 movq %r14,48(%rdi) 1930 movq %r15,56(%rdi) 1931 leaq 64(%rdi),%rdi 1932 1933 cmpq %rdx,%rdi 1934 jb .L8x_reduction_loop 1935 .byte 0xf3,0xc3 1936.size bn_sqr8x_internal,.-bn_sqr8x_internal 1937.type __bn_post4x_internal,@function 1938.align 32 1939__bn_post4x_internal: 1940 movq 0(%rbp),%r12 1941 leaq (%rdi,%r9,1),%rbx 1942 movq %r9,%rcx 1943.byte 102,72,15,126,207 1944 negq %rax 1945.byte 102,72,15,126,206 1946 sarq $3+2,%rcx 1947 decq %r12 1948 xorq %r10,%r10 1949 movq 8(%rbp),%r13 1950 movq 16(%rbp),%r14 1951 movq 24(%rbp),%r15 1952 jmp .Lsqr4x_sub_entry 1953 1954.align 16 1955.Lsqr4x_sub: 1956 movq 0(%rbp),%r12 1957 movq 8(%rbp),%r13 1958 movq 16(%rbp),%r14 1959 movq 24(%rbp),%r15 1960.Lsqr4x_sub_entry: 1961 leaq 32(%rbp),%rbp 1962 notq %r12 1963 notq %r13 1964 notq %r14 1965 notq %r15 1966 andq %rax,%r12 1967 andq %rax,%r13 1968 andq %rax,%r14 1969 andq %rax,%r15 1970 1971 negq %r10 1972 adcq 0(%rbx),%r12 1973 adcq 8(%rbx),%r13 1974 adcq 16(%rbx),%r14 1975 adcq 24(%rbx),%r15 1976 movq %r12,0(%rdi) 1977 leaq 32(%rbx),%rbx 1978 movq %r13,8(%rdi) 1979 sbbq %r10,%r10 1980 movq %r14,16(%rdi) 1981 movq %r15,24(%rdi) 1982 leaq 32(%rdi),%rdi 1983 1984 incq %rcx 1985 jnz .Lsqr4x_sub 1986 1987 movq %r9,%r10 1988 negq %r9 1989 .byte 0xf3,0xc3 1990.size __bn_post4x_internal,.-__bn_post4x_internal 1991.globl bn_from_montgomery 1992.type bn_from_montgomery,@function 1993.align 32 1994bn_from_montgomery: 1995 testl $7,%r9d 1996 jz bn_from_mont8x 1997 xorl %eax,%eax 1998 .byte 0xf3,0xc3 1999.size bn_from_montgomery,.-bn_from_montgomery 2000 2001.type bn_from_mont8x,@function 2002.align 32 2003bn_from_mont8x: 2004.byte 0x67 2005 movq %rsp,%rax 2006 pushq %rbx 2007 pushq %rbp 2008 pushq %r12 2009 pushq %r13 2010 pushq %r14 2011 pushq %r15 2012.Lfrom_prologue: 2013 2014 shll $3,%r9d 2015 leaq (%r9,%r9,2),%r10 2016 negq %r9 2017 movq (%r8),%r8 2018 2019 2020 2021 2022 2023 2024 2025 2026 leaq -320(%rsp,%r9,2),%r11 2027 movq %rsp,%rbp 2028 subq %rdi,%r11 2029 andq $4095,%r11 2030 cmpq %r11,%r10 2031 jb .Lfrom_sp_alt 2032 subq %r11,%rbp 2033 leaq -320(%rbp,%r9,2),%rbp 2034 jmp .Lfrom_sp_done 2035 2036.align 32 2037.Lfrom_sp_alt: 2038 leaq 4096-320(,%r9,2),%r10 2039 leaq -320(%rbp,%r9,2),%rbp 2040 subq %r10,%r11 2041 movq $0,%r10 2042 cmovcq %r10,%r11 2043 subq %r11,%rbp 2044.Lfrom_sp_done: 2045 andq $-64,%rbp 2046 movq %rsp,%r11 2047 subq %rbp,%r11 2048 andq $-4096,%r11 2049 leaq (%r11,%rbp,1),%rsp 2050 movq (%rsp),%r10 2051 cmpq %rbp,%rsp 2052 ja .Lfrom_page_walk 2053 jmp .Lfrom_page_walk_done 2054 2055.Lfrom_page_walk: 2056 leaq -4096(%rsp),%rsp 2057 movq (%rsp),%r10 2058 cmpq %rbp,%rsp 2059 ja .Lfrom_page_walk 2060.Lfrom_page_walk_done: 2061 2062 movq %r9,%r10 2063 negq %r9 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 movq %r8,32(%rsp) 2075 movq %rax,40(%rsp) 2076.Lfrom_body: 2077 movq %r9,%r11 2078 leaq 48(%rsp),%rax 2079 pxor %xmm0,%xmm0 2080 jmp .Lmul_by_1 2081 2082.align 32 2083.Lmul_by_1: 2084 movdqu (%rsi),%xmm1 2085 movdqu 16(%rsi),%xmm2 2086 movdqu 32(%rsi),%xmm3 2087 movdqa %xmm0,(%rax,%r9,1) 2088 movdqu 48(%rsi),%xmm4 2089 movdqa %xmm0,16(%rax,%r9,1) 2090.byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00 2091 movdqa %xmm1,(%rax) 2092 movdqa %xmm0,32(%rax,%r9,1) 2093 movdqa %xmm2,16(%rax) 2094 movdqa %xmm0,48(%rax,%r9,1) 2095 movdqa %xmm3,32(%rax) 2096 movdqa %xmm4,48(%rax) 2097 leaq 64(%rax),%rax 2098 subq $64,%r11 2099 jnz .Lmul_by_1 2100 2101.byte 102,72,15,110,207 2102.byte 102,72,15,110,209 2103.byte 0x67 2104 movq %rcx,%rbp 2105.byte 102,73,15,110,218 2106 movl OPENSSL_ia32cap_P+8(%rip),%r11d 2107 andl $0x80108,%r11d 2108 cmpl $0x80108,%r11d 2109 jne .Lfrom_mont_nox 2110 2111 leaq (%rax,%r9,1),%rdi 2112 call __bn_sqrx8x_reduction 2113 call __bn_postx4x_internal 2114 2115 pxor %xmm0,%xmm0 2116 leaq 48(%rsp),%rax 2117 movq 40(%rsp),%rsi 2118 jmp .Lfrom_mont_zero 2119 2120.align 32 2121.Lfrom_mont_nox: 2122 call __bn_sqr8x_reduction 2123 call __bn_post4x_internal 2124 2125 pxor %xmm0,%xmm0 2126 leaq 48(%rsp),%rax 2127 movq 40(%rsp),%rsi 2128 jmp .Lfrom_mont_zero 2129 2130.align 32 2131.Lfrom_mont_zero: 2132 movdqa %xmm0,0(%rax) 2133 movdqa %xmm0,16(%rax) 2134 movdqa %xmm0,32(%rax) 2135 movdqa %xmm0,48(%rax) 2136 leaq 64(%rax),%rax 2137 subq $32,%r9 2138 jnz .Lfrom_mont_zero 2139 2140 movq $1,%rax 2141 movq -48(%rsi),%r15 2142 movq -40(%rsi),%r14 2143 movq -32(%rsi),%r13 2144 movq -24(%rsi),%r12 2145 movq -16(%rsi),%rbp 2146 movq -8(%rsi),%rbx 2147 leaq (%rsi),%rsp 2148.Lfrom_epilogue: 2149 .byte 0xf3,0xc3 2150.size bn_from_mont8x,.-bn_from_mont8x 2151.type bn_mulx4x_mont_gather5,@function 2152.align 32 2153bn_mulx4x_mont_gather5: 2154 movq %rsp,%rax 2155.Lmulx4x_enter: 2156 pushq %rbx 2157 pushq %rbp 2158 pushq %r12 2159 pushq %r13 2160 pushq %r14 2161 pushq %r15 2162.Lmulx4x_prologue: 2163 2164 shll $3,%r9d 2165 leaq (%r9,%r9,2),%r10 2166 negq %r9 2167 movq (%r8),%r8 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 leaq -320(%rsp,%r9,2),%r11 2179 movq %rsp,%rbp 2180 subq %rdi,%r11 2181 andq $4095,%r11 2182 cmpq %r11,%r10 2183 jb .Lmulx4xsp_alt 2184 subq %r11,%rbp 2185 leaq -320(%rbp,%r9,2),%rbp 2186 jmp .Lmulx4xsp_done 2187 2188.Lmulx4xsp_alt: 2189 leaq 4096-320(,%r9,2),%r10 2190 leaq -320(%rbp,%r9,2),%rbp 2191 subq %r10,%r11 2192 movq $0,%r10 2193 cmovcq %r10,%r11 2194 subq %r11,%rbp 2195.Lmulx4xsp_done: 2196 andq $-64,%rbp 2197 movq %rsp,%r11 2198 subq %rbp,%r11 2199 andq $-4096,%r11 2200 leaq (%r11,%rbp,1),%rsp 2201 movq (%rsp),%r10 2202 cmpq %rbp,%rsp 2203 ja .Lmulx4x_page_walk 2204 jmp .Lmulx4x_page_walk_done 2205 2206.Lmulx4x_page_walk: 2207 leaq -4096(%rsp),%rsp 2208 movq (%rsp),%r10 2209 cmpq %rbp,%rsp 2210 ja .Lmulx4x_page_walk 2211.Lmulx4x_page_walk_done: 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 movq %r8,32(%rsp) 2226 movq %rax,40(%rsp) 2227.Lmulx4x_body: 2228 call mulx4x_internal 2229 2230 movq 40(%rsp),%rsi 2231 movq $1,%rax 2232 2233 movq -48(%rsi),%r15 2234 movq -40(%rsi),%r14 2235 movq -32(%rsi),%r13 2236 movq -24(%rsi),%r12 2237 movq -16(%rsi),%rbp 2238 movq -8(%rsi),%rbx 2239 leaq (%rsi),%rsp 2240.Lmulx4x_epilogue: 2241 .byte 0xf3,0xc3 2242.size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5 2243 2244.type mulx4x_internal,@function 2245.align 32 2246mulx4x_internal: 2247 movq %r9,8(%rsp) 2248 movq %r9,%r10 2249 negq %r9 2250 shlq $5,%r9 2251 negq %r10 2252 leaq 128(%rdx,%r9,1),%r13 2253 shrq $5+5,%r9 2254 movd 8(%rax),%xmm5 2255 subq $1,%r9 2256 leaq .Linc(%rip),%rax 2257 movq %r13,16+8(%rsp) 2258 movq %r9,24+8(%rsp) 2259 movq %rdi,56+8(%rsp) 2260 movdqa 0(%rax),%xmm0 2261 movdqa 16(%rax),%xmm1 2262 leaq 88-112(%rsp,%r10,1),%r10 2263 leaq 128(%rdx),%rdi 2264 2265 pshufd $0,%xmm5,%xmm5 2266 movdqa %xmm1,%xmm4 2267.byte 0x67 2268 movdqa %xmm1,%xmm2 2269.byte 0x67 2270 paddd %xmm0,%xmm1 2271 pcmpeqd %xmm5,%xmm0 2272 movdqa %xmm4,%xmm3 2273 paddd %xmm1,%xmm2 2274 pcmpeqd %xmm5,%xmm1 2275 movdqa %xmm0,112(%r10) 2276 movdqa %xmm4,%xmm0 2277 2278 paddd %xmm2,%xmm3 2279 pcmpeqd %xmm5,%xmm2 2280 movdqa %xmm1,128(%r10) 2281 movdqa %xmm4,%xmm1 2282 2283 paddd %xmm3,%xmm0 2284 pcmpeqd %xmm5,%xmm3 2285 movdqa %xmm2,144(%r10) 2286 movdqa %xmm4,%xmm2 2287 2288 paddd %xmm0,%xmm1 2289 pcmpeqd %xmm5,%xmm0 2290 movdqa %xmm3,160(%r10) 2291 movdqa %xmm4,%xmm3 2292 paddd %xmm1,%xmm2 2293 pcmpeqd %xmm5,%xmm1 2294 movdqa %xmm0,176(%r10) 2295 movdqa %xmm4,%xmm0 2296 2297 paddd %xmm2,%xmm3 2298 pcmpeqd %xmm5,%xmm2 2299 movdqa %xmm1,192(%r10) 2300 movdqa %xmm4,%xmm1 2301 2302 paddd %xmm3,%xmm0 2303 pcmpeqd %xmm5,%xmm3 2304 movdqa %xmm2,208(%r10) 2305 movdqa %xmm4,%xmm2 2306 2307 paddd %xmm0,%xmm1 2308 pcmpeqd %xmm5,%xmm0 2309 movdqa %xmm3,224(%r10) 2310 movdqa %xmm4,%xmm3 2311 paddd %xmm1,%xmm2 2312 pcmpeqd %xmm5,%xmm1 2313 movdqa %xmm0,240(%r10) 2314 movdqa %xmm4,%xmm0 2315 2316 paddd %xmm2,%xmm3 2317 pcmpeqd %xmm5,%xmm2 2318 movdqa %xmm1,256(%r10) 2319 movdqa %xmm4,%xmm1 2320 2321 paddd %xmm3,%xmm0 2322 pcmpeqd %xmm5,%xmm3 2323 movdqa %xmm2,272(%r10) 2324 movdqa %xmm4,%xmm2 2325 2326 paddd %xmm0,%xmm1 2327 pcmpeqd %xmm5,%xmm0 2328 movdqa %xmm3,288(%r10) 2329 movdqa %xmm4,%xmm3 2330.byte 0x67 2331 paddd %xmm1,%xmm2 2332 pcmpeqd %xmm5,%xmm1 2333 movdqa %xmm0,304(%r10) 2334 2335 paddd %xmm2,%xmm3 2336 pcmpeqd %xmm5,%xmm2 2337 movdqa %xmm1,320(%r10) 2338 2339 pcmpeqd %xmm5,%xmm3 2340 movdqa %xmm2,336(%r10) 2341 2342 pand 64(%rdi),%xmm0 2343 pand 80(%rdi),%xmm1 2344 pand 96(%rdi),%xmm2 2345 movdqa %xmm3,352(%r10) 2346 pand 112(%rdi),%xmm3 2347 por %xmm2,%xmm0 2348 por %xmm3,%xmm1 2349 movdqa -128(%rdi),%xmm4 2350 movdqa -112(%rdi),%xmm5 2351 movdqa -96(%rdi),%xmm2 2352 pand 112(%r10),%xmm4 2353 movdqa -80(%rdi),%xmm3 2354 pand 128(%r10),%xmm5 2355 por %xmm4,%xmm0 2356 pand 144(%r10),%xmm2 2357 por %xmm5,%xmm1 2358 pand 160(%r10),%xmm3 2359 por %xmm2,%xmm0 2360 por %xmm3,%xmm1 2361 movdqa -64(%rdi),%xmm4 2362 movdqa -48(%rdi),%xmm5 2363 movdqa -32(%rdi),%xmm2 2364 pand 176(%r10),%xmm4 2365 movdqa -16(%rdi),%xmm3 2366 pand 192(%r10),%xmm5 2367 por %xmm4,%xmm0 2368 pand 208(%r10),%xmm2 2369 por %xmm5,%xmm1 2370 pand 224(%r10),%xmm3 2371 por %xmm2,%xmm0 2372 por %xmm3,%xmm1 2373 movdqa 0(%rdi),%xmm4 2374 movdqa 16(%rdi),%xmm5 2375 movdqa 32(%rdi),%xmm2 2376 pand 240(%r10),%xmm4 2377 movdqa 48(%rdi),%xmm3 2378 pand 256(%r10),%xmm5 2379 por %xmm4,%xmm0 2380 pand 272(%r10),%xmm2 2381 por %xmm5,%xmm1 2382 pand 288(%r10),%xmm3 2383 por %xmm2,%xmm0 2384 por %xmm3,%xmm1 2385 pxor %xmm1,%xmm0 2386 pshufd $0x4e,%xmm0,%xmm1 2387 por %xmm1,%xmm0 2388 leaq 256(%rdi),%rdi 2389.byte 102,72,15,126,194 2390 leaq 64+32+8(%rsp),%rbx 2391 2392 movq %rdx,%r9 2393 mulxq 0(%rsi),%r8,%rax 2394 mulxq 8(%rsi),%r11,%r12 2395 addq %rax,%r11 2396 mulxq 16(%rsi),%rax,%r13 2397 adcq %rax,%r12 2398 adcq $0,%r13 2399 mulxq 24(%rsi),%rax,%r14 2400 2401 movq %r8,%r15 2402 imulq 32+8(%rsp),%r8 2403 xorq %rbp,%rbp 2404 movq %r8,%rdx 2405 2406 movq %rdi,8+8(%rsp) 2407 2408 leaq 32(%rsi),%rsi 2409 adcxq %rax,%r13 2410 adcxq %rbp,%r14 2411 2412 mulxq 0(%rcx),%rax,%r10 2413 adcxq %rax,%r15 2414 adoxq %r11,%r10 2415 mulxq 8(%rcx),%rax,%r11 2416 adcxq %rax,%r10 2417 adoxq %r12,%r11 2418 mulxq 16(%rcx),%rax,%r12 2419 movq 24+8(%rsp),%rdi 2420 movq %r10,-32(%rbx) 2421 adcxq %rax,%r11 2422 adoxq %r13,%r12 2423 mulxq 24(%rcx),%rax,%r15 2424 movq %r9,%rdx 2425 movq %r11,-24(%rbx) 2426 adcxq %rax,%r12 2427 adoxq %rbp,%r15 2428 leaq 32(%rcx),%rcx 2429 movq %r12,-16(%rbx) 2430 jmp .Lmulx4x_1st 2431 2432.align 32 2433.Lmulx4x_1st: 2434 adcxq %rbp,%r15 2435 mulxq 0(%rsi),%r10,%rax 2436 adcxq %r14,%r10 2437 mulxq 8(%rsi),%r11,%r14 2438 adcxq %rax,%r11 2439 mulxq 16(%rsi),%r12,%rax 2440 adcxq %r14,%r12 2441 mulxq 24(%rsi),%r13,%r14 2442.byte 0x67,0x67 2443 movq %r8,%rdx 2444 adcxq %rax,%r13 2445 adcxq %rbp,%r14 2446 leaq 32(%rsi),%rsi 2447 leaq 32(%rbx),%rbx 2448 2449 adoxq %r15,%r10 2450 mulxq 0(%rcx),%rax,%r15 2451 adcxq %rax,%r10 2452 adoxq %r15,%r11 2453 mulxq 8(%rcx),%rax,%r15 2454 adcxq %rax,%r11 2455 adoxq %r15,%r12 2456 mulxq 16(%rcx),%rax,%r15 2457 movq %r10,-40(%rbx) 2458 adcxq %rax,%r12 2459 movq %r11,-32(%rbx) 2460 adoxq %r15,%r13 2461 mulxq 24(%rcx),%rax,%r15 2462 movq %r9,%rdx 2463 movq %r12,-24(%rbx) 2464 adcxq %rax,%r13 2465 adoxq %rbp,%r15 2466 leaq 32(%rcx),%rcx 2467 movq %r13,-16(%rbx) 2468 2469 decq %rdi 2470 jnz .Lmulx4x_1st 2471 2472 movq 8(%rsp),%rax 2473 adcq %rbp,%r15 2474 leaq (%rsi,%rax,1),%rsi 2475 addq %r15,%r14 2476 movq 8+8(%rsp),%rdi 2477 adcq %rbp,%rbp 2478 movq %r14,-8(%rbx) 2479 jmp .Lmulx4x_outer 2480 2481.align 32 2482.Lmulx4x_outer: 2483 leaq 16-256(%rbx),%r10 2484 pxor %xmm4,%xmm4 2485.byte 0x67,0x67 2486 pxor %xmm5,%xmm5 2487 movdqa -128(%rdi),%xmm0 2488 movdqa -112(%rdi),%xmm1 2489 movdqa -96(%rdi),%xmm2 2490 pand 256(%r10),%xmm0 2491 movdqa -80(%rdi),%xmm3 2492 pand 272(%r10),%xmm1 2493 por %xmm0,%xmm4 2494 pand 288(%r10),%xmm2 2495 por %xmm1,%xmm5 2496 pand 304(%r10),%xmm3 2497 por %xmm2,%xmm4 2498 por %xmm3,%xmm5 2499 movdqa -64(%rdi),%xmm0 2500 movdqa -48(%rdi),%xmm1 2501 movdqa -32(%rdi),%xmm2 2502 pand 320(%r10),%xmm0 2503 movdqa -16(%rdi),%xmm3 2504 pand 336(%r10),%xmm1 2505 por %xmm0,%xmm4 2506 pand 352(%r10),%xmm2 2507 por %xmm1,%xmm5 2508 pand 368(%r10),%xmm3 2509 por %xmm2,%xmm4 2510 por %xmm3,%xmm5 2511 movdqa 0(%rdi),%xmm0 2512 movdqa 16(%rdi),%xmm1 2513 movdqa 32(%rdi),%xmm2 2514 pand 384(%r10),%xmm0 2515 movdqa 48(%rdi),%xmm3 2516 pand 400(%r10),%xmm1 2517 por %xmm0,%xmm4 2518 pand 416(%r10),%xmm2 2519 por %xmm1,%xmm5 2520 pand 432(%r10),%xmm3 2521 por %xmm2,%xmm4 2522 por %xmm3,%xmm5 2523 movdqa 64(%rdi),%xmm0 2524 movdqa 80(%rdi),%xmm1 2525 movdqa 96(%rdi),%xmm2 2526 pand 448(%r10),%xmm0 2527 movdqa 112(%rdi),%xmm3 2528 pand 464(%r10),%xmm1 2529 por %xmm0,%xmm4 2530 pand 480(%r10),%xmm2 2531 por %xmm1,%xmm5 2532 pand 496(%r10),%xmm3 2533 por %xmm2,%xmm4 2534 por %xmm3,%xmm5 2535 por %xmm5,%xmm4 2536 pshufd $0x4e,%xmm4,%xmm0 2537 por %xmm4,%xmm0 2538 leaq 256(%rdi),%rdi 2539.byte 102,72,15,126,194 2540 2541 movq %rbp,(%rbx) 2542 leaq 32(%rbx,%rax,1),%rbx 2543 mulxq 0(%rsi),%r8,%r11 2544 xorq %rbp,%rbp 2545 movq %rdx,%r9 2546 mulxq 8(%rsi),%r14,%r12 2547 adoxq -32(%rbx),%r8 2548 adcxq %r14,%r11 2549 mulxq 16(%rsi),%r15,%r13 2550 adoxq -24(%rbx),%r11 2551 adcxq %r15,%r12 2552 mulxq 24(%rsi),%rdx,%r14 2553 adoxq -16(%rbx),%r12 2554 adcxq %rdx,%r13 2555 leaq (%rcx,%rax,1),%rcx 2556 leaq 32(%rsi),%rsi 2557 adoxq -8(%rbx),%r13 2558 adcxq %rbp,%r14 2559 adoxq %rbp,%r14 2560 2561 movq %r8,%r15 2562 imulq 32+8(%rsp),%r8 2563 2564 movq %r8,%rdx 2565 xorq %rbp,%rbp 2566 movq %rdi,8+8(%rsp) 2567 2568 mulxq 0(%rcx),%rax,%r10 2569 adcxq %rax,%r15 2570 adoxq %r11,%r10 2571 mulxq 8(%rcx),%rax,%r11 2572 adcxq %rax,%r10 2573 adoxq %r12,%r11 2574 mulxq 16(%rcx),%rax,%r12 2575 adcxq %rax,%r11 2576 adoxq %r13,%r12 2577 mulxq 24(%rcx),%rax,%r15 2578 movq %r9,%rdx 2579 movq 24+8(%rsp),%rdi 2580 movq %r10,-32(%rbx) 2581 adcxq %rax,%r12 2582 movq %r11,-24(%rbx) 2583 adoxq %rbp,%r15 2584 movq %r12,-16(%rbx) 2585 leaq 32(%rcx),%rcx 2586 jmp .Lmulx4x_inner 2587 2588.align 32 2589.Lmulx4x_inner: 2590 mulxq 0(%rsi),%r10,%rax 2591 adcxq %rbp,%r15 2592 adoxq %r14,%r10 2593 mulxq 8(%rsi),%r11,%r14 2594 adcxq 0(%rbx),%r10 2595 adoxq %rax,%r11 2596 mulxq 16(%rsi),%r12,%rax 2597 adcxq 8(%rbx),%r11 2598 adoxq %r14,%r12 2599 mulxq 24(%rsi),%r13,%r14 2600 movq %r8,%rdx 2601 adcxq 16(%rbx),%r12 2602 adoxq %rax,%r13 2603 adcxq 24(%rbx),%r13 2604 adoxq %rbp,%r14 2605 leaq 32(%rsi),%rsi 2606 leaq 32(%rbx),%rbx 2607 adcxq %rbp,%r14 2608 2609 adoxq %r15,%r10 2610 mulxq 0(%rcx),%rax,%r15 2611 adcxq %rax,%r10 2612 adoxq %r15,%r11 2613 mulxq 8(%rcx),%rax,%r15 2614 adcxq %rax,%r11 2615 adoxq %r15,%r12 2616 mulxq 16(%rcx),%rax,%r15 2617 movq %r10,-40(%rbx) 2618 adcxq %rax,%r12 2619 adoxq %r15,%r13 2620 movq %r11,-32(%rbx) 2621 mulxq 24(%rcx),%rax,%r15 2622 movq %r9,%rdx 2623 leaq 32(%rcx),%rcx 2624 movq %r12,-24(%rbx) 2625 adcxq %rax,%r13 2626 adoxq %rbp,%r15 2627 movq %r13,-16(%rbx) 2628 2629 decq %rdi 2630 jnz .Lmulx4x_inner 2631 2632 movq 0+8(%rsp),%rax 2633 adcq %rbp,%r15 2634 subq 0(%rbx),%rdi 2635 movq 8+8(%rsp),%rdi 2636 movq 16+8(%rsp),%r10 2637 adcq %r15,%r14 2638 leaq (%rsi,%rax,1),%rsi 2639 adcq %rbp,%rbp 2640 movq %r14,-8(%rbx) 2641 2642 cmpq %r10,%rdi 2643 jb .Lmulx4x_outer 2644 2645 movq -8(%rcx),%r10 2646 movq %rbp,%r8 2647 movq (%rcx,%rax,1),%r12 2648 leaq (%rcx,%rax,1),%rbp 2649 movq %rax,%rcx 2650 leaq (%rbx,%rax,1),%rdi 2651 xorl %eax,%eax 2652 xorq %r15,%r15 2653 subq %r14,%r10 2654 adcq %r15,%r15 2655 orq %r15,%r8 2656 sarq $3+2,%rcx 2657 subq %r8,%rax 2658 movq 56+8(%rsp),%rdx 2659 decq %r12 2660 movq 8(%rbp),%r13 2661 xorq %r8,%r8 2662 movq 16(%rbp),%r14 2663 movq 24(%rbp),%r15 2664 jmp .Lsqrx4x_sub_entry 2665.size mulx4x_internal,.-mulx4x_internal 2666.type bn_powerx5,@function 2667.align 32 2668bn_powerx5: 2669 movq %rsp,%rax 2670.Lpowerx5_enter: 2671 pushq %rbx 2672 pushq %rbp 2673 pushq %r12 2674 pushq %r13 2675 pushq %r14 2676 pushq %r15 2677.Lpowerx5_prologue: 2678 2679 shll $3,%r9d 2680 leaq (%r9,%r9,2),%r10 2681 negq %r9 2682 movq (%r8),%r8 2683 2684 2685 2686 2687 2688 2689 2690 2691 leaq -320(%rsp,%r9,2),%r11 2692 movq %rsp,%rbp 2693 subq %rdi,%r11 2694 andq $4095,%r11 2695 cmpq %r11,%r10 2696 jb .Lpwrx_sp_alt 2697 subq %r11,%rbp 2698 leaq -320(%rbp,%r9,2),%rbp 2699 jmp .Lpwrx_sp_done 2700 2701.align 32 2702.Lpwrx_sp_alt: 2703 leaq 4096-320(,%r9,2),%r10 2704 leaq -320(%rbp,%r9,2),%rbp 2705 subq %r10,%r11 2706 movq $0,%r10 2707 cmovcq %r10,%r11 2708 subq %r11,%rbp 2709.Lpwrx_sp_done: 2710 andq $-64,%rbp 2711 movq %rsp,%r11 2712 subq %rbp,%r11 2713 andq $-4096,%r11 2714 leaq (%r11,%rbp,1),%rsp 2715 movq (%rsp),%r10 2716 cmpq %rbp,%rsp 2717 ja .Lpwrx_page_walk 2718 jmp .Lpwrx_page_walk_done 2719 2720.Lpwrx_page_walk: 2721 leaq -4096(%rsp),%rsp 2722 movq (%rsp),%r10 2723 cmpq %rbp,%rsp 2724 ja .Lpwrx_page_walk 2725.Lpwrx_page_walk_done: 2726 2727 movq %r9,%r10 2728 negq %r9 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 pxor %xmm0,%xmm0 2742.byte 102,72,15,110,207 2743.byte 102,72,15,110,209 2744.byte 102,73,15,110,218 2745.byte 102,72,15,110,226 2746 movq %r8,32(%rsp) 2747 movq %rax,40(%rsp) 2748.Lpowerx5_body: 2749 2750 call __bn_sqrx8x_internal 2751 call __bn_postx4x_internal 2752 call __bn_sqrx8x_internal 2753 call __bn_postx4x_internal 2754 call __bn_sqrx8x_internal 2755 call __bn_postx4x_internal 2756 call __bn_sqrx8x_internal 2757 call __bn_postx4x_internal 2758 call __bn_sqrx8x_internal 2759 call __bn_postx4x_internal 2760 2761 movq %r10,%r9 2762 movq %rsi,%rdi 2763.byte 102,72,15,126,209 2764.byte 102,72,15,126,226 2765 movq 40(%rsp),%rax 2766 2767 call mulx4x_internal 2768 2769 movq 40(%rsp),%rsi 2770 movq $1,%rax 2771 2772 movq -48(%rsi),%r15 2773 movq -40(%rsi),%r14 2774 movq -32(%rsi),%r13 2775 movq -24(%rsi),%r12 2776 movq -16(%rsi),%rbp 2777 movq -8(%rsi),%rbx 2778 leaq (%rsi),%rsp 2779.Lpowerx5_epilogue: 2780 .byte 0xf3,0xc3 2781.size bn_powerx5,.-bn_powerx5 2782 2783.globl bn_sqrx8x_internal 2784.hidden bn_sqrx8x_internal 2785.type bn_sqrx8x_internal,@function 2786.align 32 2787bn_sqrx8x_internal: 2788__bn_sqrx8x_internal: 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 leaq 48+8(%rsp),%rdi 2830 leaq (%rsi,%r9,1),%rbp 2831 movq %r9,0+8(%rsp) 2832 movq %rbp,8+8(%rsp) 2833 jmp .Lsqr8x_zero_start 2834 2835.align 32 2836.byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00 2837.Lsqrx8x_zero: 2838.byte 0x3e 2839 movdqa %xmm0,0(%rdi) 2840 movdqa %xmm0,16(%rdi) 2841 movdqa %xmm0,32(%rdi) 2842 movdqa %xmm0,48(%rdi) 2843.Lsqr8x_zero_start: 2844 movdqa %xmm0,64(%rdi) 2845 movdqa %xmm0,80(%rdi) 2846 movdqa %xmm0,96(%rdi) 2847 movdqa %xmm0,112(%rdi) 2848 leaq 128(%rdi),%rdi 2849 subq $64,%r9 2850 jnz .Lsqrx8x_zero 2851 2852 movq 0(%rsi),%rdx 2853 2854 xorq %r10,%r10 2855 xorq %r11,%r11 2856 xorq %r12,%r12 2857 xorq %r13,%r13 2858 xorq %r14,%r14 2859 xorq %r15,%r15 2860 leaq 48+8(%rsp),%rdi 2861 xorq %rbp,%rbp 2862 jmp .Lsqrx8x_outer_loop 2863 2864.align 32 2865.Lsqrx8x_outer_loop: 2866 mulxq 8(%rsi),%r8,%rax 2867 adcxq %r9,%r8 2868 adoxq %rax,%r10 2869 mulxq 16(%rsi),%r9,%rax 2870 adcxq %r10,%r9 2871 adoxq %rax,%r11 2872.byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00 2873 adcxq %r11,%r10 2874 adoxq %rax,%r12 2875.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00 2876 adcxq %r12,%r11 2877 adoxq %rax,%r13 2878 mulxq 40(%rsi),%r12,%rax 2879 adcxq %r13,%r12 2880 adoxq %rax,%r14 2881 mulxq 48(%rsi),%r13,%rax 2882 adcxq %r14,%r13 2883 adoxq %r15,%rax 2884 mulxq 56(%rsi),%r14,%r15 2885 movq 8(%rsi),%rdx 2886 adcxq %rax,%r14 2887 adoxq %rbp,%r15 2888 adcq 64(%rdi),%r15 2889 movq %r8,8(%rdi) 2890 movq %r9,16(%rdi) 2891 sbbq %rcx,%rcx 2892 xorq %rbp,%rbp 2893 2894 2895 mulxq 16(%rsi),%r8,%rbx 2896 mulxq 24(%rsi),%r9,%rax 2897 adcxq %r10,%r8 2898 adoxq %rbx,%r9 2899 mulxq 32(%rsi),%r10,%rbx 2900 adcxq %r11,%r9 2901 adoxq %rax,%r10 2902.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00 2903 adcxq %r12,%r10 2904 adoxq %rbx,%r11 2905.byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00 2906 adcxq %r13,%r11 2907 adoxq %r14,%r12 2908.byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00 2909 movq 16(%rsi),%rdx 2910 adcxq %rax,%r12 2911 adoxq %rbx,%r13 2912 adcxq %r15,%r13 2913 adoxq %rbp,%r14 2914 adcxq %rbp,%r14 2915 2916 movq %r8,24(%rdi) 2917 movq %r9,32(%rdi) 2918 2919 mulxq 24(%rsi),%r8,%rbx 2920 mulxq 32(%rsi),%r9,%rax 2921 adcxq %r10,%r8 2922 adoxq %rbx,%r9 2923 mulxq 40(%rsi),%r10,%rbx 2924 adcxq %r11,%r9 2925 adoxq %rax,%r10 2926.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00 2927 adcxq %r12,%r10 2928 adoxq %r13,%r11 2929.byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00 2930.byte 0x3e 2931 movq 24(%rsi),%rdx 2932 adcxq %rbx,%r11 2933 adoxq %rax,%r12 2934 adcxq %r14,%r12 2935 movq %r8,40(%rdi) 2936 movq %r9,48(%rdi) 2937 mulxq 32(%rsi),%r8,%rax 2938 adoxq %rbp,%r13 2939 adcxq %rbp,%r13 2940 2941 mulxq 40(%rsi),%r9,%rbx 2942 adcxq %r10,%r8 2943 adoxq %rax,%r9 2944 mulxq 48(%rsi),%r10,%rax 2945 adcxq %r11,%r9 2946 adoxq %r12,%r10 2947 mulxq 56(%rsi),%r11,%r12 2948 movq 32(%rsi),%rdx 2949 movq 40(%rsi),%r14 2950 adcxq %rbx,%r10 2951 adoxq %rax,%r11 2952 movq 48(%rsi),%r15 2953 adcxq %r13,%r11 2954 adoxq %rbp,%r12 2955 adcxq %rbp,%r12 2956 2957 movq %r8,56(%rdi) 2958 movq %r9,64(%rdi) 2959 2960 mulxq %r14,%r9,%rax 2961 movq 56(%rsi),%r8 2962 adcxq %r10,%r9 2963 mulxq %r15,%r10,%rbx 2964 adoxq %rax,%r10 2965 adcxq %r11,%r10 2966 mulxq %r8,%r11,%rax 2967 movq %r14,%rdx 2968 adoxq %rbx,%r11 2969 adcxq %r12,%r11 2970 2971 adcxq %rbp,%rax 2972 2973 mulxq %r15,%r14,%rbx 2974 mulxq %r8,%r12,%r13 2975 movq %r15,%rdx 2976 leaq 64(%rsi),%rsi 2977 adcxq %r14,%r11 2978 adoxq %rbx,%r12 2979 adcxq %rax,%r12 2980 adoxq %rbp,%r13 2981 2982.byte 0x67,0x67 2983 mulxq %r8,%r8,%r14 2984 adcxq %r8,%r13 2985 adcxq %rbp,%r14 2986 2987 cmpq 8+8(%rsp),%rsi 2988 je .Lsqrx8x_outer_break 2989 2990 negq %rcx 2991 movq $-8,%rcx 2992 movq %rbp,%r15 2993 movq 64(%rdi),%r8 2994 adcxq 72(%rdi),%r9 2995 adcxq 80(%rdi),%r10 2996 adcxq 88(%rdi),%r11 2997 adcq 96(%rdi),%r12 2998 adcq 104(%rdi),%r13 2999 adcq 112(%rdi),%r14 3000 adcq 120(%rdi),%r15 3001 leaq (%rsi),%rbp 3002 leaq 128(%rdi),%rdi 3003 sbbq %rax,%rax 3004 3005 movq -64(%rsi),%rdx 3006 movq %rax,16+8(%rsp) 3007 movq %rdi,24+8(%rsp) 3008 3009 3010 xorl %eax,%eax 3011 jmp .Lsqrx8x_loop 3012 3013.align 32 3014.Lsqrx8x_loop: 3015 movq %r8,%rbx 3016 mulxq 0(%rbp),%rax,%r8 3017 adcxq %rax,%rbx 3018 adoxq %r9,%r8 3019 3020 mulxq 8(%rbp),%rax,%r9 3021 adcxq %rax,%r8 3022 adoxq %r10,%r9 3023 3024 mulxq 16(%rbp),%rax,%r10 3025 adcxq %rax,%r9 3026 adoxq %r11,%r10 3027 3028 mulxq 24(%rbp),%rax,%r11 3029 adcxq %rax,%r10 3030 adoxq %r12,%r11 3031 3032.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 3033 adcxq %rax,%r11 3034 adoxq %r13,%r12 3035 3036 mulxq 40(%rbp),%rax,%r13 3037 adcxq %rax,%r12 3038 adoxq %r14,%r13 3039 3040 mulxq 48(%rbp),%rax,%r14 3041 movq %rbx,(%rdi,%rcx,8) 3042 movl $0,%ebx 3043 adcxq %rax,%r13 3044 adoxq %r15,%r14 3045 3046.byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00 3047 movq 8(%rsi,%rcx,8),%rdx 3048 adcxq %rax,%r14 3049 adoxq %rbx,%r15 3050 adcxq %rbx,%r15 3051 3052.byte 0x67 3053 incq %rcx 3054 jnz .Lsqrx8x_loop 3055 3056 leaq 64(%rbp),%rbp 3057 movq $-8,%rcx 3058 cmpq 8+8(%rsp),%rbp 3059 je .Lsqrx8x_break 3060 3061 subq 16+8(%rsp),%rbx 3062.byte 0x66 3063 movq -64(%rsi),%rdx 3064 adcxq 0(%rdi),%r8 3065 adcxq 8(%rdi),%r9 3066 adcq 16(%rdi),%r10 3067 adcq 24(%rdi),%r11 3068 adcq 32(%rdi),%r12 3069 adcq 40(%rdi),%r13 3070 adcq 48(%rdi),%r14 3071 adcq 56(%rdi),%r15 3072 leaq 64(%rdi),%rdi 3073.byte 0x67 3074 sbbq %rax,%rax 3075 xorl %ebx,%ebx 3076 movq %rax,16+8(%rsp) 3077 jmp .Lsqrx8x_loop 3078 3079.align 32 3080.Lsqrx8x_break: 3081 xorq %rbp,%rbp 3082 subq 16+8(%rsp),%rbx 3083 adcxq %rbp,%r8 3084 movq 24+8(%rsp),%rcx 3085 adcxq %rbp,%r9 3086 movq 0(%rsi),%rdx 3087 adcq $0,%r10 3088 movq %r8,0(%rdi) 3089 adcq $0,%r11 3090 adcq $0,%r12 3091 adcq $0,%r13 3092 adcq $0,%r14 3093 adcq $0,%r15 3094 cmpq %rcx,%rdi 3095 je .Lsqrx8x_outer_loop 3096 3097 movq %r9,8(%rdi) 3098 movq 8(%rcx),%r9 3099 movq %r10,16(%rdi) 3100 movq 16(%rcx),%r10 3101 movq %r11,24(%rdi) 3102 movq 24(%rcx),%r11 3103 movq %r12,32(%rdi) 3104 movq 32(%rcx),%r12 3105 movq %r13,40(%rdi) 3106 movq 40(%rcx),%r13 3107 movq %r14,48(%rdi) 3108 movq 48(%rcx),%r14 3109 movq %r15,56(%rdi) 3110 movq 56(%rcx),%r15 3111 movq %rcx,%rdi 3112 jmp .Lsqrx8x_outer_loop 3113 3114.align 32 3115.Lsqrx8x_outer_break: 3116 movq %r9,72(%rdi) 3117.byte 102,72,15,126,217 3118 movq %r10,80(%rdi) 3119 movq %r11,88(%rdi) 3120 movq %r12,96(%rdi) 3121 movq %r13,104(%rdi) 3122 movq %r14,112(%rdi) 3123 leaq 48+8(%rsp),%rdi 3124 movq (%rsi,%rcx,1),%rdx 3125 3126 movq 8(%rdi),%r11 3127 xorq %r10,%r10 3128 movq 0+8(%rsp),%r9 3129 adoxq %r11,%r11 3130 movq 16(%rdi),%r12 3131 movq 24(%rdi),%r13 3132 3133 3134.align 32 3135.Lsqrx4x_shift_n_add: 3136 mulxq %rdx,%rax,%rbx 3137 adoxq %r12,%r12 3138 adcxq %r10,%rax 3139.byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00 3140.byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00 3141 adoxq %r13,%r13 3142 adcxq %r11,%rbx 3143 movq 40(%rdi),%r11 3144 movq %rax,0(%rdi) 3145 movq %rbx,8(%rdi) 3146 3147 mulxq %rdx,%rax,%rbx 3148 adoxq %r10,%r10 3149 adcxq %r12,%rax 3150 movq 16(%rsi,%rcx,1),%rdx 3151 movq 48(%rdi),%r12 3152 adoxq %r11,%r11 3153 adcxq %r13,%rbx 3154 movq 56(%rdi),%r13 3155 movq %rax,16(%rdi) 3156 movq %rbx,24(%rdi) 3157 3158 mulxq %rdx,%rax,%rbx 3159 adoxq %r12,%r12 3160 adcxq %r10,%rax 3161 movq 24(%rsi,%rcx,1),%rdx 3162 leaq 32(%rcx),%rcx 3163 movq 64(%rdi),%r10 3164 adoxq %r13,%r13 3165 adcxq %r11,%rbx 3166 movq 72(%rdi),%r11 3167 movq %rax,32(%rdi) 3168 movq %rbx,40(%rdi) 3169 3170 mulxq %rdx,%rax,%rbx 3171 adoxq %r10,%r10 3172 adcxq %r12,%rax 3173 jrcxz .Lsqrx4x_shift_n_add_break 3174.byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00 3175 adoxq %r11,%r11 3176 adcxq %r13,%rbx 3177 movq 80(%rdi),%r12 3178 movq 88(%rdi),%r13 3179 movq %rax,48(%rdi) 3180 movq %rbx,56(%rdi) 3181 leaq 64(%rdi),%rdi 3182 nop 3183 jmp .Lsqrx4x_shift_n_add 3184 3185.align 32 3186.Lsqrx4x_shift_n_add_break: 3187 adcxq %r13,%rbx 3188 movq %rax,48(%rdi) 3189 movq %rbx,56(%rdi) 3190 leaq 64(%rdi),%rdi 3191.byte 102,72,15,126,213 3192__bn_sqrx8x_reduction: 3193 xorl %eax,%eax 3194 movq 32+8(%rsp),%rbx 3195 movq 48+8(%rsp),%rdx 3196 leaq -64(%rbp,%r9,1),%rcx 3197 3198 movq %rcx,0+8(%rsp) 3199 movq %rdi,8+8(%rsp) 3200 3201 leaq 48+8(%rsp),%rdi 3202 jmp .Lsqrx8x_reduction_loop 3203 3204.align 32 3205.Lsqrx8x_reduction_loop: 3206 movq 8(%rdi),%r9 3207 movq 16(%rdi),%r10 3208 movq 24(%rdi),%r11 3209 movq 32(%rdi),%r12 3210 movq %rdx,%r8 3211 imulq %rbx,%rdx 3212 movq 40(%rdi),%r13 3213 movq 48(%rdi),%r14 3214 movq 56(%rdi),%r15 3215 movq %rax,24+8(%rsp) 3216 3217 leaq 64(%rdi),%rdi 3218 xorq %rsi,%rsi 3219 movq $-8,%rcx 3220 jmp .Lsqrx8x_reduce 3221 3222.align 32 3223.Lsqrx8x_reduce: 3224 movq %r8,%rbx 3225 mulxq 0(%rbp),%rax,%r8 3226 adcxq %rbx,%rax 3227 adoxq %r9,%r8 3228 3229 mulxq 8(%rbp),%rbx,%r9 3230 adcxq %rbx,%r8 3231 adoxq %r10,%r9 3232 3233 mulxq 16(%rbp),%rbx,%r10 3234 adcxq %rbx,%r9 3235 adoxq %r11,%r10 3236 3237 mulxq 24(%rbp),%rbx,%r11 3238 adcxq %rbx,%r10 3239 adoxq %r12,%r11 3240 3241.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 3242 movq %rdx,%rax 3243 movq %r8,%rdx 3244 adcxq %rbx,%r11 3245 adoxq %r13,%r12 3246 3247 mulxq 32+8(%rsp),%rbx,%rdx 3248 movq %rax,%rdx 3249 movq %rax,64+48+8(%rsp,%rcx,8) 3250 3251 mulxq 40(%rbp),%rax,%r13 3252 adcxq %rax,%r12 3253 adoxq %r14,%r13 3254 3255 mulxq 48(%rbp),%rax,%r14 3256 adcxq %rax,%r13 3257 adoxq %r15,%r14 3258 3259 mulxq 56(%rbp),%rax,%r15 3260 movq %rbx,%rdx 3261 adcxq %rax,%r14 3262 adoxq %rsi,%r15 3263 adcxq %rsi,%r15 3264 3265.byte 0x67,0x67,0x67 3266 incq %rcx 3267 jnz .Lsqrx8x_reduce 3268 3269 movq %rsi,%rax 3270 cmpq 0+8(%rsp),%rbp 3271 jae .Lsqrx8x_no_tail 3272 3273 movq 48+8(%rsp),%rdx 3274 addq 0(%rdi),%r8 3275 leaq 64(%rbp),%rbp 3276 movq $-8,%rcx 3277 adcxq 8(%rdi),%r9 3278 adcxq 16(%rdi),%r10 3279 adcq 24(%rdi),%r11 3280 adcq 32(%rdi),%r12 3281 adcq 40(%rdi),%r13 3282 adcq 48(%rdi),%r14 3283 adcq 56(%rdi),%r15 3284 leaq 64(%rdi),%rdi 3285 sbbq %rax,%rax 3286 3287 xorq %rsi,%rsi 3288 movq %rax,16+8(%rsp) 3289 jmp .Lsqrx8x_tail 3290 3291.align 32 3292.Lsqrx8x_tail: 3293 movq %r8,%rbx 3294 mulxq 0(%rbp),%rax,%r8 3295 adcxq %rax,%rbx 3296 adoxq %r9,%r8 3297 3298 mulxq 8(%rbp),%rax,%r9 3299 adcxq %rax,%r8 3300 adoxq %r10,%r9 3301 3302 mulxq 16(%rbp),%rax,%r10 3303 adcxq %rax,%r9 3304 adoxq %r11,%r10 3305 3306 mulxq 24(%rbp),%rax,%r11 3307 adcxq %rax,%r10 3308 adoxq %r12,%r11 3309 3310.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 3311 adcxq %rax,%r11 3312 adoxq %r13,%r12 3313 3314 mulxq 40(%rbp),%rax,%r13 3315 adcxq %rax,%r12 3316 adoxq %r14,%r13 3317 3318 mulxq 48(%rbp),%rax,%r14 3319 adcxq %rax,%r13 3320 adoxq %r15,%r14 3321 3322 mulxq 56(%rbp),%rax,%r15 3323 movq 72+48+8(%rsp,%rcx,8),%rdx 3324 adcxq %rax,%r14 3325 adoxq %rsi,%r15 3326 movq %rbx,(%rdi,%rcx,8) 3327 movq %r8,%rbx 3328 adcxq %rsi,%r15 3329 3330 incq %rcx 3331 jnz .Lsqrx8x_tail 3332 3333 cmpq 0+8(%rsp),%rbp 3334 jae .Lsqrx8x_tail_done 3335 3336 subq 16+8(%rsp),%rsi 3337 movq 48+8(%rsp),%rdx 3338 leaq 64(%rbp),%rbp 3339 adcq 0(%rdi),%r8 3340 adcq 8(%rdi),%r9 3341 adcq 16(%rdi),%r10 3342 adcq 24(%rdi),%r11 3343 adcq 32(%rdi),%r12 3344 adcq 40(%rdi),%r13 3345 adcq 48(%rdi),%r14 3346 adcq 56(%rdi),%r15 3347 leaq 64(%rdi),%rdi 3348 sbbq %rax,%rax 3349 subq $8,%rcx 3350 3351 xorq %rsi,%rsi 3352 movq %rax,16+8(%rsp) 3353 jmp .Lsqrx8x_tail 3354 3355.align 32 3356.Lsqrx8x_tail_done: 3357 xorq %rax,%rax 3358 addq 24+8(%rsp),%r8 3359 adcq $0,%r9 3360 adcq $0,%r10 3361 adcq $0,%r11 3362 adcq $0,%r12 3363 adcq $0,%r13 3364 adcq $0,%r14 3365 adcq $0,%r15 3366 adcq $0,%rax 3367 3368 subq 16+8(%rsp),%rsi 3369.Lsqrx8x_no_tail: 3370 adcq 0(%rdi),%r8 3371.byte 102,72,15,126,217 3372 adcq 8(%rdi),%r9 3373 movq 56(%rbp),%rsi 3374.byte 102,72,15,126,213 3375 adcq 16(%rdi),%r10 3376 adcq 24(%rdi),%r11 3377 adcq 32(%rdi),%r12 3378 adcq 40(%rdi),%r13 3379 adcq 48(%rdi),%r14 3380 adcq 56(%rdi),%r15 3381 adcq $0,%rax 3382 3383 movq 32+8(%rsp),%rbx 3384 movq 64(%rdi,%rcx,1),%rdx 3385 3386 movq %r8,0(%rdi) 3387 leaq 64(%rdi),%r8 3388 movq %r9,8(%rdi) 3389 movq %r10,16(%rdi) 3390 movq %r11,24(%rdi) 3391 movq %r12,32(%rdi) 3392 movq %r13,40(%rdi) 3393 movq %r14,48(%rdi) 3394 movq %r15,56(%rdi) 3395 3396 leaq 64(%rdi,%rcx,1),%rdi 3397 cmpq 8+8(%rsp),%r8 3398 jb .Lsqrx8x_reduction_loop 3399 .byte 0xf3,0xc3 3400.size bn_sqrx8x_internal,.-bn_sqrx8x_internal 3401.align 32 3402__bn_postx4x_internal: 3403 movq 0(%rbp),%r12 3404 movq %rcx,%r10 3405 movq %rcx,%r9 3406 negq %rax 3407 sarq $3+2,%rcx 3408 3409.byte 102,72,15,126,202 3410.byte 102,72,15,126,206 3411 decq %r12 3412 movq 8(%rbp),%r13 3413 xorq %r8,%r8 3414 movq 16(%rbp),%r14 3415 movq 24(%rbp),%r15 3416 jmp .Lsqrx4x_sub_entry 3417 3418.align 16 3419.Lsqrx4x_sub: 3420 movq 0(%rbp),%r12 3421 movq 8(%rbp),%r13 3422 movq 16(%rbp),%r14 3423 movq 24(%rbp),%r15 3424.Lsqrx4x_sub_entry: 3425 andnq %rax,%r12,%r12 3426 leaq 32(%rbp),%rbp 3427 andnq %rax,%r13,%r13 3428 andnq %rax,%r14,%r14 3429 andnq %rax,%r15,%r15 3430 3431 negq %r8 3432 adcq 0(%rdi),%r12 3433 adcq 8(%rdi),%r13 3434 adcq 16(%rdi),%r14 3435 adcq 24(%rdi),%r15 3436 movq %r12,0(%rdx) 3437 leaq 32(%rdi),%rdi 3438 movq %r13,8(%rdx) 3439 sbbq %r8,%r8 3440 movq %r14,16(%rdx) 3441 movq %r15,24(%rdx) 3442 leaq 32(%rdx),%rdx 3443 3444 incq %rcx 3445 jnz .Lsqrx4x_sub 3446 3447 negq %r9 3448 3449 .byte 0xf3,0xc3 3450.size __bn_postx4x_internal,.-__bn_postx4x_internal 3451.globl bn_get_bits5 3452.type bn_get_bits5,@function 3453.align 16 3454bn_get_bits5: 3455 leaq 0(%rdi),%r10 3456 leaq 1(%rdi),%r11 3457 movl %esi,%ecx 3458 shrl $4,%esi 3459 andl $15,%ecx 3460 leal -8(%rcx),%eax 3461 cmpl $11,%ecx 3462 cmovaq %r11,%r10 3463 cmoval %eax,%ecx 3464 movzwl (%r10,%rsi,2),%eax 3465 shrl %cl,%eax 3466 andl $31,%eax 3467 .byte 0xf3,0xc3 3468.size bn_get_bits5,.-bn_get_bits5 3469 3470.globl bn_scatter5 3471.type bn_scatter5,@function 3472.align 16 3473bn_scatter5: 3474 cmpl $0,%esi 3475 jz .Lscatter_epilogue 3476 leaq (%rdx,%rcx,8),%rdx 3477.Lscatter: 3478 movq (%rdi),%rax 3479 leaq 8(%rdi),%rdi 3480 movq %rax,(%rdx) 3481 leaq 256(%rdx),%rdx 3482 subl $1,%esi 3483 jnz .Lscatter 3484.Lscatter_epilogue: 3485 .byte 0xf3,0xc3 3486.size bn_scatter5,.-bn_scatter5 3487 3488.globl bn_gather5 3489.type bn_gather5,@function 3490.align 32 3491bn_gather5: 3492.LSEH_begin_bn_gather5: 3493 3494.byte 0x4c,0x8d,0x14,0x24 3495.byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00 3496 leaq .Linc(%rip),%rax 3497 andq $-16,%rsp 3498 3499 movd %ecx,%xmm5 3500 movdqa 0(%rax),%xmm0 3501 movdqa 16(%rax),%xmm1 3502 leaq 128(%rdx),%r11 3503 leaq 128(%rsp),%rax 3504 3505 pshufd $0,%xmm5,%xmm5 3506 movdqa %xmm1,%xmm4 3507 movdqa %xmm1,%xmm2 3508 paddd %xmm0,%xmm1 3509 pcmpeqd %xmm5,%xmm0 3510 movdqa %xmm4,%xmm3 3511 3512 paddd %xmm1,%xmm2 3513 pcmpeqd %xmm5,%xmm1 3514 movdqa %xmm0,-128(%rax) 3515 movdqa %xmm4,%xmm0 3516 3517 paddd %xmm2,%xmm3 3518 pcmpeqd %xmm5,%xmm2 3519 movdqa %xmm1,-112(%rax) 3520 movdqa %xmm4,%xmm1 3521 3522 paddd %xmm3,%xmm0 3523 pcmpeqd %xmm5,%xmm3 3524 movdqa %xmm2,-96(%rax) 3525 movdqa %xmm4,%xmm2 3526 paddd %xmm0,%xmm1 3527 pcmpeqd %xmm5,%xmm0 3528 movdqa %xmm3,-80(%rax) 3529 movdqa %xmm4,%xmm3 3530 3531 paddd %xmm1,%xmm2 3532 pcmpeqd %xmm5,%xmm1 3533 movdqa %xmm0,-64(%rax) 3534 movdqa %xmm4,%xmm0 3535 3536 paddd %xmm2,%xmm3 3537 pcmpeqd %xmm5,%xmm2 3538 movdqa %xmm1,-48(%rax) 3539 movdqa %xmm4,%xmm1 3540 3541 paddd %xmm3,%xmm0 3542 pcmpeqd %xmm5,%xmm3 3543 movdqa %xmm2,-32(%rax) 3544 movdqa %xmm4,%xmm2 3545 paddd %xmm0,%xmm1 3546 pcmpeqd %xmm5,%xmm0 3547 movdqa %xmm3,-16(%rax) 3548 movdqa %xmm4,%xmm3 3549 3550 paddd %xmm1,%xmm2 3551 pcmpeqd %xmm5,%xmm1 3552 movdqa %xmm0,0(%rax) 3553 movdqa %xmm4,%xmm0 3554 3555 paddd %xmm2,%xmm3 3556 pcmpeqd %xmm5,%xmm2 3557 movdqa %xmm1,16(%rax) 3558 movdqa %xmm4,%xmm1 3559 3560 paddd %xmm3,%xmm0 3561 pcmpeqd %xmm5,%xmm3 3562 movdqa %xmm2,32(%rax) 3563 movdqa %xmm4,%xmm2 3564 paddd %xmm0,%xmm1 3565 pcmpeqd %xmm5,%xmm0 3566 movdqa %xmm3,48(%rax) 3567 movdqa %xmm4,%xmm3 3568 3569 paddd %xmm1,%xmm2 3570 pcmpeqd %xmm5,%xmm1 3571 movdqa %xmm0,64(%rax) 3572 movdqa %xmm4,%xmm0 3573 3574 paddd %xmm2,%xmm3 3575 pcmpeqd %xmm5,%xmm2 3576 movdqa %xmm1,80(%rax) 3577 movdqa %xmm4,%xmm1 3578 3579 paddd %xmm3,%xmm0 3580 pcmpeqd %xmm5,%xmm3 3581 movdqa %xmm2,96(%rax) 3582 movdqa %xmm4,%xmm2 3583 movdqa %xmm3,112(%rax) 3584 jmp .Lgather 3585 3586.align 32 3587.Lgather: 3588 pxor %xmm4,%xmm4 3589 pxor %xmm5,%xmm5 3590 movdqa -128(%r11),%xmm0 3591 movdqa -112(%r11),%xmm1 3592 movdqa -96(%r11),%xmm2 3593 pand -128(%rax),%xmm0 3594 movdqa -80(%r11),%xmm3 3595 pand -112(%rax),%xmm1 3596 por %xmm0,%xmm4 3597 pand -96(%rax),%xmm2 3598 por %xmm1,%xmm5 3599 pand -80(%rax),%xmm3 3600 por %xmm2,%xmm4 3601 por %xmm3,%xmm5 3602 movdqa -64(%r11),%xmm0 3603 movdqa -48(%r11),%xmm1 3604 movdqa -32(%r11),%xmm2 3605 pand -64(%rax),%xmm0 3606 movdqa -16(%r11),%xmm3 3607 pand -48(%rax),%xmm1 3608 por %xmm0,%xmm4 3609 pand -32(%rax),%xmm2 3610 por %xmm1,%xmm5 3611 pand -16(%rax),%xmm3 3612 por %xmm2,%xmm4 3613 por %xmm3,%xmm5 3614 movdqa 0(%r11),%xmm0 3615 movdqa 16(%r11),%xmm1 3616 movdqa 32(%r11),%xmm2 3617 pand 0(%rax),%xmm0 3618 movdqa 48(%r11),%xmm3 3619 pand 16(%rax),%xmm1 3620 por %xmm0,%xmm4 3621 pand 32(%rax),%xmm2 3622 por %xmm1,%xmm5 3623 pand 48(%rax),%xmm3 3624 por %xmm2,%xmm4 3625 por %xmm3,%xmm5 3626 movdqa 64(%r11),%xmm0 3627 movdqa 80(%r11),%xmm1 3628 movdqa 96(%r11),%xmm2 3629 pand 64(%rax),%xmm0 3630 movdqa 112(%r11),%xmm3 3631 pand 80(%rax),%xmm1 3632 por %xmm0,%xmm4 3633 pand 96(%rax),%xmm2 3634 por %xmm1,%xmm5 3635 pand 112(%rax),%xmm3 3636 por %xmm2,%xmm4 3637 por %xmm3,%xmm5 3638 por %xmm5,%xmm4 3639 leaq 256(%r11),%r11 3640 pshufd $0x4e,%xmm4,%xmm0 3641 por %xmm4,%xmm0 3642 movq %xmm0,(%rdi) 3643 leaq 8(%rdi),%rdi 3644 subl $1,%esi 3645 jnz .Lgather 3646 3647 leaq (%r10),%rsp 3648 .byte 0xf3,0xc3 3649.LSEH_end_bn_gather5: 3650.size bn_gather5,.-bn_gather5 3651.align 64 3652.Linc: 3653.long 0,0, 1,1 3654.long 2,2, 2,2 3655.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 3656