1# $FreeBSD: releng/11.0/secure/lib/libcrypto/amd64/x86_64-mont5.S 326358 2017-11-29 05:59:12Z delphij $ 2# Do not modify. This file is auto-generated from x86_64-mont5.pl. 3.text 4 5 6 7.globl bn_mul_mont_gather5 8.type bn_mul_mont_gather5,@function 9.align 64 10bn_mul_mont_gather5: 11 movl %r9d,%r9d 12 movq %rsp,%rax 13 testl $7,%r9d 14 jnz .Lmul_enter 15 movl OPENSSL_ia32cap_P+8(%rip),%r11d 16 jmp .Lmul4x_enter 17 18.align 16 19.Lmul_enter: 20 movd 8(%rsp),%xmm5 21 pushq %rbx 22 pushq %rbp 23 pushq %r12 24 pushq %r13 25 pushq %r14 26 pushq %r15 27 28 negq %r9 29 movq %rsp,%r11 30 leaq -280(%rsp,%r9,8),%r10 31 negq %r9 32 andq $-1024,%r10 33 34 35 36 37 38 39 40 subq %r10,%r11 41 andq $-4096,%r11 42 leaq (%r10,%r11,1),%rsp 43 movq (%rsp),%r11 44 cmpq %r10,%rsp 45 ja .Lmul_page_walk 46 jmp .Lmul_page_walk_done 47 48.Lmul_page_walk: 49 leaq -4096(%rsp),%rsp 50 movq (%rsp),%r11 51 cmpq %r10,%rsp 52 ja .Lmul_page_walk 53.Lmul_page_walk_done: 54 55 leaq .Linc(%rip),%r10 56 movq %rax,8(%rsp,%r9,8) 57.Lmul_body: 58 59 leaq 128(%rdx),%r12 60 movdqa 0(%r10),%xmm0 61 movdqa 16(%r10),%xmm1 62 leaq 24-112(%rsp,%r9,8),%r10 63 andq $-16,%r10 64 65 pshufd $0,%xmm5,%xmm5 66 movdqa %xmm1,%xmm4 67 movdqa %xmm1,%xmm2 68 paddd %xmm0,%xmm1 69 pcmpeqd %xmm5,%xmm0 70.byte 0x67 71 movdqa %xmm4,%xmm3 72 paddd %xmm1,%xmm2 73 pcmpeqd %xmm5,%xmm1 74 movdqa %xmm0,112(%r10) 75 movdqa %xmm4,%xmm0 76 77 paddd %xmm2,%xmm3 78 pcmpeqd %xmm5,%xmm2 79 movdqa %xmm1,128(%r10) 80 movdqa %xmm4,%xmm1 81 82 paddd %xmm3,%xmm0 83 pcmpeqd %xmm5,%xmm3 84 movdqa %xmm2,144(%r10) 85 movdqa %xmm4,%xmm2 86 87 paddd %xmm0,%xmm1 88 pcmpeqd %xmm5,%xmm0 89 movdqa %xmm3,160(%r10) 90 movdqa %xmm4,%xmm3 91 paddd %xmm1,%xmm2 92 pcmpeqd %xmm5,%xmm1 93 movdqa %xmm0,176(%r10) 94 movdqa %xmm4,%xmm0 95 96 paddd %xmm2,%xmm3 97 pcmpeqd %xmm5,%xmm2 98 movdqa %xmm1,192(%r10) 99 movdqa %xmm4,%xmm1 100 101 paddd %xmm3,%xmm0 102 pcmpeqd %xmm5,%xmm3 103 movdqa %xmm2,208(%r10) 104 movdqa %xmm4,%xmm2 105 106 paddd %xmm0,%xmm1 107 pcmpeqd %xmm5,%xmm0 108 movdqa %xmm3,224(%r10) 109 movdqa %xmm4,%xmm3 110 paddd %xmm1,%xmm2 111 pcmpeqd %xmm5,%xmm1 112 movdqa %xmm0,240(%r10) 113 movdqa %xmm4,%xmm0 114 115 paddd %xmm2,%xmm3 116 pcmpeqd %xmm5,%xmm2 117 movdqa %xmm1,256(%r10) 118 movdqa %xmm4,%xmm1 119 120 paddd %xmm3,%xmm0 121 pcmpeqd %xmm5,%xmm3 122 movdqa %xmm2,272(%r10) 123 movdqa %xmm4,%xmm2 124 125 paddd %xmm0,%xmm1 126 pcmpeqd %xmm5,%xmm0 127 movdqa %xmm3,288(%r10) 128 movdqa %xmm4,%xmm3 129 paddd %xmm1,%xmm2 130 pcmpeqd %xmm5,%xmm1 131 movdqa %xmm0,304(%r10) 132 133 paddd %xmm2,%xmm3 134.byte 0x67 135 pcmpeqd %xmm5,%xmm2 136 movdqa %xmm1,320(%r10) 137 138 pcmpeqd %xmm5,%xmm3 139 movdqa %xmm2,336(%r10) 140 pand 64(%r12),%xmm0 141 142 pand 80(%r12),%xmm1 143 pand 96(%r12),%xmm2 144 movdqa %xmm3,352(%r10) 145 pand 112(%r12),%xmm3 146 por %xmm2,%xmm0 147 por %xmm3,%xmm1 148 movdqa -128(%r12),%xmm4 149 movdqa -112(%r12),%xmm5 150 movdqa -96(%r12),%xmm2 151 pand 112(%r10),%xmm4 152 movdqa -80(%r12),%xmm3 153 pand 128(%r10),%xmm5 154 por %xmm4,%xmm0 155 pand 144(%r10),%xmm2 156 por %xmm5,%xmm1 157 pand 160(%r10),%xmm3 158 por %xmm2,%xmm0 159 por %xmm3,%xmm1 160 movdqa -64(%r12),%xmm4 161 movdqa -48(%r12),%xmm5 162 movdqa -32(%r12),%xmm2 163 pand 176(%r10),%xmm4 164 movdqa -16(%r12),%xmm3 165 pand 192(%r10),%xmm5 166 por %xmm4,%xmm0 167 pand 208(%r10),%xmm2 168 por %xmm5,%xmm1 169 pand 224(%r10),%xmm3 170 por %xmm2,%xmm0 171 por %xmm3,%xmm1 172 movdqa 0(%r12),%xmm4 173 movdqa 16(%r12),%xmm5 174 movdqa 32(%r12),%xmm2 175 pand 240(%r10),%xmm4 176 movdqa 48(%r12),%xmm3 177 pand 256(%r10),%xmm5 178 por %xmm4,%xmm0 179 pand 272(%r10),%xmm2 180 por %xmm5,%xmm1 181 pand 288(%r10),%xmm3 182 por %xmm2,%xmm0 183 por %xmm3,%xmm1 184 por %xmm1,%xmm0 185 pshufd $0x4e,%xmm0,%xmm1 186 por %xmm1,%xmm0 187 leaq 256(%r12),%r12 188.byte 102,72,15,126,195 189 190 movq (%r8),%r8 191 movq (%rsi),%rax 192 193 xorq %r14,%r14 194 xorq %r15,%r15 195 196 movq %r8,%rbp 197 mulq %rbx 198 movq %rax,%r10 199 movq (%rcx),%rax 200 201 imulq %r10,%rbp 202 movq %rdx,%r11 203 204 mulq %rbp 205 addq %rax,%r10 206 movq 8(%rsi),%rax 207 adcq $0,%rdx 208 movq %rdx,%r13 209 210 leaq 1(%r15),%r15 211 jmp .L1st_enter 212 213.align 16 214.L1st: 215 addq %rax,%r13 216 movq (%rsi,%r15,8),%rax 217 adcq $0,%rdx 218 addq %r11,%r13 219 movq %r10,%r11 220 adcq $0,%rdx 221 movq %r13,-16(%rsp,%r15,8) 222 movq %rdx,%r13 223 224.L1st_enter: 225 mulq %rbx 226 addq %rax,%r11 227 movq (%rcx,%r15,8),%rax 228 adcq $0,%rdx 229 leaq 1(%r15),%r15 230 movq %rdx,%r10 231 232 mulq %rbp 233 cmpq %r9,%r15 234 jne .L1st 235 236 237 addq %rax,%r13 238 adcq $0,%rdx 239 addq %r11,%r13 240 adcq $0,%rdx 241 movq %r13,-16(%rsp,%r9,8) 242 movq %rdx,%r13 243 movq %r10,%r11 244 245 xorq %rdx,%rdx 246 addq %r11,%r13 247 adcq $0,%rdx 248 movq %r13,-8(%rsp,%r9,8) 249 movq %rdx,(%rsp,%r9,8) 250 251 leaq 1(%r14),%r14 252 jmp .Louter 253.align 16 254.Louter: 255 leaq 24+128(%rsp,%r9,8),%rdx 256 andq $-16,%rdx 257 pxor %xmm4,%xmm4 258 pxor %xmm5,%xmm5 259 movdqa -128(%r12),%xmm0 260 movdqa -112(%r12),%xmm1 261 movdqa -96(%r12),%xmm2 262 movdqa -80(%r12),%xmm3 263 pand -128(%rdx),%xmm0 264 pand -112(%rdx),%xmm1 265 por %xmm0,%xmm4 266 pand -96(%rdx),%xmm2 267 por %xmm1,%xmm5 268 pand -80(%rdx),%xmm3 269 por %xmm2,%xmm4 270 por %xmm3,%xmm5 271 movdqa -64(%r12),%xmm0 272 movdqa -48(%r12),%xmm1 273 movdqa -32(%r12),%xmm2 274 movdqa -16(%r12),%xmm3 275 pand -64(%rdx),%xmm0 276 pand -48(%rdx),%xmm1 277 por %xmm0,%xmm4 278 pand -32(%rdx),%xmm2 279 por %xmm1,%xmm5 280 pand -16(%rdx),%xmm3 281 por %xmm2,%xmm4 282 por %xmm3,%xmm5 283 movdqa 0(%r12),%xmm0 284 movdqa 16(%r12),%xmm1 285 movdqa 32(%r12),%xmm2 286 movdqa 48(%r12),%xmm3 287 pand 0(%rdx),%xmm0 288 pand 16(%rdx),%xmm1 289 por %xmm0,%xmm4 290 pand 32(%rdx),%xmm2 291 por %xmm1,%xmm5 292 pand 48(%rdx),%xmm3 293 por %xmm2,%xmm4 294 por %xmm3,%xmm5 295 movdqa 64(%r12),%xmm0 296 movdqa 80(%r12),%xmm1 297 movdqa 96(%r12),%xmm2 298 movdqa 112(%r12),%xmm3 299 pand 64(%rdx),%xmm0 300 pand 80(%rdx),%xmm1 301 por %xmm0,%xmm4 302 pand 96(%rdx),%xmm2 303 por %xmm1,%xmm5 304 pand 112(%rdx),%xmm3 305 por %xmm2,%xmm4 306 por %xmm3,%xmm5 307 por %xmm5,%xmm4 308 pshufd $0x4e,%xmm4,%xmm0 309 por %xmm4,%xmm0 310 leaq 256(%r12),%r12 311 312 movq (%rsi),%rax 313.byte 102,72,15,126,195 314 315 xorq %r15,%r15 316 movq %r8,%rbp 317 movq (%rsp),%r10 318 319 mulq %rbx 320 addq %rax,%r10 321 movq (%rcx),%rax 322 adcq $0,%rdx 323 324 imulq %r10,%rbp 325 movq %rdx,%r11 326 327 mulq %rbp 328 addq %rax,%r10 329 movq 8(%rsi),%rax 330 adcq $0,%rdx 331 movq 8(%rsp),%r10 332 movq %rdx,%r13 333 334 leaq 1(%r15),%r15 335 jmp .Linner_enter 336 337.align 16 338.Linner: 339 addq %rax,%r13 340 movq (%rsi,%r15,8),%rax 341 adcq $0,%rdx 342 addq %r10,%r13 343 movq (%rsp,%r15,8),%r10 344 adcq $0,%rdx 345 movq %r13,-16(%rsp,%r15,8) 346 movq %rdx,%r13 347 348.Linner_enter: 349 mulq %rbx 350 addq %rax,%r11 351 movq (%rcx,%r15,8),%rax 352 adcq $0,%rdx 353 addq %r11,%r10 354 movq %rdx,%r11 355 adcq $0,%r11 356 leaq 1(%r15),%r15 357 358 mulq %rbp 359 cmpq %r9,%r15 360 jne .Linner 361 362 addq %rax,%r13 363 adcq $0,%rdx 364 addq %r10,%r13 365 movq (%rsp,%r9,8),%r10 366 adcq $0,%rdx 367 movq %r13,-16(%rsp,%r9,8) 368 movq %rdx,%r13 369 370 xorq %rdx,%rdx 371 addq %r11,%r13 372 adcq $0,%rdx 373 addq %r10,%r13 374 adcq $0,%rdx 375 movq %r13,-8(%rsp,%r9,8) 376 movq %rdx,(%rsp,%r9,8) 377 378 leaq 1(%r14),%r14 379 cmpq %r9,%r14 380 jb .Louter 381 382 xorq %r14,%r14 383 movq (%rsp),%rax 384 leaq (%rsp),%rsi 385 movq %r9,%r15 386 jmp .Lsub 387.align 16 388.Lsub: sbbq (%rcx,%r14,8),%rax 389 movq %rax,(%rdi,%r14,8) 390 movq 8(%rsi,%r14,8),%rax 391 leaq 1(%r14),%r14 392 decq %r15 393 jnz .Lsub 394 395 sbbq $0,%rax 396 xorq %r14,%r14 397 andq %rax,%rsi 398 notq %rax 399 movq %rdi,%rcx 400 andq %rax,%rcx 401 movq %r9,%r15 402 orq %rcx,%rsi 403.align 16 404.Lcopy: 405 movq (%rsi,%r14,8),%rax 406 movq %r14,(%rsp,%r14,8) 407 movq %rax,(%rdi,%r14,8) 408 leaq 1(%r14),%r14 409 subq $1,%r15 410 jnz .Lcopy 411 412 movq 8(%rsp,%r9,8),%rsi 413 movq $1,%rax 414 415 movq -48(%rsi),%r15 416 movq -40(%rsi),%r14 417 movq -32(%rsi),%r13 418 movq -24(%rsi),%r12 419 movq -16(%rsi),%rbp 420 movq -8(%rsi),%rbx 421 leaq (%rsi),%rsp 422.Lmul_epilogue: 423 .byte 0xf3,0xc3 424.size bn_mul_mont_gather5,.-bn_mul_mont_gather5 425.type bn_mul4x_mont_gather5,@function 426.align 32 427bn_mul4x_mont_gather5: 428.byte 0x67 429 movq %rsp,%rax 430.Lmul4x_enter: 431 andl $0x80108,%r11d 432 cmpl $0x80108,%r11d 433 je .Lmulx4x_enter 434 pushq %rbx 435 pushq %rbp 436 pushq %r12 437 pushq %r13 438 pushq %r14 439 pushq %r15 440.Lmul4x_prologue: 441 442.byte 0x67 443 shll $3,%r9d 444 leaq (%r9,%r9,2),%r10 445 negq %r9 446 447 448 449 450 451 452 453 454 455 456 leaq -320(%rsp,%r9,2),%r11 457 movq %rsp,%rbp 458 subq %rdi,%r11 459 andq $4095,%r11 460 cmpq %r11,%r10 461 jb .Lmul4xsp_alt 462 subq %r11,%rbp 463 leaq -320(%rbp,%r9,2),%rbp 464 jmp .Lmul4xsp_done 465 466.align 32 467.Lmul4xsp_alt: 468 leaq 4096-320(,%r9,2),%r10 469 leaq -320(%rbp,%r9,2),%rbp 470 subq %r10,%r11 471 movq $0,%r10 472 cmovcq %r10,%r11 473 subq %r11,%rbp 474.Lmul4xsp_done: 475 andq $-64,%rbp 476 movq %rsp,%r11 477 subq %rbp,%r11 478 andq $-4096,%r11 479 leaq (%r11,%rbp,1),%rsp 480 movq (%rsp),%r10 481 cmpq %rbp,%rsp 482 ja .Lmul4x_page_walk 483 jmp .Lmul4x_page_walk_done 484 485.Lmul4x_page_walk: 486 leaq -4096(%rsp),%rsp 487 movq (%rsp),%r10 488 cmpq %rbp,%rsp 489 ja .Lmul4x_page_walk 490.Lmul4x_page_walk_done: 491 492 negq %r9 493 494 movq %rax,40(%rsp) 495.Lmul4x_body: 496 497 call mul4x_internal 498 499 movq 40(%rsp),%rsi 500 movq $1,%rax 501 502 movq -48(%rsi),%r15 503 movq -40(%rsi),%r14 504 movq -32(%rsi),%r13 505 movq -24(%rsi),%r12 506 movq -16(%rsi),%rbp 507 movq -8(%rsi),%rbx 508 leaq (%rsi),%rsp 509.Lmul4x_epilogue: 510 .byte 0xf3,0xc3 511.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 512 513.type mul4x_internal,@function 514.align 32 515mul4x_internal: 516 shlq $5,%r9 517 movd 8(%rax),%xmm5 518 leaq .Linc(%rip),%rax 519 leaq 128(%rdx,%r9,1),%r13 520 shrq $5,%r9 521 movdqa 0(%rax),%xmm0 522 movdqa 16(%rax),%xmm1 523 leaq 88-112(%rsp,%r9,1),%r10 524 leaq 128(%rdx),%r12 525 526 pshufd $0,%xmm5,%xmm5 527 movdqa %xmm1,%xmm4 528.byte 0x67,0x67 529 movdqa %xmm1,%xmm2 530 paddd %xmm0,%xmm1 531 pcmpeqd %xmm5,%xmm0 532.byte 0x67 533 movdqa %xmm4,%xmm3 534 paddd %xmm1,%xmm2 535 pcmpeqd %xmm5,%xmm1 536 movdqa %xmm0,112(%r10) 537 movdqa %xmm4,%xmm0 538 539 paddd %xmm2,%xmm3 540 pcmpeqd %xmm5,%xmm2 541 movdqa %xmm1,128(%r10) 542 movdqa %xmm4,%xmm1 543 544 paddd %xmm3,%xmm0 545 pcmpeqd %xmm5,%xmm3 546 movdqa %xmm2,144(%r10) 547 movdqa %xmm4,%xmm2 548 549 paddd %xmm0,%xmm1 550 pcmpeqd %xmm5,%xmm0 551 movdqa %xmm3,160(%r10) 552 movdqa %xmm4,%xmm3 553 paddd %xmm1,%xmm2 554 pcmpeqd %xmm5,%xmm1 555 movdqa %xmm0,176(%r10) 556 movdqa %xmm4,%xmm0 557 558 paddd %xmm2,%xmm3 559 pcmpeqd %xmm5,%xmm2 560 movdqa %xmm1,192(%r10) 561 movdqa %xmm4,%xmm1 562 563 paddd %xmm3,%xmm0 564 pcmpeqd %xmm5,%xmm3 565 movdqa %xmm2,208(%r10) 566 movdqa %xmm4,%xmm2 567 568 paddd %xmm0,%xmm1 569 pcmpeqd %xmm5,%xmm0 570 movdqa %xmm3,224(%r10) 571 movdqa %xmm4,%xmm3 572 paddd %xmm1,%xmm2 573 pcmpeqd %xmm5,%xmm1 574 movdqa %xmm0,240(%r10) 575 movdqa %xmm4,%xmm0 576 577 paddd %xmm2,%xmm3 578 pcmpeqd %xmm5,%xmm2 579 movdqa %xmm1,256(%r10) 580 movdqa %xmm4,%xmm1 581 582 paddd %xmm3,%xmm0 583 pcmpeqd %xmm5,%xmm3 584 movdqa %xmm2,272(%r10) 585 movdqa %xmm4,%xmm2 586 587 paddd %xmm0,%xmm1 588 pcmpeqd %xmm5,%xmm0 589 movdqa %xmm3,288(%r10) 590 movdqa %xmm4,%xmm3 591 paddd %xmm1,%xmm2 592 pcmpeqd %xmm5,%xmm1 593 movdqa %xmm0,304(%r10) 594 595 paddd %xmm2,%xmm3 596.byte 0x67 597 pcmpeqd %xmm5,%xmm2 598 movdqa %xmm1,320(%r10) 599 600 pcmpeqd %xmm5,%xmm3 601 movdqa %xmm2,336(%r10) 602 pand 64(%r12),%xmm0 603 604 pand 80(%r12),%xmm1 605 pand 96(%r12),%xmm2 606 movdqa %xmm3,352(%r10) 607 pand 112(%r12),%xmm3 608 por %xmm2,%xmm0 609 por %xmm3,%xmm1 610 movdqa -128(%r12),%xmm4 611 movdqa -112(%r12),%xmm5 612 movdqa -96(%r12),%xmm2 613 pand 112(%r10),%xmm4 614 movdqa -80(%r12),%xmm3 615 pand 128(%r10),%xmm5 616 por %xmm4,%xmm0 617 pand 144(%r10),%xmm2 618 por %xmm5,%xmm1 619 pand 160(%r10),%xmm3 620 por %xmm2,%xmm0 621 por %xmm3,%xmm1 622 movdqa -64(%r12),%xmm4 623 movdqa -48(%r12),%xmm5 624 movdqa -32(%r12),%xmm2 625 pand 176(%r10),%xmm4 626 movdqa -16(%r12),%xmm3 627 pand 192(%r10),%xmm5 628 por %xmm4,%xmm0 629 pand 208(%r10),%xmm2 630 por %xmm5,%xmm1 631 pand 224(%r10),%xmm3 632 por %xmm2,%xmm0 633 por %xmm3,%xmm1 634 movdqa 0(%r12),%xmm4 635 movdqa 16(%r12),%xmm5 636 movdqa 32(%r12),%xmm2 637 pand 240(%r10),%xmm4 638 movdqa 48(%r12),%xmm3 639 pand 256(%r10),%xmm5 640 por %xmm4,%xmm0 641 pand 272(%r10),%xmm2 642 por %xmm5,%xmm1 643 pand 288(%r10),%xmm3 644 por %xmm2,%xmm0 645 por %xmm3,%xmm1 646 por %xmm1,%xmm0 647 pshufd $0x4e,%xmm0,%xmm1 648 por %xmm1,%xmm0 649 leaq 256(%r12),%r12 650.byte 102,72,15,126,195 651 652 movq %r13,16+8(%rsp) 653 movq %rdi,56+8(%rsp) 654 655 movq (%r8),%r8 656 movq (%rsi),%rax 657 leaq (%rsi,%r9,1),%rsi 658 negq %r9 659 660 movq %r8,%rbp 661 mulq %rbx 662 movq %rax,%r10 663 movq (%rcx),%rax 664 665 imulq %r10,%rbp 666 leaq 64+8(%rsp),%r14 667 movq %rdx,%r11 668 669 mulq %rbp 670 addq %rax,%r10 671 movq 8(%rsi,%r9,1),%rax 672 adcq $0,%rdx 673 movq %rdx,%rdi 674 675 mulq %rbx 676 addq %rax,%r11 677 movq 8(%rcx),%rax 678 adcq $0,%rdx 679 movq %rdx,%r10 680 681 mulq %rbp 682 addq %rax,%rdi 683 movq 16(%rsi,%r9,1),%rax 684 adcq $0,%rdx 685 addq %r11,%rdi 686 leaq 32(%r9),%r15 687 leaq 32(%rcx),%rcx 688 adcq $0,%rdx 689 movq %rdi,(%r14) 690 movq %rdx,%r13 691 jmp .L1st4x 692 693.align 32 694.L1st4x: 695 mulq %rbx 696 addq %rax,%r10 697 movq -16(%rcx),%rax 698 leaq 32(%r14),%r14 699 adcq $0,%rdx 700 movq %rdx,%r11 701 702 mulq %rbp 703 addq %rax,%r13 704 movq -8(%rsi,%r15,1),%rax 705 adcq $0,%rdx 706 addq %r10,%r13 707 adcq $0,%rdx 708 movq %r13,-24(%r14) 709 movq %rdx,%rdi 710 711 mulq %rbx 712 addq %rax,%r11 713 movq -8(%rcx),%rax 714 adcq $0,%rdx 715 movq %rdx,%r10 716 717 mulq %rbp 718 addq %rax,%rdi 719 movq (%rsi,%r15,1),%rax 720 adcq $0,%rdx 721 addq %r11,%rdi 722 adcq $0,%rdx 723 movq %rdi,-16(%r14) 724 movq %rdx,%r13 725 726 mulq %rbx 727 addq %rax,%r10 728 movq 0(%rcx),%rax 729 adcq $0,%rdx 730 movq %rdx,%r11 731 732 mulq %rbp 733 addq %rax,%r13 734 movq 8(%rsi,%r15,1),%rax 735 adcq $0,%rdx 736 addq %r10,%r13 737 adcq $0,%rdx 738 movq %r13,-8(%r14) 739 movq %rdx,%rdi 740 741 mulq %rbx 742 addq %rax,%r11 743 movq 8(%rcx),%rax 744 adcq $0,%rdx 745 movq %rdx,%r10 746 747 mulq %rbp 748 addq %rax,%rdi 749 movq 16(%rsi,%r15,1),%rax 750 adcq $0,%rdx 751 addq %r11,%rdi 752 leaq 32(%rcx),%rcx 753 adcq $0,%rdx 754 movq %rdi,(%r14) 755 movq %rdx,%r13 756 757 addq $32,%r15 758 jnz .L1st4x 759 760 mulq %rbx 761 addq %rax,%r10 762 movq -16(%rcx),%rax 763 leaq 32(%r14),%r14 764 adcq $0,%rdx 765 movq %rdx,%r11 766 767 mulq %rbp 768 addq %rax,%r13 769 movq -8(%rsi),%rax 770 adcq $0,%rdx 771 addq %r10,%r13 772 adcq $0,%rdx 773 movq %r13,-24(%r14) 774 movq %rdx,%rdi 775 776 mulq %rbx 777 addq %rax,%r11 778 movq -8(%rcx),%rax 779 adcq $0,%rdx 780 movq %rdx,%r10 781 782 mulq %rbp 783 addq %rax,%rdi 784 movq (%rsi,%r9,1),%rax 785 adcq $0,%rdx 786 addq %r11,%rdi 787 adcq $0,%rdx 788 movq %rdi,-16(%r14) 789 movq %rdx,%r13 790 791 leaq (%rcx,%r9,1),%rcx 792 793 xorq %rdi,%rdi 794 addq %r10,%r13 795 adcq $0,%rdi 796 movq %r13,-8(%r14) 797 798 jmp .Louter4x 799 800.align 32 801.Louter4x: 802 leaq 16+128(%r14),%rdx 803 pxor %xmm4,%xmm4 804 pxor %xmm5,%xmm5 805 movdqa -128(%r12),%xmm0 806 movdqa -112(%r12),%xmm1 807 movdqa -96(%r12),%xmm2 808 movdqa -80(%r12),%xmm3 809 pand -128(%rdx),%xmm0 810 pand -112(%rdx),%xmm1 811 por %xmm0,%xmm4 812 pand -96(%rdx),%xmm2 813 por %xmm1,%xmm5 814 pand -80(%rdx),%xmm3 815 por %xmm2,%xmm4 816 por %xmm3,%xmm5 817 movdqa -64(%r12),%xmm0 818 movdqa -48(%r12),%xmm1 819 movdqa -32(%r12),%xmm2 820 movdqa -16(%r12),%xmm3 821 pand -64(%rdx),%xmm0 822 pand -48(%rdx),%xmm1 823 por %xmm0,%xmm4 824 pand -32(%rdx),%xmm2 825 por %xmm1,%xmm5 826 pand -16(%rdx),%xmm3 827 por %xmm2,%xmm4 828 por %xmm3,%xmm5 829 movdqa 0(%r12),%xmm0 830 movdqa 16(%r12),%xmm1 831 movdqa 32(%r12),%xmm2 832 movdqa 48(%r12),%xmm3 833 pand 0(%rdx),%xmm0 834 pand 16(%rdx),%xmm1 835 por %xmm0,%xmm4 836 pand 32(%rdx),%xmm2 837 por %xmm1,%xmm5 838 pand 48(%rdx),%xmm3 839 por %xmm2,%xmm4 840 por %xmm3,%xmm5 841 movdqa 64(%r12),%xmm0 842 movdqa 80(%r12),%xmm1 843 movdqa 96(%r12),%xmm2 844 movdqa 112(%r12),%xmm3 845 pand 64(%rdx),%xmm0 846 pand 80(%rdx),%xmm1 847 por %xmm0,%xmm4 848 pand 96(%rdx),%xmm2 849 por %xmm1,%xmm5 850 pand 112(%rdx),%xmm3 851 por %xmm2,%xmm4 852 por %xmm3,%xmm5 853 por %xmm5,%xmm4 854 pshufd $0x4e,%xmm4,%xmm0 855 por %xmm4,%xmm0 856 leaq 256(%r12),%r12 857.byte 102,72,15,126,195 858 859 movq (%r14,%r9,1),%r10 860 movq %r8,%rbp 861 mulq %rbx 862 addq %rax,%r10 863 movq (%rcx),%rax 864 adcq $0,%rdx 865 866 imulq %r10,%rbp 867 movq %rdx,%r11 868 movq %rdi,(%r14) 869 870 leaq (%r14,%r9,1),%r14 871 872 mulq %rbp 873 addq %rax,%r10 874 movq 8(%rsi,%r9,1),%rax 875 adcq $0,%rdx 876 movq %rdx,%rdi 877 878 mulq %rbx 879 addq %rax,%r11 880 movq 8(%rcx),%rax 881 adcq $0,%rdx 882 addq 8(%r14),%r11 883 adcq $0,%rdx 884 movq %rdx,%r10 885 886 mulq %rbp 887 addq %rax,%rdi 888 movq 16(%rsi,%r9,1),%rax 889 adcq $0,%rdx 890 addq %r11,%rdi 891 leaq 32(%r9),%r15 892 leaq 32(%rcx),%rcx 893 adcq $0,%rdx 894 movq %rdx,%r13 895 jmp .Linner4x 896 897.align 32 898.Linner4x: 899 mulq %rbx 900 addq %rax,%r10 901 movq -16(%rcx),%rax 902 adcq $0,%rdx 903 addq 16(%r14),%r10 904 leaq 32(%r14),%r14 905 adcq $0,%rdx 906 movq %rdx,%r11 907 908 mulq %rbp 909 addq %rax,%r13 910 movq -8(%rsi,%r15,1),%rax 911 adcq $0,%rdx 912 addq %r10,%r13 913 adcq $0,%rdx 914 movq %rdi,-32(%r14) 915 movq %rdx,%rdi 916 917 mulq %rbx 918 addq %rax,%r11 919 movq -8(%rcx),%rax 920 adcq $0,%rdx 921 addq -8(%r14),%r11 922 adcq $0,%rdx 923 movq %rdx,%r10 924 925 mulq %rbp 926 addq %rax,%rdi 927 movq (%rsi,%r15,1),%rax 928 adcq $0,%rdx 929 addq %r11,%rdi 930 adcq $0,%rdx 931 movq %r13,-24(%r14) 932 movq %rdx,%r13 933 934 mulq %rbx 935 addq %rax,%r10 936 movq 0(%rcx),%rax 937 adcq $0,%rdx 938 addq (%r14),%r10 939 adcq $0,%rdx 940 movq %rdx,%r11 941 942 mulq %rbp 943 addq %rax,%r13 944 movq 8(%rsi,%r15,1),%rax 945 adcq $0,%rdx 946 addq %r10,%r13 947 adcq $0,%rdx 948 movq %rdi,-16(%r14) 949 movq %rdx,%rdi 950 951 mulq %rbx 952 addq %rax,%r11 953 movq 8(%rcx),%rax 954 adcq $0,%rdx 955 addq 8(%r14),%r11 956 adcq $0,%rdx 957 movq %rdx,%r10 958 959 mulq %rbp 960 addq %rax,%rdi 961 movq 16(%rsi,%r15,1),%rax 962 adcq $0,%rdx 963 addq %r11,%rdi 964 leaq 32(%rcx),%rcx 965 adcq $0,%rdx 966 movq %r13,-8(%r14) 967 movq %rdx,%r13 968 969 addq $32,%r15 970 jnz .Linner4x 971 972 mulq %rbx 973 addq %rax,%r10 974 movq -16(%rcx),%rax 975 adcq $0,%rdx 976 addq 16(%r14),%r10 977 leaq 32(%r14),%r14 978 adcq $0,%rdx 979 movq %rdx,%r11 980 981 mulq %rbp 982 addq %rax,%r13 983 movq -8(%rsi),%rax 984 adcq $0,%rdx 985 addq %r10,%r13 986 adcq $0,%rdx 987 movq %rdi,-32(%r14) 988 movq %rdx,%rdi 989 990 mulq %rbx 991 addq %rax,%r11 992 movq %rbp,%rax 993 movq -8(%rcx),%rbp 994 adcq $0,%rdx 995 addq -8(%r14),%r11 996 adcq $0,%rdx 997 movq %rdx,%r10 998 999 mulq %rbp 1000 addq %rax,%rdi 1001 movq (%rsi,%r9,1),%rax 1002 adcq $0,%rdx 1003 addq %r11,%rdi 1004 adcq $0,%rdx 1005 movq %r13,-24(%r14) 1006 movq %rdx,%r13 1007 1008 movq %rdi,-16(%r14) 1009 leaq (%rcx,%r9,1),%rcx 1010 1011 xorq %rdi,%rdi 1012 addq %r10,%r13 1013 adcq $0,%rdi 1014 addq (%r14),%r13 1015 adcq $0,%rdi 1016 movq %r13,-8(%r14) 1017 1018 cmpq 16+8(%rsp),%r12 1019 jb .Louter4x 1020 xorq %rax,%rax 1021 subq %r13,%rbp 1022 adcq %r15,%r15 1023 orq %r15,%rdi 1024 subq %rdi,%rax 1025 leaq (%r14,%r9,1),%rbx 1026 movq (%rcx),%r12 1027 leaq (%rcx),%rbp 1028 movq %r9,%rcx 1029 sarq $3+2,%rcx 1030 movq 56+8(%rsp),%rdi 1031 decq %r12 1032 xorq %r10,%r10 1033 movq 8(%rbp),%r13 1034 movq 16(%rbp),%r14 1035 movq 24(%rbp),%r15 1036 jmp .Lsqr4x_sub_entry 1037.size mul4x_internal,.-mul4x_internal 1038.globl bn_power5 1039.type bn_power5,@function 1040.align 32 1041bn_power5: 1042 movq %rsp,%rax 1043 movl OPENSSL_ia32cap_P+8(%rip),%r11d 1044 andl $0x80108,%r11d 1045 cmpl $0x80108,%r11d 1046 je .Lpowerx5_enter 1047 pushq %rbx 1048 pushq %rbp 1049 pushq %r12 1050 pushq %r13 1051 pushq %r14 1052 pushq %r15 1053.Lpower5_prologue: 1054 1055 shll $3,%r9d 1056 leal (%r9,%r9,2),%r10d 1057 negq %r9 1058 movq (%r8),%r8 1059 1060 1061 1062 1063 1064 1065 1066 1067 leaq -320(%rsp,%r9,2),%r11 1068 movq %rsp,%rbp 1069 subq %rdi,%r11 1070 andq $4095,%r11 1071 cmpq %r11,%r10 1072 jb .Lpwr_sp_alt 1073 subq %r11,%rbp 1074 leaq -320(%rbp,%r9,2),%rbp 1075 jmp .Lpwr_sp_done 1076 1077.align 32 1078.Lpwr_sp_alt: 1079 leaq 4096-320(,%r9,2),%r10 1080 leaq -320(%rbp,%r9,2),%rbp 1081 subq %r10,%r11 1082 movq $0,%r10 1083 cmovcq %r10,%r11 1084 subq %r11,%rbp 1085.Lpwr_sp_done: 1086 andq $-64,%rbp 1087 movq %rsp,%r11 1088 subq %rbp,%r11 1089 andq $-4096,%r11 1090 leaq (%r11,%rbp,1),%rsp 1091 movq (%rsp),%r10 1092 cmpq %rbp,%rsp 1093 ja .Lpwr_page_walk 1094 jmp .Lpwr_page_walk_done 1095 1096.Lpwr_page_walk: 1097 leaq -4096(%rsp),%rsp 1098 movq (%rsp),%r10 1099 cmpq %rbp,%rsp 1100 ja .Lpwr_page_walk 1101.Lpwr_page_walk_done: 1102 1103 movq %r9,%r10 1104 negq %r9 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 movq %r8,32(%rsp) 1116 movq %rax,40(%rsp) 1117.Lpower5_body: 1118.byte 102,72,15,110,207 1119.byte 102,72,15,110,209 1120.byte 102,73,15,110,218 1121.byte 102,72,15,110,226 1122 1123 call __bn_sqr8x_internal 1124 call __bn_post4x_internal 1125 call __bn_sqr8x_internal 1126 call __bn_post4x_internal 1127 call __bn_sqr8x_internal 1128 call __bn_post4x_internal 1129 call __bn_sqr8x_internal 1130 call __bn_post4x_internal 1131 call __bn_sqr8x_internal 1132 call __bn_post4x_internal 1133 1134.byte 102,72,15,126,209 1135.byte 102,72,15,126,226 1136 movq %rsi,%rdi 1137 movq 40(%rsp),%rax 1138 leaq 32(%rsp),%r8 1139 1140 call mul4x_internal 1141 1142 movq 40(%rsp),%rsi 1143 movq $1,%rax 1144 movq -48(%rsi),%r15 1145 movq -40(%rsi),%r14 1146 movq -32(%rsi),%r13 1147 movq -24(%rsi),%r12 1148 movq -16(%rsi),%rbp 1149 movq -8(%rsi),%rbx 1150 leaq (%rsi),%rsp 1151.Lpower5_epilogue: 1152 .byte 0xf3,0xc3 1153.size bn_power5,.-bn_power5 1154 1155.globl bn_sqr8x_internal 1156.hidden bn_sqr8x_internal 1157.type bn_sqr8x_internal,@function 1158.align 32 1159bn_sqr8x_internal: 1160__bn_sqr8x_internal: 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 leaq 32(%r10),%rbp 1235 leaq (%rsi,%r9,1),%rsi 1236 1237 movq %r9,%rcx 1238 1239 1240 movq -32(%rsi,%rbp,1),%r14 1241 leaq 48+8(%rsp,%r9,2),%rdi 1242 movq -24(%rsi,%rbp,1),%rax 1243 leaq -32(%rdi,%rbp,1),%rdi 1244 movq -16(%rsi,%rbp,1),%rbx 1245 movq %rax,%r15 1246 1247 mulq %r14 1248 movq %rax,%r10 1249 movq %rbx,%rax 1250 movq %rdx,%r11 1251 movq %r10,-24(%rdi,%rbp,1) 1252 1253 mulq %r14 1254 addq %rax,%r11 1255 movq %rbx,%rax 1256 adcq $0,%rdx 1257 movq %r11,-16(%rdi,%rbp,1) 1258 movq %rdx,%r10 1259 1260 1261 movq -8(%rsi,%rbp,1),%rbx 1262 mulq %r15 1263 movq %rax,%r12 1264 movq %rbx,%rax 1265 movq %rdx,%r13 1266 1267 leaq (%rbp),%rcx 1268 mulq %r14 1269 addq %rax,%r10 1270 movq %rbx,%rax 1271 movq %rdx,%r11 1272 adcq $0,%r11 1273 addq %r12,%r10 1274 adcq $0,%r11 1275 movq %r10,-8(%rdi,%rcx,1) 1276 jmp .Lsqr4x_1st 1277 1278.align 32 1279.Lsqr4x_1st: 1280 movq (%rsi,%rcx,1),%rbx 1281 mulq %r15 1282 addq %rax,%r13 1283 movq %rbx,%rax 1284 movq %rdx,%r12 1285 adcq $0,%r12 1286 1287 mulq %r14 1288 addq %rax,%r11 1289 movq %rbx,%rax 1290 movq 8(%rsi,%rcx,1),%rbx 1291 movq %rdx,%r10 1292 adcq $0,%r10 1293 addq %r13,%r11 1294 adcq $0,%r10 1295 1296 1297 mulq %r15 1298 addq %rax,%r12 1299 movq %rbx,%rax 1300 movq %r11,(%rdi,%rcx,1) 1301 movq %rdx,%r13 1302 adcq $0,%r13 1303 1304 mulq %r14 1305 addq %rax,%r10 1306 movq %rbx,%rax 1307 movq 16(%rsi,%rcx,1),%rbx 1308 movq %rdx,%r11 1309 adcq $0,%r11 1310 addq %r12,%r10 1311 adcq $0,%r11 1312 1313 mulq %r15 1314 addq %rax,%r13 1315 movq %rbx,%rax 1316 movq %r10,8(%rdi,%rcx,1) 1317 movq %rdx,%r12 1318 adcq $0,%r12 1319 1320 mulq %r14 1321 addq %rax,%r11 1322 movq %rbx,%rax 1323 movq 24(%rsi,%rcx,1),%rbx 1324 movq %rdx,%r10 1325 adcq $0,%r10 1326 addq %r13,%r11 1327 adcq $0,%r10 1328 1329 1330 mulq %r15 1331 addq %rax,%r12 1332 movq %rbx,%rax 1333 movq %r11,16(%rdi,%rcx,1) 1334 movq %rdx,%r13 1335 adcq $0,%r13 1336 leaq 32(%rcx),%rcx 1337 1338 mulq %r14 1339 addq %rax,%r10 1340 movq %rbx,%rax 1341 movq %rdx,%r11 1342 adcq $0,%r11 1343 addq %r12,%r10 1344 adcq $0,%r11 1345 movq %r10,-8(%rdi,%rcx,1) 1346 1347 cmpq $0,%rcx 1348 jne .Lsqr4x_1st 1349 1350 mulq %r15 1351 addq %rax,%r13 1352 leaq 16(%rbp),%rbp 1353 adcq $0,%rdx 1354 addq %r11,%r13 1355 adcq $0,%rdx 1356 1357 movq %r13,(%rdi) 1358 movq %rdx,%r12 1359 movq %rdx,8(%rdi) 1360 jmp .Lsqr4x_outer 1361 1362.align 32 1363.Lsqr4x_outer: 1364 movq -32(%rsi,%rbp,1),%r14 1365 leaq 48+8(%rsp,%r9,2),%rdi 1366 movq -24(%rsi,%rbp,1),%rax 1367 leaq -32(%rdi,%rbp,1),%rdi 1368 movq -16(%rsi,%rbp,1),%rbx 1369 movq %rax,%r15 1370 1371 mulq %r14 1372 movq -24(%rdi,%rbp,1),%r10 1373 addq %rax,%r10 1374 movq %rbx,%rax 1375 adcq $0,%rdx 1376 movq %r10,-24(%rdi,%rbp,1) 1377 movq %rdx,%r11 1378 1379 mulq %r14 1380 addq %rax,%r11 1381 movq %rbx,%rax 1382 adcq $0,%rdx 1383 addq -16(%rdi,%rbp,1),%r11 1384 movq %rdx,%r10 1385 adcq $0,%r10 1386 movq %r11,-16(%rdi,%rbp,1) 1387 1388 xorq %r12,%r12 1389 1390 movq -8(%rsi,%rbp,1),%rbx 1391 mulq %r15 1392 addq %rax,%r12 1393 movq %rbx,%rax 1394 adcq $0,%rdx 1395 addq -8(%rdi,%rbp,1),%r12 1396 movq %rdx,%r13 1397 adcq $0,%r13 1398 1399 mulq %r14 1400 addq %rax,%r10 1401 movq %rbx,%rax 1402 adcq $0,%rdx 1403 addq %r12,%r10 1404 movq %rdx,%r11 1405 adcq $0,%r11 1406 movq %r10,-8(%rdi,%rbp,1) 1407 1408 leaq (%rbp),%rcx 1409 jmp .Lsqr4x_inner 1410 1411.align 32 1412.Lsqr4x_inner: 1413 movq (%rsi,%rcx,1),%rbx 1414 mulq %r15 1415 addq %rax,%r13 1416 movq %rbx,%rax 1417 movq %rdx,%r12 1418 adcq $0,%r12 1419 addq (%rdi,%rcx,1),%r13 1420 adcq $0,%r12 1421 1422.byte 0x67 1423 mulq %r14 1424 addq %rax,%r11 1425 movq %rbx,%rax 1426 movq 8(%rsi,%rcx,1),%rbx 1427 movq %rdx,%r10 1428 adcq $0,%r10 1429 addq %r13,%r11 1430 adcq $0,%r10 1431 1432 mulq %r15 1433 addq %rax,%r12 1434 movq %r11,(%rdi,%rcx,1) 1435 movq %rbx,%rax 1436 movq %rdx,%r13 1437 adcq $0,%r13 1438 addq 8(%rdi,%rcx,1),%r12 1439 leaq 16(%rcx),%rcx 1440 adcq $0,%r13 1441 1442 mulq %r14 1443 addq %rax,%r10 1444 movq %rbx,%rax 1445 adcq $0,%rdx 1446 addq %r12,%r10 1447 movq %rdx,%r11 1448 adcq $0,%r11 1449 movq %r10,-8(%rdi,%rcx,1) 1450 1451 cmpq $0,%rcx 1452 jne .Lsqr4x_inner 1453 1454.byte 0x67 1455 mulq %r15 1456 addq %rax,%r13 1457 adcq $0,%rdx 1458 addq %r11,%r13 1459 adcq $0,%rdx 1460 1461 movq %r13,(%rdi) 1462 movq %rdx,%r12 1463 movq %rdx,8(%rdi) 1464 1465 addq $16,%rbp 1466 jnz .Lsqr4x_outer 1467 1468 1469 movq -32(%rsi),%r14 1470 leaq 48+8(%rsp,%r9,2),%rdi 1471 movq -24(%rsi),%rax 1472 leaq -32(%rdi,%rbp,1),%rdi 1473 movq -16(%rsi),%rbx 1474 movq %rax,%r15 1475 1476 mulq %r14 1477 addq %rax,%r10 1478 movq %rbx,%rax 1479 movq %rdx,%r11 1480 adcq $0,%r11 1481 1482 mulq %r14 1483 addq %rax,%r11 1484 movq %rbx,%rax 1485 movq %r10,-24(%rdi) 1486 movq %rdx,%r10 1487 adcq $0,%r10 1488 addq %r13,%r11 1489 movq -8(%rsi),%rbx 1490 adcq $0,%r10 1491 1492 mulq %r15 1493 addq %rax,%r12 1494 movq %rbx,%rax 1495 movq %r11,-16(%rdi) 1496 movq %rdx,%r13 1497 adcq $0,%r13 1498 1499 mulq %r14 1500 addq %rax,%r10 1501 movq %rbx,%rax 1502 movq %rdx,%r11 1503 adcq $0,%r11 1504 addq %r12,%r10 1505 adcq $0,%r11 1506 movq %r10,-8(%rdi) 1507 1508 mulq %r15 1509 addq %rax,%r13 1510 movq -16(%rsi),%rax 1511 adcq $0,%rdx 1512 addq %r11,%r13 1513 adcq $0,%rdx 1514 1515 movq %r13,(%rdi) 1516 movq %rdx,%r12 1517 movq %rdx,8(%rdi) 1518 1519 mulq %rbx 1520 addq $16,%rbp 1521 xorq %r14,%r14 1522 subq %r9,%rbp 1523 xorq %r15,%r15 1524 1525 addq %r12,%rax 1526 adcq $0,%rdx 1527 movq %rax,8(%rdi) 1528 movq %rdx,16(%rdi) 1529 movq %r15,24(%rdi) 1530 1531 movq -16(%rsi,%rbp,1),%rax 1532 leaq 48+8(%rsp),%rdi 1533 xorq %r10,%r10 1534 movq 8(%rdi),%r11 1535 1536 leaq (%r14,%r10,2),%r12 1537 shrq $63,%r10 1538 leaq (%rcx,%r11,2),%r13 1539 shrq $63,%r11 1540 orq %r10,%r13 1541 movq 16(%rdi),%r10 1542 movq %r11,%r14 1543 mulq %rax 1544 negq %r15 1545 movq 24(%rdi),%r11 1546 adcq %rax,%r12 1547 movq -8(%rsi,%rbp,1),%rax 1548 movq %r12,(%rdi) 1549 adcq %rdx,%r13 1550 1551 leaq (%r14,%r10,2),%rbx 1552 movq %r13,8(%rdi) 1553 sbbq %r15,%r15 1554 shrq $63,%r10 1555 leaq (%rcx,%r11,2),%r8 1556 shrq $63,%r11 1557 orq %r10,%r8 1558 movq 32(%rdi),%r10 1559 movq %r11,%r14 1560 mulq %rax 1561 negq %r15 1562 movq 40(%rdi),%r11 1563 adcq %rax,%rbx 1564 movq 0(%rsi,%rbp,1),%rax 1565 movq %rbx,16(%rdi) 1566 adcq %rdx,%r8 1567 leaq 16(%rbp),%rbp 1568 movq %r8,24(%rdi) 1569 sbbq %r15,%r15 1570 leaq 64(%rdi),%rdi 1571 jmp .Lsqr4x_shift_n_add 1572 1573.align 32 1574.Lsqr4x_shift_n_add: 1575 leaq (%r14,%r10,2),%r12 1576 shrq $63,%r10 1577 leaq (%rcx,%r11,2),%r13 1578 shrq $63,%r11 1579 orq %r10,%r13 1580 movq -16(%rdi),%r10 1581 movq %r11,%r14 1582 mulq %rax 1583 negq %r15 1584 movq -8(%rdi),%r11 1585 adcq %rax,%r12 1586 movq -8(%rsi,%rbp,1),%rax 1587 movq %r12,-32(%rdi) 1588 adcq %rdx,%r13 1589 1590 leaq (%r14,%r10,2),%rbx 1591 movq %r13,-24(%rdi) 1592 sbbq %r15,%r15 1593 shrq $63,%r10 1594 leaq (%rcx,%r11,2),%r8 1595 shrq $63,%r11 1596 orq %r10,%r8 1597 movq 0(%rdi),%r10 1598 movq %r11,%r14 1599 mulq %rax 1600 negq %r15 1601 movq 8(%rdi),%r11 1602 adcq %rax,%rbx 1603 movq 0(%rsi,%rbp,1),%rax 1604 movq %rbx,-16(%rdi) 1605 adcq %rdx,%r8 1606 1607 leaq (%r14,%r10,2),%r12 1608 movq %r8,-8(%rdi) 1609 sbbq %r15,%r15 1610 shrq $63,%r10 1611 leaq (%rcx,%r11,2),%r13 1612 shrq $63,%r11 1613 orq %r10,%r13 1614 movq 16(%rdi),%r10 1615 movq %r11,%r14 1616 mulq %rax 1617 negq %r15 1618 movq 24(%rdi),%r11 1619 adcq %rax,%r12 1620 movq 8(%rsi,%rbp,1),%rax 1621 movq %r12,0(%rdi) 1622 adcq %rdx,%r13 1623 1624 leaq (%r14,%r10,2),%rbx 1625 movq %r13,8(%rdi) 1626 sbbq %r15,%r15 1627 shrq $63,%r10 1628 leaq (%rcx,%r11,2),%r8 1629 shrq $63,%r11 1630 orq %r10,%r8 1631 movq 32(%rdi),%r10 1632 movq %r11,%r14 1633 mulq %rax 1634 negq %r15 1635 movq 40(%rdi),%r11 1636 adcq %rax,%rbx 1637 movq 16(%rsi,%rbp,1),%rax 1638 movq %rbx,16(%rdi) 1639 adcq %rdx,%r8 1640 movq %r8,24(%rdi) 1641 sbbq %r15,%r15 1642 leaq 64(%rdi),%rdi 1643 addq $32,%rbp 1644 jnz .Lsqr4x_shift_n_add 1645 1646 leaq (%r14,%r10,2),%r12 1647.byte 0x67 1648 shrq $63,%r10 1649 leaq (%rcx,%r11,2),%r13 1650 shrq $63,%r11 1651 orq %r10,%r13 1652 movq -16(%rdi),%r10 1653 movq %r11,%r14 1654 mulq %rax 1655 negq %r15 1656 movq -8(%rdi),%r11 1657 adcq %rax,%r12 1658 movq -8(%rsi),%rax 1659 movq %r12,-32(%rdi) 1660 adcq %rdx,%r13 1661 1662 leaq (%r14,%r10,2),%rbx 1663 movq %r13,-24(%rdi) 1664 sbbq %r15,%r15 1665 shrq $63,%r10 1666 leaq (%rcx,%r11,2),%r8 1667 shrq $63,%r11 1668 orq %r10,%r8 1669 mulq %rax 1670 negq %r15 1671 adcq %rax,%rbx 1672 adcq %rdx,%r8 1673 movq %rbx,-16(%rdi) 1674 movq %r8,-8(%rdi) 1675.byte 102,72,15,126,213 1676__bn_sqr8x_reduction: 1677 xorq %rax,%rax 1678 leaq (%r9,%rbp,1),%rcx 1679 leaq 48+8(%rsp,%r9,2),%rdx 1680 movq %rcx,0+8(%rsp) 1681 leaq 48+8(%rsp,%r9,1),%rdi 1682 movq %rdx,8+8(%rsp) 1683 negq %r9 1684 jmp .L8x_reduction_loop 1685 1686.align 32 1687.L8x_reduction_loop: 1688 leaq (%rdi,%r9,1),%rdi 1689.byte 0x66 1690 movq 0(%rdi),%rbx 1691 movq 8(%rdi),%r9 1692 movq 16(%rdi),%r10 1693 movq 24(%rdi),%r11 1694 movq 32(%rdi),%r12 1695 movq 40(%rdi),%r13 1696 movq 48(%rdi),%r14 1697 movq 56(%rdi),%r15 1698 movq %rax,(%rdx) 1699 leaq 64(%rdi),%rdi 1700 1701.byte 0x67 1702 movq %rbx,%r8 1703 imulq 32+8(%rsp),%rbx 1704 movq 0(%rbp),%rax 1705 movl $8,%ecx 1706 jmp .L8x_reduce 1707 1708.align 32 1709.L8x_reduce: 1710 mulq %rbx 1711 movq 8(%rbp),%rax 1712 negq %r8 1713 movq %rdx,%r8 1714 adcq $0,%r8 1715 1716 mulq %rbx 1717 addq %rax,%r9 1718 movq 16(%rbp),%rax 1719 adcq $0,%rdx 1720 addq %r9,%r8 1721 movq %rbx,48-8+8(%rsp,%rcx,8) 1722 movq %rdx,%r9 1723 adcq $0,%r9 1724 1725 mulq %rbx 1726 addq %rax,%r10 1727 movq 24(%rbp),%rax 1728 adcq $0,%rdx 1729 addq %r10,%r9 1730 movq 32+8(%rsp),%rsi 1731 movq %rdx,%r10 1732 adcq $0,%r10 1733 1734 mulq %rbx 1735 addq %rax,%r11 1736 movq 32(%rbp),%rax 1737 adcq $0,%rdx 1738 imulq %r8,%rsi 1739 addq %r11,%r10 1740 movq %rdx,%r11 1741 adcq $0,%r11 1742 1743 mulq %rbx 1744 addq %rax,%r12 1745 movq 40(%rbp),%rax 1746 adcq $0,%rdx 1747 addq %r12,%r11 1748 movq %rdx,%r12 1749 adcq $0,%r12 1750 1751 mulq %rbx 1752 addq %rax,%r13 1753 movq 48(%rbp),%rax 1754 adcq $0,%rdx 1755 addq %r13,%r12 1756 movq %rdx,%r13 1757 adcq $0,%r13 1758 1759 mulq %rbx 1760 addq %rax,%r14 1761 movq 56(%rbp),%rax 1762 adcq $0,%rdx 1763 addq %r14,%r13 1764 movq %rdx,%r14 1765 adcq $0,%r14 1766 1767 mulq %rbx 1768 movq %rsi,%rbx 1769 addq %rax,%r15 1770 movq 0(%rbp),%rax 1771 adcq $0,%rdx 1772 addq %r15,%r14 1773 movq %rdx,%r15 1774 adcq $0,%r15 1775 1776 decl %ecx 1777 jnz .L8x_reduce 1778 1779 leaq 64(%rbp),%rbp 1780 xorq %rax,%rax 1781 movq 8+8(%rsp),%rdx 1782 cmpq 0+8(%rsp),%rbp 1783 jae .L8x_no_tail 1784 1785.byte 0x66 1786 addq 0(%rdi),%r8 1787 adcq 8(%rdi),%r9 1788 adcq 16(%rdi),%r10 1789 adcq 24(%rdi),%r11 1790 adcq 32(%rdi),%r12 1791 adcq 40(%rdi),%r13 1792 adcq 48(%rdi),%r14 1793 adcq 56(%rdi),%r15 1794 sbbq %rsi,%rsi 1795 1796 movq 48+56+8(%rsp),%rbx 1797 movl $8,%ecx 1798 movq 0(%rbp),%rax 1799 jmp .L8x_tail 1800 1801.align 32 1802.L8x_tail: 1803 mulq %rbx 1804 addq %rax,%r8 1805 movq 8(%rbp),%rax 1806 movq %r8,(%rdi) 1807 movq %rdx,%r8 1808 adcq $0,%r8 1809 1810 mulq %rbx 1811 addq %rax,%r9 1812 movq 16(%rbp),%rax 1813 adcq $0,%rdx 1814 addq %r9,%r8 1815 leaq 8(%rdi),%rdi 1816 movq %rdx,%r9 1817 adcq $0,%r9 1818 1819 mulq %rbx 1820 addq %rax,%r10 1821 movq 24(%rbp),%rax 1822 adcq $0,%rdx 1823 addq %r10,%r9 1824 movq %rdx,%r10 1825 adcq $0,%r10 1826 1827 mulq %rbx 1828 addq %rax,%r11 1829 movq 32(%rbp),%rax 1830 adcq $0,%rdx 1831 addq %r11,%r10 1832 movq %rdx,%r11 1833 adcq $0,%r11 1834 1835 mulq %rbx 1836 addq %rax,%r12 1837 movq 40(%rbp),%rax 1838 adcq $0,%rdx 1839 addq %r12,%r11 1840 movq %rdx,%r12 1841 adcq $0,%r12 1842 1843 mulq %rbx 1844 addq %rax,%r13 1845 movq 48(%rbp),%rax 1846 adcq $0,%rdx 1847 addq %r13,%r12 1848 movq %rdx,%r13 1849 adcq $0,%r13 1850 1851 mulq %rbx 1852 addq %rax,%r14 1853 movq 56(%rbp),%rax 1854 adcq $0,%rdx 1855 addq %r14,%r13 1856 movq %rdx,%r14 1857 adcq $0,%r14 1858 1859 mulq %rbx 1860 movq 48-16+8(%rsp,%rcx,8),%rbx 1861 addq %rax,%r15 1862 adcq $0,%rdx 1863 addq %r15,%r14 1864 movq 0(%rbp),%rax 1865 movq %rdx,%r15 1866 adcq $0,%r15 1867 1868 decl %ecx 1869 jnz .L8x_tail 1870 1871 leaq 64(%rbp),%rbp 1872 movq 8+8(%rsp),%rdx 1873 cmpq 0+8(%rsp),%rbp 1874 jae .L8x_tail_done 1875 1876 movq 48+56+8(%rsp),%rbx 1877 negq %rsi 1878 movq 0(%rbp),%rax 1879 adcq 0(%rdi),%r8 1880 adcq 8(%rdi),%r9 1881 adcq 16(%rdi),%r10 1882 adcq 24(%rdi),%r11 1883 adcq 32(%rdi),%r12 1884 adcq 40(%rdi),%r13 1885 adcq 48(%rdi),%r14 1886 adcq 56(%rdi),%r15 1887 sbbq %rsi,%rsi 1888 1889 movl $8,%ecx 1890 jmp .L8x_tail 1891 1892.align 32 1893.L8x_tail_done: 1894 xorq %rax,%rax 1895 addq (%rdx),%r8 1896 adcq $0,%r9 1897 adcq $0,%r10 1898 adcq $0,%r11 1899 adcq $0,%r12 1900 adcq $0,%r13 1901 adcq $0,%r14 1902 adcq $0,%r15 1903 adcq $0,%rax 1904 1905 negq %rsi 1906.L8x_no_tail: 1907 adcq 0(%rdi),%r8 1908 adcq 8(%rdi),%r9 1909 adcq 16(%rdi),%r10 1910 adcq 24(%rdi),%r11 1911 adcq 32(%rdi),%r12 1912 adcq 40(%rdi),%r13 1913 adcq 48(%rdi),%r14 1914 adcq 56(%rdi),%r15 1915 adcq $0,%rax 1916 movq -8(%rbp),%rcx 1917 xorq %rsi,%rsi 1918 1919.byte 102,72,15,126,213 1920 1921 movq %r8,0(%rdi) 1922 movq %r9,8(%rdi) 1923.byte 102,73,15,126,217 1924 movq %r10,16(%rdi) 1925 movq %r11,24(%rdi) 1926 movq %r12,32(%rdi) 1927 movq %r13,40(%rdi) 1928 movq %r14,48(%rdi) 1929 movq %r15,56(%rdi) 1930 leaq 64(%rdi),%rdi 1931 1932 cmpq %rdx,%rdi 1933 jb .L8x_reduction_loop 1934 .byte 0xf3,0xc3 1935.size bn_sqr8x_internal,.-bn_sqr8x_internal 1936.type __bn_post4x_internal,@function 1937.align 32 1938__bn_post4x_internal: 1939 movq 0(%rbp),%r12 1940 leaq (%rdi,%r9,1),%rbx 1941 movq %r9,%rcx 1942.byte 102,72,15,126,207 1943 negq %rax 1944.byte 102,72,15,126,206 1945 sarq $3+2,%rcx 1946 decq %r12 1947 xorq %r10,%r10 1948 movq 8(%rbp),%r13 1949 movq 16(%rbp),%r14 1950 movq 24(%rbp),%r15 1951 jmp .Lsqr4x_sub_entry 1952 1953.align 16 1954.Lsqr4x_sub: 1955 movq 0(%rbp),%r12 1956 movq 8(%rbp),%r13 1957 movq 16(%rbp),%r14 1958 movq 24(%rbp),%r15 1959.Lsqr4x_sub_entry: 1960 leaq 32(%rbp),%rbp 1961 notq %r12 1962 notq %r13 1963 notq %r14 1964 notq %r15 1965 andq %rax,%r12 1966 andq %rax,%r13 1967 andq %rax,%r14 1968 andq %rax,%r15 1969 1970 negq %r10 1971 adcq 0(%rbx),%r12 1972 adcq 8(%rbx),%r13 1973 adcq 16(%rbx),%r14 1974 adcq 24(%rbx),%r15 1975 movq %r12,0(%rdi) 1976 leaq 32(%rbx),%rbx 1977 movq %r13,8(%rdi) 1978 sbbq %r10,%r10 1979 movq %r14,16(%rdi) 1980 movq %r15,24(%rdi) 1981 leaq 32(%rdi),%rdi 1982 1983 incq %rcx 1984 jnz .Lsqr4x_sub 1985 1986 movq %r9,%r10 1987 negq %r9 1988 .byte 0xf3,0xc3 1989.size __bn_post4x_internal,.-__bn_post4x_internal 1990.globl bn_from_montgomery 1991.type bn_from_montgomery,@function 1992.align 32 1993bn_from_montgomery: 1994 testl $7,%r9d 1995 jz bn_from_mont8x 1996 xorl %eax,%eax 1997 .byte 0xf3,0xc3 1998.size bn_from_montgomery,.-bn_from_montgomery 1999 2000.type bn_from_mont8x,@function 2001.align 32 2002bn_from_mont8x: 2003.byte 0x67 2004 movq %rsp,%rax 2005 pushq %rbx 2006 pushq %rbp 2007 pushq %r12 2008 pushq %r13 2009 pushq %r14 2010 pushq %r15 2011.Lfrom_prologue: 2012 2013 shll $3,%r9d 2014 leaq (%r9,%r9,2),%r10 2015 negq %r9 2016 movq (%r8),%r8 2017 2018 2019 2020 2021 2022 2023 2024 2025 leaq -320(%rsp,%r9,2),%r11 2026 movq %rsp,%rbp 2027 subq %rdi,%r11 2028 andq $4095,%r11 2029 cmpq %r11,%r10 2030 jb .Lfrom_sp_alt 2031 subq %r11,%rbp 2032 leaq -320(%rbp,%r9,2),%rbp 2033 jmp .Lfrom_sp_done 2034 2035.align 32 2036.Lfrom_sp_alt: 2037 leaq 4096-320(,%r9,2),%r10 2038 leaq -320(%rbp,%r9,2),%rbp 2039 subq %r10,%r11 2040 movq $0,%r10 2041 cmovcq %r10,%r11 2042 subq %r11,%rbp 2043.Lfrom_sp_done: 2044 andq $-64,%rbp 2045 movq %rsp,%r11 2046 subq %rbp,%r11 2047 andq $-4096,%r11 2048 leaq (%r11,%rbp,1),%rsp 2049 movq (%rsp),%r10 2050 cmpq %rbp,%rsp 2051 ja .Lfrom_page_walk 2052 jmp .Lfrom_page_walk_done 2053 2054.Lfrom_page_walk: 2055 leaq -4096(%rsp),%rsp 2056 movq (%rsp),%r10 2057 cmpq %rbp,%rsp 2058 ja .Lfrom_page_walk 2059.Lfrom_page_walk_done: 2060 2061 movq %r9,%r10 2062 negq %r9 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 movq %r8,32(%rsp) 2074 movq %rax,40(%rsp) 2075.Lfrom_body: 2076 movq %r9,%r11 2077 leaq 48(%rsp),%rax 2078 pxor %xmm0,%xmm0 2079 jmp .Lmul_by_1 2080 2081.align 32 2082.Lmul_by_1: 2083 movdqu (%rsi),%xmm1 2084 movdqu 16(%rsi),%xmm2 2085 movdqu 32(%rsi),%xmm3 2086 movdqa %xmm0,(%rax,%r9,1) 2087 movdqu 48(%rsi),%xmm4 2088 movdqa %xmm0,16(%rax,%r9,1) 2089.byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00 2090 movdqa %xmm1,(%rax) 2091 movdqa %xmm0,32(%rax,%r9,1) 2092 movdqa %xmm2,16(%rax) 2093 movdqa %xmm0,48(%rax,%r9,1) 2094 movdqa %xmm3,32(%rax) 2095 movdqa %xmm4,48(%rax) 2096 leaq 64(%rax),%rax 2097 subq $64,%r11 2098 jnz .Lmul_by_1 2099 2100.byte 102,72,15,110,207 2101.byte 102,72,15,110,209 2102.byte 0x67 2103 movq %rcx,%rbp 2104.byte 102,73,15,110,218 2105 movl OPENSSL_ia32cap_P+8(%rip),%r11d 2106 andl $0x80108,%r11d 2107 cmpl $0x80108,%r11d 2108 jne .Lfrom_mont_nox 2109 2110 leaq (%rax,%r9,1),%rdi 2111 call __bn_sqrx8x_reduction 2112 call __bn_postx4x_internal 2113 2114 pxor %xmm0,%xmm0 2115 leaq 48(%rsp),%rax 2116 movq 40(%rsp),%rsi 2117 jmp .Lfrom_mont_zero 2118 2119.align 32 2120.Lfrom_mont_nox: 2121 call __bn_sqr8x_reduction 2122 call __bn_post4x_internal 2123 2124 pxor %xmm0,%xmm0 2125 leaq 48(%rsp),%rax 2126 movq 40(%rsp),%rsi 2127 jmp .Lfrom_mont_zero 2128 2129.align 32 2130.Lfrom_mont_zero: 2131 movdqa %xmm0,0(%rax) 2132 movdqa %xmm0,16(%rax) 2133 movdqa %xmm0,32(%rax) 2134 movdqa %xmm0,48(%rax) 2135 leaq 64(%rax),%rax 2136 subq $32,%r9 2137 jnz .Lfrom_mont_zero 2138 2139 movq $1,%rax 2140 movq -48(%rsi),%r15 2141 movq -40(%rsi),%r14 2142 movq -32(%rsi),%r13 2143 movq -24(%rsi),%r12 2144 movq -16(%rsi),%rbp 2145 movq -8(%rsi),%rbx 2146 leaq (%rsi),%rsp 2147.Lfrom_epilogue: 2148 .byte 0xf3,0xc3 2149.size bn_from_mont8x,.-bn_from_mont8x 2150.type bn_mulx4x_mont_gather5,@function 2151.align 32 2152bn_mulx4x_mont_gather5: 2153 movq %rsp,%rax 2154.Lmulx4x_enter: 2155 pushq %rbx 2156 pushq %rbp 2157 pushq %r12 2158 pushq %r13 2159 pushq %r14 2160 pushq %r15 2161.Lmulx4x_prologue: 2162 2163 shll $3,%r9d 2164 leaq (%r9,%r9,2),%r10 2165 negq %r9 2166 movq (%r8),%r8 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 leaq -320(%rsp,%r9,2),%r11 2178 movq %rsp,%rbp 2179 subq %rdi,%r11 2180 andq $4095,%r11 2181 cmpq %r11,%r10 2182 jb .Lmulx4xsp_alt 2183 subq %r11,%rbp 2184 leaq -320(%rbp,%r9,2),%rbp 2185 jmp .Lmulx4xsp_done 2186 2187.Lmulx4xsp_alt: 2188 leaq 4096-320(,%r9,2),%r10 2189 leaq -320(%rbp,%r9,2),%rbp 2190 subq %r10,%r11 2191 movq $0,%r10 2192 cmovcq %r10,%r11 2193 subq %r11,%rbp 2194.Lmulx4xsp_done: 2195 andq $-64,%rbp 2196 movq %rsp,%r11 2197 subq %rbp,%r11 2198 andq $-4096,%r11 2199 leaq (%r11,%rbp,1),%rsp 2200 movq (%rsp),%r10 2201 cmpq %rbp,%rsp 2202 ja .Lmulx4x_page_walk 2203 jmp .Lmulx4x_page_walk_done 2204 2205.Lmulx4x_page_walk: 2206 leaq -4096(%rsp),%rsp 2207 movq (%rsp),%r10 2208 cmpq %rbp,%rsp 2209 ja .Lmulx4x_page_walk 2210.Lmulx4x_page_walk_done: 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 movq %r8,32(%rsp) 2225 movq %rax,40(%rsp) 2226.Lmulx4x_body: 2227 call mulx4x_internal 2228 2229 movq 40(%rsp),%rsi 2230 movq $1,%rax 2231 2232 movq -48(%rsi),%r15 2233 movq -40(%rsi),%r14 2234 movq -32(%rsi),%r13 2235 movq -24(%rsi),%r12 2236 movq -16(%rsi),%rbp 2237 movq -8(%rsi),%rbx 2238 leaq (%rsi),%rsp 2239.Lmulx4x_epilogue: 2240 .byte 0xf3,0xc3 2241.size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5 2242 2243.type mulx4x_internal,@function 2244.align 32 2245mulx4x_internal: 2246 movq %r9,8(%rsp) 2247 movq %r9,%r10 2248 negq %r9 2249 shlq $5,%r9 2250 negq %r10 2251 leaq 128(%rdx,%r9,1),%r13 2252 shrq $5+5,%r9 2253 movd 8(%rax),%xmm5 2254 subq $1,%r9 2255 leaq .Linc(%rip),%rax 2256 movq %r13,16+8(%rsp) 2257 movq %r9,24+8(%rsp) 2258 movq %rdi,56+8(%rsp) 2259 movdqa 0(%rax),%xmm0 2260 movdqa 16(%rax),%xmm1 2261 leaq 88-112(%rsp,%r10,1),%r10 2262 leaq 128(%rdx),%rdi 2263 2264 pshufd $0,%xmm5,%xmm5 2265 movdqa %xmm1,%xmm4 2266.byte 0x67 2267 movdqa %xmm1,%xmm2 2268.byte 0x67 2269 paddd %xmm0,%xmm1 2270 pcmpeqd %xmm5,%xmm0 2271 movdqa %xmm4,%xmm3 2272 paddd %xmm1,%xmm2 2273 pcmpeqd %xmm5,%xmm1 2274 movdqa %xmm0,112(%r10) 2275 movdqa %xmm4,%xmm0 2276 2277 paddd %xmm2,%xmm3 2278 pcmpeqd %xmm5,%xmm2 2279 movdqa %xmm1,128(%r10) 2280 movdqa %xmm4,%xmm1 2281 2282 paddd %xmm3,%xmm0 2283 pcmpeqd %xmm5,%xmm3 2284 movdqa %xmm2,144(%r10) 2285 movdqa %xmm4,%xmm2 2286 2287 paddd %xmm0,%xmm1 2288 pcmpeqd %xmm5,%xmm0 2289 movdqa %xmm3,160(%r10) 2290 movdqa %xmm4,%xmm3 2291 paddd %xmm1,%xmm2 2292 pcmpeqd %xmm5,%xmm1 2293 movdqa %xmm0,176(%r10) 2294 movdqa %xmm4,%xmm0 2295 2296 paddd %xmm2,%xmm3 2297 pcmpeqd %xmm5,%xmm2 2298 movdqa %xmm1,192(%r10) 2299 movdqa %xmm4,%xmm1 2300 2301 paddd %xmm3,%xmm0 2302 pcmpeqd %xmm5,%xmm3 2303 movdqa %xmm2,208(%r10) 2304 movdqa %xmm4,%xmm2 2305 2306 paddd %xmm0,%xmm1 2307 pcmpeqd %xmm5,%xmm0 2308 movdqa %xmm3,224(%r10) 2309 movdqa %xmm4,%xmm3 2310 paddd %xmm1,%xmm2 2311 pcmpeqd %xmm5,%xmm1 2312 movdqa %xmm0,240(%r10) 2313 movdqa %xmm4,%xmm0 2314 2315 paddd %xmm2,%xmm3 2316 pcmpeqd %xmm5,%xmm2 2317 movdqa %xmm1,256(%r10) 2318 movdqa %xmm4,%xmm1 2319 2320 paddd %xmm3,%xmm0 2321 pcmpeqd %xmm5,%xmm3 2322 movdqa %xmm2,272(%r10) 2323 movdqa %xmm4,%xmm2 2324 2325 paddd %xmm0,%xmm1 2326 pcmpeqd %xmm5,%xmm0 2327 movdqa %xmm3,288(%r10) 2328 movdqa %xmm4,%xmm3 2329.byte 0x67 2330 paddd %xmm1,%xmm2 2331 pcmpeqd %xmm5,%xmm1 2332 movdqa %xmm0,304(%r10) 2333 2334 paddd %xmm2,%xmm3 2335 pcmpeqd %xmm5,%xmm2 2336 movdqa %xmm1,320(%r10) 2337 2338 pcmpeqd %xmm5,%xmm3 2339 movdqa %xmm2,336(%r10) 2340 2341 pand 64(%rdi),%xmm0 2342 pand 80(%rdi),%xmm1 2343 pand 96(%rdi),%xmm2 2344 movdqa %xmm3,352(%r10) 2345 pand 112(%rdi),%xmm3 2346 por %xmm2,%xmm0 2347 por %xmm3,%xmm1 2348 movdqa -128(%rdi),%xmm4 2349 movdqa -112(%rdi),%xmm5 2350 movdqa -96(%rdi),%xmm2 2351 pand 112(%r10),%xmm4 2352 movdqa -80(%rdi),%xmm3 2353 pand 128(%r10),%xmm5 2354 por %xmm4,%xmm0 2355 pand 144(%r10),%xmm2 2356 por %xmm5,%xmm1 2357 pand 160(%r10),%xmm3 2358 por %xmm2,%xmm0 2359 por %xmm3,%xmm1 2360 movdqa -64(%rdi),%xmm4 2361 movdqa -48(%rdi),%xmm5 2362 movdqa -32(%rdi),%xmm2 2363 pand 176(%r10),%xmm4 2364 movdqa -16(%rdi),%xmm3 2365 pand 192(%r10),%xmm5 2366 por %xmm4,%xmm0 2367 pand 208(%r10),%xmm2 2368 por %xmm5,%xmm1 2369 pand 224(%r10),%xmm3 2370 por %xmm2,%xmm0 2371 por %xmm3,%xmm1 2372 movdqa 0(%rdi),%xmm4 2373 movdqa 16(%rdi),%xmm5 2374 movdqa 32(%rdi),%xmm2 2375 pand 240(%r10),%xmm4 2376 movdqa 48(%rdi),%xmm3 2377 pand 256(%r10),%xmm5 2378 por %xmm4,%xmm0 2379 pand 272(%r10),%xmm2 2380 por %xmm5,%xmm1 2381 pand 288(%r10),%xmm3 2382 por %xmm2,%xmm0 2383 por %xmm3,%xmm1 2384 pxor %xmm1,%xmm0 2385 pshufd $0x4e,%xmm0,%xmm1 2386 por %xmm1,%xmm0 2387 leaq 256(%rdi),%rdi 2388.byte 102,72,15,126,194 2389 leaq 64+32+8(%rsp),%rbx 2390 2391 movq %rdx,%r9 2392 mulxq 0(%rsi),%r8,%rax 2393 mulxq 8(%rsi),%r11,%r12 2394 addq %rax,%r11 2395 mulxq 16(%rsi),%rax,%r13 2396 adcq %rax,%r12 2397 adcq $0,%r13 2398 mulxq 24(%rsi),%rax,%r14 2399 2400 movq %r8,%r15 2401 imulq 32+8(%rsp),%r8 2402 xorq %rbp,%rbp 2403 movq %r8,%rdx 2404 2405 movq %rdi,8+8(%rsp) 2406 2407 leaq 32(%rsi),%rsi 2408 adcxq %rax,%r13 2409 adcxq %rbp,%r14 2410 2411 mulxq 0(%rcx),%rax,%r10 2412 adcxq %rax,%r15 2413 adoxq %r11,%r10 2414 mulxq 8(%rcx),%rax,%r11 2415 adcxq %rax,%r10 2416 adoxq %r12,%r11 2417 mulxq 16(%rcx),%rax,%r12 2418 movq 24+8(%rsp),%rdi 2419 movq %r10,-32(%rbx) 2420 adcxq %rax,%r11 2421 adoxq %r13,%r12 2422 mulxq 24(%rcx),%rax,%r15 2423 movq %r9,%rdx 2424 movq %r11,-24(%rbx) 2425 adcxq %rax,%r12 2426 adoxq %rbp,%r15 2427 leaq 32(%rcx),%rcx 2428 movq %r12,-16(%rbx) 2429 jmp .Lmulx4x_1st 2430 2431.align 32 2432.Lmulx4x_1st: 2433 adcxq %rbp,%r15 2434 mulxq 0(%rsi),%r10,%rax 2435 adcxq %r14,%r10 2436 mulxq 8(%rsi),%r11,%r14 2437 adcxq %rax,%r11 2438 mulxq 16(%rsi),%r12,%rax 2439 adcxq %r14,%r12 2440 mulxq 24(%rsi),%r13,%r14 2441.byte 0x67,0x67 2442 movq %r8,%rdx 2443 adcxq %rax,%r13 2444 adcxq %rbp,%r14 2445 leaq 32(%rsi),%rsi 2446 leaq 32(%rbx),%rbx 2447 2448 adoxq %r15,%r10 2449 mulxq 0(%rcx),%rax,%r15 2450 adcxq %rax,%r10 2451 adoxq %r15,%r11 2452 mulxq 8(%rcx),%rax,%r15 2453 adcxq %rax,%r11 2454 adoxq %r15,%r12 2455 mulxq 16(%rcx),%rax,%r15 2456 movq %r10,-40(%rbx) 2457 adcxq %rax,%r12 2458 movq %r11,-32(%rbx) 2459 adoxq %r15,%r13 2460 mulxq 24(%rcx),%rax,%r15 2461 movq %r9,%rdx 2462 movq %r12,-24(%rbx) 2463 adcxq %rax,%r13 2464 adoxq %rbp,%r15 2465 leaq 32(%rcx),%rcx 2466 movq %r13,-16(%rbx) 2467 2468 decq %rdi 2469 jnz .Lmulx4x_1st 2470 2471 movq 8(%rsp),%rax 2472 adcq %rbp,%r15 2473 leaq (%rsi,%rax,1),%rsi 2474 addq %r15,%r14 2475 movq 8+8(%rsp),%rdi 2476 adcq %rbp,%rbp 2477 movq %r14,-8(%rbx) 2478 jmp .Lmulx4x_outer 2479 2480.align 32 2481.Lmulx4x_outer: 2482 leaq 16-256(%rbx),%r10 2483 pxor %xmm4,%xmm4 2484.byte 0x67,0x67 2485 pxor %xmm5,%xmm5 2486 movdqa -128(%rdi),%xmm0 2487 movdqa -112(%rdi),%xmm1 2488 movdqa -96(%rdi),%xmm2 2489 pand 256(%r10),%xmm0 2490 movdqa -80(%rdi),%xmm3 2491 pand 272(%r10),%xmm1 2492 por %xmm0,%xmm4 2493 pand 288(%r10),%xmm2 2494 por %xmm1,%xmm5 2495 pand 304(%r10),%xmm3 2496 por %xmm2,%xmm4 2497 por %xmm3,%xmm5 2498 movdqa -64(%rdi),%xmm0 2499 movdqa -48(%rdi),%xmm1 2500 movdqa -32(%rdi),%xmm2 2501 pand 320(%r10),%xmm0 2502 movdqa -16(%rdi),%xmm3 2503 pand 336(%r10),%xmm1 2504 por %xmm0,%xmm4 2505 pand 352(%r10),%xmm2 2506 por %xmm1,%xmm5 2507 pand 368(%r10),%xmm3 2508 por %xmm2,%xmm4 2509 por %xmm3,%xmm5 2510 movdqa 0(%rdi),%xmm0 2511 movdqa 16(%rdi),%xmm1 2512 movdqa 32(%rdi),%xmm2 2513 pand 384(%r10),%xmm0 2514 movdqa 48(%rdi),%xmm3 2515 pand 400(%r10),%xmm1 2516 por %xmm0,%xmm4 2517 pand 416(%r10),%xmm2 2518 por %xmm1,%xmm5 2519 pand 432(%r10),%xmm3 2520 por %xmm2,%xmm4 2521 por %xmm3,%xmm5 2522 movdqa 64(%rdi),%xmm0 2523 movdqa 80(%rdi),%xmm1 2524 movdqa 96(%rdi),%xmm2 2525 pand 448(%r10),%xmm0 2526 movdqa 112(%rdi),%xmm3 2527 pand 464(%r10),%xmm1 2528 por %xmm0,%xmm4 2529 pand 480(%r10),%xmm2 2530 por %xmm1,%xmm5 2531 pand 496(%r10),%xmm3 2532 por %xmm2,%xmm4 2533 por %xmm3,%xmm5 2534 por %xmm5,%xmm4 2535 pshufd $0x4e,%xmm4,%xmm0 2536 por %xmm4,%xmm0 2537 leaq 256(%rdi),%rdi 2538.byte 102,72,15,126,194 2539 2540 movq %rbp,(%rbx) 2541 leaq 32(%rbx,%rax,1),%rbx 2542 mulxq 0(%rsi),%r8,%r11 2543 xorq %rbp,%rbp 2544 movq %rdx,%r9 2545 mulxq 8(%rsi),%r14,%r12 2546 adoxq -32(%rbx),%r8 2547 adcxq %r14,%r11 2548 mulxq 16(%rsi),%r15,%r13 2549 adoxq -24(%rbx),%r11 2550 adcxq %r15,%r12 2551 mulxq 24(%rsi),%rdx,%r14 2552 adoxq -16(%rbx),%r12 2553 adcxq %rdx,%r13 2554 leaq (%rcx,%rax,1),%rcx 2555 leaq 32(%rsi),%rsi 2556 adoxq -8(%rbx),%r13 2557 adcxq %rbp,%r14 2558 adoxq %rbp,%r14 2559 2560 movq %r8,%r15 2561 imulq 32+8(%rsp),%r8 2562 2563 movq %r8,%rdx 2564 xorq %rbp,%rbp 2565 movq %rdi,8+8(%rsp) 2566 2567 mulxq 0(%rcx),%rax,%r10 2568 adcxq %rax,%r15 2569 adoxq %r11,%r10 2570 mulxq 8(%rcx),%rax,%r11 2571 adcxq %rax,%r10 2572 adoxq %r12,%r11 2573 mulxq 16(%rcx),%rax,%r12 2574 adcxq %rax,%r11 2575 adoxq %r13,%r12 2576 mulxq 24(%rcx),%rax,%r15 2577 movq %r9,%rdx 2578 movq 24+8(%rsp),%rdi 2579 movq %r10,-32(%rbx) 2580 adcxq %rax,%r12 2581 movq %r11,-24(%rbx) 2582 adoxq %rbp,%r15 2583 movq %r12,-16(%rbx) 2584 leaq 32(%rcx),%rcx 2585 jmp .Lmulx4x_inner 2586 2587.align 32 2588.Lmulx4x_inner: 2589 mulxq 0(%rsi),%r10,%rax 2590 adcxq %rbp,%r15 2591 adoxq %r14,%r10 2592 mulxq 8(%rsi),%r11,%r14 2593 adcxq 0(%rbx),%r10 2594 adoxq %rax,%r11 2595 mulxq 16(%rsi),%r12,%rax 2596 adcxq 8(%rbx),%r11 2597 adoxq %r14,%r12 2598 mulxq 24(%rsi),%r13,%r14 2599 movq %r8,%rdx 2600 adcxq 16(%rbx),%r12 2601 adoxq %rax,%r13 2602 adcxq 24(%rbx),%r13 2603 adoxq %rbp,%r14 2604 leaq 32(%rsi),%rsi 2605 leaq 32(%rbx),%rbx 2606 adcxq %rbp,%r14 2607 2608 adoxq %r15,%r10 2609 mulxq 0(%rcx),%rax,%r15 2610 adcxq %rax,%r10 2611 adoxq %r15,%r11 2612 mulxq 8(%rcx),%rax,%r15 2613 adcxq %rax,%r11 2614 adoxq %r15,%r12 2615 mulxq 16(%rcx),%rax,%r15 2616 movq %r10,-40(%rbx) 2617 adcxq %rax,%r12 2618 adoxq %r15,%r13 2619 movq %r11,-32(%rbx) 2620 mulxq 24(%rcx),%rax,%r15 2621 movq %r9,%rdx 2622 leaq 32(%rcx),%rcx 2623 movq %r12,-24(%rbx) 2624 adcxq %rax,%r13 2625 adoxq %rbp,%r15 2626 movq %r13,-16(%rbx) 2627 2628 decq %rdi 2629 jnz .Lmulx4x_inner 2630 2631 movq 0+8(%rsp),%rax 2632 adcq %rbp,%r15 2633 subq 0(%rbx),%rdi 2634 movq 8+8(%rsp),%rdi 2635 movq 16+8(%rsp),%r10 2636 adcq %r15,%r14 2637 leaq (%rsi,%rax,1),%rsi 2638 adcq %rbp,%rbp 2639 movq %r14,-8(%rbx) 2640 2641 cmpq %r10,%rdi 2642 jb .Lmulx4x_outer 2643 2644 movq -8(%rcx),%r10 2645 movq %rbp,%r8 2646 movq (%rcx,%rax,1),%r12 2647 leaq (%rcx,%rax,1),%rbp 2648 movq %rax,%rcx 2649 leaq (%rbx,%rax,1),%rdi 2650 xorl %eax,%eax 2651 xorq %r15,%r15 2652 subq %r14,%r10 2653 adcq %r15,%r15 2654 orq %r15,%r8 2655 sarq $3+2,%rcx 2656 subq %r8,%rax 2657 movq 56+8(%rsp),%rdx 2658 decq %r12 2659 movq 8(%rbp),%r13 2660 xorq %r8,%r8 2661 movq 16(%rbp),%r14 2662 movq 24(%rbp),%r15 2663 jmp .Lsqrx4x_sub_entry 2664.size mulx4x_internal,.-mulx4x_internal 2665.type bn_powerx5,@function 2666.align 32 2667bn_powerx5: 2668 movq %rsp,%rax 2669.Lpowerx5_enter: 2670 pushq %rbx 2671 pushq %rbp 2672 pushq %r12 2673 pushq %r13 2674 pushq %r14 2675 pushq %r15 2676.Lpowerx5_prologue: 2677 2678 shll $3,%r9d 2679 leaq (%r9,%r9,2),%r10 2680 negq %r9 2681 movq (%r8),%r8 2682 2683 2684 2685 2686 2687 2688 2689 2690 leaq -320(%rsp,%r9,2),%r11 2691 movq %rsp,%rbp 2692 subq %rdi,%r11 2693 andq $4095,%r11 2694 cmpq %r11,%r10 2695 jb .Lpwrx_sp_alt 2696 subq %r11,%rbp 2697 leaq -320(%rbp,%r9,2),%rbp 2698 jmp .Lpwrx_sp_done 2699 2700.align 32 2701.Lpwrx_sp_alt: 2702 leaq 4096-320(,%r9,2),%r10 2703 leaq -320(%rbp,%r9,2),%rbp 2704 subq %r10,%r11 2705 movq $0,%r10 2706 cmovcq %r10,%r11 2707 subq %r11,%rbp 2708.Lpwrx_sp_done: 2709 andq $-64,%rbp 2710 movq %rsp,%r11 2711 subq %rbp,%r11 2712 andq $-4096,%r11 2713 leaq (%r11,%rbp,1),%rsp 2714 movq (%rsp),%r10 2715 cmpq %rbp,%rsp 2716 ja .Lpwrx_page_walk 2717 jmp .Lpwrx_page_walk_done 2718 2719.Lpwrx_page_walk: 2720 leaq -4096(%rsp),%rsp 2721 movq (%rsp),%r10 2722 cmpq %rbp,%rsp 2723 ja .Lpwrx_page_walk 2724.Lpwrx_page_walk_done: 2725 2726 movq %r9,%r10 2727 negq %r9 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 pxor %xmm0,%xmm0 2741.byte 102,72,15,110,207 2742.byte 102,72,15,110,209 2743.byte 102,73,15,110,218 2744.byte 102,72,15,110,226 2745 movq %r8,32(%rsp) 2746 movq %rax,40(%rsp) 2747.Lpowerx5_body: 2748 2749 call __bn_sqrx8x_internal 2750 call __bn_postx4x_internal 2751 call __bn_sqrx8x_internal 2752 call __bn_postx4x_internal 2753 call __bn_sqrx8x_internal 2754 call __bn_postx4x_internal 2755 call __bn_sqrx8x_internal 2756 call __bn_postx4x_internal 2757 call __bn_sqrx8x_internal 2758 call __bn_postx4x_internal 2759 2760 movq %r10,%r9 2761 movq %rsi,%rdi 2762.byte 102,72,15,126,209 2763.byte 102,72,15,126,226 2764 movq 40(%rsp),%rax 2765 2766 call mulx4x_internal 2767 2768 movq 40(%rsp),%rsi 2769 movq $1,%rax 2770 2771 movq -48(%rsi),%r15 2772 movq -40(%rsi),%r14 2773 movq -32(%rsi),%r13 2774 movq -24(%rsi),%r12 2775 movq -16(%rsi),%rbp 2776 movq -8(%rsi),%rbx 2777 leaq (%rsi),%rsp 2778.Lpowerx5_epilogue: 2779 .byte 0xf3,0xc3 2780.size bn_powerx5,.-bn_powerx5 2781 2782.globl bn_sqrx8x_internal 2783.hidden bn_sqrx8x_internal 2784.type bn_sqrx8x_internal,@function 2785.align 32 2786bn_sqrx8x_internal: 2787__bn_sqrx8x_internal: 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 leaq 48+8(%rsp),%rdi 2829 leaq (%rsi,%r9,1),%rbp 2830 movq %r9,0+8(%rsp) 2831 movq %rbp,8+8(%rsp) 2832 jmp .Lsqr8x_zero_start 2833 2834.align 32 2835.byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00 2836.Lsqrx8x_zero: 2837.byte 0x3e 2838 movdqa %xmm0,0(%rdi) 2839 movdqa %xmm0,16(%rdi) 2840 movdqa %xmm0,32(%rdi) 2841 movdqa %xmm0,48(%rdi) 2842.Lsqr8x_zero_start: 2843 movdqa %xmm0,64(%rdi) 2844 movdqa %xmm0,80(%rdi) 2845 movdqa %xmm0,96(%rdi) 2846 movdqa %xmm0,112(%rdi) 2847 leaq 128(%rdi),%rdi 2848 subq $64,%r9 2849 jnz .Lsqrx8x_zero 2850 2851 movq 0(%rsi),%rdx 2852 2853 xorq %r10,%r10 2854 xorq %r11,%r11 2855 xorq %r12,%r12 2856 xorq %r13,%r13 2857 xorq %r14,%r14 2858 xorq %r15,%r15 2859 leaq 48+8(%rsp),%rdi 2860 xorq %rbp,%rbp 2861 jmp .Lsqrx8x_outer_loop 2862 2863.align 32 2864.Lsqrx8x_outer_loop: 2865 mulxq 8(%rsi),%r8,%rax 2866 adcxq %r9,%r8 2867 adoxq %rax,%r10 2868 mulxq 16(%rsi),%r9,%rax 2869 adcxq %r10,%r9 2870 adoxq %rax,%r11 2871.byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00 2872 adcxq %r11,%r10 2873 adoxq %rax,%r12 2874.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00 2875 adcxq %r12,%r11 2876 adoxq %rax,%r13 2877 mulxq 40(%rsi),%r12,%rax 2878 adcxq %r13,%r12 2879 adoxq %rax,%r14 2880 mulxq 48(%rsi),%r13,%rax 2881 adcxq %r14,%r13 2882 adoxq %r15,%rax 2883 mulxq 56(%rsi),%r14,%r15 2884 movq 8(%rsi),%rdx 2885 adcxq %rax,%r14 2886 adoxq %rbp,%r15 2887 adcq 64(%rdi),%r15 2888 movq %r8,8(%rdi) 2889 movq %r9,16(%rdi) 2890 sbbq %rcx,%rcx 2891 xorq %rbp,%rbp 2892 2893 2894 mulxq 16(%rsi),%r8,%rbx 2895 mulxq 24(%rsi),%r9,%rax 2896 adcxq %r10,%r8 2897 adoxq %rbx,%r9 2898 mulxq 32(%rsi),%r10,%rbx 2899 adcxq %r11,%r9 2900 adoxq %rax,%r10 2901.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00 2902 adcxq %r12,%r10 2903 adoxq %rbx,%r11 2904.byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00 2905 adcxq %r13,%r11 2906 adoxq %r14,%r12 2907.byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00 2908 movq 16(%rsi),%rdx 2909 adcxq %rax,%r12 2910 adoxq %rbx,%r13 2911 adcxq %r15,%r13 2912 adoxq %rbp,%r14 2913 adcxq %rbp,%r14 2914 2915 movq %r8,24(%rdi) 2916 movq %r9,32(%rdi) 2917 2918 mulxq 24(%rsi),%r8,%rbx 2919 mulxq 32(%rsi),%r9,%rax 2920 adcxq %r10,%r8 2921 adoxq %rbx,%r9 2922 mulxq 40(%rsi),%r10,%rbx 2923 adcxq %r11,%r9 2924 adoxq %rax,%r10 2925.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00 2926 adcxq %r12,%r10 2927 adoxq %r13,%r11 2928.byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00 2929.byte 0x3e 2930 movq 24(%rsi),%rdx 2931 adcxq %rbx,%r11 2932 adoxq %rax,%r12 2933 adcxq %r14,%r12 2934 movq %r8,40(%rdi) 2935 movq %r9,48(%rdi) 2936 mulxq 32(%rsi),%r8,%rax 2937 adoxq %rbp,%r13 2938 adcxq %rbp,%r13 2939 2940 mulxq 40(%rsi),%r9,%rbx 2941 adcxq %r10,%r8 2942 adoxq %rax,%r9 2943 mulxq 48(%rsi),%r10,%rax 2944 adcxq %r11,%r9 2945 adoxq %r12,%r10 2946 mulxq 56(%rsi),%r11,%r12 2947 movq 32(%rsi),%rdx 2948 movq 40(%rsi),%r14 2949 adcxq %rbx,%r10 2950 adoxq %rax,%r11 2951 movq 48(%rsi),%r15 2952 adcxq %r13,%r11 2953 adoxq %rbp,%r12 2954 adcxq %rbp,%r12 2955 2956 movq %r8,56(%rdi) 2957 movq %r9,64(%rdi) 2958 2959 mulxq %r14,%r9,%rax 2960 movq 56(%rsi),%r8 2961 adcxq %r10,%r9 2962 mulxq %r15,%r10,%rbx 2963 adoxq %rax,%r10 2964 adcxq %r11,%r10 2965 mulxq %r8,%r11,%rax 2966 movq %r14,%rdx 2967 adoxq %rbx,%r11 2968 adcxq %r12,%r11 2969 2970 adcxq %rbp,%rax 2971 2972 mulxq %r15,%r14,%rbx 2973 mulxq %r8,%r12,%r13 2974 movq %r15,%rdx 2975 leaq 64(%rsi),%rsi 2976 adcxq %r14,%r11 2977 adoxq %rbx,%r12 2978 adcxq %rax,%r12 2979 adoxq %rbp,%r13 2980 2981.byte 0x67,0x67 2982 mulxq %r8,%r8,%r14 2983 adcxq %r8,%r13 2984 adcxq %rbp,%r14 2985 2986 cmpq 8+8(%rsp),%rsi 2987 je .Lsqrx8x_outer_break 2988 2989 negq %rcx 2990 movq $-8,%rcx 2991 movq %rbp,%r15 2992 movq 64(%rdi),%r8 2993 adcxq 72(%rdi),%r9 2994 adcxq 80(%rdi),%r10 2995 adcxq 88(%rdi),%r11 2996 adcq 96(%rdi),%r12 2997 adcq 104(%rdi),%r13 2998 adcq 112(%rdi),%r14 2999 adcq 120(%rdi),%r15 3000 leaq (%rsi),%rbp 3001 leaq 128(%rdi),%rdi 3002 sbbq %rax,%rax 3003 3004 movq -64(%rsi),%rdx 3005 movq %rax,16+8(%rsp) 3006 movq %rdi,24+8(%rsp) 3007 3008 3009 xorl %eax,%eax 3010 jmp .Lsqrx8x_loop 3011 3012.align 32 3013.Lsqrx8x_loop: 3014 movq %r8,%rbx 3015 mulxq 0(%rbp),%rax,%r8 3016 adcxq %rax,%rbx 3017 adoxq %r9,%r8 3018 3019 mulxq 8(%rbp),%rax,%r9 3020 adcxq %rax,%r8 3021 adoxq %r10,%r9 3022 3023 mulxq 16(%rbp),%rax,%r10 3024 adcxq %rax,%r9 3025 adoxq %r11,%r10 3026 3027 mulxq 24(%rbp),%rax,%r11 3028 adcxq %rax,%r10 3029 adoxq %r12,%r11 3030 3031.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 3032 adcxq %rax,%r11 3033 adoxq %r13,%r12 3034 3035 mulxq 40(%rbp),%rax,%r13 3036 adcxq %rax,%r12 3037 adoxq %r14,%r13 3038 3039 mulxq 48(%rbp),%rax,%r14 3040 movq %rbx,(%rdi,%rcx,8) 3041 movl $0,%ebx 3042 adcxq %rax,%r13 3043 adoxq %r15,%r14 3044 3045.byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00 3046 movq 8(%rsi,%rcx,8),%rdx 3047 adcxq %rax,%r14 3048 adoxq %rbx,%r15 3049 adcxq %rbx,%r15 3050 3051.byte 0x67 3052 incq %rcx 3053 jnz .Lsqrx8x_loop 3054 3055 leaq 64(%rbp),%rbp 3056 movq $-8,%rcx 3057 cmpq 8+8(%rsp),%rbp 3058 je .Lsqrx8x_break 3059 3060 subq 16+8(%rsp),%rbx 3061.byte 0x66 3062 movq -64(%rsi),%rdx 3063 adcxq 0(%rdi),%r8 3064 adcxq 8(%rdi),%r9 3065 adcq 16(%rdi),%r10 3066 adcq 24(%rdi),%r11 3067 adcq 32(%rdi),%r12 3068 adcq 40(%rdi),%r13 3069 adcq 48(%rdi),%r14 3070 adcq 56(%rdi),%r15 3071 leaq 64(%rdi),%rdi 3072.byte 0x67 3073 sbbq %rax,%rax 3074 xorl %ebx,%ebx 3075 movq %rax,16+8(%rsp) 3076 jmp .Lsqrx8x_loop 3077 3078.align 32 3079.Lsqrx8x_break: 3080 xorq %rbp,%rbp 3081 subq 16+8(%rsp),%rbx 3082 adcxq %rbp,%r8 3083 movq 24+8(%rsp),%rcx 3084 adcxq %rbp,%r9 3085 movq 0(%rsi),%rdx 3086 adcq $0,%r10 3087 movq %r8,0(%rdi) 3088 adcq $0,%r11 3089 adcq $0,%r12 3090 adcq $0,%r13 3091 adcq $0,%r14 3092 adcq $0,%r15 3093 cmpq %rcx,%rdi 3094 je .Lsqrx8x_outer_loop 3095 3096 movq %r9,8(%rdi) 3097 movq 8(%rcx),%r9 3098 movq %r10,16(%rdi) 3099 movq 16(%rcx),%r10 3100 movq %r11,24(%rdi) 3101 movq 24(%rcx),%r11 3102 movq %r12,32(%rdi) 3103 movq 32(%rcx),%r12 3104 movq %r13,40(%rdi) 3105 movq 40(%rcx),%r13 3106 movq %r14,48(%rdi) 3107 movq 48(%rcx),%r14 3108 movq %r15,56(%rdi) 3109 movq 56(%rcx),%r15 3110 movq %rcx,%rdi 3111 jmp .Lsqrx8x_outer_loop 3112 3113.align 32 3114.Lsqrx8x_outer_break: 3115 movq %r9,72(%rdi) 3116.byte 102,72,15,126,217 3117 movq %r10,80(%rdi) 3118 movq %r11,88(%rdi) 3119 movq %r12,96(%rdi) 3120 movq %r13,104(%rdi) 3121 movq %r14,112(%rdi) 3122 leaq 48+8(%rsp),%rdi 3123 movq (%rsi,%rcx,1),%rdx 3124 3125 movq 8(%rdi),%r11 3126 xorq %r10,%r10 3127 movq 0+8(%rsp),%r9 3128 adoxq %r11,%r11 3129 movq 16(%rdi),%r12 3130 movq 24(%rdi),%r13 3131 3132 3133.align 32 3134.Lsqrx4x_shift_n_add: 3135 mulxq %rdx,%rax,%rbx 3136 adoxq %r12,%r12 3137 adcxq %r10,%rax 3138.byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00 3139.byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00 3140 adoxq %r13,%r13 3141 adcxq %r11,%rbx 3142 movq 40(%rdi),%r11 3143 movq %rax,0(%rdi) 3144 movq %rbx,8(%rdi) 3145 3146 mulxq %rdx,%rax,%rbx 3147 adoxq %r10,%r10 3148 adcxq %r12,%rax 3149 movq 16(%rsi,%rcx,1),%rdx 3150 movq 48(%rdi),%r12 3151 adoxq %r11,%r11 3152 adcxq %r13,%rbx 3153 movq 56(%rdi),%r13 3154 movq %rax,16(%rdi) 3155 movq %rbx,24(%rdi) 3156 3157 mulxq %rdx,%rax,%rbx 3158 adoxq %r12,%r12 3159 adcxq %r10,%rax 3160 movq 24(%rsi,%rcx,1),%rdx 3161 leaq 32(%rcx),%rcx 3162 movq 64(%rdi),%r10 3163 adoxq %r13,%r13 3164 adcxq %r11,%rbx 3165 movq 72(%rdi),%r11 3166 movq %rax,32(%rdi) 3167 movq %rbx,40(%rdi) 3168 3169 mulxq %rdx,%rax,%rbx 3170 adoxq %r10,%r10 3171 adcxq %r12,%rax 3172 jrcxz .Lsqrx4x_shift_n_add_break 3173.byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00 3174 adoxq %r11,%r11 3175 adcxq %r13,%rbx 3176 movq 80(%rdi),%r12 3177 movq 88(%rdi),%r13 3178 movq %rax,48(%rdi) 3179 movq %rbx,56(%rdi) 3180 leaq 64(%rdi),%rdi 3181 nop 3182 jmp .Lsqrx4x_shift_n_add 3183 3184.align 32 3185.Lsqrx4x_shift_n_add_break: 3186 adcxq %r13,%rbx 3187 movq %rax,48(%rdi) 3188 movq %rbx,56(%rdi) 3189 leaq 64(%rdi),%rdi 3190.byte 102,72,15,126,213 3191__bn_sqrx8x_reduction: 3192 xorl %eax,%eax 3193 movq 32+8(%rsp),%rbx 3194 movq 48+8(%rsp),%rdx 3195 leaq -64(%rbp,%r9,1),%rcx 3196 3197 movq %rcx,0+8(%rsp) 3198 movq %rdi,8+8(%rsp) 3199 3200 leaq 48+8(%rsp),%rdi 3201 jmp .Lsqrx8x_reduction_loop 3202 3203.align 32 3204.Lsqrx8x_reduction_loop: 3205 movq 8(%rdi),%r9 3206 movq 16(%rdi),%r10 3207 movq 24(%rdi),%r11 3208 movq 32(%rdi),%r12 3209 movq %rdx,%r8 3210 imulq %rbx,%rdx 3211 movq 40(%rdi),%r13 3212 movq 48(%rdi),%r14 3213 movq 56(%rdi),%r15 3214 movq %rax,24+8(%rsp) 3215 3216 leaq 64(%rdi),%rdi 3217 xorq %rsi,%rsi 3218 movq $-8,%rcx 3219 jmp .Lsqrx8x_reduce 3220 3221.align 32 3222.Lsqrx8x_reduce: 3223 movq %r8,%rbx 3224 mulxq 0(%rbp),%rax,%r8 3225 adcxq %rbx,%rax 3226 adoxq %r9,%r8 3227 3228 mulxq 8(%rbp),%rbx,%r9 3229 adcxq %rbx,%r8 3230 adoxq %r10,%r9 3231 3232 mulxq 16(%rbp),%rbx,%r10 3233 adcxq %rbx,%r9 3234 adoxq %r11,%r10 3235 3236 mulxq 24(%rbp),%rbx,%r11 3237 adcxq %rbx,%r10 3238 adoxq %r12,%r11 3239 3240.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 3241 movq %rdx,%rax 3242 movq %r8,%rdx 3243 adcxq %rbx,%r11 3244 adoxq %r13,%r12 3245 3246 mulxq 32+8(%rsp),%rbx,%rdx 3247 movq %rax,%rdx 3248 movq %rax,64+48+8(%rsp,%rcx,8) 3249 3250 mulxq 40(%rbp),%rax,%r13 3251 adcxq %rax,%r12 3252 adoxq %r14,%r13 3253 3254 mulxq 48(%rbp),%rax,%r14 3255 adcxq %rax,%r13 3256 adoxq %r15,%r14 3257 3258 mulxq 56(%rbp),%rax,%r15 3259 movq %rbx,%rdx 3260 adcxq %rax,%r14 3261 adoxq %rsi,%r15 3262 adcxq %rsi,%r15 3263 3264.byte 0x67,0x67,0x67 3265 incq %rcx 3266 jnz .Lsqrx8x_reduce 3267 3268 movq %rsi,%rax 3269 cmpq 0+8(%rsp),%rbp 3270 jae .Lsqrx8x_no_tail 3271 3272 movq 48+8(%rsp),%rdx 3273 addq 0(%rdi),%r8 3274 leaq 64(%rbp),%rbp 3275 movq $-8,%rcx 3276 adcxq 8(%rdi),%r9 3277 adcxq 16(%rdi),%r10 3278 adcq 24(%rdi),%r11 3279 adcq 32(%rdi),%r12 3280 adcq 40(%rdi),%r13 3281 adcq 48(%rdi),%r14 3282 adcq 56(%rdi),%r15 3283 leaq 64(%rdi),%rdi 3284 sbbq %rax,%rax 3285 3286 xorq %rsi,%rsi 3287 movq %rax,16+8(%rsp) 3288 jmp .Lsqrx8x_tail 3289 3290.align 32 3291.Lsqrx8x_tail: 3292 movq %r8,%rbx 3293 mulxq 0(%rbp),%rax,%r8 3294 adcxq %rax,%rbx 3295 adoxq %r9,%r8 3296 3297 mulxq 8(%rbp),%rax,%r9 3298 adcxq %rax,%r8 3299 adoxq %r10,%r9 3300 3301 mulxq 16(%rbp),%rax,%r10 3302 adcxq %rax,%r9 3303 adoxq %r11,%r10 3304 3305 mulxq 24(%rbp),%rax,%r11 3306 adcxq %rax,%r10 3307 adoxq %r12,%r11 3308 3309.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 3310 adcxq %rax,%r11 3311 adoxq %r13,%r12 3312 3313 mulxq 40(%rbp),%rax,%r13 3314 adcxq %rax,%r12 3315 adoxq %r14,%r13 3316 3317 mulxq 48(%rbp),%rax,%r14 3318 adcxq %rax,%r13 3319 adoxq %r15,%r14 3320 3321 mulxq 56(%rbp),%rax,%r15 3322 movq 72+48+8(%rsp,%rcx,8),%rdx 3323 adcxq %rax,%r14 3324 adoxq %rsi,%r15 3325 movq %rbx,(%rdi,%rcx,8) 3326 movq %r8,%rbx 3327 adcxq %rsi,%r15 3328 3329 incq %rcx 3330 jnz .Lsqrx8x_tail 3331 3332 cmpq 0+8(%rsp),%rbp 3333 jae .Lsqrx8x_tail_done 3334 3335 subq 16+8(%rsp),%rsi 3336 movq 48+8(%rsp),%rdx 3337 leaq 64(%rbp),%rbp 3338 adcq 0(%rdi),%r8 3339 adcq 8(%rdi),%r9 3340 adcq 16(%rdi),%r10 3341 adcq 24(%rdi),%r11 3342 adcq 32(%rdi),%r12 3343 adcq 40(%rdi),%r13 3344 adcq 48(%rdi),%r14 3345 adcq 56(%rdi),%r15 3346 leaq 64(%rdi),%rdi 3347 sbbq %rax,%rax 3348 subq $8,%rcx 3349 3350 xorq %rsi,%rsi 3351 movq %rax,16+8(%rsp) 3352 jmp .Lsqrx8x_tail 3353 3354.align 32 3355.Lsqrx8x_tail_done: 3356 xorq %rax,%rax 3357 addq 24+8(%rsp),%r8 3358 adcq $0,%r9 3359 adcq $0,%r10 3360 adcq $0,%r11 3361 adcq $0,%r12 3362 adcq $0,%r13 3363 adcq $0,%r14 3364 adcq $0,%r15 3365 adcq $0,%rax 3366 3367 subq 16+8(%rsp),%rsi 3368.Lsqrx8x_no_tail: 3369 adcq 0(%rdi),%r8 3370.byte 102,72,15,126,217 3371 adcq 8(%rdi),%r9 3372 movq 56(%rbp),%rsi 3373.byte 102,72,15,126,213 3374 adcq 16(%rdi),%r10 3375 adcq 24(%rdi),%r11 3376 adcq 32(%rdi),%r12 3377 adcq 40(%rdi),%r13 3378 adcq 48(%rdi),%r14 3379 adcq 56(%rdi),%r15 3380 adcq $0,%rax 3381 3382 movq 32+8(%rsp),%rbx 3383 movq 64(%rdi,%rcx,1),%rdx 3384 3385 movq %r8,0(%rdi) 3386 leaq 64(%rdi),%r8 3387 movq %r9,8(%rdi) 3388 movq %r10,16(%rdi) 3389 movq %r11,24(%rdi) 3390 movq %r12,32(%rdi) 3391 movq %r13,40(%rdi) 3392 movq %r14,48(%rdi) 3393 movq %r15,56(%rdi) 3394 3395 leaq 64(%rdi,%rcx,1),%rdi 3396 cmpq 8+8(%rsp),%r8 3397 jb .Lsqrx8x_reduction_loop 3398 .byte 0xf3,0xc3 3399.size bn_sqrx8x_internal,.-bn_sqrx8x_internal 3400.align 32 3401__bn_postx4x_internal: 3402 movq 0(%rbp),%r12 3403 movq %rcx,%r10 3404 movq %rcx,%r9 3405 negq %rax 3406 sarq $3+2,%rcx 3407 3408.byte 102,72,15,126,202 3409.byte 102,72,15,126,206 3410 decq %r12 3411 movq 8(%rbp),%r13 3412 xorq %r8,%r8 3413 movq 16(%rbp),%r14 3414 movq 24(%rbp),%r15 3415 jmp .Lsqrx4x_sub_entry 3416 3417.align 16 3418.Lsqrx4x_sub: 3419 movq 0(%rbp),%r12 3420 movq 8(%rbp),%r13 3421 movq 16(%rbp),%r14 3422 movq 24(%rbp),%r15 3423.Lsqrx4x_sub_entry: 3424 andnq %rax,%r12,%r12 3425 leaq 32(%rbp),%rbp 3426 andnq %rax,%r13,%r13 3427 andnq %rax,%r14,%r14 3428 andnq %rax,%r15,%r15 3429 3430 negq %r8 3431 adcq 0(%rdi),%r12 3432 adcq 8(%rdi),%r13 3433 adcq 16(%rdi),%r14 3434 adcq 24(%rdi),%r15 3435 movq %r12,0(%rdx) 3436 leaq 32(%rdi),%rdi 3437 movq %r13,8(%rdx) 3438 sbbq %r8,%r8 3439 movq %r14,16(%rdx) 3440 movq %r15,24(%rdx) 3441 leaq 32(%rdx),%rdx 3442 3443 incq %rcx 3444 jnz .Lsqrx4x_sub 3445 3446 negq %r9 3447 3448 .byte 0xf3,0xc3 3449.size __bn_postx4x_internal,.-__bn_postx4x_internal 3450.globl bn_get_bits5 3451.type bn_get_bits5,@function 3452.align 16 3453bn_get_bits5: 3454 leaq 0(%rdi),%r10 3455 leaq 1(%rdi),%r11 3456 movl %esi,%ecx 3457 shrl $4,%esi 3458 andl $15,%ecx 3459 leal -8(%rcx),%eax 3460 cmpl $11,%ecx 3461 cmovaq %r11,%r10 3462 cmoval %eax,%ecx 3463 movzwl (%r10,%rsi,2),%eax 3464 shrl %cl,%eax 3465 andl $31,%eax 3466 .byte 0xf3,0xc3 3467.size bn_get_bits5,.-bn_get_bits5 3468 3469.globl bn_scatter5 3470.type bn_scatter5,@function 3471.align 16 3472bn_scatter5: 3473 cmpl $0,%esi 3474 jz .Lscatter_epilogue 3475 leaq (%rdx,%rcx,8),%rdx 3476.Lscatter: 3477 movq (%rdi),%rax 3478 leaq 8(%rdi),%rdi 3479 movq %rax,(%rdx) 3480 leaq 256(%rdx),%rdx 3481 subl $1,%esi 3482 jnz .Lscatter 3483.Lscatter_epilogue: 3484 .byte 0xf3,0xc3 3485.size bn_scatter5,.-bn_scatter5 3486 3487.globl bn_gather5 3488.type bn_gather5,@function 3489.align 32 3490bn_gather5: 3491.LSEH_begin_bn_gather5: 3492 3493.byte 0x4c,0x8d,0x14,0x24 3494.byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00 3495 leaq .Linc(%rip),%rax 3496 andq $-16,%rsp 3497 3498 movd %ecx,%xmm5 3499 movdqa 0(%rax),%xmm0 3500 movdqa 16(%rax),%xmm1 3501 leaq 128(%rdx),%r11 3502 leaq 128(%rsp),%rax 3503 3504 pshufd $0,%xmm5,%xmm5 3505 movdqa %xmm1,%xmm4 3506 movdqa %xmm1,%xmm2 3507 paddd %xmm0,%xmm1 3508 pcmpeqd %xmm5,%xmm0 3509 movdqa %xmm4,%xmm3 3510 3511 paddd %xmm1,%xmm2 3512 pcmpeqd %xmm5,%xmm1 3513 movdqa %xmm0,-128(%rax) 3514 movdqa %xmm4,%xmm0 3515 3516 paddd %xmm2,%xmm3 3517 pcmpeqd %xmm5,%xmm2 3518 movdqa %xmm1,-112(%rax) 3519 movdqa %xmm4,%xmm1 3520 3521 paddd %xmm3,%xmm0 3522 pcmpeqd %xmm5,%xmm3 3523 movdqa %xmm2,-96(%rax) 3524 movdqa %xmm4,%xmm2 3525 paddd %xmm0,%xmm1 3526 pcmpeqd %xmm5,%xmm0 3527 movdqa %xmm3,-80(%rax) 3528 movdqa %xmm4,%xmm3 3529 3530 paddd %xmm1,%xmm2 3531 pcmpeqd %xmm5,%xmm1 3532 movdqa %xmm0,-64(%rax) 3533 movdqa %xmm4,%xmm0 3534 3535 paddd %xmm2,%xmm3 3536 pcmpeqd %xmm5,%xmm2 3537 movdqa %xmm1,-48(%rax) 3538 movdqa %xmm4,%xmm1 3539 3540 paddd %xmm3,%xmm0 3541 pcmpeqd %xmm5,%xmm3 3542 movdqa %xmm2,-32(%rax) 3543 movdqa %xmm4,%xmm2 3544 paddd %xmm0,%xmm1 3545 pcmpeqd %xmm5,%xmm0 3546 movdqa %xmm3,-16(%rax) 3547 movdqa %xmm4,%xmm3 3548 3549 paddd %xmm1,%xmm2 3550 pcmpeqd %xmm5,%xmm1 3551 movdqa %xmm0,0(%rax) 3552 movdqa %xmm4,%xmm0 3553 3554 paddd %xmm2,%xmm3 3555 pcmpeqd %xmm5,%xmm2 3556 movdqa %xmm1,16(%rax) 3557 movdqa %xmm4,%xmm1 3558 3559 paddd %xmm3,%xmm0 3560 pcmpeqd %xmm5,%xmm3 3561 movdqa %xmm2,32(%rax) 3562 movdqa %xmm4,%xmm2 3563 paddd %xmm0,%xmm1 3564 pcmpeqd %xmm5,%xmm0 3565 movdqa %xmm3,48(%rax) 3566 movdqa %xmm4,%xmm3 3567 3568 paddd %xmm1,%xmm2 3569 pcmpeqd %xmm5,%xmm1 3570 movdqa %xmm0,64(%rax) 3571 movdqa %xmm4,%xmm0 3572 3573 paddd %xmm2,%xmm3 3574 pcmpeqd %xmm5,%xmm2 3575 movdqa %xmm1,80(%rax) 3576 movdqa %xmm4,%xmm1 3577 3578 paddd %xmm3,%xmm0 3579 pcmpeqd %xmm5,%xmm3 3580 movdqa %xmm2,96(%rax) 3581 movdqa %xmm4,%xmm2 3582 movdqa %xmm3,112(%rax) 3583 jmp .Lgather 3584 3585.align 32 3586.Lgather: 3587 pxor %xmm4,%xmm4 3588 pxor %xmm5,%xmm5 3589 movdqa -128(%r11),%xmm0 3590 movdqa -112(%r11),%xmm1 3591 movdqa -96(%r11),%xmm2 3592 pand -128(%rax),%xmm0 3593 movdqa -80(%r11),%xmm3 3594 pand -112(%rax),%xmm1 3595 por %xmm0,%xmm4 3596 pand -96(%rax),%xmm2 3597 por %xmm1,%xmm5 3598 pand -80(%rax),%xmm3 3599 por %xmm2,%xmm4 3600 por %xmm3,%xmm5 3601 movdqa -64(%r11),%xmm0 3602 movdqa -48(%r11),%xmm1 3603 movdqa -32(%r11),%xmm2 3604 pand -64(%rax),%xmm0 3605 movdqa -16(%r11),%xmm3 3606 pand -48(%rax),%xmm1 3607 por %xmm0,%xmm4 3608 pand -32(%rax),%xmm2 3609 por %xmm1,%xmm5 3610 pand -16(%rax),%xmm3 3611 por %xmm2,%xmm4 3612 por %xmm3,%xmm5 3613 movdqa 0(%r11),%xmm0 3614 movdqa 16(%r11),%xmm1 3615 movdqa 32(%r11),%xmm2 3616 pand 0(%rax),%xmm0 3617 movdqa 48(%r11),%xmm3 3618 pand 16(%rax),%xmm1 3619 por %xmm0,%xmm4 3620 pand 32(%rax),%xmm2 3621 por %xmm1,%xmm5 3622 pand 48(%rax),%xmm3 3623 por %xmm2,%xmm4 3624 por %xmm3,%xmm5 3625 movdqa 64(%r11),%xmm0 3626 movdqa 80(%r11),%xmm1 3627 movdqa 96(%r11),%xmm2 3628 pand 64(%rax),%xmm0 3629 movdqa 112(%r11),%xmm3 3630 pand 80(%rax),%xmm1 3631 por %xmm0,%xmm4 3632 pand 96(%rax),%xmm2 3633 por %xmm1,%xmm5 3634 pand 112(%rax),%xmm3 3635 por %xmm2,%xmm4 3636 por %xmm3,%xmm5 3637 por %xmm5,%xmm4 3638 leaq 256(%r11),%r11 3639 pshufd $0x4e,%xmm4,%xmm0 3640 por %xmm4,%xmm0 3641 movq %xmm0,(%rdi) 3642 leaq 8(%rdi),%rdi 3643 subl $1,%esi 3644 jnz .Lgather 3645 3646 leaq (%r10),%rsp 3647 .byte 0xf3,0xc3 3648.LSEH_end_bn_gather5: 3649.size bn_gather5,.-bn_gather5 3650.align 64 3651.Linc: 3652.long 0,0, 1,1 3653.long 2,2, 2,2 3654.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 3655