x86_64-mont5.S revision 306195
1/* $FreeBSD: stable/11/secure/lib/libcrypto/amd64/x86_64-mont5.S 306195 2016-09-22 14:57:48Z jkim $ */ 2/* Do not modify. This file is auto-generated from x86_64-mont5.pl. */ 3.text 4 5 6 7.globl bn_mul_mont_gather5 8.type bn_mul_mont_gather5,@function 9.align 64 10bn_mul_mont_gather5: 11 movl %r9d,%r9d 12 movq %rsp,%rax 13 testl $7,%r9d 14 jnz .Lmul_enter 15 movl OPENSSL_ia32cap_P+8(%rip),%r11d 16 jmp .Lmul4x_enter 17 18.align 16 19.Lmul_enter: 20 movd 8(%rsp),%xmm5 21 pushq %rbx 22 pushq %rbp 23 pushq %r12 24 pushq %r13 25 pushq %r14 26 pushq %r15 27 28 negq %r9 29 movq %rsp,%r11 30 leaq -280(%rsp,%r9,8),%r10 31 negq %r9 32 andq $-1024,%r10 33 34 35 36 37 38 39 40 subq %r10,%r11 41 andq $-4096,%r11 42 leaq (%r10,%r11,1),%rsp 43 movq (%rsp),%r11 44 cmpq %r10,%rsp 45 ja .Lmul_page_walk 46 jmp .Lmul_page_walk_done 47 48.Lmul_page_walk: 49 leaq -4096(%rsp),%rsp 50 movq (%rsp),%r11 51 cmpq %r10,%rsp 52 ja .Lmul_page_walk 53.Lmul_page_walk_done: 54 55 leaq .Linc(%rip),%r10 56 movq %rax,8(%rsp,%r9,8) 57.Lmul_body: 58 59 leaq 128(%rdx),%r12 60 movdqa 0(%r10),%xmm0 61 movdqa 16(%r10),%xmm1 62 leaq 24-112(%rsp,%r9,8),%r10 63 andq $-16,%r10 64 65 pshufd $0,%xmm5,%xmm5 66 movdqa %xmm1,%xmm4 67 movdqa %xmm1,%xmm2 68 paddd %xmm0,%xmm1 69 pcmpeqd %xmm5,%xmm0 70.byte 0x67 71 movdqa %xmm4,%xmm3 72 paddd %xmm1,%xmm2 73 pcmpeqd %xmm5,%xmm1 74 movdqa %xmm0,112(%r10) 75 movdqa %xmm4,%xmm0 76 77 paddd %xmm2,%xmm3 78 pcmpeqd %xmm5,%xmm2 79 movdqa %xmm1,128(%r10) 80 movdqa %xmm4,%xmm1 81 82 paddd %xmm3,%xmm0 83 pcmpeqd %xmm5,%xmm3 84 movdqa %xmm2,144(%r10) 85 movdqa %xmm4,%xmm2 86 87 paddd %xmm0,%xmm1 88 pcmpeqd %xmm5,%xmm0 89 movdqa %xmm3,160(%r10) 90 movdqa %xmm4,%xmm3 91 paddd %xmm1,%xmm2 92 pcmpeqd %xmm5,%xmm1 93 movdqa %xmm0,176(%r10) 94 movdqa %xmm4,%xmm0 95 96 paddd %xmm2,%xmm3 97 pcmpeqd %xmm5,%xmm2 98 movdqa %xmm1,192(%r10) 99 movdqa %xmm4,%xmm1 100 101 paddd %xmm3,%xmm0 102 pcmpeqd %xmm5,%xmm3 103 movdqa %xmm2,208(%r10) 104 movdqa %xmm4,%xmm2 105 106 paddd %xmm0,%xmm1 107 pcmpeqd %xmm5,%xmm0 108 movdqa %xmm3,224(%r10) 109 movdqa %xmm4,%xmm3 110 paddd %xmm1,%xmm2 111 pcmpeqd %xmm5,%xmm1 112 movdqa %xmm0,240(%r10) 113 movdqa %xmm4,%xmm0 114 115 paddd %xmm2,%xmm3 116 pcmpeqd %xmm5,%xmm2 117 movdqa %xmm1,256(%r10) 118 movdqa %xmm4,%xmm1 119 120 paddd %xmm3,%xmm0 121 pcmpeqd %xmm5,%xmm3 122 movdqa %xmm2,272(%r10) 123 movdqa %xmm4,%xmm2 124 125 paddd %xmm0,%xmm1 126 pcmpeqd %xmm5,%xmm0 127 movdqa %xmm3,288(%r10) 128 movdqa %xmm4,%xmm3 129 paddd %xmm1,%xmm2 130 pcmpeqd %xmm5,%xmm1 131 movdqa %xmm0,304(%r10) 132 133 paddd %xmm2,%xmm3 134.byte 0x67 135 pcmpeqd %xmm5,%xmm2 136 movdqa %xmm1,320(%r10) 137 138 pcmpeqd %xmm5,%xmm3 139 movdqa %xmm2,336(%r10) 140 pand 64(%r12),%xmm0 141 142 pand 80(%r12),%xmm1 143 pand 96(%r12),%xmm2 144 movdqa %xmm3,352(%r10) 145 pand 112(%r12),%xmm3 146 por %xmm2,%xmm0 147 por %xmm3,%xmm1 148 movdqa -128(%r12),%xmm4 149 movdqa -112(%r12),%xmm5 150 movdqa -96(%r12),%xmm2 151 pand 112(%r10),%xmm4 152 movdqa -80(%r12),%xmm3 153 pand 128(%r10),%xmm5 154 por %xmm4,%xmm0 155 pand 144(%r10),%xmm2 156 por %xmm5,%xmm1 157 pand 160(%r10),%xmm3 158 por %xmm2,%xmm0 159 por %xmm3,%xmm1 160 movdqa -64(%r12),%xmm4 161 movdqa -48(%r12),%xmm5 162 movdqa -32(%r12),%xmm2 163 pand 176(%r10),%xmm4 164 movdqa -16(%r12),%xmm3 165 pand 192(%r10),%xmm5 166 por %xmm4,%xmm0 167 pand 208(%r10),%xmm2 168 por %xmm5,%xmm1 169 pand 224(%r10),%xmm3 170 por %xmm2,%xmm0 171 por %xmm3,%xmm1 172 movdqa 0(%r12),%xmm4 173 movdqa 16(%r12),%xmm5 174 movdqa 32(%r12),%xmm2 175 pand 240(%r10),%xmm4 176 movdqa 48(%r12),%xmm3 177 pand 256(%r10),%xmm5 178 por %xmm4,%xmm0 179 pand 272(%r10),%xmm2 180 por %xmm5,%xmm1 181 pand 288(%r10),%xmm3 182 por %xmm2,%xmm0 183 por %xmm3,%xmm1 184 por %xmm1,%xmm0 185 pshufd $0x4e,%xmm0,%xmm1 186 por %xmm1,%xmm0 187 leaq 256(%r12),%r12 188.byte 102,72,15,126,195 189 190 movq (%r8),%r8 191 movq (%rsi),%rax 192 193 xorq %r14,%r14 194 xorq %r15,%r15 195 196 movq %r8,%rbp 197 mulq %rbx 198 movq %rax,%r10 199 movq (%rcx),%rax 200 201 imulq %r10,%rbp 202 movq %rdx,%r11 203 204 mulq %rbp 205 addq %rax,%r10 206 movq 8(%rsi),%rax 207 adcq $0,%rdx 208 movq %rdx,%r13 209 210 leaq 1(%r15),%r15 211 jmp .L1st_enter 212 213.align 16 214.L1st: 215 addq %rax,%r13 216 movq (%rsi,%r15,8),%rax 217 adcq $0,%rdx 218 addq %r11,%r13 219 movq %r10,%r11 220 adcq $0,%rdx 221 movq %r13,-16(%rsp,%r15,8) 222 movq %rdx,%r13 223 224.L1st_enter: 225 mulq %rbx 226 addq %rax,%r11 227 movq (%rcx,%r15,8),%rax 228 adcq $0,%rdx 229 leaq 1(%r15),%r15 230 movq %rdx,%r10 231 232 mulq %rbp 233 cmpq %r9,%r15 234 jne .L1st 235 236 237 addq %rax,%r13 238 adcq $0,%rdx 239 addq %r11,%r13 240 adcq $0,%rdx 241 movq %r13,-16(%rsp,%r9,8) 242 movq %rdx,%r13 243 movq %r10,%r11 244 245 xorq %rdx,%rdx 246 addq %r11,%r13 247 adcq $0,%rdx 248 movq %r13,-8(%rsp,%r9,8) 249 movq %rdx,(%rsp,%r9,8) 250 251 leaq 1(%r14),%r14 252 jmp .Louter 253.align 16 254.Louter: 255 leaq 24+128(%rsp,%r9,8),%rdx 256 andq $-16,%rdx 257 pxor %xmm4,%xmm4 258 pxor %xmm5,%xmm5 259 movdqa -128(%r12),%xmm0 260 movdqa -112(%r12),%xmm1 261 movdqa -96(%r12),%xmm2 262 movdqa -80(%r12),%xmm3 263 pand -128(%rdx),%xmm0 264 pand -112(%rdx),%xmm1 265 por %xmm0,%xmm4 266 pand -96(%rdx),%xmm2 267 por %xmm1,%xmm5 268 pand -80(%rdx),%xmm3 269 por %xmm2,%xmm4 270 por %xmm3,%xmm5 271 movdqa -64(%r12),%xmm0 272 movdqa -48(%r12),%xmm1 273 movdqa -32(%r12),%xmm2 274 movdqa -16(%r12),%xmm3 275 pand -64(%rdx),%xmm0 276 pand -48(%rdx),%xmm1 277 por %xmm0,%xmm4 278 pand -32(%rdx),%xmm2 279 por %xmm1,%xmm5 280 pand -16(%rdx),%xmm3 281 por %xmm2,%xmm4 282 por %xmm3,%xmm5 283 movdqa 0(%r12),%xmm0 284 movdqa 16(%r12),%xmm1 285 movdqa 32(%r12),%xmm2 286 movdqa 48(%r12),%xmm3 287 pand 0(%rdx),%xmm0 288 pand 16(%rdx),%xmm1 289 por %xmm0,%xmm4 290 pand 32(%rdx),%xmm2 291 por %xmm1,%xmm5 292 pand 48(%rdx),%xmm3 293 por %xmm2,%xmm4 294 por %xmm3,%xmm5 295 movdqa 64(%r12),%xmm0 296 movdqa 80(%r12),%xmm1 297 movdqa 96(%r12),%xmm2 298 movdqa 112(%r12),%xmm3 299 pand 64(%rdx),%xmm0 300 pand 80(%rdx),%xmm1 301 por %xmm0,%xmm4 302 pand 96(%rdx),%xmm2 303 por %xmm1,%xmm5 304 pand 112(%rdx),%xmm3 305 por %xmm2,%xmm4 306 por %xmm3,%xmm5 307 por %xmm5,%xmm4 308 pshufd $0x4e,%xmm4,%xmm0 309 por %xmm4,%xmm0 310 leaq 256(%r12),%r12 311 312 movq (%rsi),%rax 313.byte 102,72,15,126,195 314 315 xorq %r15,%r15 316 movq %r8,%rbp 317 movq (%rsp),%r10 318 319 mulq %rbx 320 addq %rax,%r10 321 movq (%rcx),%rax 322 adcq $0,%rdx 323 324 imulq %r10,%rbp 325 movq %rdx,%r11 326 327 mulq %rbp 328 addq %rax,%r10 329 movq 8(%rsi),%rax 330 adcq $0,%rdx 331 movq 8(%rsp),%r10 332 movq %rdx,%r13 333 334 leaq 1(%r15),%r15 335 jmp .Linner_enter 336 337.align 16 338.Linner: 339 addq %rax,%r13 340 movq (%rsi,%r15,8),%rax 341 adcq $0,%rdx 342 addq %r10,%r13 343 movq (%rsp,%r15,8),%r10 344 adcq $0,%rdx 345 movq %r13,-16(%rsp,%r15,8) 346 movq %rdx,%r13 347 348.Linner_enter: 349 mulq %rbx 350 addq %rax,%r11 351 movq (%rcx,%r15,8),%rax 352 adcq $0,%rdx 353 addq %r11,%r10 354 movq %rdx,%r11 355 adcq $0,%r11 356 leaq 1(%r15),%r15 357 358 mulq %rbp 359 cmpq %r9,%r15 360 jne .Linner 361 362 addq %rax,%r13 363 adcq $0,%rdx 364 addq %r10,%r13 365 movq (%rsp,%r9,8),%r10 366 adcq $0,%rdx 367 movq %r13,-16(%rsp,%r9,8) 368 movq %rdx,%r13 369 370 xorq %rdx,%rdx 371 addq %r11,%r13 372 adcq $0,%rdx 373 addq %r10,%r13 374 adcq $0,%rdx 375 movq %r13,-8(%rsp,%r9,8) 376 movq %rdx,(%rsp,%r9,8) 377 378 leaq 1(%r14),%r14 379 cmpq %r9,%r14 380 jb .Louter 381 382 xorq %r14,%r14 383 movq (%rsp),%rax 384 leaq (%rsp),%rsi 385 movq %r9,%r15 386 jmp .Lsub 387.align 16 388.Lsub: sbbq (%rcx,%r14,8),%rax 389 movq %rax,(%rdi,%r14,8) 390 movq 8(%rsi,%r14,8),%rax 391 leaq 1(%r14),%r14 392 decq %r15 393 jnz .Lsub 394 395 sbbq $0,%rax 396 xorq %r14,%r14 397 andq %rax,%rsi 398 notq %rax 399 movq %rdi,%rcx 400 andq %rax,%rcx 401 movq %r9,%r15 402 orq %rcx,%rsi 403.align 16 404.Lcopy: 405 movq (%rsi,%r14,8),%rax 406 movq %r14,(%rsp,%r14,8) 407 movq %rax,(%rdi,%r14,8) 408 leaq 1(%r14),%r14 409 subq $1,%r15 410 jnz .Lcopy 411 412 movq 8(%rsp,%r9,8),%rsi 413 movq $1,%rax 414 415 movq -48(%rsi),%r15 416 movq -40(%rsi),%r14 417 movq -32(%rsi),%r13 418 movq -24(%rsi),%r12 419 movq -16(%rsi),%rbp 420 movq -8(%rsi),%rbx 421 leaq (%rsi),%rsp 422.Lmul_epilogue: 423 .byte 0xf3,0xc3 424.size bn_mul_mont_gather5,.-bn_mul_mont_gather5 425.type bn_mul4x_mont_gather5,@function 426.align 32 427bn_mul4x_mont_gather5: 428.byte 0x67 429 movq %rsp,%rax 430.Lmul4x_enter: 431 andl $0x80108,%r11d 432 cmpl $0x80108,%r11d 433 je .Lmulx4x_enter 434 pushq %rbx 435 pushq %rbp 436 pushq %r12 437 pushq %r13 438 pushq %r14 439 pushq %r15 440.Lmul4x_prologue: 441 442.byte 0x67 443 shll $3,%r9d 444 leaq (%r9,%r9,2),%r10 445 negq %r9 446 447 448 449 450 451 452 453 454 455 456 leaq -320(%rsp,%r9,2),%r11 457 movq %rsp,%rbp 458 subq %rdi,%r11 459 andq $4095,%r11 460 cmpq %r11,%r10 461 jb .Lmul4xsp_alt 462 subq %r11,%rbp 463 leaq -320(%rbp,%r9,2),%rbp 464 jmp .Lmul4xsp_done 465 466.align 32 467.Lmul4xsp_alt: 468 leaq 4096-320(,%r9,2),%r10 469 leaq -320(%rbp,%r9,2),%rbp 470 subq %r10,%r11 471 movq $0,%r10 472 cmovcq %r10,%r11 473 subq %r11,%rbp 474.Lmul4xsp_done: 475 andq $-64,%rbp 476 movq %rsp,%r11 477 subq %rbp,%r11 478 andq $-4096,%r11 479 leaq (%r11,%rbp,1),%rsp 480 movq (%rsp),%r10 481 cmpq %rbp,%rsp 482 ja .Lmul4x_page_walk 483 jmp .Lmul4x_page_walk_done 484 485.Lmul4x_page_walk: 486 leaq -4096(%rsp),%rsp 487 movq (%rsp),%r10 488 cmpq %rbp,%rsp 489 ja .Lmul4x_page_walk 490.Lmul4x_page_walk_done: 491 492 negq %r9 493 494 movq %rax,40(%rsp) 495.Lmul4x_body: 496 497 call mul4x_internal 498 499 movq 40(%rsp),%rsi 500 movq $1,%rax 501 502 movq -48(%rsi),%r15 503 movq -40(%rsi),%r14 504 movq -32(%rsi),%r13 505 movq -24(%rsi),%r12 506 movq -16(%rsi),%rbp 507 movq -8(%rsi),%rbx 508 leaq (%rsi),%rsp 509.Lmul4x_epilogue: 510 .byte 0xf3,0xc3 511.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 512 513.type mul4x_internal,@function 514.align 32 515mul4x_internal: 516 shlq $5,%r9 517 movd 8(%rax),%xmm5 518 leaq .Linc(%rip),%rax 519 leaq 128(%rdx,%r9,1),%r13 520 shrq $5,%r9 521 movdqa 0(%rax),%xmm0 522 movdqa 16(%rax),%xmm1 523 leaq 88-112(%rsp,%r9,1),%r10 524 leaq 128(%rdx),%r12 525 526 pshufd $0,%xmm5,%xmm5 527 movdqa %xmm1,%xmm4 528.byte 0x67,0x67 529 movdqa %xmm1,%xmm2 530 paddd %xmm0,%xmm1 531 pcmpeqd %xmm5,%xmm0 532.byte 0x67 533 movdqa %xmm4,%xmm3 534 paddd %xmm1,%xmm2 535 pcmpeqd %xmm5,%xmm1 536 movdqa %xmm0,112(%r10) 537 movdqa %xmm4,%xmm0 538 539 paddd %xmm2,%xmm3 540 pcmpeqd %xmm5,%xmm2 541 movdqa %xmm1,128(%r10) 542 movdqa %xmm4,%xmm1 543 544 paddd %xmm3,%xmm0 545 pcmpeqd %xmm5,%xmm3 546 movdqa %xmm2,144(%r10) 547 movdqa %xmm4,%xmm2 548 549 paddd %xmm0,%xmm1 550 pcmpeqd %xmm5,%xmm0 551 movdqa %xmm3,160(%r10) 552 movdqa %xmm4,%xmm3 553 paddd %xmm1,%xmm2 554 pcmpeqd %xmm5,%xmm1 555 movdqa %xmm0,176(%r10) 556 movdqa %xmm4,%xmm0 557 558 paddd %xmm2,%xmm3 559 pcmpeqd %xmm5,%xmm2 560 movdqa %xmm1,192(%r10) 561 movdqa %xmm4,%xmm1 562 563 paddd %xmm3,%xmm0 564 pcmpeqd %xmm5,%xmm3 565 movdqa %xmm2,208(%r10) 566 movdqa %xmm4,%xmm2 567 568 paddd %xmm0,%xmm1 569 pcmpeqd %xmm5,%xmm0 570 movdqa %xmm3,224(%r10) 571 movdqa %xmm4,%xmm3 572 paddd %xmm1,%xmm2 573 pcmpeqd %xmm5,%xmm1 574 movdqa %xmm0,240(%r10) 575 movdqa %xmm4,%xmm0 576 577 paddd %xmm2,%xmm3 578 pcmpeqd %xmm5,%xmm2 579 movdqa %xmm1,256(%r10) 580 movdqa %xmm4,%xmm1 581 582 paddd %xmm3,%xmm0 583 pcmpeqd %xmm5,%xmm3 584 movdqa %xmm2,272(%r10) 585 movdqa %xmm4,%xmm2 586 587 paddd %xmm0,%xmm1 588 pcmpeqd %xmm5,%xmm0 589 movdqa %xmm3,288(%r10) 590 movdqa %xmm4,%xmm3 591 paddd %xmm1,%xmm2 592 pcmpeqd %xmm5,%xmm1 593 movdqa %xmm0,304(%r10) 594 595 paddd %xmm2,%xmm3 596.byte 0x67 597 pcmpeqd %xmm5,%xmm2 598 movdqa %xmm1,320(%r10) 599 600 pcmpeqd %xmm5,%xmm3 601 movdqa %xmm2,336(%r10) 602 pand 64(%r12),%xmm0 603 604 pand 80(%r12),%xmm1 605 pand 96(%r12),%xmm2 606 movdqa %xmm3,352(%r10) 607 pand 112(%r12),%xmm3 608 por %xmm2,%xmm0 609 por %xmm3,%xmm1 610 movdqa -128(%r12),%xmm4 611 movdqa -112(%r12),%xmm5 612 movdqa -96(%r12),%xmm2 613 pand 112(%r10),%xmm4 614 movdqa -80(%r12),%xmm3 615 pand 128(%r10),%xmm5 616 por %xmm4,%xmm0 617 pand 144(%r10),%xmm2 618 por %xmm5,%xmm1 619 pand 160(%r10),%xmm3 620 por %xmm2,%xmm0 621 por %xmm3,%xmm1 622 movdqa -64(%r12),%xmm4 623 movdqa -48(%r12),%xmm5 624 movdqa -32(%r12),%xmm2 625 pand 176(%r10),%xmm4 626 movdqa -16(%r12),%xmm3 627 pand 192(%r10),%xmm5 628 por %xmm4,%xmm0 629 pand 208(%r10),%xmm2 630 por %xmm5,%xmm1 631 pand 224(%r10),%xmm3 632 por %xmm2,%xmm0 633 por %xmm3,%xmm1 634 movdqa 0(%r12),%xmm4 635 movdqa 16(%r12),%xmm5 636 movdqa 32(%r12),%xmm2 637 pand 240(%r10),%xmm4 638 movdqa 48(%r12),%xmm3 639 pand 256(%r10),%xmm5 640 por %xmm4,%xmm0 641 pand 272(%r10),%xmm2 642 por %xmm5,%xmm1 643 pand 288(%r10),%xmm3 644 por %xmm2,%xmm0 645 por %xmm3,%xmm1 646 por %xmm1,%xmm0 647 pshufd $0x4e,%xmm0,%xmm1 648 por %xmm1,%xmm0 649 leaq 256(%r12),%r12 650.byte 102,72,15,126,195 651 652 movq %r13,16+8(%rsp) 653 movq %rdi,56+8(%rsp) 654 655 movq (%r8),%r8 656 movq (%rsi),%rax 657 leaq (%rsi,%r9,1),%rsi 658 negq %r9 659 660 movq %r8,%rbp 661 mulq %rbx 662 movq %rax,%r10 663 movq (%rcx),%rax 664 665 imulq %r10,%rbp 666 leaq 64+8(%rsp),%r14 667 movq %rdx,%r11 668 669 mulq %rbp 670 addq %rax,%r10 671 movq 8(%rsi,%r9,1),%rax 672 adcq $0,%rdx 673 movq %rdx,%rdi 674 675 mulq %rbx 676 addq %rax,%r11 677 movq 8(%rcx),%rax 678 adcq $0,%rdx 679 movq %rdx,%r10 680 681 mulq %rbp 682 addq %rax,%rdi 683 movq 16(%rsi,%r9,1),%rax 684 adcq $0,%rdx 685 addq %r11,%rdi 686 leaq 32(%r9),%r15 687 leaq 32(%rcx),%rcx 688 adcq $0,%rdx 689 movq %rdi,(%r14) 690 movq %rdx,%r13 691 jmp .L1st4x 692 693.align 32 694.L1st4x: 695 mulq %rbx 696 addq %rax,%r10 697 movq -16(%rcx),%rax 698 leaq 32(%r14),%r14 699 adcq $0,%rdx 700 movq %rdx,%r11 701 702 mulq %rbp 703 addq %rax,%r13 704 movq -8(%rsi,%r15,1),%rax 705 adcq $0,%rdx 706 addq %r10,%r13 707 adcq $0,%rdx 708 movq %r13,-24(%r14) 709 movq %rdx,%rdi 710 711 mulq %rbx 712 addq %rax,%r11 713 movq -8(%rcx),%rax 714 adcq $0,%rdx 715 movq %rdx,%r10 716 717 mulq %rbp 718 addq %rax,%rdi 719 movq (%rsi,%r15,1),%rax 720 adcq $0,%rdx 721 addq %r11,%rdi 722 adcq $0,%rdx 723 movq %rdi,-16(%r14) 724 movq %rdx,%r13 725 726 mulq %rbx 727 addq %rax,%r10 728 movq 0(%rcx),%rax 729 adcq $0,%rdx 730 movq %rdx,%r11 731 732 mulq %rbp 733 addq %rax,%r13 734 movq 8(%rsi,%r15,1),%rax 735 adcq $0,%rdx 736 addq %r10,%r13 737 adcq $0,%rdx 738 movq %r13,-8(%r14) 739 movq %rdx,%rdi 740 741 mulq %rbx 742 addq %rax,%r11 743 movq 8(%rcx),%rax 744 adcq $0,%rdx 745 movq %rdx,%r10 746 747 mulq %rbp 748 addq %rax,%rdi 749 movq 16(%rsi,%r15,1),%rax 750 adcq $0,%rdx 751 addq %r11,%rdi 752 leaq 32(%rcx),%rcx 753 adcq $0,%rdx 754 movq %rdi,(%r14) 755 movq %rdx,%r13 756 757 addq $32,%r15 758 jnz .L1st4x 759 760 mulq %rbx 761 addq %rax,%r10 762 movq -16(%rcx),%rax 763 leaq 32(%r14),%r14 764 adcq $0,%rdx 765 movq %rdx,%r11 766 767 mulq %rbp 768 addq %rax,%r13 769 movq -8(%rsi),%rax 770 adcq $0,%rdx 771 addq %r10,%r13 772 adcq $0,%rdx 773 movq %r13,-24(%r14) 774 movq %rdx,%rdi 775 776 mulq %rbx 777 addq %rax,%r11 778 movq -8(%rcx),%rax 779 adcq $0,%rdx 780 movq %rdx,%r10 781 782 mulq %rbp 783 addq %rax,%rdi 784 movq (%rsi,%r9,1),%rax 785 adcq $0,%rdx 786 addq %r11,%rdi 787 adcq $0,%rdx 788 movq %rdi,-16(%r14) 789 movq %rdx,%r13 790 791 leaq (%rcx,%r9,1),%rcx 792 793 xorq %rdi,%rdi 794 addq %r10,%r13 795 adcq $0,%rdi 796 movq %r13,-8(%r14) 797 798 jmp .Louter4x 799 800.align 32 801.Louter4x: 802 leaq 16+128(%r14),%rdx 803 pxor %xmm4,%xmm4 804 pxor %xmm5,%xmm5 805 movdqa -128(%r12),%xmm0 806 movdqa -112(%r12),%xmm1 807 movdqa -96(%r12),%xmm2 808 movdqa -80(%r12),%xmm3 809 pand -128(%rdx),%xmm0 810 pand -112(%rdx),%xmm1 811 por %xmm0,%xmm4 812 pand -96(%rdx),%xmm2 813 por %xmm1,%xmm5 814 pand -80(%rdx),%xmm3 815 por %xmm2,%xmm4 816 por %xmm3,%xmm5 817 movdqa -64(%r12),%xmm0 818 movdqa -48(%r12),%xmm1 819 movdqa -32(%r12),%xmm2 820 movdqa -16(%r12),%xmm3 821 pand -64(%rdx),%xmm0 822 pand -48(%rdx),%xmm1 823 por %xmm0,%xmm4 824 pand -32(%rdx),%xmm2 825 por %xmm1,%xmm5 826 pand -16(%rdx),%xmm3 827 por %xmm2,%xmm4 828 por %xmm3,%xmm5 829 movdqa 0(%r12),%xmm0 830 movdqa 16(%r12),%xmm1 831 movdqa 32(%r12),%xmm2 832 movdqa 48(%r12),%xmm3 833 pand 0(%rdx),%xmm0 834 pand 16(%rdx),%xmm1 835 por %xmm0,%xmm4 836 pand 32(%rdx),%xmm2 837 por %xmm1,%xmm5 838 pand 48(%rdx),%xmm3 839 por %xmm2,%xmm4 840 por %xmm3,%xmm5 841 movdqa 64(%r12),%xmm0 842 movdqa 80(%r12),%xmm1 843 movdqa 96(%r12),%xmm2 844 movdqa 112(%r12),%xmm3 845 pand 64(%rdx),%xmm0 846 pand 80(%rdx),%xmm1 847 por %xmm0,%xmm4 848 pand 96(%rdx),%xmm2 849 por %xmm1,%xmm5 850 pand 112(%rdx),%xmm3 851 por %xmm2,%xmm4 852 por %xmm3,%xmm5 853 por %xmm5,%xmm4 854 pshufd $0x4e,%xmm4,%xmm0 855 por %xmm4,%xmm0 856 leaq 256(%r12),%r12 857.byte 102,72,15,126,195 858 859 movq (%r14,%r9,1),%r10 860 movq %r8,%rbp 861 mulq %rbx 862 addq %rax,%r10 863 movq (%rcx),%rax 864 adcq $0,%rdx 865 866 imulq %r10,%rbp 867 movq %rdx,%r11 868 movq %rdi,(%r14) 869 870 leaq (%r14,%r9,1),%r14 871 872 mulq %rbp 873 addq %rax,%r10 874 movq 8(%rsi,%r9,1),%rax 875 adcq $0,%rdx 876 movq %rdx,%rdi 877 878 mulq %rbx 879 addq %rax,%r11 880 movq 8(%rcx),%rax 881 adcq $0,%rdx 882 addq 8(%r14),%r11 883 adcq $0,%rdx 884 movq %rdx,%r10 885 886 mulq %rbp 887 addq %rax,%rdi 888 movq 16(%rsi,%r9,1),%rax 889 adcq $0,%rdx 890 addq %r11,%rdi 891 leaq 32(%r9),%r15 892 leaq 32(%rcx),%rcx 893 adcq $0,%rdx 894 movq %rdx,%r13 895 jmp .Linner4x 896 897.align 32 898.Linner4x: 899 mulq %rbx 900 addq %rax,%r10 901 movq -16(%rcx),%rax 902 adcq $0,%rdx 903 addq 16(%r14),%r10 904 leaq 32(%r14),%r14 905 adcq $0,%rdx 906 movq %rdx,%r11 907 908 mulq %rbp 909 addq %rax,%r13 910 movq -8(%rsi,%r15,1),%rax 911 adcq $0,%rdx 912 addq %r10,%r13 913 adcq $0,%rdx 914 movq %rdi,-32(%r14) 915 movq %rdx,%rdi 916 917 mulq %rbx 918 addq %rax,%r11 919 movq -8(%rcx),%rax 920 adcq $0,%rdx 921 addq -8(%r14),%r11 922 adcq $0,%rdx 923 movq %rdx,%r10 924 925 mulq %rbp 926 addq %rax,%rdi 927 movq (%rsi,%r15,1),%rax 928 adcq $0,%rdx 929 addq %r11,%rdi 930 adcq $0,%rdx 931 movq %r13,-24(%r14) 932 movq %rdx,%r13 933 934 mulq %rbx 935 addq %rax,%r10 936 movq 0(%rcx),%rax 937 adcq $0,%rdx 938 addq (%r14),%r10 939 adcq $0,%rdx 940 movq %rdx,%r11 941 942 mulq %rbp 943 addq %rax,%r13 944 movq 8(%rsi,%r15,1),%rax 945 adcq $0,%rdx 946 addq %r10,%r13 947 adcq $0,%rdx 948 movq %rdi,-16(%r14) 949 movq %rdx,%rdi 950 951 mulq %rbx 952 addq %rax,%r11 953 movq 8(%rcx),%rax 954 adcq $0,%rdx 955 addq 8(%r14),%r11 956 adcq $0,%rdx 957 movq %rdx,%r10 958 959 mulq %rbp 960 addq %rax,%rdi 961 movq 16(%rsi,%r15,1),%rax 962 adcq $0,%rdx 963 addq %r11,%rdi 964 leaq 32(%rcx),%rcx 965 adcq $0,%rdx 966 movq %r13,-8(%r14) 967 movq %rdx,%r13 968 969 addq $32,%r15 970 jnz .Linner4x 971 972 mulq %rbx 973 addq %rax,%r10 974 movq -16(%rcx),%rax 975 adcq $0,%rdx 976 addq 16(%r14),%r10 977 leaq 32(%r14),%r14 978 adcq $0,%rdx 979 movq %rdx,%r11 980 981 mulq %rbp 982 addq %rax,%r13 983 movq -8(%rsi),%rax 984 adcq $0,%rdx 985 addq %r10,%r13 986 adcq $0,%rdx 987 movq %rdi,-32(%r14) 988 movq %rdx,%rdi 989 990 mulq %rbx 991 addq %rax,%r11 992 movq %rbp,%rax 993 movq -8(%rcx),%rbp 994 adcq $0,%rdx 995 addq -8(%r14),%r11 996 adcq $0,%rdx 997 movq %rdx,%r10 998 999 mulq %rbp 1000 addq %rax,%rdi 1001 movq (%rsi,%r9,1),%rax 1002 adcq $0,%rdx 1003 addq %r11,%rdi 1004 adcq $0,%rdx 1005 movq %r13,-24(%r14) 1006 movq %rdx,%r13 1007 1008 movq %rdi,-16(%r14) 1009 leaq (%rcx,%r9,1),%rcx 1010 1011 xorq %rdi,%rdi 1012 addq %r10,%r13 1013 adcq $0,%rdi 1014 addq (%r14),%r13 1015 adcq $0,%rdi 1016 movq %r13,-8(%r14) 1017 1018 cmpq 16+8(%rsp),%r12 1019 jb .Louter4x 1020 xorq %rax,%rax 1021 subq %r13,%rbp 1022 adcq %r15,%r15 1023 orq %r15,%rdi 1024 subq %rdi,%rax 1025 leaq (%r14,%r9,1),%rbx 1026 movq (%rcx),%r12 1027 leaq (%rcx),%rbp 1028 movq %r9,%rcx 1029 sarq $3+2,%rcx 1030 movq 56+8(%rsp),%rdi 1031 decq %r12 1032 xorq %r10,%r10 1033 movq 8(%rbp),%r13 1034 movq 16(%rbp),%r14 1035 movq 24(%rbp),%r15 1036 jmp .Lsqr4x_sub_entry 1037.size mul4x_internal,.-mul4x_internal 1038.globl bn_power5 1039.type bn_power5,@function 1040.align 32 1041bn_power5: 1042 movq %rsp,%rax 1043 movl OPENSSL_ia32cap_P+8(%rip),%r11d 1044 andl $0x80108,%r11d 1045 cmpl $0x80108,%r11d 1046 je .Lpowerx5_enter 1047 pushq %rbx 1048 pushq %rbp 1049 pushq %r12 1050 pushq %r13 1051 pushq %r14 1052 pushq %r15 1053.Lpower5_prologue: 1054 1055 shll $3,%r9d 1056 leal (%r9,%r9,2),%r10d 1057 negq %r9 1058 movq (%r8),%r8 1059 1060 1061 1062 1063 1064 1065 1066 1067 leaq -320(%rsp,%r9,2),%r11 1068 movq %rsp,%rbp 1069 subq %rdi,%r11 1070 andq $4095,%r11 1071 cmpq %r11,%r10 1072 jb .Lpwr_sp_alt 1073 subq %r11,%rbp 1074 leaq -320(%rbp,%r9,2),%rbp 1075 jmp .Lpwr_sp_done 1076 1077.align 32 1078.Lpwr_sp_alt: 1079 leaq 4096-320(,%r9,2),%r10 1080 leaq -320(%rbp,%r9,2),%rbp 1081 subq %r10,%r11 1082 movq $0,%r10 1083 cmovcq %r10,%r11 1084 subq %r11,%rbp 1085.Lpwr_sp_done: 1086 andq $-64,%rbp 1087 movq %rsp,%r11 1088 subq %rbp,%r11 1089 andq $-4096,%r11 1090 leaq (%r11,%rbp,1),%rsp 1091 movq (%rsp),%r10 1092 cmpq %rbp,%rsp 1093 ja .Lpwr_page_walk 1094 jmp .Lpwr_page_walk_done 1095 1096.Lpwr_page_walk: 1097 leaq -4096(%rsp),%rsp 1098 movq (%rsp),%r10 1099 cmpq %rbp,%rsp 1100 ja .Lpwr_page_walk 1101.Lpwr_page_walk_done: 1102 1103 movq %r9,%r10 1104 negq %r9 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 movq %r8,32(%rsp) 1116 movq %rax,40(%rsp) 1117.Lpower5_body: 1118.byte 102,72,15,110,207 1119.byte 102,72,15,110,209 1120.byte 102,73,15,110,218 1121.byte 102,72,15,110,226 1122 1123 call __bn_sqr8x_internal 1124 call __bn_post4x_internal 1125 call __bn_sqr8x_internal 1126 call __bn_post4x_internal 1127 call __bn_sqr8x_internal 1128 call __bn_post4x_internal 1129 call __bn_sqr8x_internal 1130 call __bn_post4x_internal 1131 call __bn_sqr8x_internal 1132 call __bn_post4x_internal 1133 1134.byte 102,72,15,126,209 1135.byte 102,72,15,126,226 1136 movq %rsi,%rdi 1137 movq 40(%rsp),%rax 1138 leaq 32(%rsp),%r8 1139 1140 call mul4x_internal 1141 1142 movq 40(%rsp),%rsi 1143 movq $1,%rax 1144 movq -48(%rsi),%r15 1145 movq -40(%rsi),%r14 1146 movq -32(%rsi),%r13 1147 movq -24(%rsi),%r12 1148 movq -16(%rsi),%rbp 1149 movq -8(%rsi),%rbx 1150 leaq (%rsi),%rsp 1151.Lpower5_epilogue: 1152 .byte 0xf3,0xc3 1153.size bn_power5,.-bn_power5 1154 1155.globl bn_sqr8x_internal 1156.hidden bn_sqr8x_internal 1157.type bn_sqr8x_internal,@function 1158.align 32 1159bn_sqr8x_internal: 1160__bn_sqr8x_internal: 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 leaq 32(%r10),%rbp 1235 leaq (%rsi,%r9,1),%rsi 1236 1237 movq %r9,%rcx 1238 1239 1240 movq -32(%rsi,%rbp,1),%r14 1241 leaq 48+8(%rsp,%r9,2),%rdi 1242 movq -24(%rsi,%rbp,1),%rax 1243 leaq -32(%rdi,%rbp,1),%rdi 1244 movq -16(%rsi,%rbp,1),%rbx 1245 movq %rax,%r15 1246 1247 mulq %r14 1248 movq %rax,%r10 1249 movq %rbx,%rax 1250 movq %rdx,%r11 1251 movq %r10,-24(%rdi,%rbp,1) 1252 1253 mulq %r14 1254 addq %rax,%r11 1255 movq %rbx,%rax 1256 adcq $0,%rdx 1257 movq %r11,-16(%rdi,%rbp,1) 1258 movq %rdx,%r10 1259 1260 1261 movq -8(%rsi,%rbp,1),%rbx 1262 mulq %r15 1263 movq %rax,%r12 1264 movq %rbx,%rax 1265 movq %rdx,%r13 1266 1267 leaq (%rbp),%rcx 1268 mulq %r14 1269 addq %rax,%r10 1270 movq %rbx,%rax 1271 movq %rdx,%r11 1272 adcq $0,%r11 1273 addq %r12,%r10 1274 adcq $0,%r11 1275 movq %r10,-8(%rdi,%rcx,1) 1276 jmp .Lsqr4x_1st 1277 1278.align 32 1279.Lsqr4x_1st: 1280 movq (%rsi,%rcx,1),%rbx 1281 mulq %r15 1282 addq %rax,%r13 1283 movq %rbx,%rax 1284 movq %rdx,%r12 1285 adcq $0,%r12 1286 1287 mulq %r14 1288 addq %rax,%r11 1289 movq %rbx,%rax 1290 movq 8(%rsi,%rcx,1),%rbx 1291 movq %rdx,%r10 1292 adcq $0,%r10 1293 addq %r13,%r11 1294 adcq $0,%r10 1295 1296 1297 mulq %r15 1298 addq %rax,%r12 1299 movq %rbx,%rax 1300 movq %r11,(%rdi,%rcx,1) 1301 movq %rdx,%r13 1302 adcq $0,%r13 1303 1304 mulq %r14 1305 addq %rax,%r10 1306 movq %rbx,%rax 1307 movq 16(%rsi,%rcx,1),%rbx 1308 movq %rdx,%r11 1309 adcq $0,%r11 1310 addq %r12,%r10 1311 adcq $0,%r11 1312 1313 mulq %r15 1314 addq %rax,%r13 1315 movq %rbx,%rax 1316 movq %r10,8(%rdi,%rcx,1) 1317 movq %rdx,%r12 1318 adcq $0,%r12 1319 1320 mulq %r14 1321 addq %rax,%r11 1322 movq %rbx,%rax 1323 movq 24(%rsi,%rcx,1),%rbx 1324 movq %rdx,%r10 1325 adcq $0,%r10 1326 addq %r13,%r11 1327 adcq $0,%r10 1328 1329 1330 mulq %r15 1331 addq %rax,%r12 1332 movq %rbx,%rax 1333 movq %r11,16(%rdi,%rcx,1) 1334 movq %rdx,%r13 1335 adcq $0,%r13 1336 leaq 32(%rcx),%rcx 1337 1338 mulq %r14 1339 addq %rax,%r10 1340 movq %rbx,%rax 1341 movq %rdx,%r11 1342 adcq $0,%r11 1343 addq %r12,%r10 1344 adcq $0,%r11 1345 movq %r10,-8(%rdi,%rcx,1) 1346 1347 cmpq $0,%rcx 1348 jne .Lsqr4x_1st 1349 1350 mulq %r15 1351 addq %rax,%r13 1352 leaq 16(%rbp),%rbp 1353 adcq $0,%rdx 1354 addq %r11,%r13 1355 adcq $0,%rdx 1356 1357 movq %r13,(%rdi) 1358 movq %rdx,%r12 1359 movq %rdx,8(%rdi) 1360 jmp .Lsqr4x_outer 1361 1362.align 32 1363.Lsqr4x_outer: 1364 movq -32(%rsi,%rbp,1),%r14 1365 leaq 48+8(%rsp,%r9,2),%rdi 1366 movq -24(%rsi,%rbp,1),%rax 1367 leaq -32(%rdi,%rbp,1),%rdi 1368 movq -16(%rsi,%rbp,1),%rbx 1369 movq %rax,%r15 1370 1371 mulq %r14 1372 movq -24(%rdi,%rbp,1),%r10 1373 addq %rax,%r10 1374 movq %rbx,%rax 1375 adcq $0,%rdx 1376 movq %r10,-24(%rdi,%rbp,1) 1377 movq %rdx,%r11 1378 1379 mulq %r14 1380 addq %rax,%r11 1381 movq %rbx,%rax 1382 adcq $0,%rdx 1383 addq -16(%rdi,%rbp,1),%r11 1384 movq %rdx,%r10 1385 adcq $0,%r10 1386 movq %r11,-16(%rdi,%rbp,1) 1387 1388 xorq %r12,%r12 1389 1390 movq -8(%rsi,%rbp,1),%rbx 1391 mulq %r15 1392 addq %rax,%r12 1393 movq %rbx,%rax 1394 adcq $0,%rdx 1395 addq -8(%rdi,%rbp,1),%r12 1396 movq %rdx,%r13 1397 adcq $0,%r13 1398 1399 mulq %r14 1400 addq %rax,%r10 1401 movq %rbx,%rax 1402 adcq $0,%rdx 1403 addq %r12,%r10 1404 movq %rdx,%r11 1405 adcq $0,%r11 1406 movq %r10,-8(%rdi,%rbp,1) 1407 1408 leaq (%rbp),%rcx 1409 jmp .Lsqr4x_inner 1410 1411.align 32 1412.Lsqr4x_inner: 1413 movq (%rsi,%rcx,1),%rbx 1414 mulq %r15 1415 addq %rax,%r13 1416 movq %rbx,%rax 1417 movq %rdx,%r12 1418 adcq $0,%r12 1419 addq (%rdi,%rcx,1),%r13 1420 adcq $0,%r12 1421 1422.byte 0x67 1423 mulq %r14 1424 addq %rax,%r11 1425 movq %rbx,%rax 1426 movq 8(%rsi,%rcx,1),%rbx 1427 movq %rdx,%r10 1428 adcq $0,%r10 1429 addq %r13,%r11 1430 adcq $0,%r10 1431 1432 mulq %r15 1433 addq %rax,%r12 1434 movq %r11,(%rdi,%rcx,1) 1435 movq %rbx,%rax 1436 movq %rdx,%r13 1437 adcq $0,%r13 1438 addq 8(%rdi,%rcx,1),%r12 1439 leaq 16(%rcx),%rcx 1440 adcq $0,%r13 1441 1442 mulq %r14 1443 addq %rax,%r10 1444 movq %rbx,%rax 1445 adcq $0,%rdx 1446 addq %r12,%r10 1447 movq %rdx,%r11 1448 adcq $0,%r11 1449 movq %r10,-8(%rdi,%rcx,1) 1450 1451 cmpq $0,%rcx 1452 jne .Lsqr4x_inner 1453 1454.byte 0x67 1455 mulq %r15 1456 addq %rax,%r13 1457 adcq $0,%rdx 1458 addq %r11,%r13 1459 adcq $0,%rdx 1460 1461 movq %r13,(%rdi) 1462 movq %rdx,%r12 1463 movq %rdx,8(%rdi) 1464 1465 addq $16,%rbp 1466 jnz .Lsqr4x_outer 1467 1468 1469 movq -32(%rsi),%r14 1470 leaq 48+8(%rsp,%r9,2),%rdi 1471 movq -24(%rsi),%rax 1472 leaq -32(%rdi,%rbp,1),%rdi 1473 movq -16(%rsi),%rbx 1474 movq %rax,%r15 1475 1476 mulq %r14 1477 addq %rax,%r10 1478 movq %rbx,%rax 1479 movq %rdx,%r11 1480 adcq $0,%r11 1481 1482 mulq %r14 1483 addq %rax,%r11 1484 movq %rbx,%rax 1485 movq %r10,-24(%rdi) 1486 movq %rdx,%r10 1487 adcq $0,%r10 1488 addq %r13,%r11 1489 movq -8(%rsi),%rbx 1490 adcq $0,%r10 1491 1492 mulq %r15 1493 addq %rax,%r12 1494 movq %rbx,%rax 1495 movq %r11,-16(%rdi) 1496 movq %rdx,%r13 1497 adcq $0,%r13 1498 1499 mulq %r14 1500 addq %rax,%r10 1501 movq %rbx,%rax 1502 movq %rdx,%r11 1503 adcq $0,%r11 1504 addq %r12,%r10 1505 adcq $0,%r11 1506 movq %r10,-8(%rdi) 1507 1508 mulq %r15 1509 addq %rax,%r13 1510 movq -16(%rsi),%rax 1511 adcq $0,%rdx 1512 addq %r11,%r13 1513 adcq $0,%rdx 1514 1515 movq %r13,(%rdi) 1516 movq %rdx,%r12 1517 movq %rdx,8(%rdi) 1518 1519 mulq %rbx 1520 addq $16,%rbp 1521 xorq %r14,%r14 1522 subq %r9,%rbp 1523 xorq %r15,%r15 1524 1525 addq %r12,%rax 1526 adcq $0,%rdx 1527 movq %rax,8(%rdi) 1528 movq %rdx,16(%rdi) 1529 movq %r15,24(%rdi) 1530 1531 movq -16(%rsi,%rbp,1),%rax 1532 leaq 48+8(%rsp),%rdi 1533 xorq %r10,%r10 1534 movq 8(%rdi),%r11 1535 1536 leaq (%r14,%r10,2),%r12 1537 shrq $63,%r10 1538 leaq (%rcx,%r11,2),%r13 1539 shrq $63,%r11 1540 orq %r10,%r13 1541 movq 16(%rdi),%r10 1542 movq %r11,%r14 1543 mulq %rax 1544 negq %r15 1545 movq 24(%rdi),%r11 1546 adcq %rax,%r12 1547 movq -8(%rsi,%rbp,1),%rax 1548 movq %r12,(%rdi) 1549 adcq %rdx,%r13 1550 1551 leaq (%r14,%r10,2),%rbx 1552 movq %r13,8(%rdi) 1553 sbbq %r15,%r15 1554 shrq $63,%r10 1555 leaq (%rcx,%r11,2),%r8 1556 shrq $63,%r11 1557 orq %r10,%r8 1558 movq 32(%rdi),%r10 1559 movq %r11,%r14 1560 mulq %rax 1561 negq %r15 1562 movq 40(%rdi),%r11 1563 adcq %rax,%rbx 1564 movq 0(%rsi,%rbp,1),%rax 1565 movq %rbx,16(%rdi) 1566 adcq %rdx,%r8 1567 leaq 16(%rbp),%rbp 1568 movq %r8,24(%rdi) 1569 sbbq %r15,%r15 1570 leaq 64(%rdi),%rdi 1571 jmp .Lsqr4x_shift_n_add 1572 1573.align 32 1574.Lsqr4x_shift_n_add: 1575 leaq (%r14,%r10,2),%r12 1576 shrq $63,%r10 1577 leaq (%rcx,%r11,2),%r13 1578 shrq $63,%r11 1579 orq %r10,%r13 1580 movq -16(%rdi),%r10 1581 movq %r11,%r14 1582 mulq %rax 1583 negq %r15 1584 movq -8(%rdi),%r11 1585 adcq %rax,%r12 1586 movq -8(%rsi,%rbp,1),%rax 1587 movq %r12,-32(%rdi) 1588 adcq %rdx,%r13 1589 1590 leaq (%r14,%r10,2),%rbx 1591 movq %r13,-24(%rdi) 1592 sbbq %r15,%r15 1593 shrq $63,%r10 1594 leaq (%rcx,%r11,2),%r8 1595 shrq $63,%r11 1596 orq %r10,%r8 1597 movq 0(%rdi),%r10 1598 movq %r11,%r14 1599 mulq %rax 1600 negq %r15 1601 movq 8(%rdi),%r11 1602 adcq %rax,%rbx 1603 movq 0(%rsi,%rbp,1),%rax 1604 movq %rbx,-16(%rdi) 1605 adcq %rdx,%r8 1606 1607 leaq (%r14,%r10,2),%r12 1608 movq %r8,-8(%rdi) 1609 sbbq %r15,%r15 1610 shrq $63,%r10 1611 leaq (%rcx,%r11,2),%r13 1612 shrq $63,%r11 1613 orq %r10,%r13 1614 movq 16(%rdi),%r10 1615 movq %r11,%r14 1616 mulq %rax 1617 negq %r15 1618 movq 24(%rdi),%r11 1619 adcq %rax,%r12 1620 movq 8(%rsi,%rbp,1),%rax 1621 movq %r12,0(%rdi) 1622 adcq %rdx,%r13 1623 1624 leaq (%r14,%r10,2),%rbx 1625 movq %r13,8(%rdi) 1626 sbbq %r15,%r15 1627 shrq $63,%r10 1628 leaq (%rcx,%r11,2),%r8 1629 shrq $63,%r11 1630 orq %r10,%r8 1631 movq 32(%rdi),%r10 1632 movq %r11,%r14 1633 mulq %rax 1634 negq %r15 1635 movq 40(%rdi),%r11 1636 adcq %rax,%rbx 1637 movq 16(%rsi,%rbp,1),%rax 1638 movq %rbx,16(%rdi) 1639 adcq %rdx,%r8 1640 movq %r8,24(%rdi) 1641 sbbq %r15,%r15 1642 leaq 64(%rdi),%rdi 1643 addq $32,%rbp 1644 jnz .Lsqr4x_shift_n_add 1645 1646 leaq (%r14,%r10,2),%r12 1647.byte 0x67 1648 shrq $63,%r10 1649 leaq (%rcx,%r11,2),%r13 1650 shrq $63,%r11 1651 orq %r10,%r13 1652 movq -16(%rdi),%r10 1653 movq %r11,%r14 1654 mulq %rax 1655 negq %r15 1656 movq -8(%rdi),%r11 1657 adcq %rax,%r12 1658 movq -8(%rsi),%rax 1659 movq %r12,-32(%rdi) 1660 adcq %rdx,%r13 1661 1662 leaq (%r14,%r10,2),%rbx 1663 movq %r13,-24(%rdi) 1664 sbbq %r15,%r15 1665 shrq $63,%r10 1666 leaq (%rcx,%r11,2),%r8 1667 shrq $63,%r11 1668 orq %r10,%r8 1669 mulq %rax 1670 negq %r15 1671 adcq %rax,%rbx 1672 adcq %rdx,%r8 1673 movq %rbx,-16(%rdi) 1674 movq %r8,-8(%rdi) 1675.byte 102,72,15,126,213 1676__bn_sqr8x_reduction: 1677 xorq %rax,%rax 1678 leaq (%r9,%rbp,1),%rcx 1679 leaq 48+8(%rsp,%r9,2),%rdx 1680 movq %rcx,0+8(%rsp) 1681 leaq 48+8(%rsp,%r9,1),%rdi 1682 movq %rdx,8+8(%rsp) 1683 negq %r9 1684 jmp .L8x_reduction_loop 1685 1686.align 32 1687.L8x_reduction_loop: 1688 leaq (%rdi,%r9,1),%rdi 1689.byte 0x66 1690 movq 0(%rdi),%rbx 1691 movq 8(%rdi),%r9 1692 movq 16(%rdi),%r10 1693 movq 24(%rdi),%r11 1694 movq 32(%rdi),%r12 1695 movq 40(%rdi),%r13 1696 movq 48(%rdi),%r14 1697 movq 56(%rdi),%r15 1698 movq %rax,(%rdx) 1699 leaq 64(%rdi),%rdi 1700 1701.byte 0x67 1702 movq %rbx,%r8 1703 imulq 32+8(%rsp),%rbx 1704 movq 0(%rbp),%rax 1705 movl $8,%ecx 1706 jmp .L8x_reduce 1707 1708.align 32 1709.L8x_reduce: 1710 mulq %rbx 1711 movq 8(%rbp),%rax 1712 negq %r8 1713 movq %rdx,%r8 1714 adcq $0,%r8 1715 1716 mulq %rbx 1717 addq %rax,%r9 1718 movq 16(%rbp),%rax 1719 adcq $0,%rdx 1720 addq %r9,%r8 1721 movq %rbx,48-8+8(%rsp,%rcx,8) 1722 movq %rdx,%r9 1723 adcq $0,%r9 1724 1725 mulq %rbx 1726 addq %rax,%r10 1727 movq 24(%rbp),%rax 1728 adcq $0,%rdx 1729 addq %r10,%r9 1730 movq 32+8(%rsp),%rsi 1731 movq %rdx,%r10 1732 adcq $0,%r10 1733 1734 mulq %rbx 1735 addq %rax,%r11 1736 movq 32(%rbp),%rax 1737 adcq $0,%rdx 1738 imulq %r8,%rsi 1739 addq %r11,%r10 1740 movq %rdx,%r11 1741 adcq $0,%r11 1742 1743 mulq %rbx 1744 addq %rax,%r12 1745 movq 40(%rbp),%rax 1746 adcq $0,%rdx 1747 addq %r12,%r11 1748 movq %rdx,%r12 1749 adcq $0,%r12 1750 1751 mulq %rbx 1752 addq %rax,%r13 1753 movq 48(%rbp),%rax 1754 adcq $0,%rdx 1755 addq %r13,%r12 1756 movq %rdx,%r13 1757 adcq $0,%r13 1758 1759 mulq %rbx 1760 addq %rax,%r14 1761 movq 56(%rbp),%rax 1762 adcq $0,%rdx 1763 addq %r14,%r13 1764 movq %rdx,%r14 1765 adcq $0,%r14 1766 1767 mulq %rbx 1768 movq %rsi,%rbx 1769 addq %rax,%r15 1770 movq 0(%rbp),%rax 1771 adcq $0,%rdx 1772 addq %r15,%r14 1773 movq %rdx,%r15 1774 adcq $0,%r15 1775 1776 decl %ecx 1777 jnz .L8x_reduce 1778 1779 leaq 64(%rbp),%rbp 1780 xorq %rax,%rax 1781 movq 8+8(%rsp),%rdx 1782 cmpq 0+8(%rsp),%rbp 1783 jae .L8x_no_tail 1784 1785.byte 0x66 1786 addq 0(%rdi),%r8 1787 adcq 8(%rdi),%r9 1788 adcq 16(%rdi),%r10 1789 adcq 24(%rdi),%r11 1790 adcq 32(%rdi),%r12 1791 adcq 40(%rdi),%r13 1792 adcq 48(%rdi),%r14 1793 adcq 56(%rdi),%r15 1794 sbbq %rsi,%rsi 1795 1796 movq 48+56+8(%rsp),%rbx 1797 movl $8,%ecx 1798 movq 0(%rbp),%rax 1799 jmp .L8x_tail 1800 1801.align 32 1802.L8x_tail: 1803 mulq %rbx 1804 addq %rax,%r8 1805 movq 8(%rbp),%rax 1806 movq %r8,(%rdi) 1807 movq %rdx,%r8 1808 adcq $0,%r8 1809 1810 mulq %rbx 1811 addq %rax,%r9 1812 movq 16(%rbp),%rax 1813 adcq $0,%rdx 1814 addq %r9,%r8 1815 leaq 8(%rdi),%rdi 1816 movq %rdx,%r9 1817 adcq $0,%r9 1818 1819 mulq %rbx 1820 addq %rax,%r10 1821 movq 24(%rbp),%rax 1822 adcq $0,%rdx 1823 addq %r10,%r9 1824 movq %rdx,%r10 1825 adcq $0,%r10 1826 1827 mulq %rbx 1828 addq %rax,%r11 1829 movq 32(%rbp),%rax 1830 adcq $0,%rdx 1831 addq %r11,%r10 1832 movq %rdx,%r11 1833 adcq $0,%r11 1834 1835 mulq %rbx 1836 addq %rax,%r12 1837 movq 40(%rbp),%rax 1838 adcq $0,%rdx 1839 addq %r12,%r11 1840 movq %rdx,%r12 1841 adcq $0,%r12 1842 1843 mulq %rbx 1844 addq %rax,%r13 1845 movq 48(%rbp),%rax 1846 adcq $0,%rdx 1847 addq %r13,%r12 1848 movq %rdx,%r13 1849 adcq $0,%r13 1850 1851 mulq %rbx 1852 addq %rax,%r14 1853 movq 56(%rbp),%rax 1854 adcq $0,%rdx 1855 addq %r14,%r13 1856 movq %rdx,%r14 1857 adcq $0,%r14 1858 1859 mulq %rbx 1860 movq 48-16+8(%rsp,%rcx,8),%rbx 1861 addq %rax,%r15 1862 adcq $0,%rdx 1863 addq %r15,%r14 1864 movq 0(%rbp),%rax 1865 movq %rdx,%r15 1866 adcq $0,%r15 1867 1868 decl %ecx 1869 jnz .L8x_tail 1870 1871 leaq 64(%rbp),%rbp 1872 movq 8+8(%rsp),%rdx 1873 cmpq 0+8(%rsp),%rbp 1874 jae .L8x_tail_done 1875 1876 movq 48+56+8(%rsp),%rbx 1877 negq %rsi 1878 movq 0(%rbp),%rax 1879 adcq 0(%rdi),%r8 1880 adcq 8(%rdi),%r9 1881 adcq 16(%rdi),%r10 1882 adcq 24(%rdi),%r11 1883 adcq 32(%rdi),%r12 1884 adcq 40(%rdi),%r13 1885 adcq 48(%rdi),%r14 1886 adcq 56(%rdi),%r15 1887 sbbq %rsi,%rsi 1888 1889 movl $8,%ecx 1890 jmp .L8x_tail 1891 1892.align 32 1893.L8x_tail_done: 1894 addq (%rdx),%r8 1895 adcq $0,%r9 1896 adcq $0,%r10 1897 adcq $0,%r11 1898 adcq $0,%r12 1899 adcq $0,%r13 1900 adcq $0,%r14 1901 adcq $0,%r15 1902 1903 1904 xorq %rax,%rax 1905 1906 negq %rsi 1907.L8x_no_tail: 1908 adcq 0(%rdi),%r8 1909 adcq 8(%rdi),%r9 1910 adcq 16(%rdi),%r10 1911 adcq 24(%rdi),%r11 1912 adcq 32(%rdi),%r12 1913 adcq 40(%rdi),%r13 1914 adcq 48(%rdi),%r14 1915 adcq 56(%rdi),%r15 1916 adcq $0,%rax 1917 movq -8(%rbp),%rcx 1918 xorq %rsi,%rsi 1919 1920.byte 102,72,15,126,213 1921 1922 movq %r8,0(%rdi) 1923 movq %r9,8(%rdi) 1924.byte 102,73,15,126,217 1925 movq %r10,16(%rdi) 1926 movq %r11,24(%rdi) 1927 movq %r12,32(%rdi) 1928 movq %r13,40(%rdi) 1929 movq %r14,48(%rdi) 1930 movq %r15,56(%rdi) 1931 leaq 64(%rdi),%rdi 1932 1933 cmpq %rdx,%rdi 1934 jb .L8x_reduction_loop 1935 .byte 0xf3,0xc3 1936.size bn_sqr8x_internal,.-bn_sqr8x_internal 1937.type __bn_post4x_internal,@function 1938.align 32 1939__bn_post4x_internal: 1940 movq 0(%rbp),%r12 1941 leaq (%rdi,%r9,1),%rbx 1942 movq %r9,%rcx 1943.byte 102,72,15,126,207 1944 negq %rax 1945.byte 102,72,15,126,206 1946 sarq $3+2,%rcx 1947 decq %r12 1948 xorq %r10,%r10 1949 movq 8(%rbp),%r13 1950 movq 16(%rbp),%r14 1951 movq 24(%rbp),%r15 1952 jmp .Lsqr4x_sub_entry 1953 1954.align 16 1955.Lsqr4x_sub: 1956 movq 0(%rbp),%r12 1957 movq 8(%rbp),%r13 1958 movq 16(%rbp),%r14 1959 movq 24(%rbp),%r15 1960.Lsqr4x_sub_entry: 1961 leaq 32(%rbp),%rbp 1962 notq %r12 1963 notq %r13 1964 notq %r14 1965 notq %r15 1966 andq %rax,%r12 1967 andq %rax,%r13 1968 andq %rax,%r14 1969 andq %rax,%r15 1970 1971 negq %r10 1972 adcq 0(%rbx),%r12 1973 adcq 8(%rbx),%r13 1974 adcq 16(%rbx),%r14 1975 adcq 24(%rbx),%r15 1976 movq %r12,0(%rdi) 1977 leaq 32(%rbx),%rbx 1978 movq %r13,8(%rdi) 1979 sbbq %r10,%r10 1980 movq %r14,16(%rdi) 1981 movq %r15,24(%rdi) 1982 leaq 32(%rdi),%rdi 1983 1984 incq %rcx 1985 jnz .Lsqr4x_sub 1986 1987 movq %r9,%r10 1988 negq %r9 1989 .byte 0xf3,0xc3 1990.size __bn_post4x_internal,.-__bn_post4x_internal 1991.globl bn_from_montgomery 1992.type bn_from_montgomery,@function 1993.align 32 1994bn_from_montgomery: 1995 testl $7,%r9d 1996 jz bn_from_mont8x 1997 xorl %eax,%eax 1998 .byte 0xf3,0xc3 1999.size bn_from_montgomery,.-bn_from_montgomery 2000 2001.type bn_from_mont8x,@function 2002.align 32 2003bn_from_mont8x: 2004.byte 0x67 2005 movq %rsp,%rax 2006 pushq %rbx 2007 pushq %rbp 2008 pushq %r12 2009 pushq %r13 2010 pushq %r14 2011 pushq %r15 2012.Lfrom_prologue: 2013 2014 shll $3,%r9d 2015 leaq (%r9,%r9,2),%r10 2016 negq %r9 2017 movq (%r8),%r8 2018 2019 2020 2021 2022 2023 2024 2025 2026 leaq -320(%rsp,%r9,2),%r11 2027 movq %rsp,%rbp 2028 subq %rdi,%r11 2029 andq $4095,%r11 2030 cmpq %r11,%r10 2031 jb .Lfrom_sp_alt 2032 subq %r11,%rbp 2033 leaq -320(%rbp,%r9,2),%rbp 2034 jmp .Lfrom_sp_done 2035 2036.align 32 2037.Lfrom_sp_alt: 2038 leaq 4096-320(,%r9,2),%r10 2039 leaq -320(%rbp,%r9,2),%rbp 2040 subq %r10,%r11 2041 movq $0,%r10 2042 cmovcq %r10,%r11 2043 subq %r11,%rbp 2044.Lfrom_sp_done: 2045 andq $-64,%rbp 2046 movq %rsp,%r11 2047 subq %rbp,%r11 2048 andq $-4096,%r11 2049 leaq (%r11,%rbp,1),%rsp 2050 movq (%rsp),%r10 2051 cmpq %rbp,%rsp 2052 ja .Lfrom_page_walk 2053 jmp .Lfrom_page_walk_done 2054 2055.Lfrom_page_walk: 2056 leaq -4096(%rsp),%rsp 2057 movq (%rsp),%r10 2058 cmpq %rbp,%rsp 2059 ja .Lfrom_page_walk 2060.Lfrom_page_walk_done: 2061 2062 movq %r9,%r10 2063 negq %r9 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 movq %r8,32(%rsp) 2075 movq %rax,40(%rsp) 2076.Lfrom_body: 2077 movq %r9,%r11 2078 leaq 48(%rsp),%rax 2079 pxor %xmm0,%xmm0 2080 jmp .Lmul_by_1 2081 2082.align 32 2083.Lmul_by_1: 2084 movdqu (%rsi),%xmm1 2085 movdqu 16(%rsi),%xmm2 2086 movdqu 32(%rsi),%xmm3 2087 movdqa %xmm0,(%rax,%r9,1) 2088 movdqu 48(%rsi),%xmm4 2089 movdqa %xmm0,16(%rax,%r9,1) 2090.byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00 2091 movdqa %xmm1,(%rax) 2092 movdqa %xmm0,32(%rax,%r9,1) 2093 movdqa %xmm2,16(%rax) 2094 movdqa %xmm0,48(%rax,%r9,1) 2095 movdqa %xmm3,32(%rax) 2096 movdqa %xmm4,48(%rax) 2097 leaq 64(%rax),%rax 2098 subq $64,%r11 2099 jnz .Lmul_by_1 2100 2101.byte 102,72,15,110,207 2102.byte 102,72,15,110,209 2103.byte 0x67 2104 movq %rcx,%rbp 2105.byte 102,73,15,110,218 2106 movl OPENSSL_ia32cap_P+8(%rip),%r11d 2107 andl $0x80108,%r11d 2108 cmpl $0x80108,%r11d 2109 jne .Lfrom_mont_nox 2110 2111 leaq (%rax,%r9,1),%rdi 2112 call __bn_sqrx8x_reduction 2113 call __bn_postx4x_internal 2114 2115 pxor %xmm0,%xmm0 2116 leaq 48(%rsp),%rax 2117 movq 40(%rsp),%rsi 2118 jmp .Lfrom_mont_zero 2119 2120.align 32 2121.Lfrom_mont_nox: 2122 call __bn_sqr8x_reduction 2123 call __bn_post4x_internal 2124 2125 pxor %xmm0,%xmm0 2126 leaq 48(%rsp),%rax 2127 movq 40(%rsp),%rsi 2128 jmp .Lfrom_mont_zero 2129 2130.align 32 2131.Lfrom_mont_zero: 2132 movdqa %xmm0,0(%rax) 2133 movdqa %xmm0,16(%rax) 2134 movdqa %xmm0,32(%rax) 2135 movdqa %xmm0,48(%rax) 2136 leaq 64(%rax),%rax 2137 subq $32,%r9 2138 jnz .Lfrom_mont_zero 2139 2140 movq $1,%rax 2141 movq -48(%rsi),%r15 2142 movq -40(%rsi),%r14 2143 movq -32(%rsi),%r13 2144 movq -24(%rsi),%r12 2145 movq -16(%rsi),%rbp 2146 movq -8(%rsi),%rbx 2147 leaq (%rsi),%rsp 2148.Lfrom_epilogue: 2149 .byte 0xf3,0xc3 2150.size bn_from_mont8x,.-bn_from_mont8x 2151.type bn_mulx4x_mont_gather5,@function 2152.align 32 2153bn_mulx4x_mont_gather5: 2154 movq %rsp,%rax 2155.Lmulx4x_enter: 2156 pushq %rbx 2157 pushq %rbp 2158 pushq %r12 2159 pushq %r13 2160 pushq %r14 2161 pushq %r15 2162.Lmulx4x_prologue: 2163 2164 shll $3,%r9d 2165 leaq (%r9,%r9,2),%r10 2166 negq %r9 2167 movq (%r8),%r8 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 leaq -320(%rsp,%r9,2),%r11 2179 movq %rsp,%rbp 2180 subq %rdi,%r11 2181 andq $4095,%r11 2182 cmpq %r11,%r10 2183 jb .Lmulx4xsp_alt 2184 subq %r11,%rbp 2185 leaq -320(%rbp,%r9,2),%rbp 2186 jmp .Lmulx4xsp_done 2187 2188.Lmulx4xsp_alt: 2189 leaq 4096-320(,%r9,2),%r10 2190 leaq -320(%rbp,%r9,2),%rbp 2191 subq %r10,%r11 2192 movq $0,%r10 2193 cmovcq %r10,%r11 2194 subq %r11,%rbp 2195.Lmulx4xsp_done: 2196 andq $-64,%rbp 2197 movq %rsp,%r11 2198 subq %rbp,%r11 2199 andq $-4096,%r11 2200 leaq (%r11,%rbp,1),%rsp 2201 movq (%rsp),%r10 2202 cmpq %rbp,%rsp 2203 ja .Lmulx4x_page_walk 2204 jmp .Lmulx4x_page_walk_done 2205 2206.Lmulx4x_page_walk: 2207 leaq -4096(%rsp),%rsp 2208 movq (%rsp),%r10 2209 cmpq %rbp,%rsp 2210 ja .Lmulx4x_page_walk 2211.Lmulx4x_page_walk_done: 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 movq %r8,32(%rsp) 2226 movq %rax,40(%rsp) 2227.Lmulx4x_body: 2228 call mulx4x_internal 2229 2230 movq 40(%rsp),%rsi 2231 movq $1,%rax 2232 2233 movq -48(%rsi),%r15 2234 movq -40(%rsi),%r14 2235 movq -32(%rsi),%r13 2236 movq -24(%rsi),%r12 2237 movq -16(%rsi),%rbp 2238 movq -8(%rsi),%rbx 2239 leaq (%rsi),%rsp 2240.Lmulx4x_epilogue: 2241 .byte 0xf3,0xc3 2242.size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5 2243 2244.type mulx4x_internal,@function 2245.align 32 2246mulx4x_internal: 2247 movq %r9,8(%rsp) 2248 movq %r9,%r10 2249 negq %r9 2250 shlq $5,%r9 2251 negq %r10 2252 leaq 128(%rdx,%r9,1),%r13 2253 shrq $5+5,%r9 2254 movd 8(%rax),%xmm5 2255 subq $1,%r9 2256 leaq .Linc(%rip),%rax 2257 movq %r13,16+8(%rsp) 2258 movq %r9,24+8(%rsp) 2259 movq %rdi,56+8(%rsp) 2260 movdqa 0(%rax),%xmm0 2261 movdqa 16(%rax),%xmm1 2262 leaq 88-112(%rsp,%r10,1),%r10 2263 leaq 128(%rdx),%rdi 2264 2265 pshufd $0,%xmm5,%xmm5 2266 movdqa %xmm1,%xmm4 2267.byte 0x67 2268 movdqa %xmm1,%xmm2 2269.byte 0x67 2270 paddd %xmm0,%xmm1 2271 pcmpeqd %xmm5,%xmm0 2272 movdqa %xmm4,%xmm3 2273 paddd %xmm1,%xmm2 2274 pcmpeqd %xmm5,%xmm1 2275 movdqa %xmm0,112(%r10) 2276 movdqa %xmm4,%xmm0 2277 2278 paddd %xmm2,%xmm3 2279 pcmpeqd %xmm5,%xmm2 2280 movdqa %xmm1,128(%r10) 2281 movdqa %xmm4,%xmm1 2282 2283 paddd %xmm3,%xmm0 2284 pcmpeqd %xmm5,%xmm3 2285 movdqa %xmm2,144(%r10) 2286 movdqa %xmm4,%xmm2 2287 2288 paddd %xmm0,%xmm1 2289 pcmpeqd %xmm5,%xmm0 2290 movdqa %xmm3,160(%r10) 2291 movdqa %xmm4,%xmm3 2292 paddd %xmm1,%xmm2 2293 pcmpeqd %xmm5,%xmm1 2294 movdqa %xmm0,176(%r10) 2295 movdqa %xmm4,%xmm0 2296 2297 paddd %xmm2,%xmm3 2298 pcmpeqd %xmm5,%xmm2 2299 movdqa %xmm1,192(%r10) 2300 movdqa %xmm4,%xmm1 2301 2302 paddd %xmm3,%xmm0 2303 pcmpeqd %xmm5,%xmm3 2304 movdqa %xmm2,208(%r10) 2305 movdqa %xmm4,%xmm2 2306 2307 paddd %xmm0,%xmm1 2308 pcmpeqd %xmm5,%xmm0 2309 movdqa %xmm3,224(%r10) 2310 movdqa %xmm4,%xmm3 2311 paddd %xmm1,%xmm2 2312 pcmpeqd %xmm5,%xmm1 2313 movdqa %xmm0,240(%r10) 2314 movdqa %xmm4,%xmm0 2315 2316 paddd %xmm2,%xmm3 2317 pcmpeqd %xmm5,%xmm2 2318 movdqa %xmm1,256(%r10) 2319 movdqa %xmm4,%xmm1 2320 2321 paddd %xmm3,%xmm0 2322 pcmpeqd %xmm5,%xmm3 2323 movdqa %xmm2,272(%r10) 2324 movdqa %xmm4,%xmm2 2325 2326 paddd %xmm0,%xmm1 2327 pcmpeqd %xmm5,%xmm0 2328 movdqa %xmm3,288(%r10) 2329 movdqa %xmm4,%xmm3 2330.byte 0x67 2331 paddd %xmm1,%xmm2 2332 pcmpeqd %xmm5,%xmm1 2333 movdqa %xmm0,304(%r10) 2334 2335 paddd %xmm2,%xmm3 2336 pcmpeqd %xmm5,%xmm2 2337 movdqa %xmm1,320(%r10) 2338 2339 pcmpeqd %xmm5,%xmm3 2340 movdqa %xmm2,336(%r10) 2341 2342 pand 64(%rdi),%xmm0 2343 pand 80(%rdi),%xmm1 2344 pand 96(%rdi),%xmm2 2345 movdqa %xmm3,352(%r10) 2346 pand 112(%rdi),%xmm3 2347 por %xmm2,%xmm0 2348 por %xmm3,%xmm1 2349 movdqa -128(%rdi),%xmm4 2350 movdqa -112(%rdi),%xmm5 2351 movdqa -96(%rdi),%xmm2 2352 pand 112(%r10),%xmm4 2353 movdqa -80(%rdi),%xmm3 2354 pand 128(%r10),%xmm5 2355 por %xmm4,%xmm0 2356 pand 144(%r10),%xmm2 2357 por %xmm5,%xmm1 2358 pand 160(%r10),%xmm3 2359 por %xmm2,%xmm0 2360 por %xmm3,%xmm1 2361 movdqa -64(%rdi),%xmm4 2362 movdqa -48(%rdi),%xmm5 2363 movdqa -32(%rdi),%xmm2 2364 pand 176(%r10),%xmm4 2365 movdqa -16(%rdi),%xmm3 2366 pand 192(%r10),%xmm5 2367 por %xmm4,%xmm0 2368 pand 208(%r10),%xmm2 2369 por %xmm5,%xmm1 2370 pand 224(%r10),%xmm3 2371 por %xmm2,%xmm0 2372 por %xmm3,%xmm1 2373 movdqa 0(%rdi),%xmm4 2374 movdqa 16(%rdi),%xmm5 2375 movdqa 32(%rdi),%xmm2 2376 pand 240(%r10),%xmm4 2377 movdqa 48(%rdi),%xmm3 2378 pand 256(%r10),%xmm5 2379 por %xmm4,%xmm0 2380 pand 272(%r10),%xmm2 2381 por %xmm5,%xmm1 2382 pand 288(%r10),%xmm3 2383 por %xmm2,%xmm0 2384 por %xmm3,%xmm1 2385 pxor %xmm1,%xmm0 2386 pshufd $0x4e,%xmm0,%xmm1 2387 por %xmm1,%xmm0 2388 leaq 256(%rdi),%rdi 2389.byte 102,72,15,126,194 2390 leaq 64+32+8(%rsp),%rbx 2391 2392 movq %rdx,%r9 2393 mulxq 0(%rsi),%r8,%rax 2394 mulxq 8(%rsi),%r11,%r12 2395 addq %rax,%r11 2396 mulxq 16(%rsi),%rax,%r13 2397 adcq %rax,%r12 2398 adcq $0,%r13 2399 mulxq 24(%rsi),%rax,%r14 2400 2401 movq %r8,%r15 2402 imulq 32+8(%rsp),%r8 2403 xorq %rbp,%rbp 2404 movq %r8,%rdx 2405 2406 movq %rdi,8+8(%rsp) 2407 2408 leaq 32(%rsi),%rsi 2409 adcxq %rax,%r13 2410 adcxq %rbp,%r14 2411 2412 mulxq 0(%rcx),%rax,%r10 2413 adcxq %rax,%r15 2414 adoxq %r11,%r10 2415 mulxq 8(%rcx),%rax,%r11 2416 adcxq %rax,%r10 2417 adoxq %r12,%r11 2418 mulxq 16(%rcx),%rax,%r12 2419 movq 24+8(%rsp),%rdi 2420 movq %r10,-32(%rbx) 2421 adcxq %rax,%r11 2422 adoxq %r13,%r12 2423 mulxq 24(%rcx),%rax,%r15 2424 movq %r9,%rdx 2425 movq %r11,-24(%rbx) 2426 adcxq %rax,%r12 2427 adoxq %rbp,%r15 2428 leaq 32(%rcx),%rcx 2429 movq %r12,-16(%rbx) 2430 jmp .Lmulx4x_1st 2431 2432.align 32 2433.Lmulx4x_1st: 2434 adcxq %rbp,%r15 2435 mulxq 0(%rsi),%r10,%rax 2436 adcxq %r14,%r10 2437 mulxq 8(%rsi),%r11,%r14 2438 adcxq %rax,%r11 2439 mulxq 16(%rsi),%r12,%rax 2440 adcxq %r14,%r12 2441 mulxq 24(%rsi),%r13,%r14 2442.byte 0x67,0x67 2443 movq %r8,%rdx 2444 adcxq %rax,%r13 2445 adcxq %rbp,%r14 2446 leaq 32(%rsi),%rsi 2447 leaq 32(%rbx),%rbx 2448 2449 adoxq %r15,%r10 2450 mulxq 0(%rcx),%rax,%r15 2451 adcxq %rax,%r10 2452 adoxq %r15,%r11 2453 mulxq 8(%rcx),%rax,%r15 2454 adcxq %rax,%r11 2455 adoxq %r15,%r12 2456 mulxq 16(%rcx),%rax,%r15 2457 movq %r10,-40(%rbx) 2458 adcxq %rax,%r12 2459 movq %r11,-32(%rbx) 2460 adoxq %r15,%r13 2461 mulxq 24(%rcx),%rax,%r15 2462 movq %r9,%rdx 2463 movq %r12,-24(%rbx) 2464 adcxq %rax,%r13 2465 adoxq %rbp,%r15 2466 leaq 32(%rcx),%rcx 2467 movq %r13,-16(%rbx) 2468 2469 decq %rdi 2470 jnz .Lmulx4x_1st 2471 2472 movq 8(%rsp),%rax 2473 adcq %rbp,%r15 2474 leaq (%rsi,%rax,1),%rsi 2475 addq %r15,%r14 2476 movq 8+8(%rsp),%rdi 2477 adcq %rbp,%rbp 2478 movq %r14,-8(%rbx) 2479 jmp .Lmulx4x_outer 2480 2481.align 32 2482.Lmulx4x_outer: 2483 leaq 16-256(%rbx),%r10 2484 pxor %xmm4,%xmm4 2485.byte 0x67,0x67 2486 pxor %xmm5,%xmm5 2487 movdqa -128(%rdi),%xmm0 2488 movdqa -112(%rdi),%xmm1 2489 movdqa -96(%rdi),%xmm2 2490 pand 256(%r10),%xmm0 2491 movdqa -80(%rdi),%xmm3 2492 pand 272(%r10),%xmm1 2493 por %xmm0,%xmm4 2494 pand 288(%r10),%xmm2 2495 por %xmm1,%xmm5 2496 pand 304(%r10),%xmm3 2497 por %xmm2,%xmm4 2498 por %xmm3,%xmm5 2499 movdqa -64(%rdi),%xmm0 2500 movdqa -48(%rdi),%xmm1 2501 movdqa -32(%rdi),%xmm2 2502 pand 320(%r10),%xmm0 2503 movdqa -16(%rdi),%xmm3 2504 pand 336(%r10),%xmm1 2505 por %xmm0,%xmm4 2506 pand 352(%r10),%xmm2 2507 por %xmm1,%xmm5 2508 pand 368(%r10),%xmm3 2509 por %xmm2,%xmm4 2510 por %xmm3,%xmm5 2511 movdqa 0(%rdi),%xmm0 2512 movdqa 16(%rdi),%xmm1 2513 movdqa 32(%rdi),%xmm2 2514 pand 384(%r10),%xmm0 2515 movdqa 48(%rdi),%xmm3 2516 pand 400(%r10),%xmm1 2517 por %xmm0,%xmm4 2518 pand 416(%r10),%xmm2 2519 por %xmm1,%xmm5 2520 pand 432(%r10),%xmm3 2521 por %xmm2,%xmm4 2522 por %xmm3,%xmm5 2523 movdqa 64(%rdi),%xmm0 2524 movdqa 80(%rdi),%xmm1 2525 movdqa 96(%rdi),%xmm2 2526 pand 448(%r10),%xmm0 2527 movdqa 112(%rdi),%xmm3 2528 pand 464(%r10),%xmm1 2529 por %xmm0,%xmm4 2530 pand 480(%r10),%xmm2 2531 por %xmm1,%xmm5 2532 pand 496(%r10),%xmm3 2533 por %xmm2,%xmm4 2534 por %xmm3,%xmm5 2535 por %xmm5,%xmm4 2536 pshufd $0x4e,%xmm4,%xmm0 2537 por %xmm4,%xmm0 2538 leaq 256(%rdi),%rdi 2539.byte 102,72,15,126,194 2540 2541 movq %rbp,(%rbx) 2542 leaq 32(%rbx,%rax,1),%rbx 2543 mulxq 0(%rsi),%r8,%r11 2544 xorq %rbp,%rbp 2545 movq %rdx,%r9 2546 mulxq 8(%rsi),%r14,%r12 2547 adoxq -32(%rbx),%r8 2548 adcxq %r14,%r11 2549 mulxq 16(%rsi),%r15,%r13 2550 adoxq -24(%rbx),%r11 2551 adcxq %r15,%r12 2552 mulxq 24(%rsi),%rdx,%r14 2553 adoxq -16(%rbx),%r12 2554 adcxq %rdx,%r13 2555 leaq (%rcx,%rax,1),%rcx 2556 leaq 32(%rsi),%rsi 2557 adoxq -8(%rbx),%r13 2558 adcxq %rbp,%r14 2559 adoxq %rbp,%r14 2560 2561 movq %r8,%r15 2562 imulq 32+8(%rsp),%r8 2563 2564 movq %r8,%rdx 2565 xorq %rbp,%rbp 2566 movq %rdi,8+8(%rsp) 2567 2568 mulxq 0(%rcx),%rax,%r10 2569 adcxq %rax,%r15 2570 adoxq %r11,%r10 2571 mulxq 8(%rcx),%rax,%r11 2572 adcxq %rax,%r10 2573 adoxq %r12,%r11 2574 mulxq 16(%rcx),%rax,%r12 2575 adcxq %rax,%r11 2576 adoxq %r13,%r12 2577 mulxq 24(%rcx),%rax,%r15 2578 movq %r9,%rdx 2579 movq 24+8(%rsp),%rdi 2580 movq %r10,-32(%rbx) 2581 adcxq %rax,%r12 2582 movq %r11,-24(%rbx) 2583 adoxq %rbp,%r15 2584 movq %r12,-16(%rbx) 2585 leaq 32(%rcx),%rcx 2586 jmp .Lmulx4x_inner 2587 2588.align 32 2589.Lmulx4x_inner: 2590 mulxq 0(%rsi),%r10,%rax 2591 adcxq %rbp,%r15 2592 adoxq %r14,%r10 2593 mulxq 8(%rsi),%r11,%r14 2594 adcxq 0(%rbx),%r10 2595 adoxq %rax,%r11 2596 mulxq 16(%rsi),%r12,%rax 2597 adcxq 8(%rbx),%r11 2598 adoxq %r14,%r12 2599 mulxq 24(%rsi),%r13,%r14 2600 movq %r8,%rdx 2601 adcxq 16(%rbx),%r12 2602 adoxq %rax,%r13 2603 adcxq 24(%rbx),%r13 2604 adoxq %rbp,%r14 2605 leaq 32(%rsi),%rsi 2606 leaq 32(%rbx),%rbx 2607 adcxq %rbp,%r14 2608 2609 adoxq %r15,%r10 2610 mulxq 0(%rcx),%rax,%r15 2611 adcxq %rax,%r10 2612 adoxq %r15,%r11 2613 mulxq 8(%rcx),%rax,%r15 2614 adcxq %rax,%r11 2615 adoxq %r15,%r12 2616 mulxq 16(%rcx),%rax,%r15 2617 movq %r10,-40(%rbx) 2618 adcxq %rax,%r12 2619 adoxq %r15,%r13 2620 movq %r11,-32(%rbx) 2621 mulxq 24(%rcx),%rax,%r15 2622 movq %r9,%rdx 2623 leaq 32(%rcx),%rcx 2624 movq %r12,-24(%rbx) 2625 adcxq %rax,%r13 2626 adoxq %rbp,%r15 2627 movq %r13,-16(%rbx) 2628 2629 decq %rdi 2630 jnz .Lmulx4x_inner 2631 2632 movq 0+8(%rsp),%rax 2633 adcq %rbp,%r15 2634 subq 0(%rbx),%rdi 2635 movq 8+8(%rsp),%rdi 2636 movq 16+8(%rsp),%r10 2637 adcq %r15,%r14 2638 leaq (%rsi,%rax,1),%rsi 2639 adcq %rbp,%rbp 2640 movq %r14,-8(%rbx) 2641 2642 cmpq %r10,%rdi 2643 jb .Lmulx4x_outer 2644 2645 movq -8(%rcx),%r10 2646 movq %rbp,%r8 2647 movq (%rcx,%rax,1),%r12 2648 leaq (%rcx,%rax,1),%rbp 2649 movq %rax,%rcx 2650 leaq (%rbx,%rax,1),%rdi 2651 xorl %eax,%eax 2652 xorq %r15,%r15 2653 subq %r14,%r10 2654 adcq %r15,%r15 2655 orq %r15,%r8 2656 sarq $3+2,%rcx 2657 subq %r8,%rax 2658 movq 56+8(%rsp),%rdx 2659 decq %r12 2660 movq 8(%rbp),%r13 2661 xorq %r8,%r8 2662 movq 16(%rbp),%r14 2663 movq 24(%rbp),%r15 2664 jmp .Lsqrx4x_sub_entry 2665.size mulx4x_internal,.-mulx4x_internal 2666.type bn_powerx5,@function 2667.align 32 2668bn_powerx5: 2669 movq %rsp,%rax 2670.Lpowerx5_enter: 2671 pushq %rbx 2672 pushq %rbp 2673 pushq %r12 2674 pushq %r13 2675 pushq %r14 2676 pushq %r15 2677.Lpowerx5_prologue: 2678 2679 shll $3,%r9d 2680 leaq (%r9,%r9,2),%r10 2681 negq %r9 2682 movq (%r8),%r8 2683 2684 2685 2686 2687 2688 2689 2690 2691 leaq -320(%rsp,%r9,2),%r11 2692 movq %rsp,%rbp 2693 subq %rdi,%r11 2694 andq $4095,%r11 2695 cmpq %r11,%r10 2696 jb .Lpwrx_sp_alt 2697 subq %r11,%rbp 2698 leaq -320(%rbp,%r9,2),%rbp 2699 jmp .Lpwrx_sp_done 2700 2701.align 32 2702.Lpwrx_sp_alt: 2703 leaq 4096-320(,%r9,2),%r10 2704 leaq -320(%rbp,%r9,2),%rbp 2705 subq %r10,%r11 2706 movq $0,%r10 2707 cmovcq %r10,%r11 2708 subq %r11,%rbp 2709.Lpwrx_sp_done: 2710 andq $-64,%rbp 2711 movq %rsp,%r11 2712 subq %rbp,%r11 2713 andq $-4096,%r11 2714 leaq (%r11,%rbp,1),%rsp 2715 movq (%rsp),%r10 2716 cmpq %rbp,%rsp 2717 ja .Lpwrx_page_walk 2718 jmp .Lpwrx_page_walk_done 2719 2720.Lpwrx_page_walk: 2721 leaq -4096(%rsp),%rsp 2722 movq (%rsp),%r10 2723 cmpq %rbp,%rsp 2724 ja .Lpwrx_page_walk 2725.Lpwrx_page_walk_done: 2726 2727 movq %r9,%r10 2728 negq %r9 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 pxor %xmm0,%xmm0 2742.byte 102,72,15,110,207 2743.byte 102,72,15,110,209 2744.byte 102,73,15,110,218 2745.byte 102,72,15,110,226 2746 movq %r8,32(%rsp) 2747 movq %rax,40(%rsp) 2748.Lpowerx5_body: 2749 2750 call __bn_sqrx8x_internal 2751 call __bn_postx4x_internal 2752 call __bn_sqrx8x_internal 2753 call __bn_postx4x_internal 2754 call __bn_sqrx8x_internal 2755 call __bn_postx4x_internal 2756 call __bn_sqrx8x_internal 2757 call __bn_postx4x_internal 2758 call __bn_sqrx8x_internal 2759 call __bn_postx4x_internal 2760 2761 movq %r10,%r9 2762 movq %rsi,%rdi 2763.byte 102,72,15,126,209 2764.byte 102,72,15,126,226 2765 movq 40(%rsp),%rax 2766 2767 call mulx4x_internal 2768 2769 movq 40(%rsp),%rsi 2770 movq $1,%rax 2771 2772 movq -48(%rsi),%r15 2773 movq -40(%rsi),%r14 2774 movq -32(%rsi),%r13 2775 movq -24(%rsi),%r12 2776 movq -16(%rsi),%rbp 2777 movq -8(%rsi),%rbx 2778 leaq (%rsi),%rsp 2779.Lpowerx5_epilogue: 2780 .byte 0xf3,0xc3 2781.size bn_powerx5,.-bn_powerx5 2782 2783.globl bn_sqrx8x_internal 2784.hidden bn_sqrx8x_internal 2785.type bn_sqrx8x_internal,@function 2786.align 32 2787bn_sqrx8x_internal: 2788__bn_sqrx8x_internal: 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 leaq 48+8(%rsp),%rdi 2830 leaq (%rsi,%r9,1),%rbp 2831 movq %r9,0+8(%rsp) 2832 movq %rbp,8+8(%rsp) 2833 jmp .Lsqr8x_zero_start 2834 2835.align 32 2836.byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00 2837.Lsqrx8x_zero: 2838.byte 0x3e 2839 movdqa %xmm0,0(%rdi) 2840 movdqa %xmm0,16(%rdi) 2841 movdqa %xmm0,32(%rdi) 2842 movdqa %xmm0,48(%rdi) 2843.Lsqr8x_zero_start: 2844 movdqa %xmm0,64(%rdi) 2845 movdqa %xmm0,80(%rdi) 2846 movdqa %xmm0,96(%rdi) 2847 movdqa %xmm0,112(%rdi) 2848 leaq 128(%rdi),%rdi 2849 subq $64,%r9 2850 jnz .Lsqrx8x_zero 2851 2852 movq 0(%rsi),%rdx 2853 2854 xorq %r10,%r10 2855 xorq %r11,%r11 2856 xorq %r12,%r12 2857 xorq %r13,%r13 2858 xorq %r14,%r14 2859 xorq %r15,%r15 2860 leaq 48+8(%rsp),%rdi 2861 xorq %rbp,%rbp 2862 jmp .Lsqrx8x_outer_loop 2863 2864.align 32 2865.Lsqrx8x_outer_loop: 2866 mulxq 8(%rsi),%r8,%rax 2867 adcxq %r9,%r8 2868 adoxq %rax,%r10 2869 mulxq 16(%rsi),%r9,%rax 2870 adcxq %r10,%r9 2871 adoxq %rax,%r11 2872.byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00 2873 adcxq %r11,%r10 2874 adoxq %rax,%r12 2875.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00 2876 adcxq %r12,%r11 2877 adoxq %rax,%r13 2878 mulxq 40(%rsi),%r12,%rax 2879 adcxq %r13,%r12 2880 adoxq %rax,%r14 2881 mulxq 48(%rsi),%r13,%rax 2882 adcxq %r14,%r13 2883 adoxq %r15,%rax 2884 mulxq 56(%rsi),%r14,%r15 2885 movq 8(%rsi),%rdx 2886 adcxq %rax,%r14 2887 adoxq %rbp,%r15 2888 adcq 64(%rdi),%r15 2889 movq %r8,8(%rdi) 2890 movq %r9,16(%rdi) 2891 sbbq %rcx,%rcx 2892 xorq %rbp,%rbp 2893 2894 2895 mulxq 16(%rsi),%r8,%rbx 2896 mulxq 24(%rsi),%r9,%rax 2897 adcxq %r10,%r8 2898 adoxq %rbx,%r9 2899 mulxq 32(%rsi),%r10,%rbx 2900 adcxq %r11,%r9 2901 adoxq %rax,%r10 2902.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00 2903 adcxq %r12,%r10 2904 adoxq %rbx,%r11 2905.byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00 2906 adcxq %r13,%r11 2907 adoxq %r14,%r12 2908.byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00 2909 movq 16(%rsi),%rdx 2910 adcxq %rax,%r12 2911 adoxq %rbx,%r13 2912 adcxq %r15,%r13 2913 adoxq %rbp,%r14 2914 adcxq %rbp,%r14 2915 2916 movq %r8,24(%rdi) 2917 movq %r9,32(%rdi) 2918 2919 mulxq 24(%rsi),%r8,%rbx 2920 mulxq 32(%rsi),%r9,%rax 2921 adcxq %r10,%r8 2922 adoxq %rbx,%r9 2923 mulxq 40(%rsi),%r10,%rbx 2924 adcxq %r11,%r9 2925 adoxq %rax,%r10 2926.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00 2927 adcxq %r12,%r10 2928 adoxq %r13,%r11 2929.byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00 2930.byte 0x3e 2931 movq 24(%rsi),%rdx 2932 adcxq %rbx,%r11 2933 adoxq %rax,%r12 2934 adcxq %r14,%r12 2935 movq %r8,40(%rdi) 2936 movq %r9,48(%rdi) 2937 mulxq 32(%rsi),%r8,%rax 2938 adoxq %rbp,%r13 2939 adcxq %rbp,%r13 2940 2941 mulxq 40(%rsi),%r9,%rbx 2942 adcxq %r10,%r8 2943 adoxq %rax,%r9 2944 mulxq 48(%rsi),%r10,%rax 2945 adcxq %r11,%r9 2946 adoxq %r12,%r10 2947 mulxq 56(%rsi),%r11,%r12 2948 movq 32(%rsi),%rdx 2949 movq 40(%rsi),%r14 2950 adcxq %rbx,%r10 2951 adoxq %rax,%r11 2952 movq 48(%rsi),%r15 2953 adcxq %r13,%r11 2954 adoxq %rbp,%r12 2955 adcxq %rbp,%r12 2956 2957 movq %r8,56(%rdi) 2958 movq %r9,64(%rdi) 2959 2960 mulxq %r14,%r9,%rax 2961 movq 56(%rsi),%r8 2962 adcxq %r10,%r9 2963 mulxq %r15,%r10,%rbx 2964 adoxq %rax,%r10 2965 adcxq %r11,%r10 2966 mulxq %r8,%r11,%rax 2967 movq %r14,%rdx 2968 adoxq %rbx,%r11 2969 adcxq %r12,%r11 2970 2971 adcxq %rbp,%rax 2972 2973 mulxq %r15,%r14,%rbx 2974 mulxq %r8,%r12,%r13 2975 movq %r15,%rdx 2976 leaq 64(%rsi),%rsi 2977 adcxq %r14,%r11 2978 adoxq %rbx,%r12 2979 adcxq %rax,%r12 2980 adoxq %rbp,%r13 2981 2982.byte 0x67,0x67 2983 mulxq %r8,%r8,%r14 2984 adcxq %r8,%r13 2985 adcxq %rbp,%r14 2986 2987 cmpq 8+8(%rsp),%rsi 2988 je .Lsqrx8x_outer_break 2989 2990 negq %rcx 2991 movq $-8,%rcx 2992 movq %rbp,%r15 2993 movq 64(%rdi),%r8 2994 adcxq 72(%rdi),%r9 2995 adcxq 80(%rdi),%r10 2996 adcxq 88(%rdi),%r11 2997 adcq 96(%rdi),%r12 2998 adcq 104(%rdi),%r13 2999 adcq 112(%rdi),%r14 3000 adcq 120(%rdi),%r15 3001 leaq (%rsi),%rbp 3002 leaq 128(%rdi),%rdi 3003 sbbq %rax,%rax 3004 3005 movq -64(%rsi),%rdx 3006 movq %rax,16+8(%rsp) 3007 movq %rdi,24+8(%rsp) 3008 3009 3010 xorl %eax,%eax 3011 jmp .Lsqrx8x_loop 3012 3013.align 32 3014.Lsqrx8x_loop: 3015 movq %r8,%rbx 3016 mulxq 0(%rbp),%rax,%r8 3017 adcxq %rax,%rbx 3018 adoxq %r9,%r8 3019 3020 mulxq 8(%rbp),%rax,%r9 3021 adcxq %rax,%r8 3022 adoxq %r10,%r9 3023 3024 mulxq 16(%rbp),%rax,%r10 3025 adcxq %rax,%r9 3026 adoxq %r11,%r10 3027 3028 mulxq 24(%rbp),%rax,%r11 3029 adcxq %rax,%r10 3030 adoxq %r12,%r11 3031 3032.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 3033 adcxq %rax,%r11 3034 adoxq %r13,%r12 3035 3036 mulxq 40(%rbp),%rax,%r13 3037 adcxq %rax,%r12 3038 adoxq %r14,%r13 3039 3040 mulxq 48(%rbp),%rax,%r14 3041 movq %rbx,(%rdi,%rcx,8) 3042 movl $0,%ebx 3043 adcxq %rax,%r13 3044 adoxq %r15,%r14 3045 3046.byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00 3047 movq 8(%rsi,%rcx,8),%rdx 3048 adcxq %rax,%r14 3049 adoxq %rbx,%r15 3050 adcxq %rbx,%r15 3051 3052.byte 0x67 3053 incq %rcx 3054 jnz .Lsqrx8x_loop 3055 3056 leaq 64(%rbp),%rbp 3057 movq $-8,%rcx 3058 cmpq 8+8(%rsp),%rbp 3059 je .Lsqrx8x_break 3060 3061 subq 16+8(%rsp),%rbx 3062.byte 0x66 3063 movq -64(%rsi),%rdx 3064 adcxq 0(%rdi),%r8 3065 adcxq 8(%rdi),%r9 3066 adcq 16(%rdi),%r10 3067 adcq 24(%rdi),%r11 3068 adcq 32(%rdi),%r12 3069 adcq 40(%rdi),%r13 3070 adcq 48(%rdi),%r14 3071 adcq 56(%rdi),%r15 3072 leaq 64(%rdi),%rdi 3073.byte 0x67 3074 sbbq %rax,%rax 3075 xorl %ebx,%ebx 3076 movq %rax,16+8(%rsp) 3077 jmp .Lsqrx8x_loop 3078 3079.align 32 3080.Lsqrx8x_break: 3081 subq 16+8(%rsp),%r8 3082 movq 24+8(%rsp),%rcx 3083 movq 0(%rsi),%rdx 3084 xorl %ebp,%ebp 3085 movq %r8,0(%rdi) 3086 cmpq %rcx,%rdi 3087 je .Lsqrx8x_outer_loop 3088 3089 movq %r9,8(%rdi) 3090 movq 8(%rcx),%r9 3091 movq %r10,16(%rdi) 3092 movq 16(%rcx),%r10 3093 movq %r11,24(%rdi) 3094 movq 24(%rcx),%r11 3095 movq %r12,32(%rdi) 3096 movq 32(%rcx),%r12 3097 movq %r13,40(%rdi) 3098 movq 40(%rcx),%r13 3099 movq %r14,48(%rdi) 3100 movq 48(%rcx),%r14 3101 movq %r15,56(%rdi) 3102 movq 56(%rcx),%r15 3103 movq %rcx,%rdi 3104 jmp .Lsqrx8x_outer_loop 3105 3106.align 32 3107.Lsqrx8x_outer_break: 3108 movq %r9,72(%rdi) 3109.byte 102,72,15,126,217 3110 movq %r10,80(%rdi) 3111 movq %r11,88(%rdi) 3112 movq %r12,96(%rdi) 3113 movq %r13,104(%rdi) 3114 movq %r14,112(%rdi) 3115 leaq 48+8(%rsp),%rdi 3116 movq (%rsi,%rcx,1),%rdx 3117 3118 movq 8(%rdi),%r11 3119 xorq %r10,%r10 3120 movq 0+8(%rsp),%r9 3121 adoxq %r11,%r11 3122 movq 16(%rdi),%r12 3123 movq 24(%rdi),%r13 3124 3125 3126.align 32 3127.Lsqrx4x_shift_n_add: 3128 mulxq %rdx,%rax,%rbx 3129 adoxq %r12,%r12 3130 adcxq %r10,%rax 3131.byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00 3132.byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00 3133 adoxq %r13,%r13 3134 adcxq %r11,%rbx 3135 movq 40(%rdi),%r11 3136 movq %rax,0(%rdi) 3137 movq %rbx,8(%rdi) 3138 3139 mulxq %rdx,%rax,%rbx 3140 adoxq %r10,%r10 3141 adcxq %r12,%rax 3142 movq 16(%rsi,%rcx,1),%rdx 3143 movq 48(%rdi),%r12 3144 adoxq %r11,%r11 3145 adcxq %r13,%rbx 3146 movq 56(%rdi),%r13 3147 movq %rax,16(%rdi) 3148 movq %rbx,24(%rdi) 3149 3150 mulxq %rdx,%rax,%rbx 3151 adoxq %r12,%r12 3152 adcxq %r10,%rax 3153 movq 24(%rsi,%rcx,1),%rdx 3154 leaq 32(%rcx),%rcx 3155 movq 64(%rdi),%r10 3156 adoxq %r13,%r13 3157 adcxq %r11,%rbx 3158 movq 72(%rdi),%r11 3159 movq %rax,32(%rdi) 3160 movq %rbx,40(%rdi) 3161 3162 mulxq %rdx,%rax,%rbx 3163 adoxq %r10,%r10 3164 adcxq %r12,%rax 3165 jrcxz .Lsqrx4x_shift_n_add_break 3166.byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00 3167 adoxq %r11,%r11 3168 adcxq %r13,%rbx 3169 movq 80(%rdi),%r12 3170 movq 88(%rdi),%r13 3171 movq %rax,48(%rdi) 3172 movq %rbx,56(%rdi) 3173 leaq 64(%rdi),%rdi 3174 nop 3175 jmp .Lsqrx4x_shift_n_add 3176 3177.align 32 3178.Lsqrx4x_shift_n_add_break: 3179 adcxq %r13,%rbx 3180 movq %rax,48(%rdi) 3181 movq %rbx,56(%rdi) 3182 leaq 64(%rdi),%rdi 3183.byte 102,72,15,126,213 3184__bn_sqrx8x_reduction: 3185 xorl %eax,%eax 3186 movq 32+8(%rsp),%rbx 3187 movq 48+8(%rsp),%rdx 3188 leaq -64(%rbp,%r9,1),%rcx 3189 3190 movq %rcx,0+8(%rsp) 3191 movq %rdi,8+8(%rsp) 3192 3193 leaq 48+8(%rsp),%rdi 3194 jmp .Lsqrx8x_reduction_loop 3195 3196.align 32 3197.Lsqrx8x_reduction_loop: 3198 movq 8(%rdi),%r9 3199 movq 16(%rdi),%r10 3200 movq 24(%rdi),%r11 3201 movq 32(%rdi),%r12 3202 movq %rdx,%r8 3203 imulq %rbx,%rdx 3204 movq 40(%rdi),%r13 3205 movq 48(%rdi),%r14 3206 movq 56(%rdi),%r15 3207 movq %rax,24+8(%rsp) 3208 3209 leaq 64(%rdi),%rdi 3210 xorq %rsi,%rsi 3211 movq $-8,%rcx 3212 jmp .Lsqrx8x_reduce 3213 3214.align 32 3215.Lsqrx8x_reduce: 3216 movq %r8,%rbx 3217 mulxq 0(%rbp),%rax,%r8 3218 adcxq %rbx,%rax 3219 adoxq %r9,%r8 3220 3221 mulxq 8(%rbp),%rbx,%r9 3222 adcxq %rbx,%r8 3223 adoxq %r10,%r9 3224 3225 mulxq 16(%rbp),%rbx,%r10 3226 adcxq %rbx,%r9 3227 adoxq %r11,%r10 3228 3229 mulxq 24(%rbp),%rbx,%r11 3230 adcxq %rbx,%r10 3231 adoxq %r12,%r11 3232 3233.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 3234 movq %rdx,%rax 3235 movq %r8,%rdx 3236 adcxq %rbx,%r11 3237 adoxq %r13,%r12 3238 3239 mulxq 32+8(%rsp),%rbx,%rdx 3240 movq %rax,%rdx 3241 movq %rax,64+48+8(%rsp,%rcx,8) 3242 3243 mulxq 40(%rbp),%rax,%r13 3244 adcxq %rax,%r12 3245 adoxq %r14,%r13 3246 3247 mulxq 48(%rbp),%rax,%r14 3248 adcxq %rax,%r13 3249 adoxq %r15,%r14 3250 3251 mulxq 56(%rbp),%rax,%r15 3252 movq %rbx,%rdx 3253 adcxq %rax,%r14 3254 adoxq %rsi,%r15 3255 adcxq %rsi,%r15 3256 3257.byte 0x67,0x67,0x67 3258 incq %rcx 3259 jnz .Lsqrx8x_reduce 3260 3261 movq %rsi,%rax 3262 cmpq 0+8(%rsp),%rbp 3263 jae .Lsqrx8x_no_tail 3264 3265 movq 48+8(%rsp),%rdx 3266 addq 0(%rdi),%r8 3267 leaq 64(%rbp),%rbp 3268 movq $-8,%rcx 3269 adcxq 8(%rdi),%r9 3270 adcxq 16(%rdi),%r10 3271 adcq 24(%rdi),%r11 3272 adcq 32(%rdi),%r12 3273 adcq 40(%rdi),%r13 3274 adcq 48(%rdi),%r14 3275 adcq 56(%rdi),%r15 3276 leaq 64(%rdi),%rdi 3277 sbbq %rax,%rax 3278 3279 xorq %rsi,%rsi 3280 movq %rax,16+8(%rsp) 3281 jmp .Lsqrx8x_tail 3282 3283.align 32 3284.Lsqrx8x_tail: 3285 movq %r8,%rbx 3286 mulxq 0(%rbp),%rax,%r8 3287 adcxq %rax,%rbx 3288 adoxq %r9,%r8 3289 3290 mulxq 8(%rbp),%rax,%r9 3291 adcxq %rax,%r8 3292 adoxq %r10,%r9 3293 3294 mulxq 16(%rbp),%rax,%r10 3295 adcxq %rax,%r9 3296 adoxq %r11,%r10 3297 3298 mulxq 24(%rbp),%rax,%r11 3299 adcxq %rax,%r10 3300 adoxq %r12,%r11 3301 3302.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 3303 adcxq %rax,%r11 3304 adoxq %r13,%r12 3305 3306 mulxq 40(%rbp),%rax,%r13 3307 adcxq %rax,%r12 3308 adoxq %r14,%r13 3309 3310 mulxq 48(%rbp),%rax,%r14 3311 adcxq %rax,%r13 3312 adoxq %r15,%r14 3313 3314 mulxq 56(%rbp),%rax,%r15 3315 movq 72+48+8(%rsp,%rcx,8),%rdx 3316 adcxq %rax,%r14 3317 adoxq %rsi,%r15 3318 movq %rbx,(%rdi,%rcx,8) 3319 movq %r8,%rbx 3320 adcxq %rsi,%r15 3321 3322 incq %rcx 3323 jnz .Lsqrx8x_tail 3324 3325 cmpq 0+8(%rsp),%rbp 3326 jae .Lsqrx8x_tail_done 3327 3328 subq 16+8(%rsp),%rsi 3329 movq 48+8(%rsp),%rdx 3330 leaq 64(%rbp),%rbp 3331 adcq 0(%rdi),%r8 3332 adcq 8(%rdi),%r9 3333 adcq 16(%rdi),%r10 3334 adcq 24(%rdi),%r11 3335 adcq 32(%rdi),%r12 3336 adcq 40(%rdi),%r13 3337 adcq 48(%rdi),%r14 3338 adcq 56(%rdi),%r15 3339 leaq 64(%rdi),%rdi 3340 sbbq %rax,%rax 3341 subq $8,%rcx 3342 3343 xorq %rsi,%rsi 3344 movq %rax,16+8(%rsp) 3345 jmp .Lsqrx8x_tail 3346 3347.align 32 3348.Lsqrx8x_tail_done: 3349 addq 24+8(%rsp),%r8 3350 adcq $0,%r9 3351 adcq $0,%r10 3352 adcq $0,%r11 3353 adcq $0,%r12 3354 adcq $0,%r13 3355 adcq $0,%r14 3356 adcq $0,%r15 3357 3358 3359 movq %rsi,%rax 3360 3361 subq 16+8(%rsp),%rsi 3362.Lsqrx8x_no_tail: 3363 adcq 0(%rdi),%r8 3364.byte 102,72,15,126,217 3365 adcq 8(%rdi),%r9 3366 movq 56(%rbp),%rsi 3367.byte 102,72,15,126,213 3368 adcq 16(%rdi),%r10 3369 adcq 24(%rdi),%r11 3370 adcq 32(%rdi),%r12 3371 adcq 40(%rdi),%r13 3372 adcq 48(%rdi),%r14 3373 adcq 56(%rdi),%r15 3374 adcq %rax,%rax 3375 3376 movq 32+8(%rsp),%rbx 3377 movq 64(%rdi,%rcx,1),%rdx 3378 3379 movq %r8,0(%rdi) 3380 leaq 64(%rdi),%r8 3381 movq %r9,8(%rdi) 3382 movq %r10,16(%rdi) 3383 movq %r11,24(%rdi) 3384 movq %r12,32(%rdi) 3385 movq %r13,40(%rdi) 3386 movq %r14,48(%rdi) 3387 movq %r15,56(%rdi) 3388 3389 leaq 64(%rdi,%rcx,1),%rdi 3390 cmpq 8+8(%rsp),%r8 3391 jb .Lsqrx8x_reduction_loop 3392 .byte 0xf3,0xc3 3393.size bn_sqrx8x_internal,.-bn_sqrx8x_internal 3394.align 32 3395__bn_postx4x_internal: 3396 movq 0(%rbp),%r12 3397 movq %rcx,%r10 3398 movq %rcx,%r9 3399 negq %rax 3400 sarq $3+2,%rcx 3401 3402.byte 102,72,15,126,202 3403.byte 102,72,15,126,206 3404 decq %r12 3405 movq 8(%rbp),%r13 3406 xorq %r8,%r8 3407 movq 16(%rbp),%r14 3408 movq 24(%rbp),%r15 3409 jmp .Lsqrx4x_sub_entry 3410 3411.align 16 3412.Lsqrx4x_sub: 3413 movq 0(%rbp),%r12 3414 movq 8(%rbp),%r13 3415 movq 16(%rbp),%r14 3416 movq 24(%rbp),%r15 3417.Lsqrx4x_sub_entry: 3418 andnq %rax,%r12,%r12 3419 leaq 32(%rbp),%rbp 3420 andnq %rax,%r13,%r13 3421 andnq %rax,%r14,%r14 3422 andnq %rax,%r15,%r15 3423 3424 negq %r8 3425 adcq 0(%rdi),%r12 3426 adcq 8(%rdi),%r13 3427 adcq 16(%rdi),%r14 3428 adcq 24(%rdi),%r15 3429 movq %r12,0(%rdx) 3430 leaq 32(%rdi),%rdi 3431 movq %r13,8(%rdx) 3432 sbbq %r8,%r8 3433 movq %r14,16(%rdx) 3434 movq %r15,24(%rdx) 3435 leaq 32(%rdx),%rdx 3436 3437 incq %rcx 3438 jnz .Lsqrx4x_sub 3439 3440 negq %r9 3441 3442 .byte 0xf3,0xc3 3443.size __bn_postx4x_internal,.-__bn_postx4x_internal 3444.globl bn_get_bits5 3445.type bn_get_bits5,@function 3446.align 16 3447bn_get_bits5: 3448 leaq 0(%rdi),%r10 3449 leaq 1(%rdi),%r11 3450 movl %esi,%ecx 3451 shrl $4,%esi 3452 andl $15,%ecx 3453 leal -8(%rcx),%eax 3454 cmpl $11,%ecx 3455 cmovaq %r11,%r10 3456 cmoval %eax,%ecx 3457 movzwl (%r10,%rsi,2),%eax 3458 shrl %cl,%eax 3459 andl $31,%eax 3460 .byte 0xf3,0xc3 3461.size bn_get_bits5,.-bn_get_bits5 3462 3463.globl bn_scatter5 3464.type bn_scatter5,@function 3465.align 16 3466bn_scatter5: 3467 cmpl $0,%esi 3468 jz .Lscatter_epilogue 3469 leaq (%rdx,%rcx,8),%rdx 3470.Lscatter: 3471 movq (%rdi),%rax 3472 leaq 8(%rdi),%rdi 3473 movq %rax,(%rdx) 3474 leaq 256(%rdx),%rdx 3475 subl $1,%esi 3476 jnz .Lscatter 3477.Lscatter_epilogue: 3478 .byte 0xf3,0xc3 3479.size bn_scatter5,.-bn_scatter5 3480 3481.globl bn_gather5 3482.type bn_gather5,@function 3483.align 32 3484bn_gather5: 3485.LSEH_begin_bn_gather5: 3486 3487.byte 0x4c,0x8d,0x14,0x24 3488.byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00 3489 leaq .Linc(%rip),%rax 3490 andq $-16,%rsp 3491 3492 movd %ecx,%xmm5 3493 movdqa 0(%rax),%xmm0 3494 movdqa 16(%rax),%xmm1 3495 leaq 128(%rdx),%r11 3496 leaq 128(%rsp),%rax 3497 3498 pshufd $0,%xmm5,%xmm5 3499 movdqa %xmm1,%xmm4 3500 movdqa %xmm1,%xmm2 3501 paddd %xmm0,%xmm1 3502 pcmpeqd %xmm5,%xmm0 3503 movdqa %xmm4,%xmm3 3504 3505 paddd %xmm1,%xmm2 3506 pcmpeqd %xmm5,%xmm1 3507 movdqa %xmm0,-128(%rax) 3508 movdqa %xmm4,%xmm0 3509 3510 paddd %xmm2,%xmm3 3511 pcmpeqd %xmm5,%xmm2 3512 movdqa %xmm1,-112(%rax) 3513 movdqa %xmm4,%xmm1 3514 3515 paddd %xmm3,%xmm0 3516 pcmpeqd %xmm5,%xmm3 3517 movdqa %xmm2,-96(%rax) 3518 movdqa %xmm4,%xmm2 3519 paddd %xmm0,%xmm1 3520 pcmpeqd %xmm5,%xmm0 3521 movdqa %xmm3,-80(%rax) 3522 movdqa %xmm4,%xmm3 3523 3524 paddd %xmm1,%xmm2 3525 pcmpeqd %xmm5,%xmm1 3526 movdqa %xmm0,-64(%rax) 3527 movdqa %xmm4,%xmm0 3528 3529 paddd %xmm2,%xmm3 3530 pcmpeqd %xmm5,%xmm2 3531 movdqa %xmm1,-48(%rax) 3532 movdqa %xmm4,%xmm1 3533 3534 paddd %xmm3,%xmm0 3535 pcmpeqd %xmm5,%xmm3 3536 movdqa %xmm2,-32(%rax) 3537 movdqa %xmm4,%xmm2 3538 paddd %xmm0,%xmm1 3539 pcmpeqd %xmm5,%xmm0 3540 movdqa %xmm3,-16(%rax) 3541 movdqa %xmm4,%xmm3 3542 3543 paddd %xmm1,%xmm2 3544 pcmpeqd %xmm5,%xmm1 3545 movdqa %xmm0,0(%rax) 3546 movdqa %xmm4,%xmm0 3547 3548 paddd %xmm2,%xmm3 3549 pcmpeqd %xmm5,%xmm2 3550 movdqa %xmm1,16(%rax) 3551 movdqa %xmm4,%xmm1 3552 3553 paddd %xmm3,%xmm0 3554 pcmpeqd %xmm5,%xmm3 3555 movdqa %xmm2,32(%rax) 3556 movdqa %xmm4,%xmm2 3557 paddd %xmm0,%xmm1 3558 pcmpeqd %xmm5,%xmm0 3559 movdqa %xmm3,48(%rax) 3560 movdqa %xmm4,%xmm3 3561 3562 paddd %xmm1,%xmm2 3563 pcmpeqd %xmm5,%xmm1 3564 movdqa %xmm0,64(%rax) 3565 movdqa %xmm4,%xmm0 3566 3567 paddd %xmm2,%xmm3 3568 pcmpeqd %xmm5,%xmm2 3569 movdqa %xmm1,80(%rax) 3570 movdqa %xmm4,%xmm1 3571 3572 paddd %xmm3,%xmm0 3573 pcmpeqd %xmm5,%xmm3 3574 movdqa %xmm2,96(%rax) 3575 movdqa %xmm4,%xmm2 3576 movdqa %xmm3,112(%rax) 3577 jmp .Lgather 3578 3579.align 32 3580.Lgather: 3581 pxor %xmm4,%xmm4 3582 pxor %xmm5,%xmm5 3583 movdqa -128(%r11),%xmm0 3584 movdqa -112(%r11),%xmm1 3585 movdqa -96(%r11),%xmm2 3586 pand -128(%rax),%xmm0 3587 movdqa -80(%r11),%xmm3 3588 pand -112(%rax),%xmm1 3589 por %xmm0,%xmm4 3590 pand -96(%rax),%xmm2 3591 por %xmm1,%xmm5 3592 pand -80(%rax),%xmm3 3593 por %xmm2,%xmm4 3594 por %xmm3,%xmm5 3595 movdqa -64(%r11),%xmm0 3596 movdqa -48(%r11),%xmm1 3597 movdqa -32(%r11),%xmm2 3598 pand -64(%rax),%xmm0 3599 movdqa -16(%r11),%xmm3 3600 pand -48(%rax),%xmm1 3601 por %xmm0,%xmm4 3602 pand -32(%rax),%xmm2 3603 por %xmm1,%xmm5 3604 pand -16(%rax),%xmm3 3605 por %xmm2,%xmm4 3606 por %xmm3,%xmm5 3607 movdqa 0(%r11),%xmm0 3608 movdqa 16(%r11),%xmm1 3609 movdqa 32(%r11),%xmm2 3610 pand 0(%rax),%xmm0 3611 movdqa 48(%r11),%xmm3 3612 pand 16(%rax),%xmm1 3613 por %xmm0,%xmm4 3614 pand 32(%rax),%xmm2 3615 por %xmm1,%xmm5 3616 pand 48(%rax),%xmm3 3617 por %xmm2,%xmm4 3618 por %xmm3,%xmm5 3619 movdqa 64(%r11),%xmm0 3620 movdqa 80(%r11),%xmm1 3621 movdqa 96(%r11),%xmm2 3622 pand 64(%rax),%xmm0 3623 movdqa 112(%r11),%xmm3 3624 pand 80(%rax),%xmm1 3625 por %xmm0,%xmm4 3626 pand 96(%rax),%xmm2 3627 por %xmm1,%xmm5 3628 pand 112(%rax),%xmm3 3629 por %xmm2,%xmm4 3630 por %xmm3,%xmm5 3631 por %xmm5,%xmm4 3632 leaq 256(%r11),%r11 3633 pshufd $0x4e,%xmm4,%xmm0 3634 por %xmm4,%xmm0 3635 movq %xmm0,(%rdi) 3636 leaq 8(%rdi),%rdi 3637 subl $1,%esi 3638 jnz .Lgather 3639 3640 leaq (%r10),%rsp 3641 .byte 0xf3,0xc3 3642.LSEH_end_bn_gather5: 3643.size bn_gather5,.-bn_gather5 3644.align 64 3645.Linc: 3646.long 0,0, 1,1 3647.long 2,2, 2,2 3648.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 3649