x86_64-mont5.S revision 238405
1238405Sjkim # $FreeBSD: head/secure/lib/libcrypto/amd64/x86_64-mont5.S 238405 2012-07-12 19:30:53Z jkim $ 2238405Sjkim.text 3238405Sjkim 4238405Sjkim.globl bn_mul_mont_gather5 5238405Sjkim.type bn_mul_mont_gather5,@function 6238405Sjkim.align 64 7238405Sjkimbn_mul_mont_gather5: 8238405Sjkim testl $3,%r9d 9238405Sjkim jnz .Lmul_enter 10238405Sjkim cmpl $8,%r9d 11238405Sjkim jb .Lmul_enter 12238405Sjkim jmp .Lmul4x_enter 13238405Sjkim 14238405Sjkim.align 16 15238405Sjkim.Lmul_enter: 16238405Sjkim movl %r9d,%r9d 17238405Sjkim movl 8(%rsp),%r10d 18238405Sjkim pushq %rbx 19238405Sjkim pushq %rbp 20238405Sjkim pushq %r12 21238405Sjkim pushq %r13 22238405Sjkim pushq %r14 23238405Sjkim pushq %r15 24238405Sjkim movq %rsp,%rax 25238405Sjkim leaq 2(%r9),%r11 26238405Sjkim negq %r11 27238405Sjkim leaq (%rsp,%r11,8),%rsp 28238405Sjkim andq $-1024,%rsp 29238405Sjkim 30238405Sjkim movq %rax,8(%rsp,%r9,8) 31238405Sjkim.Lmul_body: 32238405Sjkim movq %rdx,%r12 33238405Sjkim movq %r10,%r11 34238405Sjkim shrq $3,%r10 35238405Sjkim andq $7,%r11 36238405Sjkim notq %r10 37238405Sjkim leaq .Lmagic_masks(%rip),%rax 38238405Sjkim andq $3,%r10 39238405Sjkim leaq 96(%r12,%r11,8),%r12 40238405Sjkim movq 0(%rax,%r10,8),%xmm4 41238405Sjkim movq 8(%rax,%r10,8),%xmm5 42238405Sjkim movq 16(%rax,%r10,8),%xmm6 43238405Sjkim movq 24(%rax,%r10,8),%xmm7 44238405Sjkim 45238405Sjkim movq -96(%r12),%xmm0 46238405Sjkim movq -32(%r12),%xmm1 47238405Sjkim pand %xmm4,%xmm0 48238405Sjkim movq 32(%r12),%xmm2 49238405Sjkim pand %xmm5,%xmm1 50238405Sjkim movq 96(%r12),%xmm3 51238405Sjkim pand %xmm6,%xmm2 52238405Sjkim por %xmm1,%xmm0 53238405Sjkim pand %xmm7,%xmm3 54238405Sjkim por %xmm2,%xmm0 55238405Sjkim leaq 256(%r12),%r12 56238405Sjkim por %xmm3,%xmm0 57238405Sjkim 58238405Sjkim.byte 102,72,15,126,195 59238405Sjkim 60238405Sjkim movq (%r8),%r8 61238405Sjkim movq (%rsi),%rax 62238405Sjkim 63238405Sjkim xorq %r14,%r14 64238405Sjkim xorq %r15,%r15 65238405Sjkim 66238405Sjkim movq -96(%r12),%xmm0 67238405Sjkim movq -32(%r12),%xmm1 68238405Sjkim pand %xmm4,%xmm0 69238405Sjkim movq 32(%r12),%xmm2 70238405Sjkim pand %xmm5,%xmm1 71238405Sjkim 72238405Sjkim movq %r8,%rbp 73238405Sjkim mulq %rbx 74238405Sjkim movq %rax,%r10 75238405Sjkim movq (%rcx),%rax 76238405Sjkim 77238405Sjkim movq 96(%r12),%xmm3 78238405Sjkim pand %xmm6,%xmm2 79238405Sjkim por %xmm1,%xmm0 80238405Sjkim pand %xmm7,%xmm3 81238405Sjkim 82238405Sjkim imulq %r10,%rbp 83238405Sjkim movq %rdx,%r11 84238405Sjkim 85238405Sjkim por %xmm2,%xmm0 86238405Sjkim leaq 256(%r12),%r12 87238405Sjkim por %xmm3,%xmm0 88238405Sjkim 89238405Sjkim mulq %rbp 90238405Sjkim addq %rax,%r10 91238405Sjkim movq 8(%rsi),%rax 92238405Sjkim adcq $0,%rdx 93238405Sjkim movq %rdx,%r13 94238405Sjkim 95238405Sjkim leaq 1(%r15),%r15 96238405Sjkim jmp .L1st_enter 97238405Sjkim 98238405Sjkim.align 16 99238405Sjkim.L1st: 100238405Sjkim addq %rax,%r13 101238405Sjkim movq (%rsi,%r15,8),%rax 102238405Sjkim adcq $0,%rdx 103238405Sjkim addq %r11,%r13 104238405Sjkim movq %r10,%r11 105238405Sjkim adcq $0,%rdx 106238405Sjkim movq %r13,-16(%rsp,%r15,8) 107238405Sjkim movq %rdx,%r13 108238405Sjkim 109238405Sjkim.L1st_enter: 110238405Sjkim mulq %rbx 111238405Sjkim addq %rax,%r11 112238405Sjkim movq (%rcx,%r15,8),%rax 113238405Sjkim adcq $0,%rdx 114238405Sjkim leaq 1(%r15),%r15 115238405Sjkim movq %rdx,%r10 116238405Sjkim 117238405Sjkim mulq %rbp 118238405Sjkim cmpq %r9,%r15 119238405Sjkim jne .L1st 120238405Sjkim 121238405Sjkim.byte 102,72,15,126,195 122238405Sjkim 123238405Sjkim addq %rax,%r13 124238405Sjkim movq (%rsi),%rax 125238405Sjkim adcq $0,%rdx 126238405Sjkim addq %r11,%r13 127238405Sjkim adcq $0,%rdx 128238405Sjkim movq %r13,-16(%rsp,%r15,8) 129238405Sjkim movq %rdx,%r13 130238405Sjkim movq %r10,%r11 131238405Sjkim 132238405Sjkim xorq %rdx,%rdx 133238405Sjkim addq %r11,%r13 134238405Sjkim adcq $0,%rdx 135238405Sjkim movq %r13,-8(%rsp,%r9,8) 136238405Sjkim movq %rdx,(%rsp,%r9,8) 137238405Sjkim 138238405Sjkim leaq 1(%r14),%r14 139238405Sjkim jmp .Louter 140238405Sjkim.align 16 141238405Sjkim.Louter: 142238405Sjkim xorq %r15,%r15 143238405Sjkim movq %r8,%rbp 144238405Sjkim movq (%rsp),%r10 145238405Sjkim 146238405Sjkim movq -96(%r12),%xmm0 147238405Sjkim movq -32(%r12),%xmm1 148238405Sjkim pand %xmm4,%xmm0 149238405Sjkim movq 32(%r12),%xmm2 150238405Sjkim pand %xmm5,%xmm1 151238405Sjkim 152238405Sjkim mulq %rbx 153238405Sjkim addq %rax,%r10 154238405Sjkim movq (%rcx),%rax 155238405Sjkim adcq $0,%rdx 156238405Sjkim 157238405Sjkim movq 96(%r12),%xmm3 158238405Sjkim pand %xmm6,%xmm2 159238405Sjkim por %xmm1,%xmm0 160238405Sjkim pand %xmm7,%xmm3 161238405Sjkim 162238405Sjkim imulq %r10,%rbp 163238405Sjkim movq %rdx,%r11 164238405Sjkim 165238405Sjkim por %xmm2,%xmm0 166238405Sjkim leaq 256(%r12),%r12 167238405Sjkim por %xmm3,%xmm0 168238405Sjkim 169238405Sjkim mulq %rbp 170238405Sjkim addq %rax,%r10 171238405Sjkim movq 8(%rsi),%rax 172238405Sjkim adcq $0,%rdx 173238405Sjkim movq 8(%rsp),%r10 174238405Sjkim movq %rdx,%r13 175238405Sjkim 176238405Sjkim leaq 1(%r15),%r15 177238405Sjkim jmp .Linner_enter 178238405Sjkim 179238405Sjkim.align 16 180238405Sjkim.Linner: 181238405Sjkim addq %rax,%r13 182238405Sjkim movq (%rsi,%r15,8),%rax 183238405Sjkim adcq $0,%rdx 184238405Sjkim addq %r10,%r13 185238405Sjkim movq (%rsp,%r15,8),%r10 186238405Sjkim adcq $0,%rdx 187238405Sjkim movq %r13,-16(%rsp,%r15,8) 188238405Sjkim movq %rdx,%r13 189238405Sjkim 190238405Sjkim.Linner_enter: 191238405Sjkim mulq %rbx 192238405Sjkim addq %rax,%r11 193238405Sjkim movq (%rcx,%r15,8),%rax 194238405Sjkim adcq $0,%rdx 195238405Sjkim addq %r11,%r10 196238405Sjkim movq %rdx,%r11 197238405Sjkim adcq $0,%r11 198238405Sjkim leaq 1(%r15),%r15 199238405Sjkim 200238405Sjkim mulq %rbp 201238405Sjkim cmpq %r9,%r15 202238405Sjkim jne .Linner 203238405Sjkim 204238405Sjkim.byte 102,72,15,126,195 205238405Sjkim 206238405Sjkim addq %rax,%r13 207238405Sjkim movq (%rsi),%rax 208238405Sjkim adcq $0,%rdx 209238405Sjkim addq %r10,%r13 210238405Sjkim movq (%rsp,%r15,8),%r10 211238405Sjkim adcq $0,%rdx 212238405Sjkim movq %r13,-16(%rsp,%r15,8) 213238405Sjkim movq %rdx,%r13 214238405Sjkim 215238405Sjkim xorq %rdx,%rdx 216238405Sjkim addq %r11,%r13 217238405Sjkim adcq $0,%rdx 218238405Sjkim addq %r10,%r13 219238405Sjkim adcq $0,%rdx 220238405Sjkim movq %r13,-8(%rsp,%r9,8) 221238405Sjkim movq %rdx,(%rsp,%r9,8) 222238405Sjkim 223238405Sjkim leaq 1(%r14),%r14 224238405Sjkim cmpq %r9,%r14 225238405Sjkim jl .Louter 226238405Sjkim 227238405Sjkim xorq %r14,%r14 228238405Sjkim movq (%rsp),%rax 229238405Sjkim leaq (%rsp),%rsi 230238405Sjkim movq %r9,%r15 231238405Sjkim jmp .Lsub 232238405Sjkim.align 16 233238405Sjkim.Lsub: sbbq (%rcx,%r14,8),%rax 234238405Sjkim movq %rax,(%rdi,%r14,8) 235238405Sjkim movq 8(%rsi,%r14,8),%rax 236238405Sjkim leaq 1(%r14),%r14 237238405Sjkim decq %r15 238238405Sjkim jnz .Lsub 239238405Sjkim 240238405Sjkim sbbq $0,%rax 241238405Sjkim xorq %r14,%r14 242238405Sjkim andq %rax,%rsi 243238405Sjkim notq %rax 244238405Sjkim movq %rdi,%rcx 245238405Sjkim andq %rax,%rcx 246238405Sjkim movq %r9,%r15 247238405Sjkim orq %rcx,%rsi 248238405Sjkim.align 16 249238405Sjkim.Lcopy: 250238405Sjkim movq (%rsi,%r14,8),%rax 251238405Sjkim movq %r14,(%rsp,%r14,8) 252238405Sjkim movq %rax,(%rdi,%r14,8) 253238405Sjkim leaq 1(%r14),%r14 254238405Sjkim subq $1,%r15 255238405Sjkim jnz .Lcopy 256238405Sjkim 257238405Sjkim movq 8(%rsp,%r9,8),%rsi 258238405Sjkim movq $1,%rax 259238405Sjkim movq (%rsi),%r15 260238405Sjkim movq 8(%rsi),%r14 261238405Sjkim movq 16(%rsi),%r13 262238405Sjkim movq 24(%rsi),%r12 263238405Sjkim movq 32(%rsi),%rbp 264238405Sjkim movq 40(%rsi),%rbx 265238405Sjkim leaq 48(%rsi),%rsp 266238405Sjkim.Lmul_epilogue: 267238405Sjkim .byte 0xf3,0xc3 268238405Sjkim.size bn_mul_mont_gather5,.-bn_mul_mont_gather5 269238405Sjkim.type bn_mul4x_mont_gather5,@function 270238405Sjkim.align 16 271238405Sjkimbn_mul4x_mont_gather5: 272238405Sjkim.Lmul4x_enter: 273238405Sjkim movl %r9d,%r9d 274238405Sjkim movl 8(%rsp),%r10d 275238405Sjkim pushq %rbx 276238405Sjkim pushq %rbp 277238405Sjkim pushq %r12 278238405Sjkim pushq %r13 279238405Sjkim pushq %r14 280238405Sjkim pushq %r15 281238405Sjkim movq %rsp,%rax 282238405Sjkim leaq 4(%r9),%r11 283238405Sjkim negq %r11 284238405Sjkim leaq (%rsp,%r11,8),%rsp 285238405Sjkim andq $-1024,%rsp 286238405Sjkim 287238405Sjkim movq %rax,8(%rsp,%r9,8) 288238405Sjkim.Lmul4x_body: 289238405Sjkim movq %rdi,16(%rsp,%r9,8) 290238405Sjkim movq %rdx,%r12 291238405Sjkim movq %r10,%r11 292238405Sjkim shrq $3,%r10 293238405Sjkim andq $7,%r11 294238405Sjkim notq %r10 295238405Sjkim leaq .Lmagic_masks(%rip),%rax 296238405Sjkim andq $3,%r10 297238405Sjkim leaq 96(%r12,%r11,8),%r12 298238405Sjkim movq 0(%rax,%r10,8),%xmm4 299238405Sjkim movq 8(%rax,%r10,8),%xmm5 300238405Sjkim movq 16(%rax,%r10,8),%xmm6 301238405Sjkim movq 24(%rax,%r10,8),%xmm7 302238405Sjkim 303238405Sjkim movq -96(%r12),%xmm0 304238405Sjkim movq -32(%r12),%xmm1 305238405Sjkim pand %xmm4,%xmm0 306238405Sjkim movq 32(%r12),%xmm2 307238405Sjkim pand %xmm5,%xmm1 308238405Sjkim movq 96(%r12),%xmm3 309238405Sjkim pand %xmm6,%xmm2 310238405Sjkim por %xmm1,%xmm0 311238405Sjkim pand %xmm7,%xmm3 312238405Sjkim por %xmm2,%xmm0 313238405Sjkim leaq 256(%r12),%r12 314238405Sjkim por %xmm3,%xmm0 315238405Sjkim 316238405Sjkim.byte 102,72,15,126,195 317238405Sjkim movq (%r8),%r8 318238405Sjkim movq (%rsi),%rax 319238405Sjkim 320238405Sjkim xorq %r14,%r14 321238405Sjkim xorq %r15,%r15 322238405Sjkim 323238405Sjkim movq -96(%r12),%xmm0 324238405Sjkim movq -32(%r12),%xmm1 325238405Sjkim pand %xmm4,%xmm0 326238405Sjkim movq 32(%r12),%xmm2 327238405Sjkim pand %xmm5,%xmm1 328238405Sjkim 329238405Sjkim movq %r8,%rbp 330238405Sjkim mulq %rbx 331238405Sjkim movq %rax,%r10 332238405Sjkim movq (%rcx),%rax 333238405Sjkim 334238405Sjkim movq 96(%r12),%xmm3 335238405Sjkim pand %xmm6,%xmm2 336238405Sjkim por %xmm1,%xmm0 337238405Sjkim pand %xmm7,%xmm3 338238405Sjkim 339238405Sjkim imulq %r10,%rbp 340238405Sjkim movq %rdx,%r11 341238405Sjkim 342238405Sjkim por %xmm2,%xmm0 343238405Sjkim leaq 256(%r12),%r12 344238405Sjkim por %xmm3,%xmm0 345238405Sjkim 346238405Sjkim mulq %rbp 347238405Sjkim addq %rax,%r10 348238405Sjkim movq 8(%rsi),%rax 349238405Sjkim adcq $0,%rdx 350238405Sjkim movq %rdx,%rdi 351238405Sjkim 352238405Sjkim mulq %rbx 353238405Sjkim addq %rax,%r11 354238405Sjkim movq 8(%rcx),%rax 355238405Sjkim adcq $0,%rdx 356238405Sjkim movq %rdx,%r10 357238405Sjkim 358238405Sjkim mulq %rbp 359238405Sjkim addq %rax,%rdi 360238405Sjkim movq 16(%rsi),%rax 361238405Sjkim adcq $0,%rdx 362238405Sjkim addq %r11,%rdi 363238405Sjkim leaq 4(%r15),%r15 364238405Sjkim adcq $0,%rdx 365238405Sjkim movq %rdi,(%rsp) 366238405Sjkim movq %rdx,%r13 367238405Sjkim jmp .L1st4x 368238405Sjkim.align 16 369238405Sjkim.L1st4x: 370238405Sjkim mulq %rbx 371238405Sjkim addq %rax,%r10 372238405Sjkim movq -16(%rcx,%r15,8),%rax 373238405Sjkim adcq $0,%rdx 374238405Sjkim movq %rdx,%r11 375238405Sjkim 376238405Sjkim mulq %rbp 377238405Sjkim addq %rax,%r13 378238405Sjkim movq -8(%rsi,%r15,8),%rax 379238405Sjkim adcq $0,%rdx 380238405Sjkim addq %r10,%r13 381238405Sjkim adcq $0,%rdx 382238405Sjkim movq %r13,-24(%rsp,%r15,8) 383238405Sjkim movq %rdx,%rdi 384238405Sjkim 385238405Sjkim mulq %rbx 386238405Sjkim addq %rax,%r11 387238405Sjkim movq -8(%rcx,%r15,8),%rax 388238405Sjkim adcq $0,%rdx 389238405Sjkim movq %rdx,%r10 390238405Sjkim 391238405Sjkim mulq %rbp 392238405Sjkim addq %rax,%rdi 393238405Sjkim movq (%rsi,%r15,8),%rax 394238405Sjkim adcq $0,%rdx 395238405Sjkim addq %r11,%rdi 396238405Sjkim adcq $0,%rdx 397238405Sjkim movq %rdi,-16(%rsp,%r15,8) 398238405Sjkim movq %rdx,%r13 399238405Sjkim 400238405Sjkim mulq %rbx 401238405Sjkim addq %rax,%r10 402238405Sjkim movq (%rcx,%r15,8),%rax 403238405Sjkim adcq $0,%rdx 404238405Sjkim movq %rdx,%r11 405238405Sjkim 406238405Sjkim mulq %rbp 407238405Sjkim addq %rax,%r13 408238405Sjkim movq 8(%rsi,%r15,8),%rax 409238405Sjkim adcq $0,%rdx 410238405Sjkim addq %r10,%r13 411238405Sjkim adcq $0,%rdx 412238405Sjkim movq %r13,-8(%rsp,%r15,8) 413238405Sjkim movq %rdx,%rdi 414238405Sjkim 415238405Sjkim mulq %rbx 416238405Sjkim addq %rax,%r11 417238405Sjkim movq 8(%rcx,%r15,8),%rax 418238405Sjkim adcq $0,%rdx 419238405Sjkim leaq 4(%r15),%r15 420238405Sjkim movq %rdx,%r10 421238405Sjkim 422238405Sjkim mulq %rbp 423238405Sjkim addq %rax,%rdi 424238405Sjkim movq -16(%rsi,%r15,8),%rax 425238405Sjkim adcq $0,%rdx 426238405Sjkim addq %r11,%rdi 427238405Sjkim adcq $0,%rdx 428238405Sjkim movq %rdi,-32(%rsp,%r15,8) 429238405Sjkim movq %rdx,%r13 430238405Sjkim cmpq %r9,%r15 431238405Sjkim jl .L1st4x 432238405Sjkim 433238405Sjkim mulq %rbx 434238405Sjkim addq %rax,%r10 435238405Sjkim movq -16(%rcx,%r15,8),%rax 436238405Sjkim adcq $0,%rdx 437238405Sjkim movq %rdx,%r11 438238405Sjkim 439238405Sjkim mulq %rbp 440238405Sjkim addq %rax,%r13 441238405Sjkim movq -8(%rsi,%r15,8),%rax 442238405Sjkim adcq $0,%rdx 443238405Sjkim addq %r10,%r13 444238405Sjkim adcq $0,%rdx 445238405Sjkim movq %r13,-24(%rsp,%r15,8) 446238405Sjkim movq %rdx,%rdi 447238405Sjkim 448238405Sjkim mulq %rbx 449238405Sjkim addq %rax,%r11 450238405Sjkim movq -8(%rcx,%r15,8),%rax 451238405Sjkim adcq $0,%rdx 452238405Sjkim movq %rdx,%r10 453238405Sjkim 454238405Sjkim mulq %rbp 455238405Sjkim addq %rax,%rdi 456238405Sjkim movq (%rsi),%rax 457238405Sjkim adcq $0,%rdx 458238405Sjkim addq %r11,%rdi 459238405Sjkim adcq $0,%rdx 460238405Sjkim movq %rdi,-16(%rsp,%r15,8) 461238405Sjkim movq %rdx,%r13 462238405Sjkim 463238405Sjkim.byte 102,72,15,126,195 464238405Sjkim 465238405Sjkim xorq %rdi,%rdi 466238405Sjkim addq %r10,%r13 467238405Sjkim adcq $0,%rdi 468238405Sjkim movq %r13,-8(%rsp,%r15,8) 469238405Sjkim movq %rdi,(%rsp,%r15,8) 470238405Sjkim 471238405Sjkim leaq 1(%r14),%r14 472238405Sjkim.align 4 473238405Sjkim.Louter4x: 474238405Sjkim xorq %r15,%r15 475238405Sjkim movq -96(%r12),%xmm0 476238405Sjkim movq -32(%r12),%xmm1 477238405Sjkim pand %xmm4,%xmm0 478238405Sjkim movq 32(%r12),%xmm2 479238405Sjkim pand %xmm5,%xmm1 480238405Sjkim 481238405Sjkim movq (%rsp),%r10 482238405Sjkim movq %r8,%rbp 483238405Sjkim mulq %rbx 484238405Sjkim addq %rax,%r10 485238405Sjkim movq (%rcx),%rax 486238405Sjkim adcq $0,%rdx 487238405Sjkim 488238405Sjkim movq 96(%r12),%xmm3 489238405Sjkim pand %xmm6,%xmm2 490238405Sjkim por %xmm1,%xmm0 491238405Sjkim pand %xmm7,%xmm3 492238405Sjkim 493238405Sjkim imulq %r10,%rbp 494238405Sjkim movq %rdx,%r11 495238405Sjkim 496238405Sjkim por %xmm2,%xmm0 497238405Sjkim leaq 256(%r12),%r12 498238405Sjkim por %xmm3,%xmm0 499238405Sjkim 500238405Sjkim mulq %rbp 501238405Sjkim addq %rax,%r10 502238405Sjkim movq 8(%rsi),%rax 503238405Sjkim adcq $0,%rdx 504238405Sjkim movq %rdx,%rdi 505238405Sjkim 506238405Sjkim mulq %rbx 507238405Sjkim addq %rax,%r11 508238405Sjkim movq 8(%rcx),%rax 509238405Sjkim adcq $0,%rdx 510238405Sjkim addq 8(%rsp),%r11 511238405Sjkim adcq $0,%rdx 512238405Sjkim movq %rdx,%r10 513238405Sjkim 514238405Sjkim mulq %rbp 515238405Sjkim addq %rax,%rdi 516238405Sjkim movq 16(%rsi),%rax 517238405Sjkim adcq $0,%rdx 518238405Sjkim addq %r11,%rdi 519238405Sjkim leaq 4(%r15),%r15 520238405Sjkim adcq $0,%rdx 521238405Sjkim movq %rdx,%r13 522238405Sjkim jmp .Linner4x 523238405Sjkim.align 16 524238405Sjkim.Linner4x: 525238405Sjkim mulq %rbx 526238405Sjkim addq %rax,%r10 527238405Sjkim movq -16(%rcx,%r15,8),%rax 528238405Sjkim adcq $0,%rdx 529238405Sjkim addq -16(%rsp,%r15,8),%r10 530238405Sjkim adcq $0,%rdx 531238405Sjkim movq %rdx,%r11 532238405Sjkim 533238405Sjkim mulq %rbp 534238405Sjkim addq %rax,%r13 535238405Sjkim movq -8(%rsi,%r15,8),%rax 536238405Sjkim adcq $0,%rdx 537238405Sjkim addq %r10,%r13 538238405Sjkim adcq $0,%rdx 539238405Sjkim movq %rdi,-32(%rsp,%r15,8) 540238405Sjkim movq %rdx,%rdi 541238405Sjkim 542238405Sjkim mulq %rbx 543238405Sjkim addq %rax,%r11 544238405Sjkim movq -8(%rcx,%r15,8),%rax 545238405Sjkim adcq $0,%rdx 546238405Sjkim addq -8(%rsp,%r15,8),%r11 547238405Sjkim adcq $0,%rdx 548238405Sjkim movq %rdx,%r10 549238405Sjkim 550238405Sjkim mulq %rbp 551238405Sjkim addq %rax,%rdi 552238405Sjkim movq (%rsi,%r15,8),%rax 553238405Sjkim adcq $0,%rdx 554238405Sjkim addq %r11,%rdi 555238405Sjkim adcq $0,%rdx 556238405Sjkim movq %r13,-24(%rsp,%r15,8) 557238405Sjkim movq %rdx,%r13 558238405Sjkim 559238405Sjkim mulq %rbx 560238405Sjkim addq %rax,%r10 561238405Sjkim movq (%rcx,%r15,8),%rax 562238405Sjkim adcq $0,%rdx 563238405Sjkim addq (%rsp,%r15,8),%r10 564238405Sjkim adcq $0,%rdx 565238405Sjkim movq %rdx,%r11 566238405Sjkim 567238405Sjkim mulq %rbp 568238405Sjkim addq %rax,%r13 569238405Sjkim movq 8(%rsi,%r15,8),%rax 570238405Sjkim adcq $0,%rdx 571238405Sjkim addq %r10,%r13 572238405Sjkim adcq $0,%rdx 573238405Sjkim movq %rdi,-16(%rsp,%r15,8) 574238405Sjkim movq %rdx,%rdi 575238405Sjkim 576238405Sjkim mulq %rbx 577238405Sjkim addq %rax,%r11 578238405Sjkim movq 8(%rcx,%r15,8),%rax 579238405Sjkim adcq $0,%rdx 580238405Sjkim addq 8(%rsp,%r15,8),%r11 581238405Sjkim adcq $0,%rdx 582238405Sjkim leaq 4(%r15),%r15 583238405Sjkim movq %rdx,%r10 584238405Sjkim 585238405Sjkim mulq %rbp 586238405Sjkim addq %rax,%rdi 587238405Sjkim movq -16(%rsi,%r15,8),%rax 588238405Sjkim adcq $0,%rdx 589238405Sjkim addq %r11,%rdi 590238405Sjkim adcq $0,%rdx 591238405Sjkim movq %r13,-40(%rsp,%r15,8) 592238405Sjkim movq %rdx,%r13 593238405Sjkim cmpq %r9,%r15 594238405Sjkim jl .Linner4x 595238405Sjkim 596238405Sjkim mulq %rbx 597238405Sjkim addq %rax,%r10 598238405Sjkim movq -16(%rcx,%r15,8),%rax 599238405Sjkim adcq $0,%rdx 600238405Sjkim addq -16(%rsp,%r15,8),%r10 601238405Sjkim adcq $0,%rdx 602238405Sjkim movq %rdx,%r11 603238405Sjkim 604238405Sjkim mulq %rbp 605238405Sjkim addq %rax,%r13 606238405Sjkim movq -8(%rsi,%r15,8),%rax 607238405Sjkim adcq $0,%rdx 608238405Sjkim addq %r10,%r13 609238405Sjkim adcq $0,%rdx 610238405Sjkim movq %rdi,-32(%rsp,%r15,8) 611238405Sjkim movq %rdx,%rdi 612238405Sjkim 613238405Sjkim mulq %rbx 614238405Sjkim addq %rax,%r11 615238405Sjkim movq -8(%rcx,%r15,8),%rax 616238405Sjkim adcq $0,%rdx 617238405Sjkim addq -8(%rsp,%r15,8),%r11 618238405Sjkim adcq $0,%rdx 619238405Sjkim leaq 1(%r14),%r14 620238405Sjkim movq %rdx,%r10 621238405Sjkim 622238405Sjkim mulq %rbp 623238405Sjkim addq %rax,%rdi 624238405Sjkim movq (%rsi),%rax 625238405Sjkim adcq $0,%rdx 626238405Sjkim addq %r11,%rdi 627238405Sjkim adcq $0,%rdx 628238405Sjkim movq %r13,-24(%rsp,%r15,8) 629238405Sjkim movq %rdx,%r13 630238405Sjkim 631238405Sjkim.byte 102,72,15,126,195 632238405Sjkim movq %rdi,-16(%rsp,%r15,8) 633238405Sjkim 634238405Sjkim xorq %rdi,%rdi 635238405Sjkim addq %r10,%r13 636238405Sjkim adcq $0,%rdi 637238405Sjkim addq (%rsp,%r9,8),%r13 638238405Sjkim adcq $0,%rdi 639238405Sjkim movq %r13,-8(%rsp,%r15,8) 640238405Sjkim movq %rdi,(%rsp,%r15,8) 641238405Sjkim 642238405Sjkim cmpq %r9,%r14 643238405Sjkim jl .Louter4x 644238405Sjkim movq 16(%rsp,%r9,8),%rdi 645238405Sjkim movq 0(%rsp),%rax 646238405Sjkim pxor %xmm0,%xmm0 647238405Sjkim movq 8(%rsp),%rdx 648238405Sjkim shrq $2,%r9 649238405Sjkim leaq (%rsp),%rsi 650238405Sjkim xorq %r14,%r14 651238405Sjkim 652238405Sjkim subq 0(%rcx),%rax 653238405Sjkim movq 16(%rsi),%rbx 654238405Sjkim movq 24(%rsi),%rbp 655238405Sjkim sbbq 8(%rcx),%rdx 656238405Sjkim leaq -1(%r9),%r15 657238405Sjkim jmp .Lsub4x 658238405Sjkim.align 16 659238405Sjkim.Lsub4x: 660238405Sjkim movq %rax,0(%rdi,%r14,8) 661238405Sjkim movq %rdx,8(%rdi,%r14,8) 662238405Sjkim sbbq 16(%rcx,%r14,8),%rbx 663238405Sjkim movq 32(%rsi,%r14,8),%rax 664238405Sjkim movq 40(%rsi,%r14,8),%rdx 665238405Sjkim sbbq 24(%rcx,%r14,8),%rbp 666238405Sjkim movq %rbx,16(%rdi,%r14,8) 667238405Sjkim movq %rbp,24(%rdi,%r14,8) 668238405Sjkim sbbq 32(%rcx,%r14,8),%rax 669238405Sjkim movq 48(%rsi,%r14,8),%rbx 670238405Sjkim movq 56(%rsi,%r14,8),%rbp 671238405Sjkim sbbq 40(%rcx,%r14,8),%rdx 672238405Sjkim leaq 4(%r14),%r14 673238405Sjkim decq %r15 674238405Sjkim jnz .Lsub4x 675238405Sjkim 676238405Sjkim movq %rax,0(%rdi,%r14,8) 677238405Sjkim movq 32(%rsi,%r14,8),%rax 678238405Sjkim sbbq 16(%rcx,%r14,8),%rbx 679238405Sjkim movq %rdx,8(%rdi,%r14,8) 680238405Sjkim sbbq 24(%rcx,%r14,8),%rbp 681238405Sjkim movq %rbx,16(%rdi,%r14,8) 682238405Sjkim 683238405Sjkim sbbq $0,%rax 684238405Sjkim movq %rbp,24(%rdi,%r14,8) 685238405Sjkim xorq %r14,%r14 686238405Sjkim andq %rax,%rsi 687238405Sjkim notq %rax 688238405Sjkim movq %rdi,%rcx 689238405Sjkim andq %rax,%rcx 690238405Sjkim leaq -1(%r9),%r15 691238405Sjkim orq %rcx,%rsi 692238405Sjkim 693238405Sjkim movdqu (%rsi),%xmm1 694238405Sjkim movdqa %xmm0,(%rsp) 695238405Sjkim movdqu %xmm1,(%rdi) 696238405Sjkim jmp .Lcopy4x 697238405Sjkim.align 16 698238405Sjkim.Lcopy4x: 699238405Sjkim movdqu 16(%rsi,%r14,1),%xmm2 700238405Sjkim movdqu 32(%rsi,%r14,1),%xmm1 701238405Sjkim movdqa %xmm0,16(%rsp,%r14,1) 702238405Sjkim movdqu %xmm2,16(%rdi,%r14,1) 703238405Sjkim movdqa %xmm0,32(%rsp,%r14,1) 704238405Sjkim movdqu %xmm1,32(%rdi,%r14,1) 705238405Sjkim leaq 32(%r14),%r14 706238405Sjkim decq %r15 707238405Sjkim jnz .Lcopy4x 708238405Sjkim 709238405Sjkim shlq $2,%r9 710238405Sjkim movdqu 16(%rsi,%r14,1),%xmm2 711238405Sjkim movdqa %xmm0,16(%rsp,%r14,1) 712238405Sjkim movdqu %xmm2,16(%rdi,%r14,1) 713238405Sjkim movq 8(%rsp,%r9,8),%rsi 714238405Sjkim movq $1,%rax 715238405Sjkim movq (%rsi),%r15 716238405Sjkim movq 8(%rsi),%r14 717238405Sjkim movq 16(%rsi),%r13 718238405Sjkim movq 24(%rsi),%r12 719238405Sjkim movq 32(%rsi),%rbp 720238405Sjkim movq 40(%rsi),%rbx 721238405Sjkim leaq 48(%rsi),%rsp 722238405Sjkim.Lmul4x_epilogue: 723238405Sjkim .byte 0xf3,0xc3 724238405Sjkim.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 725238405Sjkim.globl bn_scatter5 726238405Sjkim.type bn_scatter5,@function 727238405Sjkim.align 16 728238405Sjkimbn_scatter5: 729238405Sjkim cmpq $0,%rsi 730238405Sjkim jz .Lscatter_epilogue 731238405Sjkim leaq (%rdx,%rcx,8),%rdx 732238405Sjkim.Lscatter: 733238405Sjkim movq (%rdi),%rax 734238405Sjkim leaq 8(%rdi),%rdi 735238405Sjkim movq %rax,(%rdx) 736238405Sjkim leaq 256(%rdx),%rdx 737238405Sjkim subq $1,%rsi 738238405Sjkim jnz .Lscatter 739238405Sjkim.Lscatter_epilogue: 740238405Sjkim .byte 0xf3,0xc3 741238405Sjkim.size bn_scatter5,.-bn_scatter5 742238405Sjkim 743238405Sjkim.globl bn_gather5 744238405Sjkim.type bn_gather5,@function 745238405Sjkim.align 16 746238405Sjkimbn_gather5: 747238405Sjkim movq %rcx,%r11 748238405Sjkim shrq $3,%rcx 749238405Sjkim andq $7,%r11 750238405Sjkim notq %rcx 751238405Sjkim leaq .Lmagic_masks(%rip),%rax 752238405Sjkim andq $3,%rcx 753238405Sjkim leaq 96(%rdx,%r11,8),%rdx 754238405Sjkim movq 0(%rax,%rcx,8),%xmm4 755238405Sjkim movq 8(%rax,%rcx,8),%xmm5 756238405Sjkim movq 16(%rax,%rcx,8),%xmm6 757238405Sjkim movq 24(%rax,%rcx,8),%xmm7 758238405Sjkim jmp .Lgather 759238405Sjkim.align 16 760238405Sjkim.Lgather: 761238405Sjkim movq -96(%rdx),%xmm0 762238405Sjkim movq -32(%rdx),%xmm1 763238405Sjkim pand %xmm4,%xmm0 764238405Sjkim movq 32(%rdx),%xmm2 765238405Sjkim pand %xmm5,%xmm1 766238405Sjkim movq 96(%rdx),%xmm3 767238405Sjkim pand %xmm6,%xmm2 768238405Sjkim por %xmm1,%xmm0 769238405Sjkim pand %xmm7,%xmm3 770238405Sjkim por %xmm2,%xmm0 771238405Sjkim leaq 256(%rdx),%rdx 772238405Sjkim por %xmm3,%xmm0 773238405Sjkim 774238405Sjkim movq %xmm0,(%rdi) 775238405Sjkim leaq 8(%rdi),%rdi 776238405Sjkim subq $1,%rsi 777238405Sjkim jnz .Lgather 778238405Sjkim .byte 0xf3,0xc3 779238405Sjkim.LSEH_end_bn_gather5: 780238405Sjkim.size bn_gather5,.-bn_gather5 781238405Sjkim.align 64 782238405Sjkim.Lmagic_masks: 783238405Sjkim.long 0,0, 0,0, 0,0, -1,-1 784238405Sjkim.long 0,0, 0,0, 0,0, 0,0 785238405Sjkim.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 786