aesni-x86_64.S revision 298998
1 # $FreeBSD: head/secure/lib/libcrypto/amd64/aesni-x86_64.S 298998 2016-05-03 18:50:10Z jkim $ 2.text 3 4.globl aesni_encrypt 5.type aesni_encrypt,@function 6.align 16 7aesni_encrypt: 8 movups (%rdi),%xmm2 9 movl 240(%rdx),%eax 10 movups (%rdx),%xmm0 11 movups 16(%rdx),%xmm1 12 leaq 32(%rdx),%rdx 13 xorps %xmm0,%xmm2 14.Loop_enc1_1: 15.byte 102,15,56,220,209 16 decl %eax 17 movups (%rdx),%xmm1 18 leaq 16(%rdx),%rdx 19 jnz .Loop_enc1_1 20.byte 102,15,56,221,209 21 pxor %xmm0,%xmm0 22 pxor %xmm1,%xmm1 23 movups %xmm2,(%rsi) 24 pxor %xmm2,%xmm2 25 .byte 0xf3,0xc3 26.size aesni_encrypt,.-aesni_encrypt 27 28.globl aesni_decrypt 29.type aesni_decrypt,@function 30.align 16 31aesni_decrypt: 32 movups (%rdi),%xmm2 33 movl 240(%rdx),%eax 34 movups (%rdx),%xmm0 35 movups 16(%rdx),%xmm1 36 leaq 32(%rdx),%rdx 37 xorps %xmm0,%xmm2 38.Loop_dec1_2: 39.byte 102,15,56,222,209 40 decl %eax 41 movups (%rdx),%xmm1 42 leaq 16(%rdx),%rdx 43 jnz .Loop_dec1_2 44.byte 102,15,56,223,209 45 pxor %xmm0,%xmm0 46 pxor %xmm1,%xmm1 47 movups %xmm2,(%rsi) 48 pxor %xmm2,%xmm2 49 .byte 0xf3,0xc3 50.size aesni_decrypt, .-aesni_decrypt 51.type _aesni_encrypt2,@function 52.align 16 53_aesni_encrypt2: 54 movups (%rcx),%xmm0 55 shll $4,%eax 56 movups 16(%rcx),%xmm1 57 xorps %xmm0,%xmm2 58 xorps %xmm0,%xmm3 59 movups 32(%rcx),%xmm0 60 leaq 32(%rcx,%rax,1),%rcx 61 negq %rax 62 addq $16,%rax 63 64.Lenc_loop2: 65.byte 102,15,56,220,209 66.byte 102,15,56,220,217 67 movups (%rcx,%rax,1),%xmm1 68 addq $32,%rax 69.byte 102,15,56,220,208 70.byte 102,15,56,220,216 71 movups -16(%rcx,%rax,1),%xmm0 72 jnz .Lenc_loop2 73 74.byte 102,15,56,220,209 75.byte 102,15,56,220,217 76.byte 102,15,56,221,208 77.byte 102,15,56,221,216 78 .byte 0xf3,0xc3 79.size _aesni_encrypt2,.-_aesni_encrypt2 80.type _aesni_decrypt2,@function 81.align 16 82_aesni_decrypt2: 83 movups (%rcx),%xmm0 84 shll $4,%eax 85 movups 16(%rcx),%xmm1 86 xorps %xmm0,%xmm2 87 xorps %xmm0,%xmm3 88 movups 32(%rcx),%xmm0 89 leaq 32(%rcx,%rax,1),%rcx 90 negq %rax 91 addq $16,%rax 92 93.Ldec_loop2: 94.byte 102,15,56,222,209 95.byte 102,15,56,222,217 96 movups (%rcx,%rax,1),%xmm1 97 addq $32,%rax 98.byte 102,15,56,222,208 99.byte 102,15,56,222,216 100 movups -16(%rcx,%rax,1),%xmm0 101 jnz .Ldec_loop2 102 103.byte 102,15,56,222,209 104.byte 102,15,56,222,217 105.byte 102,15,56,223,208 106.byte 102,15,56,223,216 107 .byte 0xf3,0xc3 108.size _aesni_decrypt2,.-_aesni_decrypt2 109.type _aesni_encrypt3,@function 110.align 16 111_aesni_encrypt3: 112 movups (%rcx),%xmm0 113 shll $4,%eax 114 movups 16(%rcx),%xmm1 115 xorps %xmm0,%xmm2 116 xorps %xmm0,%xmm3 117 xorps %xmm0,%xmm4 118 movups 32(%rcx),%xmm0 119 leaq 32(%rcx,%rax,1),%rcx 120 negq %rax 121 addq $16,%rax 122 123.Lenc_loop3: 124.byte 102,15,56,220,209 125.byte 102,15,56,220,217 126.byte 102,15,56,220,225 127 movups (%rcx,%rax,1),%xmm1 128 addq $32,%rax 129.byte 102,15,56,220,208 130.byte 102,15,56,220,216 131.byte 102,15,56,220,224 132 movups -16(%rcx,%rax,1),%xmm0 133 jnz .Lenc_loop3 134 135.byte 102,15,56,220,209 136.byte 102,15,56,220,217 137.byte 102,15,56,220,225 138.byte 102,15,56,221,208 139.byte 102,15,56,221,216 140.byte 102,15,56,221,224 141 .byte 0xf3,0xc3 142.size _aesni_encrypt3,.-_aesni_encrypt3 143.type _aesni_decrypt3,@function 144.align 16 145_aesni_decrypt3: 146 movups (%rcx),%xmm0 147 shll $4,%eax 148 movups 16(%rcx),%xmm1 149 xorps %xmm0,%xmm2 150 xorps %xmm0,%xmm3 151 xorps %xmm0,%xmm4 152 movups 32(%rcx),%xmm0 153 leaq 32(%rcx,%rax,1),%rcx 154 negq %rax 155 addq $16,%rax 156 157.Ldec_loop3: 158.byte 102,15,56,222,209 159.byte 102,15,56,222,217 160.byte 102,15,56,222,225 161 movups (%rcx,%rax,1),%xmm1 162 addq $32,%rax 163.byte 102,15,56,222,208 164.byte 102,15,56,222,216 165.byte 102,15,56,222,224 166 movups -16(%rcx,%rax,1),%xmm0 167 jnz .Ldec_loop3 168 169.byte 102,15,56,222,209 170.byte 102,15,56,222,217 171.byte 102,15,56,222,225 172.byte 102,15,56,223,208 173.byte 102,15,56,223,216 174.byte 102,15,56,223,224 175 .byte 0xf3,0xc3 176.size _aesni_decrypt3,.-_aesni_decrypt3 177.type _aesni_encrypt4,@function 178.align 16 179_aesni_encrypt4: 180 movups (%rcx),%xmm0 181 shll $4,%eax 182 movups 16(%rcx),%xmm1 183 xorps %xmm0,%xmm2 184 xorps %xmm0,%xmm3 185 xorps %xmm0,%xmm4 186 xorps %xmm0,%xmm5 187 movups 32(%rcx),%xmm0 188 leaq 32(%rcx,%rax,1),%rcx 189 negq %rax 190.byte 0x0f,0x1f,0x00 191 addq $16,%rax 192 193.Lenc_loop4: 194.byte 102,15,56,220,209 195.byte 102,15,56,220,217 196.byte 102,15,56,220,225 197.byte 102,15,56,220,233 198 movups (%rcx,%rax,1),%xmm1 199 addq $32,%rax 200.byte 102,15,56,220,208 201.byte 102,15,56,220,216 202.byte 102,15,56,220,224 203.byte 102,15,56,220,232 204 movups -16(%rcx,%rax,1),%xmm0 205 jnz .Lenc_loop4 206 207.byte 102,15,56,220,209 208.byte 102,15,56,220,217 209.byte 102,15,56,220,225 210.byte 102,15,56,220,233 211.byte 102,15,56,221,208 212.byte 102,15,56,221,216 213.byte 102,15,56,221,224 214.byte 102,15,56,221,232 215 .byte 0xf3,0xc3 216.size _aesni_encrypt4,.-_aesni_encrypt4 217.type _aesni_decrypt4,@function 218.align 16 219_aesni_decrypt4: 220 movups (%rcx),%xmm0 221 shll $4,%eax 222 movups 16(%rcx),%xmm1 223 xorps %xmm0,%xmm2 224 xorps %xmm0,%xmm3 225 xorps %xmm0,%xmm4 226 xorps %xmm0,%xmm5 227 movups 32(%rcx),%xmm0 228 leaq 32(%rcx,%rax,1),%rcx 229 negq %rax 230.byte 0x0f,0x1f,0x00 231 addq $16,%rax 232 233.Ldec_loop4: 234.byte 102,15,56,222,209 235.byte 102,15,56,222,217 236.byte 102,15,56,222,225 237.byte 102,15,56,222,233 238 movups (%rcx,%rax,1),%xmm1 239 addq $32,%rax 240.byte 102,15,56,222,208 241.byte 102,15,56,222,216 242.byte 102,15,56,222,224 243.byte 102,15,56,222,232 244 movups -16(%rcx,%rax,1),%xmm0 245 jnz .Ldec_loop4 246 247.byte 102,15,56,222,209 248.byte 102,15,56,222,217 249.byte 102,15,56,222,225 250.byte 102,15,56,222,233 251.byte 102,15,56,223,208 252.byte 102,15,56,223,216 253.byte 102,15,56,223,224 254.byte 102,15,56,223,232 255 .byte 0xf3,0xc3 256.size _aesni_decrypt4,.-_aesni_decrypt4 257.type _aesni_encrypt6,@function 258.align 16 259_aesni_encrypt6: 260 movups (%rcx),%xmm0 261 shll $4,%eax 262 movups 16(%rcx),%xmm1 263 xorps %xmm0,%xmm2 264 pxor %xmm0,%xmm3 265 pxor %xmm0,%xmm4 266.byte 102,15,56,220,209 267 leaq 32(%rcx,%rax,1),%rcx 268 negq %rax 269.byte 102,15,56,220,217 270 pxor %xmm0,%xmm5 271 pxor %xmm0,%xmm6 272.byte 102,15,56,220,225 273 pxor %xmm0,%xmm7 274 movups (%rcx,%rax,1),%xmm0 275 addq $16,%rax 276 jmp .Lenc_loop6_enter 277.align 16 278.Lenc_loop6: 279.byte 102,15,56,220,209 280.byte 102,15,56,220,217 281.byte 102,15,56,220,225 282.Lenc_loop6_enter: 283.byte 102,15,56,220,233 284.byte 102,15,56,220,241 285.byte 102,15,56,220,249 286 movups (%rcx,%rax,1),%xmm1 287 addq $32,%rax 288.byte 102,15,56,220,208 289.byte 102,15,56,220,216 290.byte 102,15,56,220,224 291.byte 102,15,56,220,232 292.byte 102,15,56,220,240 293.byte 102,15,56,220,248 294 movups -16(%rcx,%rax,1),%xmm0 295 jnz .Lenc_loop6 296 297.byte 102,15,56,220,209 298.byte 102,15,56,220,217 299.byte 102,15,56,220,225 300.byte 102,15,56,220,233 301.byte 102,15,56,220,241 302.byte 102,15,56,220,249 303.byte 102,15,56,221,208 304.byte 102,15,56,221,216 305.byte 102,15,56,221,224 306.byte 102,15,56,221,232 307.byte 102,15,56,221,240 308.byte 102,15,56,221,248 309 .byte 0xf3,0xc3 310.size _aesni_encrypt6,.-_aesni_encrypt6 311.type _aesni_decrypt6,@function 312.align 16 313_aesni_decrypt6: 314 movups (%rcx),%xmm0 315 shll $4,%eax 316 movups 16(%rcx),%xmm1 317 xorps %xmm0,%xmm2 318 pxor %xmm0,%xmm3 319 pxor %xmm0,%xmm4 320.byte 102,15,56,222,209 321 leaq 32(%rcx,%rax,1),%rcx 322 negq %rax 323.byte 102,15,56,222,217 324 pxor %xmm0,%xmm5 325 pxor %xmm0,%xmm6 326.byte 102,15,56,222,225 327 pxor %xmm0,%xmm7 328 movups (%rcx,%rax,1),%xmm0 329 addq $16,%rax 330 jmp .Ldec_loop6_enter 331.align 16 332.Ldec_loop6: 333.byte 102,15,56,222,209 334.byte 102,15,56,222,217 335.byte 102,15,56,222,225 336.Ldec_loop6_enter: 337.byte 102,15,56,222,233 338.byte 102,15,56,222,241 339.byte 102,15,56,222,249 340 movups (%rcx,%rax,1),%xmm1 341 addq $32,%rax 342.byte 102,15,56,222,208 343.byte 102,15,56,222,216 344.byte 102,15,56,222,224 345.byte 102,15,56,222,232 346.byte 102,15,56,222,240 347.byte 102,15,56,222,248 348 movups -16(%rcx,%rax,1),%xmm0 349 jnz .Ldec_loop6 350 351.byte 102,15,56,222,209 352.byte 102,15,56,222,217 353.byte 102,15,56,222,225 354.byte 102,15,56,222,233 355.byte 102,15,56,222,241 356.byte 102,15,56,222,249 357.byte 102,15,56,223,208 358.byte 102,15,56,223,216 359.byte 102,15,56,223,224 360.byte 102,15,56,223,232 361.byte 102,15,56,223,240 362.byte 102,15,56,223,248 363 .byte 0xf3,0xc3 364.size _aesni_decrypt6,.-_aesni_decrypt6 365.type _aesni_encrypt8,@function 366.align 16 367_aesni_encrypt8: 368 movups (%rcx),%xmm0 369 shll $4,%eax 370 movups 16(%rcx),%xmm1 371 xorps %xmm0,%xmm2 372 xorps %xmm0,%xmm3 373 pxor %xmm0,%xmm4 374 pxor %xmm0,%xmm5 375 pxor %xmm0,%xmm6 376 leaq 32(%rcx,%rax,1),%rcx 377 negq %rax 378.byte 102,15,56,220,209 379 pxor %xmm0,%xmm7 380 pxor %xmm0,%xmm8 381.byte 102,15,56,220,217 382 pxor %xmm0,%xmm9 383 movups (%rcx,%rax,1),%xmm0 384 addq $16,%rax 385 jmp .Lenc_loop8_inner 386.align 16 387.Lenc_loop8: 388.byte 102,15,56,220,209 389.byte 102,15,56,220,217 390.Lenc_loop8_inner: 391.byte 102,15,56,220,225 392.byte 102,15,56,220,233 393.byte 102,15,56,220,241 394.byte 102,15,56,220,249 395.byte 102,68,15,56,220,193 396.byte 102,68,15,56,220,201 397.Lenc_loop8_enter: 398 movups (%rcx,%rax,1),%xmm1 399 addq $32,%rax 400.byte 102,15,56,220,208 401.byte 102,15,56,220,216 402.byte 102,15,56,220,224 403.byte 102,15,56,220,232 404.byte 102,15,56,220,240 405.byte 102,15,56,220,248 406.byte 102,68,15,56,220,192 407.byte 102,68,15,56,220,200 408 movups -16(%rcx,%rax,1),%xmm0 409 jnz .Lenc_loop8 410 411.byte 102,15,56,220,209 412.byte 102,15,56,220,217 413.byte 102,15,56,220,225 414.byte 102,15,56,220,233 415.byte 102,15,56,220,241 416.byte 102,15,56,220,249 417.byte 102,68,15,56,220,193 418.byte 102,68,15,56,220,201 419.byte 102,15,56,221,208 420.byte 102,15,56,221,216 421.byte 102,15,56,221,224 422.byte 102,15,56,221,232 423.byte 102,15,56,221,240 424.byte 102,15,56,221,248 425.byte 102,68,15,56,221,192 426.byte 102,68,15,56,221,200 427 .byte 0xf3,0xc3 428.size _aesni_encrypt8,.-_aesni_encrypt8 429.type _aesni_decrypt8,@function 430.align 16 431_aesni_decrypt8: 432 movups (%rcx),%xmm0 433 shll $4,%eax 434 movups 16(%rcx),%xmm1 435 xorps %xmm0,%xmm2 436 xorps %xmm0,%xmm3 437 pxor %xmm0,%xmm4 438 pxor %xmm0,%xmm5 439 pxor %xmm0,%xmm6 440 leaq 32(%rcx,%rax,1),%rcx 441 negq %rax 442.byte 102,15,56,222,209 443 pxor %xmm0,%xmm7 444 pxor %xmm0,%xmm8 445.byte 102,15,56,222,217 446 pxor %xmm0,%xmm9 447 movups (%rcx,%rax,1),%xmm0 448 addq $16,%rax 449 jmp .Ldec_loop8_inner 450.align 16 451.Ldec_loop8: 452.byte 102,15,56,222,209 453.byte 102,15,56,222,217 454.Ldec_loop8_inner: 455.byte 102,15,56,222,225 456.byte 102,15,56,222,233 457.byte 102,15,56,222,241 458.byte 102,15,56,222,249 459.byte 102,68,15,56,222,193 460.byte 102,68,15,56,222,201 461.Ldec_loop8_enter: 462 movups (%rcx,%rax,1),%xmm1 463 addq $32,%rax 464.byte 102,15,56,222,208 465.byte 102,15,56,222,216 466.byte 102,15,56,222,224 467.byte 102,15,56,222,232 468.byte 102,15,56,222,240 469.byte 102,15,56,222,248 470.byte 102,68,15,56,222,192 471.byte 102,68,15,56,222,200 472 movups -16(%rcx,%rax,1),%xmm0 473 jnz .Ldec_loop8 474 475.byte 102,15,56,222,209 476.byte 102,15,56,222,217 477.byte 102,15,56,222,225 478.byte 102,15,56,222,233 479.byte 102,15,56,222,241 480.byte 102,15,56,222,249 481.byte 102,68,15,56,222,193 482.byte 102,68,15,56,222,201 483.byte 102,15,56,223,208 484.byte 102,15,56,223,216 485.byte 102,15,56,223,224 486.byte 102,15,56,223,232 487.byte 102,15,56,223,240 488.byte 102,15,56,223,248 489.byte 102,68,15,56,223,192 490.byte 102,68,15,56,223,200 491 .byte 0xf3,0xc3 492.size _aesni_decrypt8,.-_aesni_decrypt8 493.globl aesni_ecb_encrypt 494.type aesni_ecb_encrypt,@function 495.align 16 496aesni_ecb_encrypt: 497 andq $-16,%rdx 498 jz .Lecb_ret 499 500 movl 240(%rcx),%eax 501 movups (%rcx),%xmm0 502 movq %rcx,%r11 503 movl %eax,%r10d 504 testl %r8d,%r8d 505 jz .Lecb_decrypt 506 507 cmpq $0x80,%rdx 508 jb .Lecb_enc_tail 509 510 movdqu (%rdi),%xmm2 511 movdqu 16(%rdi),%xmm3 512 movdqu 32(%rdi),%xmm4 513 movdqu 48(%rdi),%xmm5 514 movdqu 64(%rdi),%xmm6 515 movdqu 80(%rdi),%xmm7 516 movdqu 96(%rdi),%xmm8 517 movdqu 112(%rdi),%xmm9 518 leaq 128(%rdi),%rdi 519 subq $0x80,%rdx 520 jmp .Lecb_enc_loop8_enter 521.align 16 522.Lecb_enc_loop8: 523 movups %xmm2,(%rsi) 524 movq %r11,%rcx 525 movdqu (%rdi),%xmm2 526 movl %r10d,%eax 527 movups %xmm3,16(%rsi) 528 movdqu 16(%rdi),%xmm3 529 movups %xmm4,32(%rsi) 530 movdqu 32(%rdi),%xmm4 531 movups %xmm5,48(%rsi) 532 movdqu 48(%rdi),%xmm5 533 movups %xmm6,64(%rsi) 534 movdqu 64(%rdi),%xmm6 535 movups %xmm7,80(%rsi) 536 movdqu 80(%rdi),%xmm7 537 movups %xmm8,96(%rsi) 538 movdqu 96(%rdi),%xmm8 539 movups %xmm9,112(%rsi) 540 leaq 128(%rsi),%rsi 541 movdqu 112(%rdi),%xmm9 542 leaq 128(%rdi),%rdi 543.Lecb_enc_loop8_enter: 544 545 call _aesni_encrypt8 546 547 subq $0x80,%rdx 548 jnc .Lecb_enc_loop8 549 550 movups %xmm2,(%rsi) 551 movq %r11,%rcx 552 movups %xmm3,16(%rsi) 553 movl %r10d,%eax 554 movups %xmm4,32(%rsi) 555 movups %xmm5,48(%rsi) 556 movups %xmm6,64(%rsi) 557 movups %xmm7,80(%rsi) 558 movups %xmm8,96(%rsi) 559 movups %xmm9,112(%rsi) 560 leaq 128(%rsi),%rsi 561 addq $0x80,%rdx 562 jz .Lecb_ret 563 564.Lecb_enc_tail: 565 movups (%rdi),%xmm2 566 cmpq $0x20,%rdx 567 jb .Lecb_enc_one 568 movups 16(%rdi),%xmm3 569 je .Lecb_enc_two 570 movups 32(%rdi),%xmm4 571 cmpq $0x40,%rdx 572 jb .Lecb_enc_three 573 movups 48(%rdi),%xmm5 574 je .Lecb_enc_four 575 movups 64(%rdi),%xmm6 576 cmpq $0x60,%rdx 577 jb .Lecb_enc_five 578 movups 80(%rdi),%xmm7 579 je .Lecb_enc_six 580 movdqu 96(%rdi),%xmm8 581 xorps %xmm9,%xmm9 582 call _aesni_encrypt8 583 movups %xmm2,(%rsi) 584 movups %xmm3,16(%rsi) 585 movups %xmm4,32(%rsi) 586 movups %xmm5,48(%rsi) 587 movups %xmm6,64(%rsi) 588 movups %xmm7,80(%rsi) 589 movups %xmm8,96(%rsi) 590 jmp .Lecb_ret 591.align 16 592.Lecb_enc_one: 593 movups (%rcx),%xmm0 594 movups 16(%rcx),%xmm1 595 leaq 32(%rcx),%rcx 596 xorps %xmm0,%xmm2 597.Loop_enc1_3: 598.byte 102,15,56,220,209 599 decl %eax 600 movups (%rcx),%xmm1 601 leaq 16(%rcx),%rcx 602 jnz .Loop_enc1_3 603.byte 102,15,56,221,209 604 movups %xmm2,(%rsi) 605 jmp .Lecb_ret 606.align 16 607.Lecb_enc_two: 608 call _aesni_encrypt2 609 movups %xmm2,(%rsi) 610 movups %xmm3,16(%rsi) 611 jmp .Lecb_ret 612.align 16 613.Lecb_enc_three: 614 call _aesni_encrypt3 615 movups %xmm2,(%rsi) 616 movups %xmm3,16(%rsi) 617 movups %xmm4,32(%rsi) 618 jmp .Lecb_ret 619.align 16 620.Lecb_enc_four: 621 call _aesni_encrypt4 622 movups %xmm2,(%rsi) 623 movups %xmm3,16(%rsi) 624 movups %xmm4,32(%rsi) 625 movups %xmm5,48(%rsi) 626 jmp .Lecb_ret 627.align 16 628.Lecb_enc_five: 629 xorps %xmm7,%xmm7 630 call _aesni_encrypt6 631 movups %xmm2,(%rsi) 632 movups %xmm3,16(%rsi) 633 movups %xmm4,32(%rsi) 634 movups %xmm5,48(%rsi) 635 movups %xmm6,64(%rsi) 636 jmp .Lecb_ret 637.align 16 638.Lecb_enc_six: 639 call _aesni_encrypt6 640 movups %xmm2,(%rsi) 641 movups %xmm3,16(%rsi) 642 movups %xmm4,32(%rsi) 643 movups %xmm5,48(%rsi) 644 movups %xmm6,64(%rsi) 645 movups %xmm7,80(%rsi) 646 jmp .Lecb_ret 647 648.align 16 649.Lecb_decrypt: 650 cmpq $0x80,%rdx 651 jb .Lecb_dec_tail 652 653 movdqu (%rdi),%xmm2 654 movdqu 16(%rdi),%xmm3 655 movdqu 32(%rdi),%xmm4 656 movdqu 48(%rdi),%xmm5 657 movdqu 64(%rdi),%xmm6 658 movdqu 80(%rdi),%xmm7 659 movdqu 96(%rdi),%xmm8 660 movdqu 112(%rdi),%xmm9 661 leaq 128(%rdi),%rdi 662 subq $0x80,%rdx 663 jmp .Lecb_dec_loop8_enter 664.align 16 665.Lecb_dec_loop8: 666 movups %xmm2,(%rsi) 667 movq %r11,%rcx 668 movdqu (%rdi),%xmm2 669 movl %r10d,%eax 670 movups %xmm3,16(%rsi) 671 movdqu 16(%rdi),%xmm3 672 movups %xmm4,32(%rsi) 673 movdqu 32(%rdi),%xmm4 674 movups %xmm5,48(%rsi) 675 movdqu 48(%rdi),%xmm5 676 movups %xmm6,64(%rsi) 677 movdqu 64(%rdi),%xmm6 678 movups %xmm7,80(%rsi) 679 movdqu 80(%rdi),%xmm7 680 movups %xmm8,96(%rsi) 681 movdqu 96(%rdi),%xmm8 682 movups %xmm9,112(%rsi) 683 leaq 128(%rsi),%rsi 684 movdqu 112(%rdi),%xmm9 685 leaq 128(%rdi),%rdi 686.Lecb_dec_loop8_enter: 687 688 call _aesni_decrypt8 689 690 movups (%r11),%xmm0 691 subq $0x80,%rdx 692 jnc .Lecb_dec_loop8 693 694 movups %xmm2,(%rsi) 695 pxor %xmm2,%xmm2 696 movq %r11,%rcx 697 movups %xmm3,16(%rsi) 698 pxor %xmm3,%xmm3 699 movl %r10d,%eax 700 movups %xmm4,32(%rsi) 701 pxor %xmm4,%xmm4 702 movups %xmm5,48(%rsi) 703 pxor %xmm5,%xmm5 704 movups %xmm6,64(%rsi) 705 pxor %xmm6,%xmm6 706 movups %xmm7,80(%rsi) 707 pxor %xmm7,%xmm7 708 movups %xmm8,96(%rsi) 709 pxor %xmm8,%xmm8 710 movups %xmm9,112(%rsi) 711 pxor %xmm9,%xmm9 712 leaq 128(%rsi),%rsi 713 addq $0x80,%rdx 714 jz .Lecb_ret 715 716.Lecb_dec_tail: 717 movups (%rdi),%xmm2 718 cmpq $0x20,%rdx 719 jb .Lecb_dec_one 720 movups 16(%rdi),%xmm3 721 je .Lecb_dec_two 722 movups 32(%rdi),%xmm4 723 cmpq $0x40,%rdx 724 jb .Lecb_dec_three 725 movups 48(%rdi),%xmm5 726 je .Lecb_dec_four 727 movups 64(%rdi),%xmm6 728 cmpq $0x60,%rdx 729 jb .Lecb_dec_five 730 movups 80(%rdi),%xmm7 731 je .Lecb_dec_six 732 movups 96(%rdi),%xmm8 733 movups (%rcx),%xmm0 734 xorps %xmm9,%xmm9 735 call _aesni_decrypt8 736 movups %xmm2,(%rsi) 737 pxor %xmm2,%xmm2 738 movups %xmm3,16(%rsi) 739 pxor %xmm3,%xmm3 740 movups %xmm4,32(%rsi) 741 pxor %xmm4,%xmm4 742 movups %xmm5,48(%rsi) 743 pxor %xmm5,%xmm5 744 movups %xmm6,64(%rsi) 745 pxor %xmm6,%xmm6 746 movups %xmm7,80(%rsi) 747 pxor %xmm7,%xmm7 748 movups %xmm8,96(%rsi) 749 pxor %xmm8,%xmm8 750 pxor %xmm9,%xmm9 751 jmp .Lecb_ret 752.align 16 753.Lecb_dec_one: 754 movups (%rcx),%xmm0 755 movups 16(%rcx),%xmm1 756 leaq 32(%rcx),%rcx 757 xorps %xmm0,%xmm2 758.Loop_dec1_4: 759.byte 102,15,56,222,209 760 decl %eax 761 movups (%rcx),%xmm1 762 leaq 16(%rcx),%rcx 763 jnz .Loop_dec1_4 764.byte 102,15,56,223,209 765 movups %xmm2,(%rsi) 766 pxor %xmm2,%xmm2 767 jmp .Lecb_ret 768.align 16 769.Lecb_dec_two: 770 call _aesni_decrypt2 771 movups %xmm2,(%rsi) 772 pxor %xmm2,%xmm2 773 movups %xmm3,16(%rsi) 774 pxor %xmm3,%xmm3 775 jmp .Lecb_ret 776.align 16 777.Lecb_dec_three: 778 call _aesni_decrypt3 779 movups %xmm2,(%rsi) 780 pxor %xmm2,%xmm2 781 movups %xmm3,16(%rsi) 782 pxor %xmm3,%xmm3 783 movups %xmm4,32(%rsi) 784 pxor %xmm4,%xmm4 785 jmp .Lecb_ret 786.align 16 787.Lecb_dec_four: 788 call _aesni_decrypt4 789 movups %xmm2,(%rsi) 790 pxor %xmm2,%xmm2 791 movups %xmm3,16(%rsi) 792 pxor %xmm3,%xmm3 793 movups %xmm4,32(%rsi) 794 pxor %xmm4,%xmm4 795 movups %xmm5,48(%rsi) 796 pxor %xmm5,%xmm5 797 jmp .Lecb_ret 798.align 16 799.Lecb_dec_five: 800 xorps %xmm7,%xmm7 801 call _aesni_decrypt6 802 movups %xmm2,(%rsi) 803 pxor %xmm2,%xmm2 804 movups %xmm3,16(%rsi) 805 pxor %xmm3,%xmm3 806 movups %xmm4,32(%rsi) 807 pxor %xmm4,%xmm4 808 movups %xmm5,48(%rsi) 809 pxor %xmm5,%xmm5 810 movups %xmm6,64(%rsi) 811 pxor %xmm6,%xmm6 812 pxor %xmm7,%xmm7 813 jmp .Lecb_ret 814.align 16 815.Lecb_dec_six: 816 call _aesni_decrypt6 817 movups %xmm2,(%rsi) 818 pxor %xmm2,%xmm2 819 movups %xmm3,16(%rsi) 820 pxor %xmm3,%xmm3 821 movups %xmm4,32(%rsi) 822 pxor %xmm4,%xmm4 823 movups %xmm5,48(%rsi) 824 pxor %xmm5,%xmm5 825 movups %xmm6,64(%rsi) 826 pxor %xmm6,%xmm6 827 movups %xmm7,80(%rsi) 828 pxor %xmm7,%xmm7 829 830.Lecb_ret: 831 xorps %xmm0,%xmm0 832 pxor %xmm1,%xmm1 833 .byte 0xf3,0xc3 834.size aesni_ecb_encrypt,.-aesni_ecb_encrypt 835.globl aesni_ccm64_encrypt_blocks 836.type aesni_ccm64_encrypt_blocks,@function 837.align 16 838aesni_ccm64_encrypt_blocks: 839 movl 240(%rcx),%eax 840 movdqu (%r8),%xmm6 841 movdqa .Lincrement64(%rip),%xmm9 842 movdqa .Lbswap_mask(%rip),%xmm7 843 844 shll $4,%eax 845 movl $16,%r10d 846 leaq 0(%rcx),%r11 847 movdqu (%r9),%xmm3 848 movdqa %xmm6,%xmm2 849 leaq 32(%rcx,%rax,1),%rcx 850.byte 102,15,56,0,247 851 subq %rax,%r10 852 jmp .Lccm64_enc_outer 853.align 16 854.Lccm64_enc_outer: 855 movups (%r11),%xmm0 856 movq %r10,%rax 857 movups (%rdi),%xmm8 858 859 xorps %xmm0,%xmm2 860 movups 16(%r11),%xmm1 861 xorps %xmm8,%xmm0 862 xorps %xmm0,%xmm3 863 movups 32(%r11),%xmm0 864 865.Lccm64_enc2_loop: 866.byte 102,15,56,220,209 867.byte 102,15,56,220,217 868 movups (%rcx,%rax,1),%xmm1 869 addq $32,%rax 870.byte 102,15,56,220,208 871.byte 102,15,56,220,216 872 movups -16(%rcx,%rax,1),%xmm0 873 jnz .Lccm64_enc2_loop 874.byte 102,15,56,220,209 875.byte 102,15,56,220,217 876 paddq %xmm9,%xmm6 877 decq %rdx 878.byte 102,15,56,221,208 879.byte 102,15,56,221,216 880 881 leaq 16(%rdi),%rdi 882 xorps %xmm2,%xmm8 883 movdqa %xmm6,%xmm2 884 movups %xmm8,(%rsi) 885.byte 102,15,56,0,215 886 leaq 16(%rsi),%rsi 887 jnz .Lccm64_enc_outer 888 889 pxor %xmm0,%xmm0 890 pxor %xmm1,%xmm1 891 pxor %xmm2,%xmm2 892 movups %xmm3,(%r9) 893 pxor %xmm3,%xmm3 894 pxor %xmm8,%xmm8 895 pxor %xmm6,%xmm6 896 .byte 0xf3,0xc3 897.size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks 898.globl aesni_ccm64_decrypt_blocks 899.type aesni_ccm64_decrypt_blocks,@function 900.align 16 901aesni_ccm64_decrypt_blocks: 902 movl 240(%rcx),%eax 903 movups (%r8),%xmm6 904 movdqu (%r9),%xmm3 905 movdqa .Lincrement64(%rip),%xmm9 906 movdqa .Lbswap_mask(%rip),%xmm7 907 908 movaps %xmm6,%xmm2 909 movl %eax,%r10d 910 movq %rcx,%r11 911.byte 102,15,56,0,247 912 movups (%rcx),%xmm0 913 movups 16(%rcx),%xmm1 914 leaq 32(%rcx),%rcx 915 xorps %xmm0,%xmm2 916.Loop_enc1_5: 917.byte 102,15,56,220,209 918 decl %eax 919 movups (%rcx),%xmm1 920 leaq 16(%rcx),%rcx 921 jnz .Loop_enc1_5 922.byte 102,15,56,221,209 923 shll $4,%r10d 924 movl $16,%eax 925 movups (%rdi),%xmm8 926 paddq %xmm9,%xmm6 927 leaq 16(%rdi),%rdi 928 subq %r10,%rax 929 leaq 32(%r11,%r10,1),%rcx 930 movq %rax,%r10 931 jmp .Lccm64_dec_outer 932.align 16 933.Lccm64_dec_outer: 934 xorps %xmm2,%xmm8 935 movdqa %xmm6,%xmm2 936 movups %xmm8,(%rsi) 937 leaq 16(%rsi),%rsi 938.byte 102,15,56,0,215 939 940 subq $1,%rdx 941 jz .Lccm64_dec_break 942 943 movups (%r11),%xmm0 944 movq %r10,%rax 945 movups 16(%r11),%xmm1 946 xorps %xmm0,%xmm8 947 xorps %xmm0,%xmm2 948 xorps %xmm8,%xmm3 949 movups 32(%r11),%xmm0 950 jmp .Lccm64_dec2_loop 951.align 16 952.Lccm64_dec2_loop: 953.byte 102,15,56,220,209 954.byte 102,15,56,220,217 955 movups (%rcx,%rax,1),%xmm1 956 addq $32,%rax 957.byte 102,15,56,220,208 958.byte 102,15,56,220,216 959 movups -16(%rcx,%rax,1),%xmm0 960 jnz .Lccm64_dec2_loop 961 movups (%rdi),%xmm8 962 paddq %xmm9,%xmm6 963.byte 102,15,56,220,209 964.byte 102,15,56,220,217 965.byte 102,15,56,221,208 966.byte 102,15,56,221,216 967 leaq 16(%rdi),%rdi 968 jmp .Lccm64_dec_outer 969 970.align 16 971.Lccm64_dec_break: 972 973 movl 240(%r11),%eax 974 movups (%r11),%xmm0 975 movups 16(%r11),%xmm1 976 xorps %xmm0,%xmm8 977 leaq 32(%r11),%r11 978 xorps %xmm8,%xmm3 979.Loop_enc1_6: 980.byte 102,15,56,220,217 981 decl %eax 982 movups (%r11),%xmm1 983 leaq 16(%r11),%r11 984 jnz .Loop_enc1_6 985.byte 102,15,56,221,217 986 pxor %xmm0,%xmm0 987 pxor %xmm1,%xmm1 988 pxor %xmm2,%xmm2 989 movups %xmm3,(%r9) 990 pxor %xmm3,%xmm3 991 pxor %xmm8,%xmm8 992 pxor %xmm6,%xmm6 993 .byte 0xf3,0xc3 994.size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks 995.globl aesni_ctr32_encrypt_blocks 996.type aesni_ctr32_encrypt_blocks,@function 997.align 16 998aesni_ctr32_encrypt_blocks: 999 cmpq $1,%rdx 1000 jne .Lctr32_bulk 1001 1002 1003 1004 movups (%r8),%xmm2 1005 movups (%rdi),%xmm3 1006 movl 240(%rcx),%edx 1007 movups (%rcx),%xmm0 1008 movups 16(%rcx),%xmm1 1009 leaq 32(%rcx),%rcx 1010 xorps %xmm0,%xmm2 1011.Loop_enc1_7: 1012.byte 102,15,56,220,209 1013 decl %edx 1014 movups (%rcx),%xmm1 1015 leaq 16(%rcx),%rcx 1016 jnz .Loop_enc1_7 1017.byte 102,15,56,221,209 1018 pxor %xmm0,%xmm0 1019 pxor %xmm1,%xmm1 1020 xorps %xmm3,%xmm2 1021 pxor %xmm3,%xmm3 1022 movups %xmm2,(%rsi) 1023 xorps %xmm2,%xmm2 1024 jmp .Lctr32_epilogue 1025 1026.align 16 1027.Lctr32_bulk: 1028 leaq (%rsp),%rax 1029 pushq %rbp 1030 subq $128,%rsp 1031 andq $-16,%rsp 1032 leaq -8(%rax),%rbp 1033 1034 1035 1036 1037 movdqu (%r8),%xmm2 1038 movdqu (%rcx),%xmm0 1039 movl 12(%r8),%r8d 1040 pxor %xmm0,%xmm2 1041 movl 12(%rcx),%r11d 1042 movdqa %xmm2,0(%rsp) 1043 bswapl %r8d 1044 movdqa %xmm2,%xmm3 1045 movdqa %xmm2,%xmm4 1046 movdqa %xmm2,%xmm5 1047 movdqa %xmm2,64(%rsp) 1048 movdqa %xmm2,80(%rsp) 1049 movdqa %xmm2,96(%rsp) 1050 movq %rdx,%r10 1051 movdqa %xmm2,112(%rsp) 1052 1053 leaq 1(%r8),%rax 1054 leaq 2(%r8),%rdx 1055 bswapl %eax 1056 bswapl %edx 1057 xorl %r11d,%eax 1058 xorl %r11d,%edx 1059.byte 102,15,58,34,216,3 1060 leaq 3(%r8),%rax 1061 movdqa %xmm3,16(%rsp) 1062.byte 102,15,58,34,226,3 1063 bswapl %eax 1064 movq %r10,%rdx 1065 leaq 4(%r8),%r10 1066 movdqa %xmm4,32(%rsp) 1067 xorl %r11d,%eax 1068 bswapl %r10d 1069.byte 102,15,58,34,232,3 1070 xorl %r11d,%r10d 1071 movdqa %xmm5,48(%rsp) 1072 leaq 5(%r8),%r9 1073 movl %r10d,64+12(%rsp) 1074 bswapl %r9d 1075 leaq 6(%r8),%r10 1076 movl 240(%rcx),%eax 1077 xorl %r11d,%r9d 1078 bswapl %r10d 1079 movl %r9d,80+12(%rsp) 1080 xorl %r11d,%r10d 1081 leaq 7(%r8),%r9 1082 movl %r10d,96+12(%rsp) 1083 bswapl %r9d 1084 movl OPENSSL_ia32cap_P+4(%rip),%r10d 1085 xorl %r11d,%r9d 1086 andl $71303168,%r10d 1087 movl %r9d,112+12(%rsp) 1088 1089 movups 16(%rcx),%xmm1 1090 1091 movdqa 64(%rsp),%xmm6 1092 movdqa 80(%rsp),%xmm7 1093 1094 cmpq $8,%rdx 1095 jb .Lctr32_tail 1096 1097 subq $6,%rdx 1098 cmpl $4194304,%r10d 1099 je .Lctr32_6x 1100 1101 leaq 128(%rcx),%rcx 1102 subq $2,%rdx 1103 jmp .Lctr32_loop8 1104 1105.align 16 1106.Lctr32_6x: 1107 shll $4,%eax 1108 movl $48,%r10d 1109 bswapl %r11d 1110 leaq 32(%rcx,%rax,1),%rcx 1111 subq %rax,%r10 1112 jmp .Lctr32_loop6 1113 1114.align 16 1115.Lctr32_loop6: 1116 addl $6,%r8d 1117 movups -48(%rcx,%r10,1),%xmm0 1118.byte 102,15,56,220,209 1119 movl %r8d,%eax 1120 xorl %r11d,%eax 1121.byte 102,15,56,220,217 1122.byte 0x0f,0x38,0xf1,0x44,0x24,12 1123 leal 1(%r8),%eax 1124.byte 102,15,56,220,225 1125 xorl %r11d,%eax 1126.byte 0x0f,0x38,0xf1,0x44,0x24,28 1127.byte 102,15,56,220,233 1128 leal 2(%r8),%eax 1129 xorl %r11d,%eax 1130.byte 102,15,56,220,241 1131.byte 0x0f,0x38,0xf1,0x44,0x24,44 1132 leal 3(%r8),%eax 1133.byte 102,15,56,220,249 1134 movups -32(%rcx,%r10,1),%xmm1 1135 xorl %r11d,%eax 1136 1137.byte 102,15,56,220,208 1138.byte 0x0f,0x38,0xf1,0x44,0x24,60 1139 leal 4(%r8),%eax 1140.byte 102,15,56,220,216 1141 xorl %r11d,%eax 1142.byte 0x0f,0x38,0xf1,0x44,0x24,76 1143.byte 102,15,56,220,224 1144 leal 5(%r8),%eax 1145 xorl %r11d,%eax 1146.byte 102,15,56,220,232 1147.byte 0x0f,0x38,0xf1,0x44,0x24,92 1148 movq %r10,%rax 1149.byte 102,15,56,220,240 1150.byte 102,15,56,220,248 1151 movups -16(%rcx,%r10,1),%xmm0 1152 1153 call .Lenc_loop6 1154 1155 movdqu (%rdi),%xmm8 1156 movdqu 16(%rdi),%xmm9 1157 movdqu 32(%rdi),%xmm10 1158 movdqu 48(%rdi),%xmm11 1159 movdqu 64(%rdi),%xmm12 1160 movdqu 80(%rdi),%xmm13 1161 leaq 96(%rdi),%rdi 1162 movups -64(%rcx,%r10,1),%xmm1 1163 pxor %xmm2,%xmm8 1164 movaps 0(%rsp),%xmm2 1165 pxor %xmm3,%xmm9 1166 movaps 16(%rsp),%xmm3 1167 pxor %xmm4,%xmm10 1168 movaps 32(%rsp),%xmm4 1169 pxor %xmm5,%xmm11 1170 movaps 48(%rsp),%xmm5 1171 pxor %xmm6,%xmm12 1172 movaps 64(%rsp),%xmm6 1173 pxor %xmm7,%xmm13 1174 movaps 80(%rsp),%xmm7 1175 movdqu %xmm8,(%rsi) 1176 movdqu %xmm9,16(%rsi) 1177 movdqu %xmm10,32(%rsi) 1178 movdqu %xmm11,48(%rsi) 1179 movdqu %xmm12,64(%rsi) 1180 movdqu %xmm13,80(%rsi) 1181 leaq 96(%rsi),%rsi 1182 1183 subq $6,%rdx 1184 jnc .Lctr32_loop6 1185 1186 addq $6,%rdx 1187 jz .Lctr32_done 1188 1189 leal -48(%r10),%eax 1190 leaq -80(%rcx,%r10,1),%rcx 1191 negl %eax 1192 shrl $4,%eax 1193 jmp .Lctr32_tail 1194 1195.align 32 1196.Lctr32_loop8: 1197 addl $8,%r8d 1198 movdqa 96(%rsp),%xmm8 1199.byte 102,15,56,220,209 1200 movl %r8d,%r9d 1201 movdqa 112(%rsp),%xmm9 1202.byte 102,15,56,220,217 1203 bswapl %r9d 1204 movups 32-128(%rcx),%xmm0 1205.byte 102,15,56,220,225 1206 xorl %r11d,%r9d 1207 nop 1208.byte 102,15,56,220,233 1209 movl %r9d,0+12(%rsp) 1210 leaq 1(%r8),%r9 1211.byte 102,15,56,220,241 1212.byte 102,15,56,220,249 1213.byte 102,68,15,56,220,193 1214.byte 102,68,15,56,220,201 1215 movups 48-128(%rcx),%xmm1 1216 bswapl %r9d 1217.byte 102,15,56,220,208 1218.byte 102,15,56,220,216 1219 xorl %r11d,%r9d 1220.byte 0x66,0x90 1221.byte 102,15,56,220,224 1222.byte 102,15,56,220,232 1223 movl %r9d,16+12(%rsp) 1224 leaq 2(%r8),%r9 1225.byte 102,15,56,220,240 1226.byte 102,15,56,220,248 1227.byte 102,68,15,56,220,192 1228.byte 102,68,15,56,220,200 1229 movups 64-128(%rcx),%xmm0 1230 bswapl %r9d 1231.byte 102,15,56,220,209 1232.byte 102,15,56,220,217 1233 xorl %r11d,%r9d 1234.byte 0x66,0x90 1235.byte 102,15,56,220,225 1236.byte 102,15,56,220,233 1237 movl %r9d,32+12(%rsp) 1238 leaq 3(%r8),%r9 1239.byte 102,15,56,220,241 1240.byte 102,15,56,220,249 1241.byte 102,68,15,56,220,193 1242.byte 102,68,15,56,220,201 1243 movups 80-128(%rcx),%xmm1 1244 bswapl %r9d 1245.byte 102,15,56,220,208 1246.byte 102,15,56,220,216 1247 xorl %r11d,%r9d 1248.byte 0x66,0x90 1249.byte 102,15,56,220,224 1250.byte 102,15,56,220,232 1251 movl %r9d,48+12(%rsp) 1252 leaq 4(%r8),%r9 1253.byte 102,15,56,220,240 1254.byte 102,15,56,220,248 1255.byte 102,68,15,56,220,192 1256.byte 102,68,15,56,220,200 1257 movups 96-128(%rcx),%xmm0 1258 bswapl %r9d 1259.byte 102,15,56,220,209 1260.byte 102,15,56,220,217 1261 xorl %r11d,%r9d 1262.byte 0x66,0x90 1263.byte 102,15,56,220,225 1264.byte 102,15,56,220,233 1265 movl %r9d,64+12(%rsp) 1266 leaq 5(%r8),%r9 1267.byte 102,15,56,220,241 1268.byte 102,15,56,220,249 1269.byte 102,68,15,56,220,193 1270.byte 102,68,15,56,220,201 1271 movups 112-128(%rcx),%xmm1 1272 bswapl %r9d 1273.byte 102,15,56,220,208 1274.byte 102,15,56,220,216 1275 xorl %r11d,%r9d 1276.byte 0x66,0x90 1277.byte 102,15,56,220,224 1278.byte 102,15,56,220,232 1279 movl %r9d,80+12(%rsp) 1280 leaq 6(%r8),%r9 1281.byte 102,15,56,220,240 1282.byte 102,15,56,220,248 1283.byte 102,68,15,56,220,192 1284.byte 102,68,15,56,220,200 1285 movups 128-128(%rcx),%xmm0 1286 bswapl %r9d 1287.byte 102,15,56,220,209 1288.byte 102,15,56,220,217 1289 xorl %r11d,%r9d 1290.byte 0x66,0x90 1291.byte 102,15,56,220,225 1292.byte 102,15,56,220,233 1293 movl %r9d,96+12(%rsp) 1294 leaq 7(%r8),%r9 1295.byte 102,15,56,220,241 1296.byte 102,15,56,220,249 1297.byte 102,68,15,56,220,193 1298.byte 102,68,15,56,220,201 1299 movups 144-128(%rcx),%xmm1 1300 bswapl %r9d 1301.byte 102,15,56,220,208 1302.byte 102,15,56,220,216 1303.byte 102,15,56,220,224 1304 xorl %r11d,%r9d 1305 movdqu 0(%rdi),%xmm10 1306.byte 102,15,56,220,232 1307 movl %r9d,112+12(%rsp) 1308 cmpl $11,%eax 1309.byte 102,15,56,220,240 1310.byte 102,15,56,220,248 1311.byte 102,68,15,56,220,192 1312.byte 102,68,15,56,220,200 1313 movups 160-128(%rcx),%xmm0 1314 1315 jb .Lctr32_enc_done 1316 1317.byte 102,15,56,220,209 1318.byte 102,15,56,220,217 1319.byte 102,15,56,220,225 1320.byte 102,15,56,220,233 1321.byte 102,15,56,220,241 1322.byte 102,15,56,220,249 1323.byte 102,68,15,56,220,193 1324.byte 102,68,15,56,220,201 1325 movups 176-128(%rcx),%xmm1 1326 1327.byte 102,15,56,220,208 1328.byte 102,15,56,220,216 1329.byte 102,15,56,220,224 1330.byte 102,15,56,220,232 1331.byte 102,15,56,220,240 1332.byte 102,15,56,220,248 1333.byte 102,68,15,56,220,192 1334.byte 102,68,15,56,220,200 1335 movups 192-128(%rcx),%xmm0 1336 je .Lctr32_enc_done 1337 1338.byte 102,15,56,220,209 1339.byte 102,15,56,220,217 1340.byte 102,15,56,220,225 1341.byte 102,15,56,220,233 1342.byte 102,15,56,220,241 1343.byte 102,15,56,220,249 1344.byte 102,68,15,56,220,193 1345.byte 102,68,15,56,220,201 1346 movups 208-128(%rcx),%xmm1 1347 1348.byte 102,15,56,220,208 1349.byte 102,15,56,220,216 1350.byte 102,15,56,220,224 1351.byte 102,15,56,220,232 1352.byte 102,15,56,220,240 1353.byte 102,15,56,220,248 1354.byte 102,68,15,56,220,192 1355.byte 102,68,15,56,220,200 1356 movups 224-128(%rcx),%xmm0 1357 jmp .Lctr32_enc_done 1358 1359.align 16 1360.Lctr32_enc_done: 1361 movdqu 16(%rdi),%xmm11 1362 pxor %xmm0,%xmm10 1363 movdqu 32(%rdi),%xmm12 1364 pxor %xmm0,%xmm11 1365 movdqu 48(%rdi),%xmm13 1366 pxor %xmm0,%xmm12 1367 movdqu 64(%rdi),%xmm14 1368 pxor %xmm0,%xmm13 1369 movdqu 80(%rdi),%xmm15 1370 pxor %xmm0,%xmm14 1371 pxor %xmm0,%xmm15 1372.byte 102,15,56,220,209 1373.byte 102,15,56,220,217 1374.byte 102,15,56,220,225 1375.byte 102,15,56,220,233 1376.byte 102,15,56,220,241 1377.byte 102,15,56,220,249 1378.byte 102,68,15,56,220,193 1379.byte 102,68,15,56,220,201 1380 movdqu 96(%rdi),%xmm1 1381 leaq 128(%rdi),%rdi 1382 1383.byte 102,65,15,56,221,210 1384 pxor %xmm0,%xmm1 1385 movdqu 112-128(%rdi),%xmm10 1386.byte 102,65,15,56,221,219 1387 pxor %xmm0,%xmm10 1388 movdqa 0(%rsp),%xmm11 1389.byte 102,65,15,56,221,228 1390.byte 102,65,15,56,221,237 1391 movdqa 16(%rsp),%xmm12 1392 movdqa 32(%rsp),%xmm13 1393.byte 102,65,15,56,221,246 1394.byte 102,65,15,56,221,255 1395 movdqa 48(%rsp),%xmm14 1396 movdqa 64(%rsp),%xmm15 1397.byte 102,68,15,56,221,193 1398 movdqa 80(%rsp),%xmm0 1399 movups 16-128(%rcx),%xmm1 1400.byte 102,69,15,56,221,202 1401 1402 movups %xmm2,(%rsi) 1403 movdqa %xmm11,%xmm2 1404 movups %xmm3,16(%rsi) 1405 movdqa %xmm12,%xmm3 1406 movups %xmm4,32(%rsi) 1407 movdqa %xmm13,%xmm4 1408 movups %xmm5,48(%rsi) 1409 movdqa %xmm14,%xmm5 1410 movups %xmm6,64(%rsi) 1411 movdqa %xmm15,%xmm6 1412 movups %xmm7,80(%rsi) 1413 movdqa %xmm0,%xmm7 1414 movups %xmm8,96(%rsi) 1415 movups %xmm9,112(%rsi) 1416 leaq 128(%rsi),%rsi 1417 1418 subq $8,%rdx 1419 jnc .Lctr32_loop8 1420 1421 addq $8,%rdx 1422 jz .Lctr32_done 1423 leaq -128(%rcx),%rcx 1424 1425.Lctr32_tail: 1426 1427 1428 leaq 16(%rcx),%rcx 1429 cmpq $4,%rdx 1430 jb .Lctr32_loop3 1431 je .Lctr32_loop4 1432 1433 1434 shll $4,%eax 1435 movdqa 96(%rsp),%xmm8 1436 pxor %xmm9,%xmm9 1437 1438 movups 16(%rcx),%xmm0 1439.byte 102,15,56,220,209 1440.byte 102,15,56,220,217 1441 leaq 32-16(%rcx,%rax,1),%rcx 1442 negq %rax 1443.byte 102,15,56,220,225 1444 addq $16,%rax 1445 movups (%rdi),%xmm10 1446.byte 102,15,56,220,233 1447.byte 102,15,56,220,241 1448 movups 16(%rdi),%xmm11 1449 movups 32(%rdi),%xmm12 1450.byte 102,15,56,220,249 1451.byte 102,68,15,56,220,193 1452 1453 call .Lenc_loop8_enter 1454 1455 movdqu 48(%rdi),%xmm13 1456 pxor %xmm10,%xmm2 1457 movdqu 64(%rdi),%xmm10 1458 pxor %xmm11,%xmm3 1459 movdqu %xmm2,(%rsi) 1460 pxor %xmm12,%xmm4 1461 movdqu %xmm3,16(%rsi) 1462 pxor %xmm13,%xmm5 1463 movdqu %xmm4,32(%rsi) 1464 pxor %xmm10,%xmm6 1465 movdqu %xmm5,48(%rsi) 1466 movdqu %xmm6,64(%rsi) 1467 cmpq $6,%rdx 1468 jb .Lctr32_done 1469 1470 movups 80(%rdi),%xmm11 1471 xorps %xmm11,%xmm7 1472 movups %xmm7,80(%rsi) 1473 je .Lctr32_done 1474 1475 movups 96(%rdi),%xmm12 1476 xorps %xmm12,%xmm8 1477 movups %xmm8,96(%rsi) 1478 jmp .Lctr32_done 1479 1480.align 32 1481.Lctr32_loop4: 1482.byte 102,15,56,220,209 1483 leaq 16(%rcx),%rcx 1484 decl %eax 1485.byte 102,15,56,220,217 1486.byte 102,15,56,220,225 1487.byte 102,15,56,220,233 1488 movups (%rcx),%xmm1 1489 jnz .Lctr32_loop4 1490.byte 102,15,56,221,209 1491.byte 102,15,56,221,217 1492 movups (%rdi),%xmm10 1493 movups 16(%rdi),%xmm11 1494.byte 102,15,56,221,225 1495.byte 102,15,56,221,233 1496 movups 32(%rdi),%xmm12 1497 movups 48(%rdi),%xmm13 1498 1499 xorps %xmm10,%xmm2 1500 movups %xmm2,(%rsi) 1501 xorps %xmm11,%xmm3 1502 movups %xmm3,16(%rsi) 1503 pxor %xmm12,%xmm4 1504 movdqu %xmm4,32(%rsi) 1505 pxor %xmm13,%xmm5 1506 movdqu %xmm5,48(%rsi) 1507 jmp .Lctr32_done 1508 1509.align 32 1510.Lctr32_loop3: 1511.byte 102,15,56,220,209 1512 leaq 16(%rcx),%rcx 1513 decl %eax 1514.byte 102,15,56,220,217 1515.byte 102,15,56,220,225 1516 movups (%rcx),%xmm1 1517 jnz .Lctr32_loop3 1518.byte 102,15,56,221,209 1519.byte 102,15,56,221,217 1520.byte 102,15,56,221,225 1521 1522 movups (%rdi),%xmm10 1523 xorps %xmm10,%xmm2 1524 movups %xmm2,(%rsi) 1525 cmpq $2,%rdx 1526 jb .Lctr32_done 1527 1528 movups 16(%rdi),%xmm11 1529 xorps %xmm11,%xmm3 1530 movups %xmm3,16(%rsi) 1531 je .Lctr32_done 1532 1533 movups 32(%rdi),%xmm12 1534 xorps %xmm12,%xmm4 1535 movups %xmm4,32(%rsi) 1536 1537.Lctr32_done: 1538 xorps %xmm0,%xmm0 1539 xorl %r11d,%r11d 1540 pxor %xmm1,%xmm1 1541 pxor %xmm2,%xmm2 1542 pxor %xmm3,%xmm3 1543 pxor %xmm4,%xmm4 1544 pxor %xmm5,%xmm5 1545 pxor %xmm6,%xmm6 1546 pxor %xmm7,%xmm7 1547 movaps %xmm0,0(%rsp) 1548 pxor %xmm8,%xmm8 1549 movaps %xmm0,16(%rsp) 1550 pxor %xmm9,%xmm9 1551 movaps %xmm0,32(%rsp) 1552 pxor %xmm10,%xmm10 1553 movaps %xmm0,48(%rsp) 1554 pxor %xmm11,%xmm11 1555 movaps %xmm0,64(%rsp) 1556 pxor %xmm12,%xmm12 1557 movaps %xmm0,80(%rsp) 1558 pxor %xmm13,%xmm13 1559 movaps %xmm0,96(%rsp) 1560 pxor %xmm14,%xmm14 1561 movaps %xmm0,112(%rsp) 1562 pxor %xmm15,%xmm15 1563 leaq (%rbp),%rsp 1564 popq %rbp 1565.Lctr32_epilogue: 1566 .byte 0xf3,0xc3 1567.size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks 1568.globl aesni_xts_encrypt 1569.type aesni_xts_encrypt,@function 1570.align 16 1571aesni_xts_encrypt: 1572 leaq (%rsp),%rax 1573 pushq %rbp 1574 subq $112,%rsp 1575 andq $-16,%rsp 1576 leaq -8(%rax),%rbp 1577 movups (%r9),%xmm2 1578 movl 240(%r8),%eax 1579 movl 240(%rcx),%r10d 1580 movups (%r8),%xmm0 1581 movups 16(%r8),%xmm1 1582 leaq 32(%r8),%r8 1583 xorps %xmm0,%xmm2 1584.Loop_enc1_8: 1585.byte 102,15,56,220,209 1586 decl %eax 1587 movups (%r8),%xmm1 1588 leaq 16(%r8),%r8 1589 jnz .Loop_enc1_8 1590.byte 102,15,56,221,209 1591 movups (%rcx),%xmm0 1592 movq %rcx,%r11 1593 movl %r10d,%eax 1594 shll $4,%r10d 1595 movq %rdx,%r9 1596 andq $-16,%rdx 1597 1598 movups 16(%rcx,%r10,1),%xmm1 1599 1600 movdqa .Lxts_magic(%rip),%xmm8 1601 movdqa %xmm2,%xmm15 1602 pshufd $0x5f,%xmm2,%xmm9 1603 pxor %xmm0,%xmm1 1604 movdqa %xmm9,%xmm14 1605 paddd %xmm9,%xmm9 1606 movdqa %xmm15,%xmm10 1607 psrad $31,%xmm14 1608 paddq %xmm15,%xmm15 1609 pand %xmm8,%xmm14 1610 pxor %xmm0,%xmm10 1611 pxor %xmm14,%xmm15 1612 movdqa %xmm9,%xmm14 1613 paddd %xmm9,%xmm9 1614 movdqa %xmm15,%xmm11 1615 psrad $31,%xmm14 1616 paddq %xmm15,%xmm15 1617 pand %xmm8,%xmm14 1618 pxor %xmm0,%xmm11 1619 pxor %xmm14,%xmm15 1620 movdqa %xmm9,%xmm14 1621 paddd %xmm9,%xmm9 1622 movdqa %xmm15,%xmm12 1623 psrad $31,%xmm14 1624 paddq %xmm15,%xmm15 1625 pand %xmm8,%xmm14 1626 pxor %xmm0,%xmm12 1627 pxor %xmm14,%xmm15 1628 movdqa %xmm9,%xmm14 1629 paddd %xmm9,%xmm9 1630 movdqa %xmm15,%xmm13 1631 psrad $31,%xmm14 1632 paddq %xmm15,%xmm15 1633 pand %xmm8,%xmm14 1634 pxor %xmm0,%xmm13 1635 pxor %xmm14,%xmm15 1636 movdqa %xmm15,%xmm14 1637 psrad $31,%xmm9 1638 paddq %xmm15,%xmm15 1639 pand %xmm8,%xmm9 1640 pxor %xmm0,%xmm14 1641 pxor %xmm9,%xmm15 1642 movaps %xmm1,96(%rsp) 1643 1644 subq $96,%rdx 1645 jc .Lxts_enc_short 1646 1647 movl $16+96,%eax 1648 leaq 32(%r11,%r10,1),%rcx 1649 subq %r10,%rax 1650 movups 16(%r11),%xmm1 1651 movq %rax,%r10 1652 leaq .Lxts_magic(%rip),%r8 1653 jmp .Lxts_enc_grandloop 1654 1655.align 32 1656.Lxts_enc_grandloop: 1657 movdqu 0(%rdi),%xmm2 1658 movdqa %xmm0,%xmm8 1659 movdqu 16(%rdi),%xmm3 1660 pxor %xmm10,%xmm2 1661 movdqu 32(%rdi),%xmm4 1662 pxor %xmm11,%xmm3 1663.byte 102,15,56,220,209 1664 movdqu 48(%rdi),%xmm5 1665 pxor %xmm12,%xmm4 1666.byte 102,15,56,220,217 1667 movdqu 64(%rdi),%xmm6 1668 pxor %xmm13,%xmm5 1669.byte 102,15,56,220,225 1670 movdqu 80(%rdi),%xmm7 1671 pxor %xmm15,%xmm8 1672 movdqa 96(%rsp),%xmm9 1673 pxor %xmm14,%xmm6 1674.byte 102,15,56,220,233 1675 movups 32(%r11),%xmm0 1676 leaq 96(%rdi),%rdi 1677 pxor %xmm8,%xmm7 1678 1679 pxor %xmm9,%xmm10 1680.byte 102,15,56,220,241 1681 pxor %xmm9,%xmm11 1682 movdqa %xmm10,0(%rsp) 1683.byte 102,15,56,220,249 1684 movups 48(%r11),%xmm1 1685 pxor %xmm9,%xmm12 1686 1687.byte 102,15,56,220,208 1688 pxor %xmm9,%xmm13 1689 movdqa %xmm11,16(%rsp) 1690.byte 102,15,56,220,216 1691 pxor %xmm9,%xmm14 1692 movdqa %xmm12,32(%rsp) 1693.byte 102,15,56,220,224 1694.byte 102,15,56,220,232 1695 pxor %xmm9,%xmm8 1696 movdqa %xmm14,64(%rsp) 1697.byte 102,15,56,220,240 1698.byte 102,15,56,220,248 1699 movups 64(%r11),%xmm0 1700 movdqa %xmm8,80(%rsp) 1701 pshufd $0x5f,%xmm15,%xmm9 1702 jmp .Lxts_enc_loop6 1703.align 32 1704.Lxts_enc_loop6: 1705.byte 102,15,56,220,209 1706.byte 102,15,56,220,217 1707.byte 102,15,56,220,225 1708.byte 102,15,56,220,233 1709.byte 102,15,56,220,241 1710.byte 102,15,56,220,249 1711 movups -64(%rcx,%rax,1),%xmm1 1712 addq $32,%rax 1713 1714.byte 102,15,56,220,208 1715.byte 102,15,56,220,216 1716.byte 102,15,56,220,224 1717.byte 102,15,56,220,232 1718.byte 102,15,56,220,240 1719.byte 102,15,56,220,248 1720 movups -80(%rcx,%rax,1),%xmm0 1721 jnz .Lxts_enc_loop6 1722 1723 movdqa (%r8),%xmm8 1724 movdqa %xmm9,%xmm14 1725 paddd %xmm9,%xmm9 1726.byte 102,15,56,220,209 1727 paddq %xmm15,%xmm15 1728 psrad $31,%xmm14 1729.byte 102,15,56,220,217 1730 pand %xmm8,%xmm14 1731 movups (%r11),%xmm10 1732.byte 102,15,56,220,225 1733.byte 102,15,56,220,233 1734.byte 102,15,56,220,241 1735 pxor %xmm14,%xmm15 1736 movaps %xmm10,%xmm11 1737.byte 102,15,56,220,249 1738 movups -64(%rcx),%xmm1 1739 1740 movdqa %xmm9,%xmm14 1741.byte 102,15,56,220,208 1742 paddd %xmm9,%xmm9 1743 pxor %xmm15,%xmm10 1744.byte 102,15,56,220,216 1745 psrad $31,%xmm14 1746 paddq %xmm15,%xmm15 1747.byte 102,15,56,220,224 1748.byte 102,15,56,220,232 1749 pand %xmm8,%xmm14 1750 movaps %xmm11,%xmm12 1751.byte 102,15,56,220,240 1752 pxor %xmm14,%xmm15 1753 movdqa %xmm9,%xmm14 1754.byte 102,15,56,220,248 1755 movups -48(%rcx),%xmm0 1756 1757 paddd %xmm9,%xmm9 1758.byte 102,15,56,220,209 1759 pxor %xmm15,%xmm11 1760 psrad $31,%xmm14 1761.byte 102,15,56,220,217 1762 paddq %xmm15,%xmm15 1763 pand %xmm8,%xmm14 1764.byte 102,15,56,220,225 1765.byte 102,15,56,220,233 1766 movdqa %xmm13,48(%rsp) 1767 pxor %xmm14,%xmm15 1768.byte 102,15,56,220,241 1769 movaps %xmm12,%xmm13 1770 movdqa %xmm9,%xmm14 1771.byte 102,15,56,220,249 1772 movups -32(%rcx),%xmm1 1773 1774 paddd %xmm9,%xmm9 1775.byte 102,15,56,220,208 1776 pxor %xmm15,%xmm12 1777 psrad $31,%xmm14 1778.byte 102,15,56,220,216 1779 paddq %xmm15,%xmm15 1780 pand %xmm8,%xmm14 1781.byte 102,15,56,220,224 1782.byte 102,15,56,220,232 1783.byte 102,15,56,220,240 1784 pxor %xmm14,%xmm15 1785 movaps %xmm13,%xmm14 1786.byte 102,15,56,220,248 1787 1788 movdqa %xmm9,%xmm0 1789 paddd %xmm9,%xmm9 1790.byte 102,15,56,220,209 1791 pxor %xmm15,%xmm13 1792 psrad $31,%xmm0 1793.byte 102,15,56,220,217 1794 paddq %xmm15,%xmm15 1795 pand %xmm8,%xmm0 1796.byte 102,15,56,220,225 1797.byte 102,15,56,220,233 1798 pxor %xmm0,%xmm15 1799 movups (%r11),%xmm0 1800.byte 102,15,56,220,241 1801.byte 102,15,56,220,249 1802 movups 16(%r11),%xmm1 1803 1804 pxor %xmm15,%xmm14 1805.byte 102,15,56,221,84,36,0 1806 psrad $31,%xmm9 1807 paddq %xmm15,%xmm15 1808.byte 102,15,56,221,92,36,16 1809.byte 102,15,56,221,100,36,32 1810 pand %xmm8,%xmm9 1811 movq %r10,%rax 1812.byte 102,15,56,221,108,36,48 1813.byte 102,15,56,221,116,36,64 1814.byte 102,15,56,221,124,36,80 1815 pxor %xmm9,%xmm15 1816 1817 leaq 96(%rsi),%rsi 1818 movups %xmm2,-96(%rsi) 1819 movups %xmm3,-80(%rsi) 1820 movups %xmm4,-64(%rsi) 1821 movups %xmm5,-48(%rsi) 1822 movups %xmm6,-32(%rsi) 1823 movups %xmm7,-16(%rsi) 1824 subq $96,%rdx 1825 jnc .Lxts_enc_grandloop 1826 1827 movl $16+96,%eax 1828 subl %r10d,%eax 1829 movq %r11,%rcx 1830 shrl $4,%eax 1831 1832.Lxts_enc_short: 1833 1834 movl %eax,%r10d 1835 pxor %xmm0,%xmm10 1836 addq $96,%rdx 1837 jz .Lxts_enc_done 1838 1839 pxor %xmm0,%xmm11 1840 cmpq $0x20,%rdx 1841 jb .Lxts_enc_one 1842 pxor %xmm0,%xmm12 1843 je .Lxts_enc_two 1844 1845 pxor %xmm0,%xmm13 1846 cmpq $0x40,%rdx 1847 jb .Lxts_enc_three 1848 pxor %xmm0,%xmm14 1849 je .Lxts_enc_four 1850 1851 movdqu (%rdi),%xmm2 1852 movdqu 16(%rdi),%xmm3 1853 movdqu 32(%rdi),%xmm4 1854 pxor %xmm10,%xmm2 1855 movdqu 48(%rdi),%xmm5 1856 pxor %xmm11,%xmm3 1857 movdqu 64(%rdi),%xmm6 1858 leaq 80(%rdi),%rdi 1859 pxor %xmm12,%xmm4 1860 pxor %xmm13,%xmm5 1861 pxor %xmm14,%xmm6 1862 pxor %xmm7,%xmm7 1863 1864 call _aesni_encrypt6 1865 1866 xorps %xmm10,%xmm2 1867 movdqa %xmm15,%xmm10 1868 xorps %xmm11,%xmm3 1869 xorps %xmm12,%xmm4 1870 movdqu %xmm2,(%rsi) 1871 xorps %xmm13,%xmm5 1872 movdqu %xmm3,16(%rsi) 1873 xorps %xmm14,%xmm6 1874 movdqu %xmm4,32(%rsi) 1875 movdqu %xmm5,48(%rsi) 1876 movdqu %xmm6,64(%rsi) 1877 leaq 80(%rsi),%rsi 1878 jmp .Lxts_enc_done 1879 1880.align 16 1881.Lxts_enc_one: 1882 movups (%rdi),%xmm2 1883 leaq 16(%rdi),%rdi 1884 xorps %xmm10,%xmm2 1885 movups (%rcx),%xmm0 1886 movups 16(%rcx),%xmm1 1887 leaq 32(%rcx),%rcx 1888 xorps %xmm0,%xmm2 1889.Loop_enc1_9: 1890.byte 102,15,56,220,209 1891 decl %eax 1892 movups (%rcx),%xmm1 1893 leaq 16(%rcx),%rcx 1894 jnz .Loop_enc1_9 1895.byte 102,15,56,221,209 1896 xorps %xmm10,%xmm2 1897 movdqa %xmm11,%xmm10 1898 movups %xmm2,(%rsi) 1899 leaq 16(%rsi),%rsi 1900 jmp .Lxts_enc_done 1901 1902.align 16 1903.Lxts_enc_two: 1904 movups (%rdi),%xmm2 1905 movups 16(%rdi),%xmm3 1906 leaq 32(%rdi),%rdi 1907 xorps %xmm10,%xmm2 1908 xorps %xmm11,%xmm3 1909 1910 call _aesni_encrypt2 1911 1912 xorps %xmm10,%xmm2 1913 movdqa %xmm12,%xmm10 1914 xorps %xmm11,%xmm3 1915 movups %xmm2,(%rsi) 1916 movups %xmm3,16(%rsi) 1917 leaq 32(%rsi),%rsi 1918 jmp .Lxts_enc_done 1919 1920.align 16 1921.Lxts_enc_three: 1922 movups (%rdi),%xmm2 1923 movups 16(%rdi),%xmm3 1924 movups 32(%rdi),%xmm4 1925 leaq 48(%rdi),%rdi 1926 xorps %xmm10,%xmm2 1927 xorps %xmm11,%xmm3 1928 xorps %xmm12,%xmm4 1929 1930 call _aesni_encrypt3 1931 1932 xorps %xmm10,%xmm2 1933 movdqa %xmm13,%xmm10 1934 xorps %xmm11,%xmm3 1935 xorps %xmm12,%xmm4 1936 movups %xmm2,(%rsi) 1937 movups %xmm3,16(%rsi) 1938 movups %xmm4,32(%rsi) 1939 leaq 48(%rsi),%rsi 1940 jmp .Lxts_enc_done 1941 1942.align 16 1943.Lxts_enc_four: 1944 movups (%rdi),%xmm2 1945 movups 16(%rdi),%xmm3 1946 movups 32(%rdi),%xmm4 1947 xorps %xmm10,%xmm2 1948 movups 48(%rdi),%xmm5 1949 leaq 64(%rdi),%rdi 1950 xorps %xmm11,%xmm3 1951 xorps %xmm12,%xmm4 1952 xorps %xmm13,%xmm5 1953 1954 call _aesni_encrypt4 1955 1956 pxor %xmm10,%xmm2 1957 movdqa %xmm14,%xmm10 1958 pxor %xmm11,%xmm3 1959 pxor %xmm12,%xmm4 1960 movdqu %xmm2,(%rsi) 1961 pxor %xmm13,%xmm5 1962 movdqu %xmm3,16(%rsi) 1963 movdqu %xmm4,32(%rsi) 1964 movdqu %xmm5,48(%rsi) 1965 leaq 64(%rsi),%rsi 1966 jmp .Lxts_enc_done 1967 1968.align 16 1969.Lxts_enc_done: 1970 andq $15,%r9 1971 jz .Lxts_enc_ret 1972 movq %r9,%rdx 1973 1974.Lxts_enc_steal: 1975 movzbl (%rdi),%eax 1976 movzbl -16(%rsi),%ecx 1977 leaq 1(%rdi),%rdi 1978 movb %al,-16(%rsi) 1979 movb %cl,0(%rsi) 1980 leaq 1(%rsi),%rsi 1981 subq $1,%rdx 1982 jnz .Lxts_enc_steal 1983 1984 subq %r9,%rsi 1985 movq %r11,%rcx 1986 movl %r10d,%eax 1987 1988 movups -16(%rsi),%xmm2 1989 xorps %xmm10,%xmm2 1990 movups (%rcx),%xmm0 1991 movups 16(%rcx),%xmm1 1992 leaq 32(%rcx),%rcx 1993 xorps %xmm0,%xmm2 1994.Loop_enc1_10: 1995.byte 102,15,56,220,209 1996 decl %eax 1997 movups (%rcx),%xmm1 1998 leaq 16(%rcx),%rcx 1999 jnz .Loop_enc1_10 2000.byte 102,15,56,221,209 2001 xorps %xmm10,%xmm2 2002 movups %xmm2,-16(%rsi) 2003 2004.Lxts_enc_ret: 2005 xorps %xmm0,%xmm0 2006 pxor %xmm1,%xmm1 2007 pxor %xmm2,%xmm2 2008 pxor %xmm3,%xmm3 2009 pxor %xmm4,%xmm4 2010 pxor %xmm5,%xmm5 2011 pxor %xmm6,%xmm6 2012 pxor %xmm7,%xmm7 2013 movaps %xmm0,0(%rsp) 2014 pxor %xmm8,%xmm8 2015 movaps %xmm0,16(%rsp) 2016 pxor %xmm9,%xmm9 2017 movaps %xmm0,32(%rsp) 2018 pxor %xmm10,%xmm10 2019 movaps %xmm0,48(%rsp) 2020 pxor %xmm11,%xmm11 2021 movaps %xmm0,64(%rsp) 2022 pxor %xmm12,%xmm12 2023 movaps %xmm0,80(%rsp) 2024 pxor %xmm13,%xmm13 2025 movaps %xmm0,96(%rsp) 2026 pxor %xmm14,%xmm14 2027 pxor %xmm15,%xmm15 2028 leaq (%rbp),%rsp 2029 popq %rbp 2030.Lxts_enc_epilogue: 2031 .byte 0xf3,0xc3 2032.size aesni_xts_encrypt,.-aesni_xts_encrypt 2033.globl aesni_xts_decrypt 2034.type aesni_xts_decrypt,@function 2035.align 16 2036aesni_xts_decrypt: 2037 leaq (%rsp),%rax 2038 pushq %rbp 2039 subq $112,%rsp 2040 andq $-16,%rsp 2041 leaq -8(%rax),%rbp 2042 movups (%r9),%xmm2 2043 movl 240(%r8),%eax 2044 movl 240(%rcx),%r10d 2045 movups (%r8),%xmm0 2046 movups 16(%r8),%xmm1 2047 leaq 32(%r8),%r8 2048 xorps %xmm0,%xmm2 2049.Loop_enc1_11: 2050.byte 102,15,56,220,209 2051 decl %eax 2052 movups (%r8),%xmm1 2053 leaq 16(%r8),%r8 2054 jnz .Loop_enc1_11 2055.byte 102,15,56,221,209 2056 xorl %eax,%eax 2057 testq $15,%rdx 2058 setnz %al 2059 shlq $4,%rax 2060 subq %rax,%rdx 2061 2062 movups (%rcx),%xmm0 2063 movq %rcx,%r11 2064 movl %r10d,%eax 2065 shll $4,%r10d 2066 movq %rdx,%r9 2067 andq $-16,%rdx 2068 2069 movups 16(%rcx,%r10,1),%xmm1 2070 2071 movdqa .Lxts_magic(%rip),%xmm8 2072 movdqa %xmm2,%xmm15 2073 pshufd $0x5f,%xmm2,%xmm9 2074 pxor %xmm0,%xmm1 2075 movdqa %xmm9,%xmm14 2076 paddd %xmm9,%xmm9 2077 movdqa %xmm15,%xmm10 2078 psrad $31,%xmm14 2079 paddq %xmm15,%xmm15 2080 pand %xmm8,%xmm14 2081 pxor %xmm0,%xmm10 2082 pxor %xmm14,%xmm15 2083 movdqa %xmm9,%xmm14 2084 paddd %xmm9,%xmm9 2085 movdqa %xmm15,%xmm11 2086 psrad $31,%xmm14 2087 paddq %xmm15,%xmm15 2088 pand %xmm8,%xmm14 2089 pxor %xmm0,%xmm11 2090 pxor %xmm14,%xmm15 2091 movdqa %xmm9,%xmm14 2092 paddd %xmm9,%xmm9 2093 movdqa %xmm15,%xmm12 2094 psrad $31,%xmm14 2095 paddq %xmm15,%xmm15 2096 pand %xmm8,%xmm14 2097 pxor %xmm0,%xmm12 2098 pxor %xmm14,%xmm15 2099 movdqa %xmm9,%xmm14 2100 paddd %xmm9,%xmm9 2101 movdqa %xmm15,%xmm13 2102 psrad $31,%xmm14 2103 paddq %xmm15,%xmm15 2104 pand %xmm8,%xmm14 2105 pxor %xmm0,%xmm13 2106 pxor %xmm14,%xmm15 2107 movdqa %xmm15,%xmm14 2108 psrad $31,%xmm9 2109 paddq %xmm15,%xmm15 2110 pand %xmm8,%xmm9 2111 pxor %xmm0,%xmm14 2112 pxor %xmm9,%xmm15 2113 movaps %xmm1,96(%rsp) 2114 2115 subq $96,%rdx 2116 jc .Lxts_dec_short 2117 2118 movl $16+96,%eax 2119 leaq 32(%r11,%r10,1),%rcx 2120 subq %r10,%rax 2121 movups 16(%r11),%xmm1 2122 movq %rax,%r10 2123 leaq .Lxts_magic(%rip),%r8 2124 jmp .Lxts_dec_grandloop 2125 2126.align 32 2127.Lxts_dec_grandloop: 2128 movdqu 0(%rdi),%xmm2 2129 movdqa %xmm0,%xmm8 2130 movdqu 16(%rdi),%xmm3 2131 pxor %xmm10,%xmm2 2132 movdqu 32(%rdi),%xmm4 2133 pxor %xmm11,%xmm3 2134.byte 102,15,56,222,209 2135 movdqu 48(%rdi),%xmm5 2136 pxor %xmm12,%xmm4 2137.byte 102,15,56,222,217 2138 movdqu 64(%rdi),%xmm6 2139 pxor %xmm13,%xmm5 2140.byte 102,15,56,222,225 2141 movdqu 80(%rdi),%xmm7 2142 pxor %xmm15,%xmm8 2143 movdqa 96(%rsp),%xmm9 2144 pxor %xmm14,%xmm6 2145.byte 102,15,56,222,233 2146 movups 32(%r11),%xmm0 2147 leaq 96(%rdi),%rdi 2148 pxor %xmm8,%xmm7 2149 2150 pxor %xmm9,%xmm10 2151.byte 102,15,56,222,241 2152 pxor %xmm9,%xmm11 2153 movdqa %xmm10,0(%rsp) 2154.byte 102,15,56,222,249 2155 movups 48(%r11),%xmm1 2156 pxor %xmm9,%xmm12 2157 2158.byte 102,15,56,222,208 2159 pxor %xmm9,%xmm13 2160 movdqa %xmm11,16(%rsp) 2161.byte 102,15,56,222,216 2162 pxor %xmm9,%xmm14 2163 movdqa %xmm12,32(%rsp) 2164.byte 102,15,56,222,224 2165.byte 102,15,56,222,232 2166 pxor %xmm9,%xmm8 2167 movdqa %xmm14,64(%rsp) 2168.byte 102,15,56,222,240 2169.byte 102,15,56,222,248 2170 movups 64(%r11),%xmm0 2171 movdqa %xmm8,80(%rsp) 2172 pshufd $0x5f,%xmm15,%xmm9 2173 jmp .Lxts_dec_loop6 2174.align 32 2175.Lxts_dec_loop6: 2176.byte 102,15,56,222,209 2177.byte 102,15,56,222,217 2178.byte 102,15,56,222,225 2179.byte 102,15,56,222,233 2180.byte 102,15,56,222,241 2181.byte 102,15,56,222,249 2182 movups -64(%rcx,%rax,1),%xmm1 2183 addq $32,%rax 2184 2185.byte 102,15,56,222,208 2186.byte 102,15,56,222,216 2187.byte 102,15,56,222,224 2188.byte 102,15,56,222,232 2189.byte 102,15,56,222,240 2190.byte 102,15,56,222,248 2191 movups -80(%rcx,%rax,1),%xmm0 2192 jnz .Lxts_dec_loop6 2193 2194 movdqa (%r8),%xmm8 2195 movdqa %xmm9,%xmm14 2196 paddd %xmm9,%xmm9 2197.byte 102,15,56,222,209 2198 paddq %xmm15,%xmm15 2199 psrad $31,%xmm14 2200.byte 102,15,56,222,217 2201 pand %xmm8,%xmm14 2202 movups (%r11),%xmm10 2203.byte 102,15,56,222,225 2204.byte 102,15,56,222,233 2205.byte 102,15,56,222,241 2206 pxor %xmm14,%xmm15 2207 movaps %xmm10,%xmm11 2208.byte 102,15,56,222,249 2209 movups -64(%rcx),%xmm1 2210 2211 movdqa %xmm9,%xmm14 2212.byte 102,15,56,222,208 2213 paddd %xmm9,%xmm9 2214 pxor %xmm15,%xmm10 2215.byte 102,15,56,222,216 2216 psrad $31,%xmm14 2217 paddq %xmm15,%xmm15 2218.byte 102,15,56,222,224 2219.byte 102,15,56,222,232 2220 pand %xmm8,%xmm14 2221 movaps %xmm11,%xmm12 2222.byte 102,15,56,222,240 2223 pxor %xmm14,%xmm15 2224 movdqa %xmm9,%xmm14 2225.byte 102,15,56,222,248 2226 movups -48(%rcx),%xmm0 2227 2228 paddd %xmm9,%xmm9 2229.byte 102,15,56,222,209 2230 pxor %xmm15,%xmm11 2231 psrad $31,%xmm14 2232.byte 102,15,56,222,217 2233 paddq %xmm15,%xmm15 2234 pand %xmm8,%xmm14 2235.byte 102,15,56,222,225 2236.byte 102,15,56,222,233 2237 movdqa %xmm13,48(%rsp) 2238 pxor %xmm14,%xmm15 2239.byte 102,15,56,222,241 2240 movaps %xmm12,%xmm13 2241 movdqa %xmm9,%xmm14 2242.byte 102,15,56,222,249 2243 movups -32(%rcx),%xmm1 2244 2245 paddd %xmm9,%xmm9 2246.byte 102,15,56,222,208 2247 pxor %xmm15,%xmm12 2248 psrad $31,%xmm14 2249.byte 102,15,56,222,216 2250 paddq %xmm15,%xmm15 2251 pand %xmm8,%xmm14 2252.byte 102,15,56,222,224 2253.byte 102,15,56,222,232 2254.byte 102,15,56,222,240 2255 pxor %xmm14,%xmm15 2256 movaps %xmm13,%xmm14 2257.byte 102,15,56,222,248 2258 2259 movdqa %xmm9,%xmm0 2260 paddd %xmm9,%xmm9 2261.byte 102,15,56,222,209 2262 pxor %xmm15,%xmm13 2263 psrad $31,%xmm0 2264.byte 102,15,56,222,217 2265 paddq %xmm15,%xmm15 2266 pand %xmm8,%xmm0 2267.byte 102,15,56,222,225 2268.byte 102,15,56,222,233 2269 pxor %xmm0,%xmm15 2270 movups (%r11),%xmm0 2271.byte 102,15,56,222,241 2272.byte 102,15,56,222,249 2273 movups 16(%r11),%xmm1 2274 2275 pxor %xmm15,%xmm14 2276.byte 102,15,56,223,84,36,0 2277 psrad $31,%xmm9 2278 paddq %xmm15,%xmm15 2279.byte 102,15,56,223,92,36,16 2280.byte 102,15,56,223,100,36,32 2281 pand %xmm8,%xmm9 2282 movq %r10,%rax 2283.byte 102,15,56,223,108,36,48 2284.byte 102,15,56,223,116,36,64 2285.byte 102,15,56,223,124,36,80 2286 pxor %xmm9,%xmm15 2287 2288 leaq 96(%rsi),%rsi 2289 movups %xmm2,-96(%rsi) 2290 movups %xmm3,-80(%rsi) 2291 movups %xmm4,-64(%rsi) 2292 movups %xmm5,-48(%rsi) 2293 movups %xmm6,-32(%rsi) 2294 movups %xmm7,-16(%rsi) 2295 subq $96,%rdx 2296 jnc .Lxts_dec_grandloop 2297 2298 movl $16+96,%eax 2299 subl %r10d,%eax 2300 movq %r11,%rcx 2301 shrl $4,%eax 2302 2303.Lxts_dec_short: 2304 2305 movl %eax,%r10d 2306 pxor %xmm0,%xmm10 2307 pxor %xmm0,%xmm11 2308 addq $96,%rdx 2309 jz .Lxts_dec_done 2310 2311 pxor %xmm0,%xmm12 2312 cmpq $0x20,%rdx 2313 jb .Lxts_dec_one 2314 pxor %xmm0,%xmm13 2315 je .Lxts_dec_two 2316 2317 pxor %xmm0,%xmm14 2318 cmpq $0x40,%rdx 2319 jb .Lxts_dec_three 2320 je .Lxts_dec_four 2321 2322 movdqu (%rdi),%xmm2 2323 movdqu 16(%rdi),%xmm3 2324 movdqu 32(%rdi),%xmm4 2325 pxor %xmm10,%xmm2 2326 movdqu 48(%rdi),%xmm5 2327 pxor %xmm11,%xmm3 2328 movdqu 64(%rdi),%xmm6 2329 leaq 80(%rdi),%rdi 2330 pxor %xmm12,%xmm4 2331 pxor %xmm13,%xmm5 2332 pxor %xmm14,%xmm6 2333 2334 call _aesni_decrypt6 2335 2336 xorps %xmm10,%xmm2 2337 xorps %xmm11,%xmm3 2338 xorps %xmm12,%xmm4 2339 movdqu %xmm2,(%rsi) 2340 xorps %xmm13,%xmm5 2341 movdqu %xmm3,16(%rsi) 2342 xorps %xmm14,%xmm6 2343 movdqu %xmm4,32(%rsi) 2344 pxor %xmm14,%xmm14 2345 movdqu %xmm5,48(%rsi) 2346 pcmpgtd %xmm15,%xmm14 2347 movdqu %xmm6,64(%rsi) 2348 leaq 80(%rsi),%rsi 2349 pshufd $0x13,%xmm14,%xmm11 2350 andq $15,%r9 2351 jz .Lxts_dec_ret 2352 2353 movdqa %xmm15,%xmm10 2354 paddq %xmm15,%xmm15 2355 pand %xmm8,%xmm11 2356 pxor %xmm15,%xmm11 2357 jmp .Lxts_dec_done2 2358 2359.align 16 2360.Lxts_dec_one: 2361 movups (%rdi),%xmm2 2362 leaq 16(%rdi),%rdi 2363 xorps %xmm10,%xmm2 2364 movups (%rcx),%xmm0 2365 movups 16(%rcx),%xmm1 2366 leaq 32(%rcx),%rcx 2367 xorps %xmm0,%xmm2 2368.Loop_dec1_12: 2369.byte 102,15,56,222,209 2370 decl %eax 2371 movups (%rcx),%xmm1 2372 leaq 16(%rcx),%rcx 2373 jnz .Loop_dec1_12 2374.byte 102,15,56,223,209 2375 xorps %xmm10,%xmm2 2376 movdqa %xmm11,%xmm10 2377 movups %xmm2,(%rsi) 2378 movdqa %xmm12,%xmm11 2379 leaq 16(%rsi),%rsi 2380 jmp .Lxts_dec_done 2381 2382.align 16 2383.Lxts_dec_two: 2384 movups (%rdi),%xmm2 2385 movups 16(%rdi),%xmm3 2386 leaq 32(%rdi),%rdi 2387 xorps %xmm10,%xmm2 2388 xorps %xmm11,%xmm3 2389 2390 call _aesni_decrypt2 2391 2392 xorps %xmm10,%xmm2 2393 movdqa %xmm12,%xmm10 2394 xorps %xmm11,%xmm3 2395 movdqa %xmm13,%xmm11 2396 movups %xmm2,(%rsi) 2397 movups %xmm3,16(%rsi) 2398 leaq 32(%rsi),%rsi 2399 jmp .Lxts_dec_done 2400 2401.align 16 2402.Lxts_dec_three: 2403 movups (%rdi),%xmm2 2404 movups 16(%rdi),%xmm3 2405 movups 32(%rdi),%xmm4 2406 leaq 48(%rdi),%rdi 2407 xorps %xmm10,%xmm2 2408 xorps %xmm11,%xmm3 2409 xorps %xmm12,%xmm4 2410 2411 call _aesni_decrypt3 2412 2413 xorps %xmm10,%xmm2 2414 movdqa %xmm13,%xmm10 2415 xorps %xmm11,%xmm3 2416 movdqa %xmm14,%xmm11 2417 xorps %xmm12,%xmm4 2418 movups %xmm2,(%rsi) 2419 movups %xmm3,16(%rsi) 2420 movups %xmm4,32(%rsi) 2421 leaq 48(%rsi),%rsi 2422 jmp .Lxts_dec_done 2423 2424.align 16 2425.Lxts_dec_four: 2426 movups (%rdi),%xmm2 2427 movups 16(%rdi),%xmm3 2428 movups 32(%rdi),%xmm4 2429 xorps %xmm10,%xmm2 2430 movups 48(%rdi),%xmm5 2431 leaq 64(%rdi),%rdi 2432 xorps %xmm11,%xmm3 2433 xorps %xmm12,%xmm4 2434 xorps %xmm13,%xmm5 2435 2436 call _aesni_decrypt4 2437 2438 pxor %xmm10,%xmm2 2439 movdqa %xmm14,%xmm10 2440 pxor %xmm11,%xmm3 2441 movdqa %xmm15,%xmm11 2442 pxor %xmm12,%xmm4 2443 movdqu %xmm2,(%rsi) 2444 pxor %xmm13,%xmm5 2445 movdqu %xmm3,16(%rsi) 2446 movdqu %xmm4,32(%rsi) 2447 movdqu %xmm5,48(%rsi) 2448 leaq 64(%rsi),%rsi 2449 jmp .Lxts_dec_done 2450 2451.align 16 2452.Lxts_dec_done: 2453 andq $15,%r9 2454 jz .Lxts_dec_ret 2455.Lxts_dec_done2: 2456 movq %r9,%rdx 2457 movq %r11,%rcx 2458 movl %r10d,%eax 2459 2460 movups (%rdi),%xmm2 2461 xorps %xmm11,%xmm2 2462 movups (%rcx),%xmm0 2463 movups 16(%rcx),%xmm1 2464 leaq 32(%rcx),%rcx 2465 xorps %xmm0,%xmm2 2466.Loop_dec1_13: 2467.byte 102,15,56,222,209 2468 decl %eax 2469 movups (%rcx),%xmm1 2470 leaq 16(%rcx),%rcx 2471 jnz .Loop_dec1_13 2472.byte 102,15,56,223,209 2473 xorps %xmm11,%xmm2 2474 movups %xmm2,(%rsi) 2475 2476.Lxts_dec_steal: 2477 movzbl 16(%rdi),%eax 2478 movzbl (%rsi),%ecx 2479 leaq 1(%rdi),%rdi 2480 movb %al,(%rsi) 2481 movb %cl,16(%rsi) 2482 leaq 1(%rsi),%rsi 2483 subq $1,%rdx 2484 jnz .Lxts_dec_steal 2485 2486 subq %r9,%rsi 2487 movq %r11,%rcx 2488 movl %r10d,%eax 2489 2490 movups (%rsi),%xmm2 2491 xorps %xmm10,%xmm2 2492 movups (%rcx),%xmm0 2493 movups 16(%rcx),%xmm1 2494 leaq 32(%rcx),%rcx 2495 xorps %xmm0,%xmm2 2496.Loop_dec1_14: 2497.byte 102,15,56,222,209 2498 decl %eax 2499 movups (%rcx),%xmm1 2500 leaq 16(%rcx),%rcx 2501 jnz .Loop_dec1_14 2502.byte 102,15,56,223,209 2503 xorps %xmm10,%xmm2 2504 movups %xmm2,(%rsi) 2505 2506.Lxts_dec_ret: 2507 xorps %xmm0,%xmm0 2508 pxor %xmm1,%xmm1 2509 pxor %xmm2,%xmm2 2510 pxor %xmm3,%xmm3 2511 pxor %xmm4,%xmm4 2512 pxor %xmm5,%xmm5 2513 pxor %xmm6,%xmm6 2514 pxor %xmm7,%xmm7 2515 movaps %xmm0,0(%rsp) 2516 pxor %xmm8,%xmm8 2517 movaps %xmm0,16(%rsp) 2518 pxor %xmm9,%xmm9 2519 movaps %xmm0,32(%rsp) 2520 pxor %xmm10,%xmm10 2521 movaps %xmm0,48(%rsp) 2522 pxor %xmm11,%xmm11 2523 movaps %xmm0,64(%rsp) 2524 pxor %xmm12,%xmm12 2525 movaps %xmm0,80(%rsp) 2526 pxor %xmm13,%xmm13 2527 movaps %xmm0,96(%rsp) 2528 pxor %xmm14,%xmm14 2529 pxor %xmm15,%xmm15 2530 leaq (%rbp),%rsp 2531 popq %rbp 2532.Lxts_dec_epilogue: 2533 .byte 0xf3,0xc3 2534.size aesni_xts_decrypt,.-aesni_xts_decrypt 2535.globl aesni_cbc_encrypt 2536.type aesni_cbc_encrypt,@function 2537.align 16 2538aesni_cbc_encrypt: 2539 testq %rdx,%rdx 2540 jz .Lcbc_ret 2541 2542 movl 240(%rcx),%r10d 2543 movq %rcx,%r11 2544 testl %r9d,%r9d 2545 jz .Lcbc_decrypt 2546 2547 movups (%r8),%xmm2 2548 movl %r10d,%eax 2549 cmpq $16,%rdx 2550 jb .Lcbc_enc_tail 2551 subq $16,%rdx 2552 jmp .Lcbc_enc_loop 2553.align 16 2554.Lcbc_enc_loop: 2555 movups (%rdi),%xmm3 2556 leaq 16(%rdi),%rdi 2557 2558 movups (%rcx),%xmm0 2559 movups 16(%rcx),%xmm1 2560 xorps %xmm0,%xmm3 2561 leaq 32(%rcx),%rcx 2562 xorps %xmm3,%xmm2 2563.Loop_enc1_15: 2564.byte 102,15,56,220,209 2565 decl %eax 2566 movups (%rcx),%xmm1 2567 leaq 16(%rcx),%rcx 2568 jnz .Loop_enc1_15 2569.byte 102,15,56,221,209 2570 movl %r10d,%eax 2571 movq %r11,%rcx 2572 movups %xmm2,0(%rsi) 2573 leaq 16(%rsi),%rsi 2574 subq $16,%rdx 2575 jnc .Lcbc_enc_loop 2576 addq $16,%rdx 2577 jnz .Lcbc_enc_tail 2578 pxor %xmm0,%xmm0 2579 pxor %xmm1,%xmm1 2580 movups %xmm2,(%r8) 2581 pxor %xmm2,%xmm2 2582 pxor %xmm3,%xmm3 2583 jmp .Lcbc_ret 2584 2585.Lcbc_enc_tail: 2586 movq %rdx,%rcx 2587 xchgq %rdi,%rsi 2588.long 0x9066A4F3 2589 movl $16,%ecx 2590 subq %rdx,%rcx 2591 xorl %eax,%eax 2592.long 0x9066AAF3 2593 leaq -16(%rdi),%rdi 2594 movl %r10d,%eax 2595 movq %rdi,%rsi 2596 movq %r11,%rcx 2597 xorq %rdx,%rdx 2598 jmp .Lcbc_enc_loop 2599 2600.align 16 2601.Lcbc_decrypt: 2602 cmpq $16,%rdx 2603 jne .Lcbc_decrypt_bulk 2604 2605 2606 2607 movdqu (%rdi),%xmm2 2608 movdqu (%r8),%xmm3 2609 movdqa %xmm2,%xmm4 2610 movups (%rcx),%xmm0 2611 movups 16(%rcx),%xmm1 2612 leaq 32(%rcx),%rcx 2613 xorps %xmm0,%xmm2 2614.Loop_dec1_16: 2615.byte 102,15,56,222,209 2616 decl %r10d 2617 movups (%rcx),%xmm1 2618 leaq 16(%rcx),%rcx 2619 jnz .Loop_dec1_16 2620.byte 102,15,56,223,209 2621 pxor %xmm0,%xmm0 2622 pxor %xmm1,%xmm1 2623 movdqu %xmm4,(%r8) 2624 xorps %xmm3,%xmm2 2625 pxor %xmm3,%xmm3 2626 movups %xmm2,(%rsi) 2627 pxor %xmm2,%xmm2 2628 jmp .Lcbc_ret 2629.align 16 2630.Lcbc_decrypt_bulk: 2631 leaq (%rsp),%rax 2632 pushq %rbp 2633 subq $16,%rsp 2634 andq $-16,%rsp 2635 leaq -8(%rax),%rbp 2636 movups (%r8),%xmm10 2637 movl %r10d,%eax 2638 cmpq $0x50,%rdx 2639 jbe .Lcbc_dec_tail 2640 2641 movups (%rcx),%xmm0 2642 movdqu 0(%rdi),%xmm2 2643 movdqu 16(%rdi),%xmm3 2644 movdqa %xmm2,%xmm11 2645 movdqu 32(%rdi),%xmm4 2646 movdqa %xmm3,%xmm12 2647 movdqu 48(%rdi),%xmm5 2648 movdqa %xmm4,%xmm13 2649 movdqu 64(%rdi),%xmm6 2650 movdqa %xmm5,%xmm14 2651 movdqu 80(%rdi),%xmm7 2652 movdqa %xmm6,%xmm15 2653 movl OPENSSL_ia32cap_P+4(%rip),%r9d 2654 cmpq $0x70,%rdx 2655 jbe .Lcbc_dec_six_or_seven 2656 2657 andl $71303168,%r9d 2658 subq $0x50,%rdx 2659 cmpl $4194304,%r9d 2660 je .Lcbc_dec_loop6_enter 2661 subq $0x20,%rdx 2662 leaq 112(%rcx),%rcx 2663 jmp .Lcbc_dec_loop8_enter 2664.align 16 2665.Lcbc_dec_loop8: 2666 movups %xmm9,(%rsi) 2667 leaq 16(%rsi),%rsi 2668.Lcbc_dec_loop8_enter: 2669 movdqu 96(%rdi),%xmm8 2670 pxor %xmm0,%xmm2 2671 movdqu 112(%rdi),%xmm9 2672 pxor %xmm0,%xmm3 2673 movups 16-112(%rcx),%xmm1 2674 pxor %xmm0,%xmm4 2675 xorq %r11,%r11 2676 cmpq $0x70,%rdx 2677 pxor %xmm0,%xmm5 2678 pxor %xmm0,%xmm6 2679 pxor %xmm0,%xmm7 2680 pxor %xmm0,%xmm8 2681 2682.byte 102,15,56,222,209 2683 pxor %xmm0,%xmm9 2684 movups 32-112(%rcx),%xmm0 2685.byte 102,15,56,222,217 2686.byte 102,15,56,222,225 2687.byte 102,15,56,222,233 2688.byte 102,15,56,222,241 2689.byte 102,15,56,222,249 2690.byte 102,68,15,56,222,193 2691 setnc %r11b 2692 shlq $7,%r11 2693.byte 102,68,15,56,222,201 2694 addq %rdi,%r11 2695 movups 48-112(%rcx),%xmm1 2696.byte 102,15,56,222,208 2697.byte 102,15,56,222,216 2698.byte 102,15,56,222,224 2699.byte 102,15,56,222,232 2700.byte 102,15,56,222,240 2701.byte 102,15,56,222,248 2702.byte 102,68,15,56,222,192 2703.byte 102,68,15,56,222,200 2704 movups 64-112(%rcx),%xmm0 2705 nop 2706.byte 102,15,56,222,209 2707.byte 102,15,56,222,217 2708.byte 102,15,56,222,225 2709.byte 102,15,56,222,233 2710.byte 102,15,56,222,241 2711.byte 102,15,56,222,249 2712.byte 102,68,15,56,222,193 2713.byte 102,68,15,56,222,201 2714 movups 80-112(%rcx),%xmm1 2715 nop 2716.byte 102,15,56,222,208 2717.byte 102,15,56,222,216 2718.byte 102,15,56,222,224 2719.byte 102,15,56,222,232 2720.byte 102,15,56,222,240 2721.byte 102,15,56,222,248 2722.byte 102,68,15,56,222,192 2723.byte 102,68,15,56,222,200 2724 movups 96-112(%rcx),%xmm0 2725 nop 2726.byte 102,15,56,222,209 2727.byte 102,15,56,222,217 2728.byte 102,15,56,222,225 2729.byte 102,15,56,222,233 2730.byte 102,15,56,222,241 2731.byte 102,15,56,222,249 2732.byte 102,68,15,56,222,193 2733.byte 102,68,15,56,222,201 2734 movups 112-112(%rcx),%xmm1 2735 nop 2736.byte 102,15,56,222,208 2737.byte 102,15,56,222,216 2738.byte 102,15,56,222,224 2739.byte 102,15,56,222,232 2740.byte 102,15,56,222,240 2741.byte 102,15,56,222,248 2742.byte 102,68,15,56,222,192 2743.byte 102,68,15,56,222,200 2744 movups 128-112(%rcx),%xmm0 2745 nop 2746.byte 102,15,56,222,209 2747.byte 102,15,56,222,217 2748.byte 102,15,56,222,225 2749.byte 102,15,56,222,233 2750.byte 102,15,56,222,241 2751.byte 102,15,56,222,249 2752.byte 102,68,15,56,222,193 2753.byte 102,68,15,56,222,201 2754 movups 144-112(%rcx),%xmm1 2755 cmpl $11,%eax 2756.byte 102,15,56,222,208 2757.byte 102,15,56,222,216 2758.byte 102,15,56,222,224 2759.byte 102,15,56,222,232 2760.byte 102,15,56,222,240 2761.byte 102,15,56,222,248 2762.byte 102,68,15,56,222,192 2763.byte 102,68,15,56,222,200 2764 movups 160-112(%rcx),%xmm0 2765 jb .Lcbc_dec_done 2766.byte 102,15,56,222,209 2767.byte 102,15,56,222,217 2768.byte 102,15,56,222,225 2769.byte 102,15,56,222,233 2770.byte 102,15,56,222,241 2771.byte 102,15,56,222,249 2772.byte 102,68,15,56,222,193 2773.byte 102,68,15,56,222,201 2774 movups 176-112(%rcx),%xmm1 2775 nop 2776.byte 102,15,56,222,208 2777.byte 102,15,56,222,216 2778.byte 102,15,56,222,224 2779.byte 102,15,56,222,232 2780.byte 102,15,56,222,240 2781.byte 102,15,56,222,248 2782.byte 102,68,15,56,222,192 2783.byte 102,68,15,56,222,200 2784 movups 192-112(%rcx),%xmm0 2785 je .Lcbc_dec_done 2786.byte 102,15,56,222,209 2787.byte 102,15,56,222,217 2788.byte 102,15,56,222,225 2789.byte 102,15,56,222,233 2790.byte 102,15,56,222,241 2791.byte 102,15,56,222,249 2792.byte 102,68,15,56,222,193 2793.byte 102,68,15,56,222,201 2794 movups 208-112(%rcx),%xmm1 2795 nop 2796.byte 102,15,56,222,208 2797.byte 102,15,56,222,216 2798.byte 102,15,56,222,224 2799.byte 102,15,56,222,232 2800.byte 102,15,56,222,240 2801.byte 102,15,56,222,248 2802.byte 102,68,15,56,222,192 2803.byte 102,68,15,56,222,200 2804 movups 224-112(%rcx),%xmm0 2805 jmp .Lcbc_dec_done 2806.align 16 2807.Lcbc_dec_done: 2808.byte 102,15,56,222,209 2809.byte 102,15,56,222,217 2810 pxor %xmm0,%xmm10 2811 pxor %xmm0,%xmm11 2812.byte 102,15,56,222,225 2813.byte 102,15,56,222,233 2814 pxor %xmm0,%xmm12 2815 pxor %xmm0,%xmm13 2816.byte 102,15,56,222,241 2817.byte 102,15,56,222,249 2818 pxor %xmm0,%xmm14 2819 pxor %xmm0,%xmm15 2820.byte 102,68,15,56,222,193 2821.byte 102,68,15,56,222,201 2822 movdqu 80(%rdi),%xmm1 2823 2824.byte 102,65,15,56,223,210 2825 movdqu 96(%rdi),%xmm10 2826 pxor %xmm0,%xmm1 2827.byte 102,65,15,56,223,219 2828 pxor %xmm0,%xmm10 2829 movdqu 112(%rdi),%xmm0 2830.byte 102,65,15,56,223,228 2831 leaq 128(%rdi),%rdi 2832 movdqu 0(%r11),%xmm11 2833.byte 102,65,15,56,223,237 2834.byte 102,65,15,56,223,246 2835 movdqu 16(%r11),%xmm12 2836 movdqu 32(%r11),%xmm13 2837.byte 102,65,15,56,223,255 2838.byte 102,68,15,56,223,193 2839 movdqu 48(%r11),%xmm14 2840 movdqu 64(%r11),%xmm15 2841.byte 102,69,15,56,223,202 2842 movdqa %xmm0,%xmm10 2843 movdqu 80(%r11),%xmm1 2844 movups -112(%rcx),%xmm0 2845 2846 movups %xmm2,(%rsi) 2847 movdqa %xmm11,%xmm2 2848 movups %xmm3,16(%rsi) 2849 movdqa %xmm12,%xmm3 2850 movups %xmm4,32(%rsi) 2851 movdqa %xmm13,%xmm4 2852 movups %xmm5,48(%rsi) 2853 movdqa %xmm14,%xmm5 2854 movups %xmm6,64(%rsi) 2855 movdqa %xmm15,%xmm6 2856 movups %xmm7,80(%rsi) 2857 movdqa %xmm1,%xmm7 2858 movups %xmm8,96(%rsi) 2859 leaq 112(%rsi),%rsi 2860 2861 subq $0x80,%rdx 2862 ja .Lcbc_dec_loop8 2863 2864 movaps %xmm9,%xmm2 2865 leaq -112(%rcx),%rcx 2866 addq $0x70,%rdx 2867 jle .Lcbc_dec_clear_tail_collected 2868 movups %xmm9,(%rsi) 2869 leaq 16(%rsi),%rsi 2870 cmpq $0x50,%rdx 2871 jbe .Lcbc_dec_tail 2872 2873 movaps %xmm11,%xmm2 2874.Lcbc_dec_six_or_seven: 2875 cmpq $0x60,%rdx 2876 ja .Lcbc_dec_seven 2877 2878 movaps %xmm7,%xmm8 2879 call _aesni_decrypt6 2880 pxor %xmm10,%xmm2 2881 movaps %xmm8,%xmm10 2882 pxor %xmm11,%xmm3 2883 movdqu %xmm2,(%rsi) 2884 pxor %xmm12,%xmm4 2885 movdqu %xmm3,16(%rsi) 2886 pxor %xmm3,%xmm3 2887 pxor %xmm13,%xmm5 2888 movdqu %xmm4,32(%rsi) 2889 pxor %xmm4,%xmm4 2890 pxor %xmm14,%xmm6 2891 movdqu %xmm5,48(%rsi) 2892 pxor %xmm5,%xmm5 2893 pxor %xmm15,%xmm7 2894 movdqu %xmm6,64(%rsi) 2895 pxor %xmm6,%xmm6 2896 leaq 80(%rsi),%rsi 2897 movdqa %xmm7,%xmm2 2898 pxor %xmm7,%xmm7 2899 jmp .Lcbc_dec_tail_collected 2900 2901.align 16 2902.Lcbc_dec_seven: 2903 movups 96(%rdi),%xmm8 2904 xorps %xmm9,%xmm9 2905 call _aesni_decrypt8 2906 movups 80(%rdi),%xmm9 2907 pxor %xmm10,%xmm2 2908 movups 96(%rdi),%xmm10 2909 pxor %xmm11,%xmm3 2910 movdqu %xmm2,(%rsi) 2911 pxor %xmm12,%xmm4 2912 movdqu %xmm3,16(%rsi) 2913 pxor %xmm3,%xmm3 2914 pxor %xmm13,%xmm5 2915 movdqu %xmm4,32(%rsi) 2916 pxor %xmm4,%xmm4 2917 pxor %xmm14,%xmm6 2918 movdqu %xmm5,48(%rsi) 2919 pxor %xmm5,%xmm5 2920 pxor %xmm15,%xmm7 2921 movdqu %xmm6,64(%rsi) 2922 pxor %xmm6,%xmm6 2923 pxor %xmm9,%xmm8 2924 movdqu %xmm7,80(%rsi) 2925 pxor %xmm7,%xmm7 2926 leaq 96(%rsi),%rsi 2927 movdqa %xmm8,%xmm2 2928 pxor %xmm8,%xmm8 2929 pxor %xmm9,%xmm9 2930 jmp .Lcbc_dec_tail_collected 2931 2932.align 16 2933.Lcbc_dec_loop6: 2934 movups %xmm7,(%rsi) 2935 leaq 16(%rsi),%rsi 2936 movdqu 0(%rdi),%xmm2 2937 movdqu 16(%rdi),%xmm3 2938 movdqa %xmm2,%xmm11 2939 movdqu 32(%rdi),%xmm4 2940 movdqa %xmm3,%xmm12 2941 movdqu 48(%rdi),%xmm5 2942 movdqa %xmm4,%xmm13 2943 movdqu 64(%rdi),%xmm6 2944 movdqa %xmm5,%xmm14 2945 movdqu 80(%rdi),%xmm7 2946 movdqa %xmm6,%xmm15 2947.Lcbc_dec_loop6_enter: 2948 leaq 96(%rdi),%rdi 2949 movdqa %xmm7,%xmm8 2950 2951 call _aesni_decrypt6 2952 2953 pxor %xmm10,%xmm2 2954 movdqa %xmm8,%xmm10 2955 pxor %xmm11,%xmm3 2956 movdqu %xmm2,(%rsi) 2957 pxor %xmm12,%xmm4 2958 movdqu %xmm3,16(%rsi) 2959 pxor %xmm13,%xmm5 2960 movdqu %xmm4,32(%rsi) 2961 pxor %xmm14,%xmm6 2962 movq %r11,%rcx 2963 movdqu %xmm5,48(%rsi) 2964 pxor %xmm15,%xmm7 2965 movl %r10d,%eax 2966 movdqu %xmm6,64(%rsi) 2967 leaq 80(%rsi),%rsi 2968 subq $0x60,%rdx 2969 ja .Lcbc_dec_loop6 2970 2971 movdqa %xmm7,%xmm2 2972 addq $0x50,%rdx 2973 jle .Lcbc_dec_clear_tail_collected 2974 movups %xmm7,(%rsi) 2975 leaq 16(%rsi),%rsi 2976 2977.Lcbc_dec_tail: 2978 movups (%rdi),%xmm2 2979 subq $0x10,%rdx 2980 jbe .Lcbc_dec_one 2981 2982 movups 16(%rdi),%xmm3 2983 movaps %xmm2,%xmm11 2984 subq $0x10,%rdx 2985 jbe .Lcbc_dec_two 2986 2987 movups 32(%rdi),%xmm4 2988 movaps %xmm3,%xmm12 2989 subq $0x10,%rdx 2990 jbe .Lcbc_dec_three 2991 2992 movups 48(%rdi),%xmm5 2993 movaps %xmm4,%xmm13 2994 subq $0x10,%rdx 2995 jbe .Lcbc_dec_four 2996 2997 movups 64(%rdi),%xmm6 2998 movaps %xmm5,%xmm14 2999 movaps %xmm6,%xmm15 3000 xorps %xmm7,%xmm7 3001 call _aesni_decrypt6 3002 pxor %xmm10,%xmm2 3003 movaps %xmm15,%xmm10 3004 pxor %xmm11,%xmm3 3005 movdqu %xmm2,(%rsi) 3006 pxor %xmm12,%xmm4 3007 movdqu %xmm3,16(%rsi) 3008 pxor %xmm3,%xmm3 3009 pxor %xmm13,%xmm5 3010 movdqu %xmm4,32(%rsi) 3011 pxor %xmm4,%xmm4 3012 pxor %xmm14,%xmm6 3013 movdqu %xmm5,48(%rsi) 3014 pxor %xmm5,%xmm5 3015 leaq 64(%rsi),%rsi 3016 movdqa %xmm6,%xmm2 3017 pxor %xmm6,%xmm6 3018 pxor %xmm7,%xmm7 3019 subq $0x10,%rdx 3020 jmp .Lcbc_dec_tail_collected 3021 3022.align 16 3023.Lcbc_dec_one: 3024 movaps %xmm2,%xmm11 3025 movups (%rcx),%xmm0 3026 movups 16(%rcx),%xmm1 3027 leaq 32(%rcx),%rcx 3028 xorps %xmm0,%xmm2 3029.Loop_dec1_17: 3030.byte 102,15,56,222,209 3031 decl %eax 3032 movups (%rcx),%xmm1 3033 leaq 16(%rcx),%rcx 3034 jnz .Loop_dec1_17 3035.byte 102,15,56,223,209 3036 xorps %xmm10,%xmm2 3037 movaps %xmm11,%xmm10 3038 jmp .Lcbc_dec_tail_collected 3039.align 16 3040.Lcbc_dec_two: 3041 movaps %xmm3,%xmm12 3042 call _aesni_decrypt2 3043 pxor %xmm10,%xmm2 3044 movaps %xmm12,%xmm10 3045 pxor %xmm11,%xmm3 3046 movdqu %xmm2,(%rsi) 3047 movdqa %xmm3,%xmm2 3048 pxor %xmm3,%xmm3 3049 leaq 16(%rsi),%rsi 3050 jmp .Lcbc_dec_tail_collected 3051.align 16 3052.Lcbc_dec_three: 3053 movaps %xmm4,%xmm13 3054 call _aesni_decrypt3 3055 pxor %xmm10,%xmm2 3056 movaps %xmm13,%xmm10 3057 pxor %xmm11,%xmm3 3058 movdqu %xmm2,(%rsi) 3059 pxor %xmm12,%xmm4 3060 movdqu %xmm3,16(%rsi) 3061 pxor %xmm3,%xmm3 3062 movdqa %xmm4,%xmm2 3063 pxor %xmm4,%xmm4 3064 leaq 32(%rsi),%rsi 3065 jmp .Lcbc_dec_tail_collected 3066.align 16 3067.Lcbc_dec_four: 3068 movaps %xmm5,%xmm14 3069 call _aesni_decrypt4 3070 pxor %xmm10,%xmm2 3071 movaps %xmm14,%xmm10 3072 pxor %xmm11,%xmm3 3073 movdqu %xmm2,(%rsi) 3074 pxor %xmm12,%xmm4 3075 movdqu %xmm3,16(%rsi) 3076 pxor %xmm3,%xmm3 3077 pxor %xmm13,%xmm5 3078 movdqu %xmm4,32(%rsi) 3079 pxor %xmm4,%xmm4 3080 movdqa %xmm5,%xmm2 3081 pxor %xmm5,%xmm5 3082 leaq 48(%rsi),%rsi 3083 jmp .Lcbc_dec_tail_collected 3084 3085.align 16 3086.Lcbc_dec_clear_tail_collected: 3087 pxor %xmm3,%xmm3 3088 pxor %xmm4,%xmm4 3089 pxor %xmm5,%xmm5 3090 pxor %xmm6,%xmm6 3091 pxor %xmm7,%xmm7 3092 pxor %xmm8,%xmm8 3093 pxor %xmm9,%xmm9 3094.Lcbc_dec_tail_collected: 3095 movups %xmm10,(%r8) 3096 andq $15,%rdx 3097 jnz .Lcbc_dec_tail_partial 3098 movups %xmm2,(%rsi) 3099 pxor %xmm2,%xmm2 3100 jmp .Lcbc_dec_ret 3101.align 16 3102.Lcbc_dec_tail_partial: 3103 movaps %xmm2,(%rsp) 3104 pxor %xmm2,%xmm2 3105 movq $16,%rcx 3106 movq %rsi,%rdi 3107 subq %rdx,%rcx 3108 leaq (%rsp),%rsi 3109.long 0x9066A4F3 3110 movdqa %xmm2,(%rsp) 3111 3112.Lcbc_dec_ret: 3113 xorps %xmm0,%xmm0 3114 pxor %xmm1,%xmm1 3115 leaq (%rbp),%rsp 3116 popq %rbp 3117.Lcbc_ret: 3118 .byte 0xf3,0xc3 3119.size aesni_cbc_encrypt,.-aesni_cbc_encrypt 3120.globl aesni_set_decrypt_key 3121.type aesni_set_decrypt_key,@function 3122.align 16 3123aesni_set_decrypt_key: 3124.byte 0x48,0x83,0xEC,0x08 3125 call __aesni_set_encrypt_key 3126 shll $4,%esi 3127 testl %eax,%eax 3128 jnz .Ldec_key_ret 3129 leaq 16(%rdx,%rsi,1),%rdi 3130 3131 movups (%rdx),%xmm0 3132 movups (%rdi),%xmm1 3133 movups %xmm0,(%rdi) 3134 movups %xmm1,(%rdx) 3135 leaq 16(%rdx),%rdx 3136 leaq -16(%rdi),%rdi 3137 3138.Ldec_key_inverse: 3139 movups (%rdx),%xmm0 3140 movups (%rdi),%xmm1 3141.byte 102,15,56,219,192 3142.byte 102,15,56,219,201 3143 leaq 16(%rdx),%rdx 3144 leaq -16(%rdi),%rdi 3145 movups %xmm0,16(%rdi) 3146 movups %xmm1,-16(%rdx) 3147 cmpq %rdx,%rdi 3148 ja .Ldec_key_inverse 3149 3150 movups (%rdx),%xmm0 3151.byte 102,15,56,219,192 3152 pxor %xmm1,%xmm1 3153 movups %xmm0,(%rdi) 3154 pxor %xmm0,%xmm0 3155.Ldec_key_ret: 3156 addq $8,%rsp 3157 .byte 0xf3,0xc3 3158.LSEH_end_set_decrypt_key: 3159.size aesni_set_decrypt_key,.-aesni_set_decrypt_key 3160.globl aesni_set_encrypt_key 3161.type aesni_set_encrypt_key,@function 3162.align 16 3163aesni_set_encrypt_key: 3164__aesni_set_encrypt_key: 3165.byte 0x48,0x83,0xEC,0x08 3166 movq $-1,%rax 3167 testq %rdi,%rdi 3168 jz .Lenc_key_ret 3169 testq %rdx,%rdx 3170 jz .Lenc_key_ret 3171 3172 movl $268437504,%r10d 3173 movups (%rdi),%xmm0 3174 xorps %xmm4,%xmm4 3175 andl OPENSSL_ia32cap_P+4(%rip),%r10d 3176 leaq 16(%rdx),%rax 3177 cmpl $256,%esi 3178 je .L14rounds 3179 cmpl $192,%esi 3180 je .L12rounds 3181 cmpl $128,%esi 3182 jne .Lbad_keybits 3183 3184.L10rounds: 3185 movl $9,%esi 3186 cmpl $268435456,%r10d 3187 je .L10rounds_alt 3188 3189 movups %xmm0,(%rdx) 3190.byte 102,15,58,223,200,1 3191 call .Lkey_expansion_128_cold 3192.byte 102,15,58,223,200,2 3193 call .Lkey_expansion_128 3194.byte 102,15,58,223,200,4 3195 call .Lkey_expansion_128 3196.byte 102,15,58,223,200,8 3197 call .Lkey_expansion_128 3198.byte 102,15,58,223,200,16 3199 call .Lkey_expansion_128 3200.byte 102,15,58,223,200,32 3201 call .Lkey_expansion_128 3202.byte 102,15,58,223,200,64 3203 call .Lkey_expansion_128 3204.byte 102,15,58,223,200,128 3205 call .Lkey_expansion_128 3206.byte 102,15,58,223,200,27 3207 call .Lkey_expansion_128 3208.byte 102,15,58,223,200,54 3209 call .Lkey_expansion_128 3210 movups %xmm0,(%rax) 3211 movl %esi,80(%rax) 3212 xorl %eax,%eax 3213 jmp .Lenc_key_ret 3214 3215.align 16 3216.L10rounds_alt: 3217 movdqa .Lkey_rotate(%rip),%xmm5 3218 movl $8,%r10d 3219 movdqa .Lkey_rcon1(%rip),%xmm4 3220 movdqa %xmm0,%xmm2 3221 movdqu %xmm0,(%rdx) 3222 jmp .Loop_key128 3223 3224.align 16 3225.Loop_key128: 3226.byte 102,15,56,0,197 3227.byte 102,15,56,221,196 3228 pslld $1,%xmm4 3229 leaq 16(%rax),%rax 3230 3231 movdqa %xmm2,%xmm3 3232 pslldq $4,%xmm2 3233 pxor %xmm2,%xmm3 3234 pslldq $4,%xmm2 3235 pxor %xmm2,%xmm3 3236 pslldq $4,%xmm2 3237 pxor %xmm3,%xmm2 3238 3239 pxor %xmm2,%xmm0 3240 movdqu %xmm0,-16(%rax) 3241 movdqa %xmm0,%xmm2 3242 3243 decl %r10d 3244 jnz .Loop_key128 3245 3246 movdqa .Lkey_rcon1b(%rip),%xmm4 3247 3248.byte 102,15,56,0,197 3249.byte 102,15,56,221,196 3250 pslld $1,%xmm4 3251 3252 movdqa %xmm2,%xmm3 3253 pslldq $4,%xmm2 3254 pxor %xmm2,%xmm3 3255 pslldq $4,%xmm2 3256 pxor %xmm2,%xmm3 3257 pslldq $4,%xmm2 3258 pxor %xmm3,%xmm2 3259 3260 pxor %xmm2,%xmm0 3261 movdqu %xmm0,(%rax) 3262 3263 movdqa %xmm0,%xmm2 3264.byte 102,15,56,0,197 3265.byte 102,15,56,221,196 3266 3267 movdqa %xmm2,%xmm3 3268 pslldq $4,%xmm2 3269 pxor %xmm2,%xmm3 3270 pslldq $4,%xmm2 3271 pxor %xmm2,%xmm3 3272 pslldq $4,%xmm2 3273 pxor %xmm3,%xmm2 3274 3275 pxor %xmm2,%xmm0 3276 movdqu %xmm0,16(%rax) 3277 3278 movl %esi,96(%rax) 3279 xorl %eax,%eax 3280 jmp .Lenc_key_ret 3281 3282.align 16 3283.L12rounds: 3284 movq 16(%rdi),%xmm2 3285 movl $11,%esi 3286 cmpl $268435456,%r10d 3287 je .L12rounds_alt 3288 3289 movups %xmm0,(%rdx) 3290.byte 102,15,58,223,202,1 3291 call .Lkey_expansion_192a_cold 3292.byte 102,15,58,223,202,2 3293 call .Lkey_expansion_192b 3294.byte 102,15,58,223,202,4 3295 call .Lkey_expansion_192a 3296.byte 102,15,58,223,202,8 3297 call .Lkey_expansion_192b 3298.byte 102,15,58,223,202,16 3299 call .Lkey_expansion_192a 3300.byte 102,15,58,223,202,32 3301 call .Lkey_expansion_192b 3302.byte 102,15,58,223,202,64 3303 call .Lkey_expansion_192a 3304.byte 102,15,58,223,202,128 3305 call .Lkey_expansion_192b 3306 movups %xmm0,(%rax) 3307 movl %esi,48(%rax) 3308 xorq %rax,%rax 3309 jmp .Lenc_key_ret 3310 3311.align 16 3312.L12rounds_alt: 3313 movdqa .Lkey_rotate192(%rip),%xmm5 3314 movdqa .Lkey_rcon1(%rip),%xmm4 3315 movl $8,%r10d 3316 movdqu %xmm0,(%rdx) 3317 jmp .Loop_key192 3318 3319.align 16 3320.Loop_key192: 3321 movq %xmm2,0(%rax) 3322 movdqa %xmm2,%xmm1 3323.byte 102,15,56,0,213 3324.byte 102,15,56,221,212 3325 pslld $1,%xmm4 3326 leaq 24(%rax),%rax 3327 3328 movdqa %xmm0,%xmm3 3329 pslldq $4,%xmm0 3330 pxor %xmm0,%xmm3 3331 pslldq $4,%xmm0 3332 pxor %xmm0,%xmm3 3333 pslldq $4,%xmm0 3334 pxor %xmm3,%xmm0 3335 3336 pshufd $0xff,%xmm0,%xmm3 3337 pxor %xmm1,%xmm3 3338 pslldq $4,%xmm1 3339 pxor %xmm1,%xmm3 3340 3341 pxor %xmm2,%xmm0 3342 pxor %xmm3,%xmm2 3343 movdqu %xmm0,-16(%rax) 3344 3345 decl %r10d 3346 jnz .Loop_key192 3347 3348 movl %esi,32(%rax) 3349 xorl %eax,%eax 3350 jmp .Lenc_key_ret 3351 3352.align 16 3353.L14rounds: 3354 movups 16(%rdi),%xmm2 3355 movl $13,%esi 3356 leaq 16(%rax),%rax 3357 cmpl $268435456,%r10d 3358 je .L14rounds_alt 3359 3360 movups %xmm0,(%rdx) 3361 movups %xmm2,16(%rdx) 3362.byte 102,15,58,223,202,1 3363 call .Lkey_expansion_256a_cold 3364.byte 102,15,58,223,200,1 3365 call .Lkey_expansion_256b 3366.byte 102,15,58,223,202,2 3367 call .Lkey_expansion_256a 3368.byte 102,15,58,223,200,2 3369 call .Lkey_expansion_256b 3370.byte 102,15,58,223,202,4 3371 call .Lkey_expansion_256a 3372.byte 102,15,58,223,200,4 3373 call .Lkey_expansion_256b 3374.byte 102,15,58,223,202,8 3375 call .Lkey_expansion_256a 3376.byte 102,15,58,223,200,8 3377 call .Lkey_expansion_256b 3378.byte 102,15,58,223,202,16 3379 call .Lkey_expansion_256a 3380.byte 102,15,58,223,200,16 3381 call .Lkey_expansion_256b 3382.byte 102,15,58,223,202,32 3383 call .Lkey_expansion_256a 3384.byte 102,15,58,223,200,32 3385 call .Lkey_expansion_256b 3386.byte 102,15,58,223,202,64 3387 call .Lkey_expansion_256a 3388 movups %xmm0,(%rax) 3389 movl %esi,16(%rax) 3390 xorq %rax,%rax 3391 jmp .Lenc_key_ret 3392 3393.align 16 3394.L14rounds_alt: 3395 movdqa .Lkey_rotate(%rip),%xmm5 3396 movdqa .Lkey_rcon1(%rip),%xmm4 3397 movl $7,%r10d 3398 movdqu %xmm0,0(%rdx) 3399 movdqa %xmm2,%xmm1 3400 movdqu %xmm2,16(%rdx) 3401 jmp .Loop_key256 3402 3403.align 16 3404.Loop_key256: 3405.byte 102,15,56,0,213 3406.byte 102,15,56,221,212 3407 3408 movdqa %xmm0,%xmm3 3409 pslldq $4,%xmm0 3410 pxor %xmm0,%xmm3 3411 pslldq $4,%xmm0 3412 pxor %xmm0,%xmm3 3413 pslldq $4,%xmm0 3414 pxor %xmm3,%xmm0 3415 pslld $1,%xmm4 3416 3417 pxor %xmm2,%xmm0 3418 movdqu %xmm0,(%rax) 3419 3420 decl %r10d 3421 jz .Ldone_key256 3422 3423 pshufd $0xff,%xmm0,%xmm2 3424 pxor %xmm3,%xmm3 3425.byte 102,15,56,221,211 3426 3427 movdqa %xmm1,%xmm3 3428 pslldq $4,%xmm1 3429 pxor %xmm1,%xmm3 3430 pslldq $4,%xmm1 3431 pxor %xmm1,%xmm3 3432 pslldq $4,%xmm1 3433 pxor %xmm3,%xmm1 3434 3435 pxor %xmm1,%xmm2 3436 movdqu %xmm2,16(%rax) 3437 leaq 32(%rax),%rax 3438 movdqa %xmm2,%xmm1 3439 3440 jmp .Loop_key256 3441 3442.Ldone_key256: 3443 movl %esi,16(%rax) 3444 xorl %eax,%eax 3445 jmp .Lenc_key_ret 3446 3447.align 16 3448.Lbad_keybits: 3449 movq $-2,%rax 3450.Lenc_key_ret: 3451 pxor %xmm0,%xmm0 3452 pxor %xmm1,%xmm1 3453 pxor %xmm2,%xmm2 3454 pxor %xmm3,%xmm3 3455 pxor %xmm4,%xmm4 3456 pxor %xmm5,%xmm5 3457 addq $8,%rsp 3458 .byte 0xf3,0xc3 3459.LSEH_end_set_encrypt_key: 3460 3461.align 16 3462.Lkey_expansion_128: 3463 movups %xmm0,(%rax) 3464 leaq 16(%rax),%rax 3465.Lkey_expansion_128_cold: 3466 shufps $16,%xmm0,%xmm4 3467 xorps %xmm4,%xmm0 3468 shufps $140,%xmm0,%xmm4 3469 xorps %xmm4,%xmm0 3470 shufps $255,%xmm1,%xmm1 3471 xorps %xmm1,%xmm0 3472 .byte 0xf3,0xc3 3473 3474.align 16 3475.Lkey_expansion_192a: 3476 movups %xmm0,(%rax) 3477 leaq 16(%rax),%rax 3478.Lkey_expansion_192a_cold: 3479 movaps %xmm2,%xmm5 3480.Lkey_expansion_192b_warm: 3481 shufps $16,%xmm0,%xmm4 3482 movdqa %xmm2,%xmm3 3483 xorps %xmm4,%xmm0 3484 shufps $140,%xmm0,%xmm4 3485 pslldq $4,%xmm3 3486 xorps %xmm4,%xmm0 3487 pshufd $85,%xmm1,%xmm1 3488 pxor %xmm3,%xmm2 3489 pxor %xmm1,%xmm0 3490 pshufd $255,%xmm0,%xmm3 3491 pxor %xmm3,%xmm2 3492 .byte 0xf3,0xc3 3493 3494.align 16 3495.Lkey_expansion_192b: 3496 movaps %xmm0,%xmm3 3497 shufps $68,%xmm0,%xmm5 3498 movups %xmm5,(%rax) 3499 shufps $78,%xmm2,%xmm3 3500 movups %xmm3,16(%rax) 3501 leaq 32(%rax),%rax 3502 jmp .Lkey_expansion_192b_warm 3503 3504.align 16 3505.Lkey_expansion_256a: 3506 movups %xmm2,(%rax) 3507 leaq 16(%rax),%rax 3508.Lkey_expansion_256a_cold: 3509 shufps $16,%xmm0,%xmm4 3510 xorps %xmm4,%xmm0 3511 shufps $140,%xmm0,%xmm4 3512 xorps %xmm4,%xmm0 3513 shufps $255,%xmm1,%xmm1 3514 xorps %xmm1,%xmm0 3515 .byte 0xf3,0xc3 3516 3517.align 16 3518.Lkey_expansion_256b: 3519 movups %xmm0,(%rax) 3520 leaq 16(%rax),%rax 3521 3522 shufps $16,%xmm2,%xmm4 3523 xorps %xmm4,%xmm2 3524 shufps $140,%xmm2,%xmm4 3525 xorps %xmm4,%xmm2 3526 shufps $170,%xmm1,%xmm1 3527 xorps %xmm1,%xmm2 3528 .byte 0xf3,0xc3 3529.size aesni_set_encrypt_key,.-aesni_set_encrypt_key 3530.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key 3531.align 64 3532.Lbswap_mask: 3533.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 3534.Lincrement32: 3535.long 6,6,6,0 3536.Lincrement64: 3537.long 1,0,0,0 3538.Lxts_magic: 3539.long 0x87,0,1,0 3540.Lincrement1: 3541.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 3542.Lkey_rotate: 3543.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d 3544.Lkey_rotate192: 3545.long 0x04070605,0x04070605,0x04070605,0x04070605 3546.Lkey_rcon1: 3547.long 1,1,1,1 3548.Lkey_rcon1b: 3549.long 0x1b,0x1b,0x1b,0x1b 3550 3551.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 3552.align 64 3553