1/* $FreeBSD$ */ 2/* Do not modify. This file is auto-generated from aesni-x86_64.pl. */ 3.text 4 5.globl aesni_encrypt 6.type aesni_encrypt,@function 7.align 16 8aesni_encrypt: 9 movups (%rdi),%xmm2 10 movl 240(%rdx),%eax 11 movups (%rdx),%xmm0 12 movups 16(%rdx),%xmm1 13 leaq 32(%rdx),%rdx 14 xorps %xmm0,%xmm2 15.Loop_enc1_1: 16.byte 102,15,56,220,209 17 decl %eax 18 movups (%rdx),%xmm1 19 leaq 16(%rdx),%rdx 20 jnz .Loop_enc1_1 21.byte 102,15,56,221,209 22 pxor %xmm0,%xmm0 23 pxor %xmm1,%xmm1 24 movups %xmm2,(%rsi) 25 pxor %xmm2,%xmm2 26 .byte 0xf3,0xc3 27.size aesni_encrypt,.-aesni_encrypt 28 29.globl aesni_decrypt 30.type aesni_decrypt,@function 31.align 16 32aesni_decrypt: 33 movups (%rdi),%xmm2 34 movl 240(%rdx),%eax 35 movups (%rdx),%xmm0 36 movups 16(%rdx),%xmm1 37 leaq 32(%rdx),%rdx 38 xorps %xmm0,%xmm2 39.Loop_dec1_2: 40.byte 102,15,56,222,209 41 decl %eax 42 movups (%rdx),%xmm1 43 leaq 16(%rdx),%rdx 44 jnz .Loop_dec1_2 45.byte 102,15,56,223,209 46 pxor %xmm0,%xmm0 47 pxor %xmm1,%xmm1 48 movups %xmm2,(%rsi) 49 pxor %xmm2,%xmm2 50 .byte 0xf3,0xc3 51.size aesni_decrypt, .-aesni_decrypt 52.type _aesni_encrypt2,@function 53.align 16 54_aesni_encrypt2: 55 movups (%rcx),%xmm0 56 shll $4,%eax 57 movups 16(%rcx),%xmm1 58 xorps %xmm0,%xmm2 59 xorps %xmm0,%xmm3 60 movups 32(%rcx),%xmm0 61 leaq 32(%rcx,%rax,1),%rcx 62 negq %rax 63 addq $16,%rax 64 65.Lenc_loop2: 66.byte 102,15,56,220,209 67.byte 102,15,56,220,217 68 movups (%rcx,%rax,1),%xmm1 69 addq $32,%rax 70.byte 102,15,56,220,208 71.byte 102,15,56,220,216 72 movups -16(%rcx,%rax,1),%xmm0 73 jnz .Lenc_loop2 74 75.byte 102,15,56,220,209 76.byte 102,15,56,220,217 77.byte 102,15,56,221,208 78.byte 102,15,56,221,216 79 .byte 0xf3,0xc3 80.size _aesni_encrypt2,.-_aesni_encrypt2 81.type _aesni_decrypt2,@function 82.align 16 83_aesni_decrypt2: 84 movups (%rcx),%xmm0 85 shll $4,%eax 86 movups 16(%rcx),%xmm1 87 xorps %xmm0,%xmm2 88 xorps %xmm0,%xmm3 89 movups 32(%rcx),%xmm0 90 leaq 32(%rcx,%rax,1),%rcx 91 negq %rax 92 addq $16,%rax 93 94.Ldec_loop2: 95.byte 102,15,56,222,209 96.byte 102,15,56,222,217 97 movups (%rcx,%rax,1),%xmm1 98 addq $32,%rax 99.byte 102,15,56,222,208 100.byte 102,15,56,222,216 101 movups -16(%rcx,%rax,1),%xmm0 102 jnz .Ldec_loop2 103 104.byte 102,15,56,222,209 105.byte 102,15,56,222,217 106.byte 102,15,56,223,208 107.byte 102,15,56,223,216 108 .byte 0xf3,0xc3 109.size _aesni_decrypt2,.-_aesni_decrypt2 110.type _aesni_encrypt3,@function 111.align 16 112_aesni_encrypt3: 113 movups (%rcx),%xmm0 114 shll $4,%eax 115 movups 16(%rcx),%xmm1 116 xorps %xmm0,%xmm2 117 xorps %xmm0,%xmm3 118 xorps %xmm0,%xmm4 119 movups 32(%rcx),%xmm0 120 leaq 32(%rcx,%rax,1),%rcx 121 negq %rax 122 addq $16,%rax 123 124.Lenc_loop3: 125.byte 102,15,56,220,209 126.byte 102,15,56,220,217 127.byte 102,15,56,220,225 128 movups (%rcx,%rax,1),%xmm1 129 addq $32,%rax 130.byte 102,15,56,220,208 131.byte 102,15,56,220,216 132.byte 102,15,56,220,224 133 movups -16(%rcx,%rax,1),%xmm0 134 jnz .Lenc_loop3 135 136.byte 102,15,56,220,209 137.byte 102,15,56,220,217 138.byte 102,15,56,220,225 139.byte 102,15,56,221,208 140.byte 102,15,56,221,216 141.byte 102,15,56,221,224 142 .byte 0xf3,0xc3 143.size _aesni_encrypt3,.-_aesni_encrypt3 144.type _aesni_decrypt3,@function 145.align 16 146_aesni_decrypt3: 147 movups (%rcx),%xmm0 148 shll $4,%eax 149 movups 16(%rcx),%xmm1 150 xorps %xmm0,%xmm2 151 xorps %xmm0,%xmm3 152 xorps %xmm0,%xmm4 153 movups 32(%rcx),%xmm0 154 leaq 32(%rcx,%rax,1),%rcx 155 negq %rax 156 addq $16,%rax 157 158.Ldec_loop3: 159.byte 102,15,56,222,209 160.byte 102,15,56,222,217 161.byte 102,15,56,222,225 162 movups (%rcx,%rax,1),%xmm1 163 addq $32,%rax 164.byte 102,15,56,222,208 165.byte 102,15,56,222,216 166.byte 102,15,56,222,224 167 movups -16(%rcx,%rax,1),%xmm0 168 jnz .Ldec_loop3 169 170.byte 102,15,56,222,209 171.byte 102,15,56,222,217 172.byte 102,15,56,222,225 173.byte 102,15,56,223,208 174.byte 102,15,56,223,216 175.byte 102,15,56,223,224 176 .byte 0xf3,0xc3 177.size _aesni_decrypt3,.-_aesni_decrypt3 178.type _aesni_encrypt4,@function 179.align 16 180_aesni_encrypt4: 181 movups (%rcx),%xmm0 182 shll $4,%eax 183 movups 16(%rcx),%xmm1 184 xorps %xmm0,%xmm2 185 xorps %xmm0,%xmm3 186 xorps %xmm0,%xmm4 187 xorps %xmm0,%xmm5 188 movups 32(%rcx),%xmm0 189 leaq 32(%rcx,%rax,1),%rcx 190 negq %rax 191.byte 0x0f,0x1f,0x00 192 addq $16,%rax 193 194.Lenc_loop4: 195.byte 102,15,56,220,209 196.byte 102,15,56,220,217 197.byte 102,15,56,220,225 198.byte 102,15,56,220,233 199 movups (%rcx,%rax,1),%xmm1 200 addq $32,%rax 201.byte 102,15,56,220,208 202.byte 102,15,56,220,216 203.byte 102,15,56,220,224 204.byte 102,15,56,220,232 205 movups -16(%rcx,%rax,1),%xmm0 206 jnz .Lenc_loop4 207 208.byte 102,15,56,220,209 209.byte 102,15,56,220,217 210.byte 102,15,56,220,225 211.byte 102,15,56,220,233 212.byte 102,15,56,221,208 213.byte 102,15,56,221,216 214.byte 102,15,56,221,224 215.byte 102,15,56,221,232 216 .byte 0xf3,0xc3 217.size _aesni_encrypt4,.-_aesni_encrypt4 218.type _aesni_decrypt4,@function 219.align 16 220_aesni_decrypt4: 221 movups (%rcx),%xmm0 222 shll $4,%eax 223 movups 16(%rcx),%xmm1 224 xorps %xmm0,%xmm2 225 xorps %xmm0,%xmm3 226 xorps %xmm0,%xmm4 227 xorps %xmm0,%xmm5 228 movups 32(%rcx),%xmm0 229 leaq 32(%rcx,%rax,1),%rcx 230 negq %rax 231.byte 0x0f,0x1f,0x00 232 addq $16,%rax 233 234.Ldec_loop4: 235.byte 102,15,56,222,209 236.byte 102,15,56,222,217 237.byte 102,15,56,222,225 238.byte 102,15,56,222,233 239 movups (%rcx,%rax,1),%xmm1 240 addq $32,%rax 241.byte 102,15,56,222,208 242.byte 102,15,56,222,216 243.byte 102,15,56,222,224 244.byte 102,15,56,222,232 245 movups -16(%rcx,%rax,1),%xmm0 246 jnz .Ldec_loop4 247 248.byte 102,15,56,222,209 249.byte 102,15,56,222,217 250.byte 102,15,56,222,225 251.byte 102,15,56,222,233 252.byte 102,15,56,223,208 253.byte 102,15,56,223,216 254.byte 102,15,56,223,224 255.byte 102,15,56,223,232 256 .byte 0xf3,0xc3 257.size _aesni_decrypt4,.-_aesni_decrypt4 258.type _aesni_encrypt6,@function 259.align 16 260_aesni_encrypt6: 261 movups (%rcx),%xmm0 262 shll $4,%eax 263 movups 16(%rcx),%xmm1 264 xorps %xmm0,%xmm2 265 pxor %xmm0,%xmm3 266 pxor %xmm0,%xmm4 267.byte 102,15,56,220,209 268 leaq 32(%rcx,%rax,1),%rcx 269 negq %rax 270.byte 102,15,56,220,217 271 pxor %xmm0,%xmm5 272 pxor %xmm0,%xmm6 273.byte 102,15,56,220,225 274 pxor %xmm0,%xmm7 275 movups (%rcx,%rax,1),%xmm0 276 addq $16,%rax 277 jmp .Lenc_loop6_enter 278.align 16 279.Lenc_loop6: 280.byte 102,15,56,220,209 281.byte 102,15,56,220,217 282.byte 102,15,56,220,225 283.Lenc_loop6_enter: 284.byte 102,15,56,220,233 285.byte 102,15,56,220,241 286.byte 102,15,56,220,249 287 movups (%rcx,%rax,1),%xmm1 288 addq $32,%rax 289.byte 102,15,56,220,208 290.byte 102,15,56,220,216 291.byte 102,15,56,220,224 292.byte 102,15,56,220,232 293.byte 102,15,56,220,240 294.byte 102,15,56,220,248 295 movups -16(%rcx,%rax,1),%xmm0 296 jnz .Lenc_loop6 297 298.byte 102,15,56,220,209 299.byte 102,15,56,220,217 300.byte 102,15,56,220,225 301.byte 102,15,56,220,233 302.byte 102,15,56,220,241 303.byte 102,15,56,220,249 304.byte 102,15,56,221,208 305.byte 102,15,56,221,216 306.byte 102,15,56,221,224 307.byte 102,15,56,221,232 308.byte 102,15,56,221,240 309.byte 102,15,56,221,248 310 .byte 0xf3,0xc3 311.size _aesni_encrypt6,.-_aesni_encrypt6 312.type _aesni_decrypt6,@function 313.align 16 314_aesni_decrypt6: 315 movups (%rcx),%xmm0 316 shll $4,%eax 317 movups 16(%rcx),%xmm1 318 xorps %xmm0,%xmm2 319 pxor %xmm0,%xmm3 320 pxor %xmm0,%xmm4 321.byte 102,15,56,222,209 322 leaq 32(%rcx,%rax,1),%rcx 323 negq %rax 324.byte 102,15,56,222,217 325 pxor %xmm0,%xmm5 326 pxor %xmm0,%xmm6 327.byte 102,15,56,222,225 328 pxor %xmm0,%xmm7 329 movups (%rcx,%rax,1),%xmm0 330 addq $16,%rax 331 jmp .Ldec_loop6_enter 332.align 16 333.Ldec_loop6: 334.byte 102,15,56,222,209 335.byte 102,15,56,222,217 336.byte 102,15,56,222,225 337.Ldec_loop6_enter: 338.byte 102,15,56,222,233 339.byte 102,15,56,222,241 340.byte 102,15,56,222,249 341 movups (%rcx,%rax,1),%xmm1 342 addq $32,%rax 343.byte 102,15,56,222,208 344.byte 102,15,56,222,216 345.byte 102,15,56,222,224 346.byte 102,15,56,222,232 347.byte 102,15,56,222,240 348.byte 102,15,56,222,248 349 movups -16(%rcx,%rax,1),%xmm0 350 jnz .Ldec_loop6 351 352.byte 102,15,56,222,209 353.byte 102,15,56,222,217 354.byte 102,15,56,222,225 355.byte 102,15,56,222,233 356.byte 102,15,56,222,241 357.byte 102,15,56,222,249 358.byte 102,15,56,223,208 359.byte 102,15,56,223,216 360.byte 102,15,56,223,224 361.byte 102,15,56,223,232 362.byte 102,15,56,223,240 363.byte 102,15,56,223,248 364 .byte 0xf3,0xc3 365.size _aesni_decrypt6,.-_aesni_decrypt6 366.type _aesni_encrypt8,@function 367.align 16 368_aesni_encrypt8: 369 movups (%rcx),%xmm0 370 shll $4,%eax 371 movups 16(%rcx),%xmm1 372 xorps %xmm0,%xmm2 373 xorps %xmm0,%xmm3 374 pxor %xmm0,%xmm4 375 pxor %xmm0,%xmm5 376 pxor %xmm0,%xmm6 377 leaq 32(%rcx,%rax,1),%rcx 378 negq %rax 379.byte 102,15,56,220,209 380 pxor %xmm0,%xmm7 381 pxor %xmm0,%xmm8 382.byte 102,15,56,220,217 383 pxor %xmm0,%xmm9 384 movups (%rcx,%rax,1),%xmm0 385 addq $16,%rax 386 jmp .Lenc_loop8_inner 387.align 16 388.Lenc_loop8: 389.byte 102,15,56,220,209 390.byte 102,15,56,220,217 391.Lenc_loop8_inner: 392.byte 102,15,56,220,225 393.byte 102,15,56,220,233 394.byte 102,15,56,220,241 395.byte 102,15,56,220,249 396.byte 102,68,15,56,220,193 397.byte 102,68,15,56,220,201 398.Lenc_loop8_enter: 399 movups (%rcx,%rax,1),%xmm1 400 addq $32,%rax 401.byte 102,15,56,220,208 402.byte 102,15,56,220,216 403.byte 102,15,56,220,224 404.byte 102,15,56,220,232 405.byte 102,15,56,220,240 406.byte 102,15,56,220,248 407.byte 102,68,15,56,220,192 408.byte 102,68,15,56,220,200 409 movups -16(%rcx,%rax,1),%xmm0 410 jnz .Lenc_loop8 411 412.byte 102,15,56,220,209 413.byte 102,15,56,220,217 414.byte 102,15,56,220,225 415.byte 102,15,56,220,233 416.byte 102,15,56,220,241 417.byte 102,15,56,220,249 418.byte 102,68,15,56,220,193 419.byte 102,68,15,56,220,201 420.byte 102,15,56,221,208 421.byte 102,15,56,221,216 422.byte 102,15,56,221,224 423.byte 102,15,56,221,232 424.byte 102,15,56,221,240 425.byte 102,15,56,221,248 426.byte 102,68,15,56,221,192 427.byte 102,68,15,56,221,200 428 .byte 0xf3,0xc3 429.size _aesni_encrypt8,.-_aesni_encrypt8 430.type _aesni_decrypt8,@function 431.align 16 432_aesni_decrypt8: 433 movups (%rcx),%xmm0 434 shll $4,%eax 435 movups 16(%rcx),%xmm1 436 xorps %xmm0,%xmm2 437 xorps %xmm0,%xmm3 438 pxor %xmm0,%xmm4 439 pxor %xmm0,%xmm5 440 pxor %xmm0,%xmm6 441 leaq 32(%rcx,%rax,1),%rcx 442 negq %rax 443.byte 102,15,56,222,209 444 pxor %xmm0,%xmm7 445 pxor %xmm0,%xmm8 446.byte 102,15,56,222,217 447 pxor %xmm0,%xmm9 448 movups (%rcx,%rax,1),%xmm0 449 addq $16,%rax 450 jmp .Ldec_loop8_inner 451.align 16 452.Ldec_loop8: 453.byte 102,15,56,222,209 454.byte 102,15,56,222,217 455.Ldec_loop8_inner: 456.byte 102,15,56,222,225 457.byte 102,15,56,222,233 458.byte 102,15,56,222,241 459.byte 102,15,56,222,249 460.byte 102,68,15,56,222,193 461.byte 102,68,15,56,222,201 462.Ldec_loop8_enter: 463 movups (%rcx,%rax,1),%xmm1 464 addq $32,%rax 465.byte 102,15,56,222,208 466.byte 102,15,56,222,216 467.byte 102,15,56,222,224 468.byte 102,15,56,222,232 469.byte 102,15,56,222,240 470.byte 102,15,56,222,248 471.byte 102,68,15,56,222,192 472.byte 102,68,15,56,222,200 473 movups -16(%rcx,%rax,1),%xmm0 474 jnz .Ldec_loop8 475 476.byte 102,15,56,222,209 477.byte 102,15,56,222,217 478.byte 102,15,56,222,225 479.byte 102,15,56,222,233 480.byte 102,15,56,222,241 481.byte 102,15,56,222,249 482.byte 102,68,15,56,222,193 483.byte 102,68,15,56,222,201 484.byte 102,15,56,223,208 485.byte 102,15,56,223,216 486.byte 102,15,56,223,224 487.byte 102,15,56,223,232 488.byte 102,15,56,223,240 489.byte 102,15,56,223,248 490.byte 102,68,15,56,223,192 491.byte 102,68,15,56,223,200 492 .byte 0xf3,0xc3 493.size _aesni_decrypt8,.-_aesni_decrypt8 494.globl aesni_ecb_encrypt 495.type aesni_ecb_encrypt,@function 496.align 16 497aesni_ecb_encrypt: 498 andq $-16,%rdx 499 jz .Lecb_ret 500 501 movl 240(%rcx),%eax 502 movups (%rcx),%xmm0 503 movq %rcx,%r11 504 movl %eax,%r10d 505 testl %r8d,%r8d 506 jz .Lecb_decrypt 507 508 cmpq $0x80,%rdx 509 jb .Lecb_enc_tail 510 511 movdqu (%rdi),%xmm2 512 movdqu 16(%rdi),%xmm3 513 movdqu 32(%rdi),%xmm4 514 movdqu 48(%rdi),%xmm5 515 movdqu 64(%rdi),%xmm6 516 movdqu 80(%rdi),%xmm7 517 movdqu 96(%rdi),%xmm8 518 movdqu 112(%rdi),%xmm9 519 leaq 128(%rdi),%rdi 520 subq $0x80,%rdx 521 jmp .Lecb_enc_loop8_enter 522.align 16 523.Lecb_enc_loop8: 524 movups %xmm2,(%rsi) 525 movq %r11,%rcx 526 movdqu (%rdi),%xmm2 527 movl %r10d,%eax 528 movups %xmm3,16(%rsi) 529 movdqu 16(%rdi),%xmm3 530 movups %xmm4,32(%rsi) 531 movdqu 32(%rdi),%xmm4 532 movups %xmm5,48(%rsi) 533 movdqu 48(%rdi),%xmm5 534 movups %xmm6,64(%rsi) 535 movdqu 64(%rdi),%xmm6 536 movups %xmm7,80(%rsi) 537 movdqu 80(%rdi),%xmm7 538 movups %xmm8,96(%rsi) 539 movdqu 96(%rdi),%xmm8 540 movups %xmm9,112(%rsi) 541 leaq 128(%rsi),%rsi 542 movdqu 112(%rdi),%xmm9 543 leaq 128(%rdi),%rdi 544.Lecb_enc_loop8_enter: 545 546 call _aesni_encrypt8 547 548 subq $0x80,%rdx 549 jnc .Lecb_enc_loop8 550 551 movups %xmm2,(%rsi) 552 movq %r11,%rcx 553 movups %xmm3,16(%rsi) 554 movl %r10d,%eax 555 movups %xmm4,32(%rsi) 556 movups %xmm5,48(%rsi) 557 movups %xmm6,64(%rsi) 558 movups %xmm7,80(%rsi) 559 movups %xmm8,96(%rsi) 560 movups %xmm9,112(%rsi) 561 leaq 128(%rsi),%rsi 562 addq $0x80,%rdx 563 jz .Lecb_ret 564 565.Lecb_enc_tail: 566 movups (%rdi),%xmm2 567 cmpq $0x20,%rdx 568 jb .Lecb_enc_one 569 movups 16(%rdi),%xmm3 570 je .Lecb_enc_two 571 movups 32(%rdi),%xmm4 572 cmpq $0x40,%rdx 573 jb .Lecb_enc_three 574 movups 48(%rdi),%xmm5 575 je .Lecb_enc_four 576 movups 64(%rdi),%xmm6 577 cmpq $0x60,%rdx 578 jb .Lecb_enc_five 579 movups 80(%rdi),%xmm7 580 je .Lecb_enc_six 581 movdqu 96(%rdi),%xmm8 582 xorps %xmm9,%xmm9 583 call _aesni_encrypt8 584 movups %xmm2,(%rsi) 585 movups %xmm3,16(%rsi) 586 movups %xmm4,32(%rsi) 587 movups %xmm5,48(%rsi) 588 movups %xmm6,64(%rsi) 589 movups %xmm7,80(%rsi) 590 movups %xmm8,96(%rsi) 591 jmp .Lecb_ret 592.align 16 593.Lecb_enc_one: 594 movups (%rcx),%xmm0 595 movups 16(%rcx),%xmm1 596 leaq 32(%rcx),%rcx 597 xorps %xmm0,%xmm2 598.Loop_enc1_3: 599.byte 102,15,56,220,209 600 decl %eax 601 movups (%rcx),%xmm1 602 leaq 16(%rcx),%rcx 603 jnz .Loop_enc1_3 604.byte 102,15,56,221,209 605 movups %xmm2,(%rsi) 606 jmp .Lecb_ret 607.align 16 608.Lecb_enc_two: 609 call _aesni_encrypt2 610 movups %xmm2,(%rsi) 611 movups %xmm3,16(%rsi) 612 jmp .Lecb_ret 613.align 16 614.Lecb_enc_three: 615 call _aesni_encrypt3 616 movups %xmm2,(%rsi) 617 movups %xmm3,16(%rsi) 618 movups %xmm4,32(%rsi) 619 jmp .Lecb_ret 620.align 16 621.Lecb_enc_four: 622 call _aesni_encrypt4 623 movups %xmm2,(%rsi) 624 movups %xmm3,16(%rsi) 625 movups %xmm4,32(%rsi) 626 movups %xmm5,48(%rsi) 627 jmp .Lecb_ret 628.align 16 629.Lecb_enc_five: 630 xorps %xmm7,%xmm7 631 call _aesni_encrypt6 632 movups %xmm2,(%rsi) 633 movups %xmm3,16(%rsi) 634 movups %xmm4,32(%rsi) 635 movups %xmm5,48(%rsi) 636 movups %xmm6,64(%rsi) 637 jmp .Lecb_ret 638.align 16 639.Lecb_enc_six: 640 call _aesni_encrypt6 641 movups %xmm2,(%rsi) 642 movups %xmm3,16(%rsi) 643 movups %xmm4,32(%rsi) 644 movups %xmm5,48(%rsi) 645 movups %xmm6,64(%rsi) 646 movups %xmm7,80(%rsi) 647 jmp .Lecb_ret 648 649.align 16 650.Lecb_decrypt: 651 cmpq $0x80,%rdx 652 jb .Lecb_dec_tail 653 654 movdqu (%rdi),%xmm2 655 movdqu 16(%rdi),%xmm3 656 movdqu 32(%rdi),%xmm4 657 movdqu 48(%rdi),%xmm5 658 movdqu 64(%rdi),%xmm6 659 movdqu 80(%rdi),%xmm7 660 movdqu 96(%rdi),%xmm8 661 movdqu 112(%rdi),%xmm9 662 leaq 128(%rdi),%rdi 663 subq $0x80,%rdx 664 jmp .Lecb_dec_loop8_enter 665.align 16 666.Lecb_dec_loop8: 667 movups %xmm2,(%rsi) 668 movq %r11,%rcx 669 movdqu (%rdi),%xmm2 670 movl %r10d,%eax 671 movups %xmm3,16(%rsi) 672 movdqu 16(%rdi),%xmm3 673 movups %xmm4,32(%rsi) 674 movdqu 32(%rdi),%xmm4 675 movups %xmm5,48(%rsi) 676 movdqu 48(%rdi),%xmm5 677 movups %xmm6,64(%rsi) 678 movdqu 64(%rdi),%xmm6 679 movups %xmm7,80(%rsi) 680 movdqu 80(%rdi),%xmm7 681 movups %xmm8,96(%rsi) 682 movdqu 96(%rdi),%xmm8 683 movups %xmm9,112(%rsi) 684 leaq 128(%rsi),%rsi 685 movdqu 112(%rdi),%xmm9 686 leaq 128(%rdi),%rdi 687.Lecb_dec_loop8_enter: 688 689 call _aesni_decrypt8 690 691 movups (%r11),%xmm0 692 subq $0x80,%rdx 693 jnc .Lecb_dec_loop8 694 695 movups %xmm2,(%rsi) 696 pxor %xmm2,%xmm2 697 movq %r11,%rcx 698 movups %xmm3,16(%rsi) 699 pxor %xmm3,%xmm3 700 movl %r10d,%eax 701 movups %xmm4,32(%rsi) 702 pxor %xmm4,%xmm4 703 movups %xmm5,48(%rsi) 704 pxor %xmm5,%xmm5 705 movups %xmm6,64(%rsi) 706 pxor %xmm6,%xmm6 707 movups %xmm7,80(%rsi) 708 pxor %xmm7,%xmm7 709 movups %xmm8,96(%rsi) 710 pxor %xmm8,%xmm8 711 movups %xmm9,112(%rsi) 712 pxor %xmm9,%xmm9 713 leaq 128(%rsi),%rsi 714 addq $0x80,%rdx 715 jz .Lecb_ret 716 717.Lecb_dec_tail: 718 movups (%rdi),%xmm2 719 cmpq $0x20,%rdx 720 jb .Lecb_dec_one 721 movups 16(%rdi),%xmm3 722 je .Lecb_dec_two 723 movups 32(%rdi),%xmm4 724 cmpq $0x40,%rdx 725 jb .Lecb_dec_three 726 movups 48(%rdi),%xmm5 727 je .Lecb_dec_four 728 movups 64(%rdi),%xmm6 729 cmpq $0x60,%rdx 730 jb .Lecb_dec_five 731 movups 80(%rdi),%xmm7 732 je .Lecb_dec_six 733 movups 96(%rdi),%xmm8 734 movups (%rcx),%xmm0 735 xorps %xmm9,%xmm9 736 call _aesni_decrypt8 737 movups %xmm2,(%rsi) 738 pxor %xmm2,%xmm2 739 movups %xmm3,16(%rsi) 740 pxor %xmm3,%xmm3 741 movups %xmm4,32(%rsi) 742 pxor %xmm4,%xmm4 743 movups %xmm5,48(%rsi) 744 pxor %xmm5,%xmm5 745 movups %xmm6,64(%rsi) 746 pxor %xmm6,%xmm6 747 movups %xmm7,80(%rsi) 748 pxor %xmm7,%xmm7 749 movups %xmm8,96(%rsi) 750 pxor %xmm8,%xmm8 751 pxor %xmm9,%xmm9 752 jmp .Lecb_ret 753.align 16 754.Lecb_dec_one: 755 movups (%rcx),%xmm0 756 movups 16(%rcx),%xmm1 757 leaq 32(%rcx),%rcx 758 xorps %xmm0,%xmm2 759.Loop_dec1_4: 760.byte 102,15,56,222,209 761 decl %eax 762 movups (%rcx),%xmm1 763 leaq 16(%rcx),%rcx 764 jnz .Loop_dec1_4 765.byte 102,15,56,223,209 766 movups %xmm2,(%rsi) 767 pxor %xmm2,%xmm2 768 jmp .Lecb_ret 769.align 16 770.Lecb_dec_two: 771 call _aesni_decrypt2 772 movups %xmm2,(%rsi) 773 pxor %xmm2,%xmm2 774 movups %xmm3,16(%rsi) 775 pxor %xmm3,%xmm3 776 jmp .Lecb_ret 777.align 16 778.Lecb_dec_three: 779 call _aesni_decrypt3 780 movups %xmm2,(%rsi) 781 pxor %xmm2,%xmm2 782 movups %xmm3,16(%rsi) 783 pxor %xmm3,%xmm3 784 movups %xmm4,32(%rsi) 785 pxor %xmm4,%xmm4 786 jmp .Lecb_ret 787.align 16 788.Lecb_dec_four: 789 call _aesni_decrypt4 790 movups %xmm2,(%rsi) 791 pxor %xmm2,%xmm2 792 movups %xmm3,16(%rsi) 793 pxor %xmm3,%xmm3 794 movups %xmm4,32(%rsi) 795 pxor %xmm4,%xmm4 796 movups %xmm5,48(%rsi) 797 pxor %xmm5,%xmm5 798 jmp .Lecb_ret 799.align 16 800.Lecb_dec_five: 801 xorps %xmm7,%xmm7 802 call _aesni_decrypt6 803 movups %xmm2,(%rsi) 804 pxor %xmm2,%xmm2 805 movups %xmm3,16(%rsi) 806 pxor %xmm3,%xmm3 807 movups %xmm4,32(%rsi) 808 pxor %xmm4,%xmm4 809 movups %xmm5,48(%rsi) 810 pxor %xmm5,%xmm5 811 movups %xmm6,64(%rsi) 812 pxor %xmm6,%xmm6 813 pxor %xmm7,%xmm7 814 jmp .Lecb_ret 815.align 16 816.Lecb_dec_six: 817 call _aesni_decrypt6 818 movups %xmm2,(%rsi) 819 pxor %xmm2,%xmm2 820 movups %xmm3,16(%rsi) 821 pxor %xmm3,%xmm3 822 movups %xmm4,32(%rsi) 823 pxor %xmm4,%xmm4 824 movups %xmm5,48(%rsi) 825 pxor %xmm5,%xmm5 826 movups %xmm6,64(%rsi) 827 pxor %xmm6,%xmm6 828 movups %xmm7,80(%rsi) 829 pxor %xmm7,%xmm7 830 831.Lecb_ret: 832 xorps %xmm0,%xmm0 833 pxor %xmm1,%xmm1 834 .byte 0xf3,0xc3 835.size aesni_ecb_encrypt,.-aesni_ecb_encrypt 836.globl aesni_ccm64_encrypt_blocks 837.type aesni_ccm64_encrypt_blocks,@function 838.align 16 839aesni_ccm64_encrypt_blocks: 840 movl 240(%rcx),%eax 841 movdqu (%r8),%xmm6 842 movdqa .Lincrement64(%rip),%xmm9 843 movdqa .Lbswap_mask(%rip),%xmm7 844 845 shll $4,%eax 846 movl $16,%r10d 847 leaq 0(%rcx),%r11 848 movdqu (%r9),%xmm3 849 movdqa %xmm6,%xmm2 850 leaq 32(%rcx,%rax,1),%rcx 851.byte 102,15,56,0,247 852 subq %rax,%r10 853 jmp .Lccm64_enc_outer 854.align 16 855.Lccm64_enc_outer: 856 movups (%r11),%xmm0 857 movq %r10,%rax 858 movups (%rdi),%xmm8 859 860 xorps %xmm0,%xmm2 861 movups 16(%r11),%xmm1 862 xorps %xmm8,%xmm0 863 xorps %xmm0,%xmm3 864 movups 32(%r11),%xmm0 865 866.Lccm64_enc2_loop: 867.byte 102,15,56,220,209 868.byte 102,15,56,220,217 869 movups (%rcx,%rax,1),%xmm1 870 addq $32,%rax 871.byte 102,15,56,220,208 872.byte 102,15,56,220,216 873 movups -16(%rcx,%rax,1),%xmm0 874 jnz .Lccm64_enc2_loop 875.byte 102,15,56,220,209 876.byte 102,15,56,220,217 877 paddq %xmm9,%xmm6 878 decq %rdx 879.byte 102,15,56,221,208 880.byte 102,15,56,221,216 881 882 leaq 16(%rdi),%rdi 883 xorps %xmm2,%xmm8 884 movdqa %xmm6,%xmm2 885 movups %xmm8,(%rsi) 886.byte 102,15,56,0,215 887 leaq 16(%rsi),%rsi 888 jnz .Lccm64_enc_outer 889 890 pxor %xmm0,%xmm0 891 pxor %xmm1,%xmm1 892 pxor %xmm2,%xmm2 893 movups %xmm3,(%r9) 894 pxor %xmm3,%xmm3 895 pxor %xmm8,%xmm8 896 pxor %xmm6,%xmm6 897 .byte 0xf3,0xc3 898.size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks 899.globl aesni_ccm64_decrypt_blocks 900.type aesni_ccm64_decrypt_blocks,@function 901.align 16 902aesni_ccm64_decrypt_blocks: 903 movl 240(%rcx),%eax 904 movups (%r8),%xmm6 905 movdqu (%r9),%xmm3 906 movdqa .Lincrement64(%rip),%xmm9 907 movdqa .Lbswap_mask(%rip),%xmm7 908 909 movaps %xmm6,%xmm2 910 movl %eax,%r10d 911 movq %rcx,%r11 912.byte 102,15,56,0,247 913 movups (%rcx),%xmm0 914 movups 16(%rcx),%xmm1 915 leaq 32(%rcx),%rcx 916 xorps %xmm0,%xmm2 917.Loop_enc1_5: 918.byte 102,15,56,220,209 919 decl %eax 920 movups (%rcx),%xmm1 921 leaq 16(%rcx),%rcx 922 jnz .Loop_enc1_5 923.byte 102,15,56,221,209 924 shll $4,%r10d 925 movl $16,%eax 926 movups (%rdi),%xmm8 927 paddq %xmm9,%xmm6 928 leaq 16(%rdi),%rdi 929 subq %r10,%rax 930 leaq 32(%r11,%r10,1),%rcx 931 movq %rax,%r10 932 jmp .Lccm64_dec_outer 933.align 16 934.Lccm64_dec_outer: 935 xorps %xmm2,%xmm8 936 movdqa %xmm6,%xmm2 937 movups %xmm8,(%rsi) 938 leaq 16(%rsi),%rsi 939.byte 102,15,56,0,215 940 941 subq $1,%rdx 942 jz .Lccm64_dec_break 943 944 movups (%r11),%xmm0 945 movq %r10,%rax 946 movups 16(%r11),%xmm1 947 xorps %xmm0,%xmm8 948 xorps %xmm0,%xmm2 949 xorps %xmm8,%xmm3 950 movups 32(%r11),%xmm0 951 jmp .Lccm64_dec2_loop 952.align 16 953.Lccm64_dec2_loop: 954.byte 102,15,56,220,209 955.byte 102,15,56,220,217 956 movups (%rcx,%rax,1),%xmm1 957 addq $32,%rax 958.byte 102,15,56,220,208 959.byte 102,15,56,220,216 960 movups -16(%rcx,%rax,1),%xmm0 961 jnz .Lccm64_dec2_loop 962 movups (%rdi),%xmm8 963 paddq %xmm9,%xmm6 964.byte 102,15,56,220,209 965.byte 102,15,56,220,217 966.byte 102,15,56,221,208 967.byte 102,15,56,221,216 968 leaq 16(%rdi),%rdi 969 jmp .Lccm64_dec_outer 970 971.align 16 972.Lccm64_dec_break: 973 974 movl 240(%r11),%eax 975 movups (%r11),%xmm0 976 movups 16(%r11),%xmm1 977 xorps %xmm0,%xmm8 978 leaq 32(%r11),%r11 979 xorps %xmm8,%xmm3 980.Loop_enc1_6: 981.byte 102,15,56,220,217 982 decl %eax 983 movups (%r11),%xmm1 984 leaq 16(%r11),%r11 985 jnz .Loop_enc1_6 986.byte 102,15,56,221,217 987 pxor %xmm0,%xmm0 988 pxor %xmm1,%xmm1 989 pxor %xmm2,%xmm2 990 movups %xmm3,(%r9) 991 pxor %xmm3,%xmm3 992 pxor %xmm8,%xmm8 993 pxor %xmm6,%xmm6 994 .byte 0xf3,0xc3 995.size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks 996.globl aesni_ctr32_encrypt_blocks 997.type aesni_ctr32_encrypt_blocks,@function 998.align 16 999aesni_ctr32_encrypt_blocks: 1000 cmpq $1,%rdx 1001 jne .Lctr32_bulk 1002 1003 1004 1005 movups (%r8),%xmm2 1006 movups (%rdi),%xmm3 1007 movl 240(%rcx),%edx 1008 movups (%rcx),%xmm0 1009 movups 16(%rcx),%xmm1 1010 leaq 32(%rcx),%rcx 1011 xorps %xmm0,%xmm2 1012.Loop_enc1_7: 1013.byte 102,15,56,220,209 1014 decl %edx 1015 movups (%rcx),%xmm1 1016 leaq 16(%rcx),%rcx 1017 jnz .Loop_enc1_7 1018.byte 102,15,56,221,209 1019 pxor %xmm0,%xmm0 1020 pxor %xmm1,%xmm1 1021 xorps %xmm3,%xmm2 1022 pxor %xmm3,%xmm3 1023 movups %xmm2,(%rsi) 1024 xorps %xmm2,%xmm2 1025 jmp .Lctr32_epilogue 1026 1027.align 16 1028.Lctr32_bulk: 1029 leaq (%rsp),%rax 1030 pushq %rbp 1031 subq $128,%rsp 1032 andq $-16,%rsp 1033 leaq -8(%rax),%rbp 1034 1035 1036 1037 1038 movdqu (%r8),%xmm2 1039 movdqu (%rcx),%xmm0 1040 movl 12(%r8),%r8d 1041 pxor %xmm0,%xmm2 1042 movl 12(%rcx),%r11d 1043 movdqa %xmm2,0(%rsp) 1044 bswapl %r8d 1045 movdqa %xmm2,%xmm3 1046 movdqa %xmm2,%xmm4 1047 movdqa %xmm2,%xmm5 1048 movdqa %xmm2,64(%rsp) 1049 movdqa %xmm2,80(%rsp) 1050 movdqa %xmm2,96(%rsp) 1051 movq %rdx,%r10 1052 movdqa %xmm2,112(%rsp) 1053 1054 leaq 1(%r8),%rax 1055 leaq 2(%r8),%rdx 1056 bswapl %eax 1057 bswapl %edx 1058 xorl %r11d,%eax 1059 xorl %r11d,%edx 1060.byte 102,15,58,34,216,3 1061 leaq 3(%r8),%rax 1062 movdqa %xmm3,16(%rsp) 1063.byte 102,15,58,34,226,3 1064 bswapl %eax 1065 movq %r10,%rdx 1066 leaq 4(%r8),%r10 1067 movdqa %xmm4,32(%rsp) 1068 xorl %r11d,%eax 1069 bswapl %r10d 1070.byte 102,15,58,34,232,3 1071 xorl %r11d,%r10d 1072 movdqa %xmm5,48(%rsp) 1073 leaq 5(%r8),%r9 1074 movl %r10d,64+12(%rsp) 1075 bswapl %r9d 1076 leaq 6(%r8),%r10 1077 movl 240(%rcx),%eax 1078 xorl %r11d,%r9d 1079 bswapl %r10d 1080 movl %r9d,80+12(%rsp) 1081 xorl %r11d,%r10d 1082 leaq 7(%r8),%r9 1083 movl %r10d,96+12(%rsp) 1084 bswapl %r9d 1085 movl OPENSSL_ia32cap_P+4(%rip),%r10d 1086 xorl %r11d,%r9d 1087 andl $71303168,%r10d 1088 movl %r9d,112+12(%rsp) 1089 1090 movups 16(%rcx),%xmm1 1091 1092 movdqa 64(%rsp),%xmm6 1093 movdqa 80(%rsp),%xmm7 1094 1095 cmpq $8,%rdx 1096 jb .Lctr32_tail 1097 1098 subq $6,%rdx 1099 cmpl $4194304,%r10d 1100 je .Lctr32_6x 1101 1102 leaq 128(%rcx),%rcx 1103 subq $2,%rdx 1104 jmp .Lctr32_loop8 1105 1106.align 16 1107.Lctr32_6x: 1108 shll $4,%eax 1109 movl $48,%r10d 1110 bswapl %r11d 1111 leaq 32(%rcx,%rax,1),%rcx 1112 subq %rax,%r10 1113 jmp .Lctr32_loop6 1114 1115.align 16 1116.Lctr32_loop6: 1117 addl $6,%r8d 1118 movups -48(%rcx,%r10,1),%xmm0 1119.byte 102,15,56,220,209 1120 movl %r8d,%eax 1121 xorl %r11d,%eax 1122.byte 102,15,56,220,217 1123.byte 0x0f,0x38,0xf1,0x44,0x24,12 1124 leal 1(%r8),%eax 1125.byte 102,15,56,220,225 1126 xorl %r11d,%eax 1127.byte 0x0f,0x38,0xf1,0x44,0x24,28 1128.byte 102,15,56,220,233 1129 leal 2(%r8),%eax 1130 xorl %r11d,%eax 1131.byte 102,15,56,220,241 1132.byte 0x0f,0x38,0xf1,0x44,0x24,44 1133 leal 3(%r8),%eax 1134.byte 102,15,56,220,249 1135 movups -32(%rcx,%r10,1),%xmm1 1136 xorl %r11d,%eax 1137 1138.byte 102,15,56,220,208 1139.byte 0x0f,0x38,0xf1,0x44,0x24,60 1140 leal 4(%r8),%eax 1141.byte 102,15,56,220,216 1142 xorl %r11d,%eax 1143.byte 0x0f,0x38,0xf1,0x44,0x24,76 1144.byte 102,15,56,220,224 1145 leal 5(%r8),%eax 1146 xorl %r11d,%eax 1147.byte 102,15,56,220,232 1148.byte 0x0f,0x38,0xf1,0x44,0x24,92 1149 movq %r10,%rax 1150.byte 102,15,56,220,240 1151.byte 102,15,56,220,248 1152 movups -16(%rcx,%r10,1),%xmm0 1153 1154 call .Lenc_loop6 1155 1156 movdqu (%rdi),%xmm8 1157 movdqu 16(%rdi),%xmm9 1158 movdqu 32(%rdi),%xmm10 1159 movdqu 48(%rdi),%xmm11 1160 movdqu 64(%rdi),%xmm12 1161 movdqu 80(%rdi),%xmm13 1162 leaq 96(%rdi),%rdi 1163 movups -64(%rcx,%r10,1),%xmm1 1164 pxor %xmm2,%xmm8 1165 movaps 0(%rsp),%xmm2 1166 pxor %xmm3,%xmm9 1167 movaps 16(%rsp),%xmm3 1168 pxor %xmm4,%xmm10 1169 movaps 32(%rsp),%xmm4 1170 pxor %xmm5,%xmm11 1171 movaps 48(%rsp),%xmm5 1172 pxor %xmm6,%xmm12 1173 movaps 64(%rsp),%xmm6 1174 pxor %xmm7,%xmm13 1175 movaps 80(%rsp),%xmm7 1176 movdqu %xmm8,(%rsi) 1177 movdqu %xmm9,16(%rsi) 1178 movdqu %xmm10,32(%rsi) 1179 movdqu %xmm11,48(%rsi) 1180 movdqu %xmm12,64(%rsi) 1181 movdqu %xmm13,80(%rsi) 1182 leaq 96(%rsi),%rsi 1183 1184 subq $6,%rdx 1185 jnc .Lctr32_loop6 1186 1187 addq $6,%rdx 1188 jz .Lctr32_done 1189 1190 leal -48(%r10),%eax 1191 leaq -80(%rcx,%r10,1),%rcx 1192 negl %eax 1193 shrl $4,%eax 1194 jmp .Lctr32_tail 1195 1196.align 32 1197.Lctr32_loop8: 1198 addl $8,%r8d 1199 movdqa 96(%rsp),%xmm8 1200.byte 102,15,56,220,209 1201 movl %r8d,%r9d 1202 movdqa 112(%rsp),%xmm9 1203.byte 102,15,56,220,217 1204 bswapl %r9d 1205 movups 32-128(%rcx),%xmm0 1206.byte 102,15,56,220,225 1207 xorl %r11d,%r9d 1208 nop 1209.byte 102,15,56,220,233 1210 movl %r9d,0+12(%rsp) 1211 leaq 1(%r8),%r9 1212.byte 102,15,56,220,241 1213.byte 102,15,56,220,249 1214.byte 102,68,15,56,220,193 1215.byte 102,68,15,56,220,201 1216 movups 48-128(%rcx),%xmm1 1217 bswapl %r9d 1218.byte 102,15,56,220,208 1219.byte 102,15,56,220,216 1220 xorl %r11d,%r9d 1221.byte 0x66,0x90 1222.byte 102,15,56,220,224 1223.byte 102,15,56,220,232 1224 movl %r9d,16+12(%rsp) 1225 leaq 2(%r8),%r9 1226.byte 102,15,56,220,240 1227.byte 102,15,56,220,248 1228.byte 102,68,15,56,220,192 1229.byte 102,68,15,56,220,200 1230 movups 64-128(%rcx),%xmm0 1231 bswapl %r9d 1232.byte 102,15,56,220,209 1233.byte 102,15,56,220,217 1234 xorl %r11d,%r9d 1235.byte 0x66,0x90 1236.byte 102,15,56,220,225 1237.byte 102,15,56,220,233 1238 movl %r9d,32+12(%rsp) 1239 leaq 3(%r8),%r9 1240.byte 102,15,56,220,241 1241.byte 102,15,56,220,249 1242.byte 102,68,15,56,220,193 1243.byte 102,68,15,56,220,201 1244 movups 80-128(%rcx),%xmm1 1245 bswapl %r9d 1246.byte 102,15,56,220,208 1247.byte 102,15,56,220,216 1248 xorl %r11d,%r9d 1249.byte 0x66,0x90 1250.byte 102,15,56,220,224 1251.byte 102,15,56,220,232 1252 movl %r9d,48+12(%rsp) 1253 leaq 4(%r8),%r9 1254.byte 102,15,56,220,240 1255.byte 102,15,56,220,248 1256.byte 102,68,15,56,220,192 1257.byte 102,68,15,56,220,200 1258 movups 96-128(%rcx),%xmm0 1259 bswapl %r9d 1260.byte 102,15,56,220,209 1261.byte 102,15,56,220,217 1262 xorl %r11d,%r9d 1263.byte 0x66,0x90 1264.byte 102,15,56,220,225 1265.byte 102,15,56,220,233 1266 movl %r9d,64+12(%rsp) 1267 leaq 5(%r8),%r9 1268.byte 102,15,56,220,241 1269.byte 102,15,56,220,249 1270.byte 102,68,15,56,220,193 1271.byte 102,68,15,56,220,201 1272 movups 112-128(%rcx),%xmm1 1273 bswapl %r9d 1274.byte 102,15,56,220,208 1275.byte 102,15,56,220,216 1276 xorl %r11d,%r9d 1277.byte 0x66,0x90 1278.byte 102,15,56,220,224 1279.byte 102,15,56,220,232 1280 movl %r9d,80+12(%rsp) 1281 leaq 6(%r8),%r9 1282.byte 102,15,56,220,240 1283.byte 102,15,56,220,248 1284.byte 102,68,15,56,220,192 1285.byte 102,68,15,56,220,200 1286 movups 128-128(%rcx),%xmm0 1287 bswapl %r9d 1288.byte 102,15,56,220,209 1289.byte 102,15,56,220,217 1290 xorl %r11d,%r9d 1291.byte 0x66,0x90 1292.byte 102,15,56,220,225 1293.byte 102,15,56,220,233 1294 movl %r9d,96+12(%rsp) 1295 leaq 7(%r8),%r9 1296.byte 102,15,56,220,241 1297.byte 102,15,56,220,249 1298.byte 102,68,15,56,220,193 1299.byte 102,68,15,56,220,201 1300 movups 144-128(%rcx),%xmm1 1301 bswapl %r9d 1302.byte 102,15,56,220,208 1303.byte 102,15,56,220,216 1304.byte 102,15,56,220,224 1305 xorl %r11d,%r9d 1306 movdqu 0(%rdi),%xmm10 1307.byte 102,15,56,220,232 1308 movl %r9d,112+12(%rsp) 1309 cmpl $11,%eax 1310.byte 102,15,56,220,240 1311.byte 102,15,56,220,248 1312.byte 102,68,15,56,220,192 1313.byte 102,68,15,56,220,200 1314 movups 160-128(%rcx),%xmm0 1315 1316 jb .Lctr32_enc_done 1317 1318.byte 102,15,56,220,209 1319.byte 102,15,56,220,217 1320.byte 102,15,56,220,225 1321.byte 102,15,56,220,233 1322.byte 102,15,56,220,241 1323.byte 102,15,56,220,249 1324.byte 102,68,15,56,220,193 1325.byte 102,68,15,56,220,201 1326 movups 176-128(%rcx),%xmm1 1327 1328.byte 102,15,56,220,208 1329.byte 102,15,56,220,216 1330.byte 102,15,56,220,224 1331.byte 102,15,56,220,232 1332.byte 102,15,56,220,240 1333.byte 102,15,56,220,248 1334.byte 102,68,15,56,220,192 1335.byte 102,68,15,56,220,200 1336 movups 192-128(%rcx),%xmm0 1337 je .Lctr32_enc_done 1338 1339.byte 102,15,56,220,209 1340.byte 102,15,56,220,217 1341.byte 102,15,56,220,225 1342.byte 102,15,56,220,233 1343.byte 102,15,56,220,241 1344.byte 102,15,56,220,249 1345.byte 102,68,15,56,220,193 1346.byte 102,68,15,56,220,201 1347 movups 208-128(%rcx),%xmm1 1348 1349.byte 102,15,56,220,208 1350.byte 102,15,56,220,216 1351.byte 102,15,56,220,224 1352.byte 102,15,56,220,232 1353.byte 102,15,56,220,240 1354.byte 102,15,56,220,248 1355.byte 102,68,15,56,220,192 1356.byte 102,68,15,56,220,200 1357 movups 224-128(%rcx),%xmm0 1358 jmp .Lctr32_enc_done 1359 1360.align 16 1361.Lctr32_enc_done: 1362 movdqu 16(%rdi),%xmm11 1363 pxor %xmm0,%xmm10 1364 movdqu 32(%rdi),%xmm12 1365 pxor %xmm0,%xmm11 1366 movdqu 48(%rdi),%xmm13 1367 pxor %xmm0,%xmm12 1368 movdqu 64(%rdi),%xmm14 1369 pxor %xmm0,%xmm13 1370 movdqu 80(%rdi),%xmm15 1371 pxor %xmm0,%xmm14 1372 pxor %xmm0,%xmm15 1373.byte 102,15,56,220,209 1374.byte 102,15,56,220,217 1375.byte 102,15,56,220,225 1376.byte 102,15,56,220,233 1377.byte 102,15,56,220,241 1378.byte 102,15,56,220,249 1379.byte 102,68,15,56,220,193 1380.byte 102,68,15,56,220,201 1381 movdqu 96(%rdi),%xmm1 1382 leaq 128(%rdi),%rdi 1383 1384.byte 102,65,15,56,221,210 1385 pxor %xmm0,%xmm1 1386 movdqu 112-128(%rdi),%xmm10 1387.byte 102,65,15,56,221,219 1388 pxor %xmm0,%xmm10 1389 movdqa 0(%rsp),%xmm11 1390.byte 102,65,15,56,221,228 1391.byte 102,65,15,56,221,237 1392 movdqa 16(%rsp),%xmm12 1393 movdqa 32(%rsp),%xmm13 1394.byte 102,65,15,56,221,246 1395.byte 102,65,15,56,221,255 1396 movdqa 48(%rsp),%xmm14 1397 movdqa 64(%rsp),%xmm15 1398.byte 102,68,15,56,221,193 1399 movdqa 80(%rsp),%xmm0 1400 movups 16-128(%rcx),%xmm1 1401.byte 102,69,15,56,221,202 1402 1403 movups %xmm2,(%rsi) 1404 movdqa %xmm11,%xmm2 1405 movups %xmm3,16(%rsi) 1406 movdqa %xmm12,%xmm3 1407 movups %xmm4,32(%rsi) 1408 movdqa %xmm13,%xmm4 1409 movups %xmm5,48(%rsi) 1410 movdqa %xmm14,%xmm5 1411 movups %xmm6,64(%rsi) 1412 movdqa %xmm15,%xmm6 1413 movups %xmm7,80(%rsi) 1414 movdqa %xmm0,%xmm7 1415 movups %xmm8,96(%rsi) 1416 movups %xmm9,112(%rsi) 1417 leaq 128(%rsi),%rsi 1418 1419 subq $8,%rdx 1420 jnc .Lctr32_loop8 1421 1422 addq $8,%rdx 1423 jz .Lctr32_done 1424 leaq -128(%rcx),%rcx 1425 1426.Lctr32_tail: 1427 1428 1429 leaq 16(%rcx),%rcx 1430 cmpq $4,%rdx 1431 jb .Lctr32_loop3 1432 je .Lctr32_loop4 1433 1434 1435 shll $4,%eax 1436 movdqa 96(%rsp),%xmm8 1437 pxor %xmm9,%xmm9 1438 1439 movups 16(%rcx),%xmm0 1440.byte 102,15,56,220,209 1441.byte 102,15,56,220,217 1442 leaq 32-16(%rcx,%rax,1),%rcx 1443 negq %rax 1444.byte 102,15,56,220,225 1445 addq $16,%rax 1446 movups (%rdi),%xmm10 1447.byte 102,15,56,220,233 1448.byte 102,15,56,220,241 1449 movups 16(%rdi),%xmm11 1450 movups 32(%rdi),%xmm12 1451.byte 102,15,56,220,249 1452.byte 102,68,15,56,220,193 1453 1454 call .Lenc_loop8_enter 1455 1456 movdqu 48(%rdi),%xmm13 1457 pxor %xmm10,%xmm2 1458 movdqu 64(%rdi),%xmm10 1459 pxor %xmm11,%xmm3 1460 movdqu %xmm2,(%rsi) 1461 pxor %xmm12,%xmm4 1462 movdqu %xmm3,16(%rsi) 1463 pxor %xmm13,%xmm5 1464 movdqu %xmm4,32(%rsi) 1465 pxor %xmm10,%xmm6 1466 movdqu %xmm5,48(%rsi) 1467 movdqu %xmm6,64(%rsi) 1468 cmpq $6,%rdx 1469 jb .Lctr32_done 1470 1471 movups 80(%rdi),%xmm11 1472 xorps %xmm11,%xmm7 1473 movups %xmm7,80(%rsi) 1474 je .Lctr32_done 1475 1476 movups 96(%rdi),%xmm12 1477 xorps %xmm12,%xmm8 1478 movups %xmm8,96(%rsi) 1479 jmp .Lctr32_done 1480 1481.align 32 1482.Lctr32_loop4: 1483.byte 102,15,56,220,209 1484 leaq 16(%rcx),%rcx 1485 decl %eax 1486.byte 102,15,56,220,217 1487.byte 102,15,56,220,225 1488.byte 102,15,56,220,233 1489 movups (%rcx),%xmm1 1490 jnz .Lctr32_loop4 1491.byte 102,15,56,221,209 1492.byte 102,15,56,221,217 1493 movups (%rdi),%xmm10 1494 movups 16(%rdi),%xmm11 1495.byte 102,15,56,221,225 1496.byte 102,15,56,221,233 1497 movups 32(%rdi),%xmm12 1498 movups 48(%rdi),%xmm13 1499 1500 xorps %xmm10,%xmm2 1501 movups %xmm2,(%rsi) 1502 xorps %xmm11,%xmm3 1503 movups %xmm3,16(%rsi) 1504 pxor %xmm12,%xmm4 1505 movdqu %xmm4,32(%rsi) 1506 pxor %xmm13,%xmm5 1507 movdqu %xmm5,48(%rsi) 1508 jmp .Lctr32_done 1509 1510.align 32 1511.Lctr32_loop3: 1512.byte 102,15,56,220,209 1513 leaq 16(%rcx),%rcx 1514 decl %eax 1515.byte 102,15,56,220,217 1516.byte 102,15,56,220,225 1517 movups (%rcx),%xmm1 1518 jnz .Lctr32_loop3 1519.byte 102,15,56,221,209 1520.byte 102,15,56,221,217 1521.byte 102,15,56,221,225 1522 1523 movups (%rdi),%xmm10 1524 xorps %xmm10,%xmm2 1525 movups %xmm2,(%rsi) 1526 cmpq $2,%rdx 1527 jb .Lctr32_done 1528 1529 movups 16(%rdi),%xmm11 1530 xorps %xmm11,%xmm3 1531 movups %xmm3,16(%rsi) 1532 je .Lctr32_done 1533 1534 movups 32(%rdi),%xmm12 1535 xorps %xmm12,%xmm4 1536 movups %xmm4,32(%rsi) 1537 1538.Lctr32_done: 1539 xorps %xmm0,%xmm0 1540 xorl %r11d,%r11d 1541 pxor %xmm1,%xmm1 1542 pxor %xmm2,%xmm2 1543 pxor %xmm3,%xmm3 1544 pxor %xmm4,%xmm4 1545 pxor %xmm5,%xmm5 1546 pxor %xmm6,%xmm6 1547 pxor %xmm7,%xmm7 1548 movaps %xmm0,0(%rsp) 1549 pxor %xmm8,%xmm8 1550 movaps %xmm0,16(%rsp) 1551 pxor %xmm9,%xmm9 1552 movaps %xmm0,32(%rsp) 1553 pxor %xmm10,%xmm10 1554 movaps %xmm0,48(%rsp) 1555 pxor %xmm11,%xmm11 1556 movaps %xmm0,64(%rsp) 1557 pxor %xmm12,%xmm12 1558 movaps %xmm0,80(%rsp) 1559 pxor %xmm13,%xmm13 1560 movaps %xmm0,96(%rsp) 1561 pxor %xmm14,%xmm14 1562 movaps %xmm0,112(%rsp) 1563 pxor %xmm15,%xmm15 1564 leaq (%rbp),%rsp 1565 popq %rbp 1566.Lctr32_epilogue: 1567 .byte 0xf3,0xc3 1568.size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks 1569.globl aesni_xts_encrypt 1570.type aesni_xts_encrypt,@function 1571.align 16 1572aesni_xts_encrypt: 1573 leaq (%rsp),%rax 1574 pushq %rbp 1575 subq $112,%rsp 1576 andq $-16,%rsp 1577 leaq -8(%rax),%rbp 1578 movups (%r9),%xmm2 1579 movl 240(%r8),%eax 1580 movl 240(%rcx),%r10d 1581 movups (%r8),%xmm0 1582 movups 16(%r8),%xmm1 1583 leaq 32(%r8),%r8 1584 xorps %xmm0,%xmm2 1585.Loop_enc1_8: 1586.byte 102,15,56,220,209 1587 decl %eax 1588 movups (%r8),%xmm1 1589 leaq 16(%r8),%r8 1590 jnz .Loop_enc1_8 1591.byte 102,15,56,221,209 1592 movups (%rcx),%xmm0 1593 movq %rcx,%r11 1594 movl %r10d,%eax 1595 shll $4,%r10d 1596 movq %rdx,%r9 1597 andq $-16,%rdx 1598 1599 movups 16(%rcx,%r10,1),%xmm1 1600 1601 movdqa .Lxts_magic(%rip),%xmm8 1602 movdqa %xmm2,%xmm15 1603 pshufd $0x5f,%xmm2,%xmm9 1604 pxor %xmm0,%xmm1 1605 movdqa %xmm9,%xmm14 1606 paddd %xmm9,%xmm9 1607 movdqa %xmm15,%xmm10 1608 psrad $31,%xmm14 1609 paddq %xmm15,%xmm15 1610 pand %xmm8,%xmm14 1611 pxor %xmm0,%xmm10 1612 pxor %xmm14,%xmm15 1613 movdqa %xmm9,%xmm14 1614 paddd %xmm9,%xmm9 1615 movdqa %xmm15,%xmm11 1616 psrad $31,%xmm14 1617 paddq %xmm15,%xmm15 1618 pand %xmm8,%xmm14 1619 pxor %xmm0,%xmm11 1620 pxor %xmm14,%xmm15 1621 movdqa %xmm9,%xmm14 1622 paddd %xmm9,%xmm9 1623 movdqa %xmm15,%xmm12 1624 psrad $31,%xmm14 1625 paddq %xmm15,%xmm15 1626 pand %xmm8,%xmm14 1627 pxor %xmm0,%xmm12 1628 pxor %xmm14,%xmm15 1629 movdqa %xmm9,%xmm14 1630 paddd %xmm9,%xmm9 1631 movdqa %xmm15,%xmm13 1632 psrad $31,%xmm14 1633 paddq %xmm15,%xmm15 1634 pand %xmm8,%xmm14 1635 pxor %xmm0,%xmm13 1636 pxor %xmm14,%xmm15 1637 movdqa %xmm15,%xmm14 1638 psrad $31,%xmm9 1639 paddq %xmm15,%xmm15 1640 pand %xmm8,%xmm9 1641 pxor %xmm0,%xmm14 1642 pxor %xmm9,%xmm15 1643 movaps %xmm1,96(%rsp) 1644 1645 subq $96,%rdx 1646 jc .Lxts_enc_short 1647 1648 movl $16+96,%eax 1649 leaq 32(%r11,%r10,1),%rcx 1650 subq %r10,%rax 1651 movups 16(%r11),%xmm1 1652 movq %rax,%r10 1653 leaq .Lxts_magic(%rip),%r8 1654 jmp .Lxts_enc_grandloop 1655 1656.align 32 1657.Lxts_enc_grandloop: 1658 movdqu 0(%rdi),%xmm2 1659 movdqa %xmm0,%xmm8 1660 movdqu 16(%rdi),%xmm3 1661 pxor %xmm10,%xmm2 1662 movdqu 32(%rdi),%xmm4 1663 pxor %xmm11,%xmm3 1664.byte 102,15,56,220,209 1665 movdqu 48(%rdi),%xmm5 1666 pxor %xmm12,%xmm4 1667.byte 102,15,56,220,217 1668 movdqu 64(%rdi),%xmm6 1669 pxor %xmm13,%xmm5 1670.byte 102,15,56,220,225 1671 movdqu 80(%rdi),%xmm7 1672 pxor %xmm15,%xmm8 1673 movdqa 96(%rsp),%xmm9 1674 pxor %xmm14,%xmm6 1675.byte 102,15,56,220,233 1676 movups 32(%r11),%xmm0 1677 leaq 96(%rdi),%rdi 1678 pxor %xmm8,%xmm7 1679 1680 pxor %xmm9,%xmm10 1681.byte 102,15,56,220,241 1682 pxor %xmm9,%xmm11 1683 movdqa %xmm10,0(%rsp) 1684.byte 102,15,56,220,249 1685 movups 48(%r11),%xmm1 1686 pxor %xmm9,%xmm12 1687 1688.byte 102,15,56,220,208 1689 pxor %xmm9,%xmm13 1690 movdqa %xmm11,16(%rsp) 1691.byte 102,15,56,220,216 1692 pxor %xmm9,%xmm14 1693 movdqa %xmm12,32(%rsp) 1694.byte 102,15,56,220,224 1695.byte 102,15,56,220,232 1696 pxor %xmm9,%xmm8 1697 movdqa %xmm14,64(%rsp) 1698.byte 102,15,56,220,240 1699.byte 102,15,56,220,248 1700 movups 64(%r11),%xmm0 1701 movdqa %xmm8,80(%rsp) 1702 pshufd $0x5f,%xmm15,%xmm9 1703 jmp .Lxts_enc_loop6 1704.align 32 1705.Lxts_enc_loop6: 1706.byte 102,15,56,220,209 1707.byte 102,15,56,220,217 1708.byte 102,15,56,220,225 1709.byte 102,15,56,220,233 1710.byte 102,15,56,220,241 1711.byte 102,15,56,220,249 1712 movups -64(%rcx,%rax,1),%xmm1 1713 addq $32,%rax 1714 1715.byte 102,15,56,220,208 1716.byte 102,15,56,220,216 1717.byte 102,15,56,220,224 1718.byte 102,15,56,220,232 1719.byte 102,15,56,220,240 1720.byte 102,15,56,220,248 1721 movups -80(%rcx,%rax,1),%xmm0 1722 jnz .Lxts_enc_loop6 1723 1724 movdqa (%r8),%xmm8 1725 movdqa %xmm9,%xmm14 1726 paddd %xmm9,%xmm9 1727.byte 102,15,56,220,209 1728 paddq %xmm15,%xmm15 1729 psrad $31,%xmm14 1730.byte 102,15,56,220,217 1731 pand %xmm8,%xmm14 1732 movups (%r11),%xmm10 1733.byte 102,15,56,220,225 1734.byte 102,15,56,220,233 1735.byte 102,15,56,220,241 1736 pxor %xmm14,%xmm15 1737 movaps %xmm10,%xmm11 1738.byte 102,15,56,220,249 1739 movups -64(%rcx),%xmm1 1740 1741 movdqa %xmm9,%xmm14 1742.byte 102,15,56,220,208 1743 paddd %xmm9,%xmm9 1744 pxor %xmm15,%xmm10 1745.byte 102,15,56,220,216 1746 psrad $31,%xmm14 1747 paddq %xmm15,%xmm15 1748.byte 102,15,56,220,224 1749.byte 102,15,56,220,232 1750 pand %xmm8,%xmm14 1751 movaps %xmm11,%xmm12 1752.byte 102,15,56,220,240 1753 pxor %xmm14,%xmm15 1754 movdqa %xmm9,%xmm14 1755.byte 102,15,56,220,248 1756 movups -48(%rcx),%xmm0 1757 1758 paddd %xmm9,%xmm9 1759.byte 102,15,56,220,209 1760 pxor %xmm15,%xmm11 1761 psrad $31,%xmm14 1762.byte 102,15,56,220,217 1763 paddq %xmm15,%xmm15 1764 pand %xmm8,%xmm14 1765.byte 102,15,56,220,225 1766.byte 102,15,56,220,233 1767 movdqa %xmm13,48(%rsp) 1768 pxor %xmm14,%xmm15 1769.byte 102,15,56,220,241 1770 movaps %xmm12,%xmm13 1771 movdqa %xmm9,%xmm14 1772.byte 102,15,56,220,249 1773 movups -32(%rcx),%xmm1 1774 1775 paddd %xmm9,%xmm9 1776.byte 102,15,56,220,208 1777 pxor %xmm15,%xmm12 1778 psrad $31,%xmm14 1779.byte 102,15,56,220,216 1780 paddq %xmm15,%xmm15 1781 pand %xmm8,%xmm14 1782.byte 102,15,56,220,224 1783.byte 102,15,56,220,232 1784.byte 102,15,56,220,240 1785 pxor %xmm14,%xmm15 1786 movaps %xmm13,%xmm14 1787.byte 102,15,56,220,248 1788 1789 movdqa %xmm9,%xmm0 1790 paddd %xmm9,%xmm9 1791.byte 102,15,56,220,209 1792 pxor %xmm15,%xmm13 1793 psrad $31,%xmm0 1794.byte 102,15,56,220,217 1795 paddq %xmm15,%xmm15 1796 pand %xmm8,%xmm0 1797.byte 102,15,56,220,225 1798.byte 102,15,56,220,233 1799 pxor %xmm0,%xmm15 1800 movups (%r11),%xmm0 1801.byte 102,15,56,220,241 1802.byte 102,15,56,220,249 1803 movups 16(%r11),%xmm1 1804 1805 pxor %xmm15,%xmm14 1806.byte 102,15,56,221,84,36,0 1807 psrad $31,%xmm9 1808 paddq %xmm15,%xmm15 1809.byte 102,15,56,221,92,36,16 1810.byte 102,15,56,221,100,36,32 1811 pand %xmm8,%xmm9 1812 movq %r10,%rax 1813.byte 102,15,56,221,108,36,48 1814.byte 102,15,56,221,116,36,64 1815.byte 102,15,56,221,124,36,80 1816 pxor %xmm9,%xmm15 1817 1818 leaq 96(%rsi),%rsi 1819 movups %xmm2,-96(%rsi) 1820 movups %xmm3,-80(%rsi) 1821 movups %xmm4,-64(%rsi) 1822 movups %xmm5,-48(%rsi) 1823 movups %xmm6,-32(%rsi) 1824 movups %xmm7,-16(%rsi) 1825 subq $96,%rdx 1826 jnc .Lxts_enc_grandloop 1827 1828 movl $16+96,%eax 1829 subl %r10d,%eax 1830 movq %r11,%rcx 1831 shrl $4,%eax 1832 1833.Lxts_enc_short: 1834 1835 movl %eax,%r10d 1836 pxor %xmm0,%xmm10 1837 addq $96,%rdx 1838 jz .Lxts_enc_done 1839 1840 pxor %xmm0,%xmm11 1841 cmpq $0x20,%rdx 1842 jb .Lxts_enc_one 1843 pxor %xmm0,%xmm12 1844 je .Lxts_enc_two 1845 1846 pxor %xmm0,%xmm13 1847 cmpq $0x40,%rdx 1848 jb .Lxts_enc_three 1849 pxor %xmm0,%xmm14 1850 je .Lxts_enc_four 1851 1852 movdqu (%rdi),%xmm2 1853 movdqu 16(%rdi),%xmm3 1854 movdqu 32(%rdi),%xmm4 1855 pxor %xmm10,%xmm2 1856 movdqu 48(%rdi),%xmm5 1857 pxor %xmm11,%xmm3 1858 movdqu 64(%rdi),%xmm6 1859 leaq 80(%rdi),%rdi 1860 pxor %xmm12,%xmm4 1861 pxor %xmm13,%xmm5 1862 pxor %xmm14,%xmm6 1863 pxor %xmm7,%xmm7 1864 1865 call _aesni_encrypt6 1866 1867 xorps %xmm10,%xmm2 1868 movdqa %xmm15,%xmm10 1869 xorps %xmm11,%xmm3 1870 xorps %xmm12,%xmm4 1871 movdqu %xmm2,(%rsi) 1872 xorps %xmm13,%xmm5 1873 movdqu %xmm3,16(%rsi) 1874 xorps %xmm14,%xmm6 1875 movdqu %xmm4,32(%rsi) 1876 movdqu %xmm5,48(%rsi) 1877 movdqu %xmm6,64(%rsi) 1878 leaq 80(%rsi),%rsi 1879 jmp .Lxts_enc_done 1880 1881.align 16 1882.Lxts_enc_one: 1883 movups (%rdi),%xmm2 1884 leaq 16(%rdi),%rdi 1885 xorps %xmm10,%xmm2 1886 movups (%rcx),%xmm0 1887 movups 16(%rcx),%xmm1 1888 leaq 32(%rcx),%rcx 1889 xorps %xmm0,%xmm2 1890.Loop_enc1_9: 1891.byte 102,15,56,220,209 1892 decl %eax 1893 movups (%rcx),%xmm1 1894 leaq 16(%rcx),%rcx 1895 jnz .Loop_enc1_9 1896.byte 102,15,56,221,209 1897 xorps %xmm10,%xmm2 1898 movdqa %xmm11,%xmm10 1899 movups %xmm2,(%rsi) 1900 leaq 16(%rsi),%rsi 1901 jmp .Lxts_enc_done 1902 1903.align 16 1904.Lxts_enc_two: 1905 movups (%rdi),%xmm2 1906 movups 16(%rdi),%xmm3 1907 leaq 32(%rdi),%rdi 1908 xorps %xmm10,%xmm2 1909 xorps %xmm11,%xmm3 1910 1911 call _aesni_encrypt2 1912 1913 xorps %xmm10,%xmm2 1914 movdqa %xmm12,%xmm10 1915 xorps %xmm11,%xmm3 1916 movups %xmm2,(%rsi) 1917 movups %xmm3,16(%rsi) 1918 leaq 32(%rsi),%rsi 1919 jmp .Lxts_enc_done 1920 1921.align 16 1922.Lxts_enc_three: 1923 movups (%rdi),%xmm2 1924 movups 16(%rdi),%xmm3 1925 movups 32(%rdi),%xmm4 1926 leaq 48(%rdi),%rdi 1927 xorps %xmm10,%xmm2 1928 xorps %xmm11,%xmm3 1929 xorps %xmm12,%xmm4 1930 1931 call _aesni_encrypt3 1932 1933 xorps %xmm10,%xmm2 1934 movdqa %xmm13,%xmm10 1935 xorps %xmm11,%xmm3 1936 xorps %xmm12,%xmm4 1937 movups %xmm2,(%rsi) 1938 movups %xmm3,16(%rsi) 1939 movups %xmm4,32(%rsi) 1940 leaq 48(%rsi),%rsi 1941 jmp .Lxts_enc_done 1942 1943.align 16 1944.Lxts_enc_four: 1945 movups (%rdi),%xmm2 1946 movups 16(%rdi),%xmm3 1947 movups 32(%rdi),%xmm4 1948 xorps %xmm10,%xmm2 1949 movups 48(%rdi),%xmm5 1950 leaq 64(%rdi),%rdi 1951 xorps %xmm11,%xmm3 1952 xorps %xmm12,%xmm4 1953 xorps %xmm13,%xmm5 1954 1955 call _aesni_encrypt4 1956 1957 pxor %xmm10,%xmm2 1958 movdqa %xmm14,%xmm10 1959 pxor %xmm11,%xmm3 1960 pxor %xmm12,%xmm4 1961 movdqu %xmm2,(%rsi) 1962 pxor %xmm13,%xmm5 1963 movdqu %xmm3,16(%rsi) 1964 movdqu %xmm4,32(%rsi) 1965 movdqu %xmm5,48(%rsi) 1966 leaq 64(%rsi),%rsi 1967 jmp .Lxts_enc_done 1968 1969.align 16 1970.Lxts_enc_done: 1971 andq $15,%r9 1972 jz .Lxts_enc_ret 1973 movq %r9,%rdx 1974 1975.Lxts_enc_steal: 1976 movzbl (%rdi),%eax 1977 movzbl -16(%rsi),%ecx 1978 leaq 1(%rdi),%rdi 1979 movb %al,-16(%rsi) 1980 movb %cl,0(%rsi) 1981 leaq 1(%rsi),%rsi 1982 subq $1,%rdx 1983 jnz .Lxts_enc_steal 1984 1985 subq %r9,%rsi 1986 movq %r11,%rcx 1987 movl %r10d,%eax 1988 1989 movups -16(%rsi),%xmm2 1990 xorps %xmm10,%xmm2 1991 movups (%rcx),%xmm0 1992 movups 16(%rcx),%xmm1 1993 leaq 32(%rcx),%rcx 1994 xorps %xmm0,%xmm2 1995.Loop_enc1_10: 1996.byte 102,15,56,220,209 1997 decl %eax 1998 movups (%rcx),%xmm1 1999 leaq 16(%rcx),%rcx 2000 jnz .Loop_enc1_10 2001.byte 102,15,56,221,209 2002 xorps %xmm10,%xmm2 2003 movups %xmm2,-16(%rsi) 2004 2005.Lxts_enc_ret: 2006 xorps %xmm0,%xmm0 2007 pxor %xmm1,%xmm1 2008 pxor %xmm2,%xmm2 2009 pxor %xmm3,%xmm3 2010 pxor %xmm4,%xmm4 2011 pxor %xmm5,%xmm5 2012 pxor %xmm6,%xmm6 2013 pxor %xmm7,%xmm7 2014 movaps %xmm0,0(%rsp) 2015 pxor %xmm8,%xmm8 2016 movaps %xmm0,16(%rsp) 2017 pxor %xmm9,%xmm9 2018 movaps %xmm0,32(%rsp) 2019 pxor %xmm10,%xmm10 2020 movaps %xmm0,48(%rsp) 2021 pxor %xmm11,%xmm11 2022 movaps %xmm0,64(%rsp) 2023 pxor %xmm12,%xmm12 2024 movaps %xmm0,80(%rsp) 2025 pxor %xmm13,%xmm13 2026 movaps %xmm0,96(%rsp) 2027 pxor %xmm14,%xmm14 2028 pxor %xmm15,%xmm15 2029 leaq (%rbp),%rsp 2030 popq %rbp 2031.Lxts_enc_epilogue: 2032 .byte 0xf3,0xc3 2033.size aesni_xts_encrypt,.-aesni_xts_encrypt 2034.globl aesni_xts_decrypt 2035.type aesni_xts_decrypt,@function 2036.align 16 2037aesni_xts_decrypt: 2038 leaq (%rsp),%rax 2039 pushq %rbp 2040 subq $112,%rsp 2041 andq $-16,%rsp 2042 leaq -8(%rax),%rbp 2043 movups (%r9),%xmm2 2044 movl 240(%r8),%eax 2045 movl 240(%rcx),%r10d 2046 movups (%r8),%xmm0 2047 movups 16(%r8),%xmm1 2048 leaq 32(%r8),%r8 2049 xorps %xmm0,%xmm2 2050.Loop_enc1_11: 2051.byte 102,15,56,220,209 2052 decl %eax 2053 movups (%r8),%xmm1 2054 leaq 16(%r8),%r8 2055 jnz .Loop_enc1_11 2056.byte 102,15,56,221,209 2057 xorl %eax,%eax 2058 testq $15,%rdx 2059 setnz %al 2060 shlq $4,%rax 2061 subq %rax,%rdx 2062 2063 movups (%rcx),%xmm0 2064 movq %rcx,%r11 2065 movl %r10d,%eax 2066 shll $4,%r10d 2067 movq %rdx,%r9 2068 andq $-16,%rdx 2069 2070 movups 16(%rcx,%r10,1),%xmm1 2071 2072 movdqa .Lxts_magic(%rip),%xmm8 2073 movdqa %xmm2,%xmm15 2074 pshufd $0x5f,%xmm2,%xmm9 2075 pxor %xmm0,%xmm1 2076 movdqa %xmm9,%xmm14 2077 paddd %xmm9,%xmm9 2078 movdqa %xmm15,%xmm10 2079 psrad $31,%xmm14 2080 paddq %xmm15,%xmm15 2081 pand %xmm8,%xmm14 2082 pxor %xmm0,%xmm10 2083 pxor %xmm14,%xmm15 2084 movdqa %xmm9,%xmm14 2085 paddd %xmm9,%xmm9 2086 movdqa %xmm15,%xmm11 2087 psrad $31,%xmm14 2088 paddq %xmm15,%xmm15 2089 pand %xmm8,%xmm14 2090 pxor %xmm0,%xmm11 2091 pxor %xmm14,%xmm15 2092 movdqa %xmm9,%xmm14 2093 paddd %xmm9,%xmm9 2094 movdqa %xmm15,%xmm12 2095 psrad $31,%xmm14 2096 paddq %xmm15,%xmm15 2097 pand %xmm8,%xmm14 2098 pxor %xmm0,%xmm12 2099 pxor %xmm14,%xmm15 2100 movdqa %xmm9,%xmm14 2101 paddd %xmm9,%xmm9 2102 movdqa %xmm15,%xmm13 2103 psrad $31,%xmm14 2104 paddq %xmm15,%xmm15 2105 pand %xmm8,%xmm14 2106 pxor %xmm0,%xmm13 2107 pxor %xmm14,%xmm15 2108 movdqa %xmm15,%xmm14 2109 psrad $31,%xmm9 2110 paddq %xmm15,%xmm15 2111 pand %xmm8,%xmm9 2112 pxor %xmm0,%xmm14 2113 pxor %xmm9,%xmm15 2114 movaps %xmm1,96(%rsp) 2115 2116 subq $96,%rdx 2117 jc .Lxts_dec_short 2118 2119 movl $16+96,%eax 2120 leaq 32(%r11,%r10,1),%rcx 2121 subq %r10,%rax 2122 movups 16(%r11),%xmm1 2123 movq %rax,%r10 2124 leaq .Lxts_magic(%rip),%r8 2125 jmp .Lxts_dec_grandloop 2126 2127.align 32 2128.Lxts_dec_grandloop: 2129 movdqu 0(%rdi),%xmm2 2130 movdqa %xmm0,%xmm8 2131 movdqu 16(%rdi),%xmm3 2132 pxor %xmm10,%xmm2 2133 movdqu 32(%rdi),%xmm4 2134 pxor %xmm11,%xmm3 2135.byte 102,15,56,222,209 2136 movdqu 48(%rdi),%xmm5 2137 pxor %xmm12,%xmm4 2138.byte 102,15,56,222,217 2139 movdqu 64(%rdi),%xmm6 2140 pxor %xmm13,%xmm5 2141.byte 102,15,56,222,225 2142 movdqu 80(%rdi),%xmm7 2143 pxor %xmm15,%xmm8 2144 movdqa 96(%rsp),%xmm9 2145 pxor %xmm14,%xmm6 2146.byte 102,15,56,222,233 2147 movups 32(%r11),%xmm0 2148 leaq 96(%rdi),%rdi 2149 pxor %xmm8,%xmm7 2150 2151 pxor %xmm9,%xmm10 2152.byte 102,15,56,222,241 2153 pxor %xmm9,%xmm11 2154 movdqa %xmm10,0(%rsp) 2155.byte 102,15,56,222,249 2156 movups 48(%r11),%xmm1 2157 pxor %xmm9,%xmm12 2158 2159.byte 102,15,56,222,208 2160 pxor %xmm9,%xmm13 2161 movdqa %xmm11,16(%rsp) 2162.byte 102,15,56,222,216 2163 pxor %xmm9,%xmm14 2164 movdqa %xmm12,32(%rsp) 2165.byte 102,15,56,222,224 2166.byte 102,15,56,222,232 2167 pxor %xmm9,%xmm8 2168 movdqa %xmm14,64(%rsp) 2169.byte 102,15,56,222,240 2170.byte 102,15,56,222,248 2171 movups 64(%r11),%xmm0 2172 movdqa %xmm8,80(%rsp) 2173 pshufd $0x5f,%xmm15,%xmm9 2174 jmp .Lxts_dec_loop6 2175.align 32 2176.Lxts_dec_loop6: 2177.byte 102,15,56,222,209 2178.byte 102,15,56,222,217 2179.byte 102,15,56,222,225 2180.byte 102,15,56,222,233 2181.byte 102,15,56,222,241 2182.byte 102,15,56,222,249 2183 movups -64(%rcx,%rax,1),%xmm1 2184 addq $32,%rax 2185 2186.byte 102,15,56,222,208 2187.byte 102,15,56,222,216 2188.byte 102,15,56,222,224 2189.byte 102,15,56,222,232 2190.byte 102,15,56,222,240 2191.byte 102,15,56,222,248 2192 movups -80(%rcx,%rax,1),%xmm0 2193 jnz .Lxts_dec_loop6 2194 2195 movdqa (%r8),%xmm8 2196 movdqa %xmm9,%xmm14 2197 paddd %xmm9,%xmm9 2198.byte 102,15,56,222,209 2199 paddq %xmm15,%xmm15 2200 psrad $31,%xmm14 2201.byte 102,15,56,222,217 2202 pand %xmm8,%xmm14 2203 movups (%r11),%xmm10 2204.byte 102,15,56,222,225 2205.byte 102,15,56,222,233 2206.byte 102,15,56,222,241 2207 pxor %xmm14,%xmm15 2208 movaps %xmm10,%xmm11 2209.byte 102,15,56,222,249 2210 movups -64(%rcx),%xmm1 2211 2212 movdqa %xmm9,%xmm14 2213.byte 102,15,56,222,208 2214 paddd %xmm9,%xmm9 2215 pxor %xmm15,%xmm10 2216.byte 102,15,56,222,216 2217 psrad $31,%xmm14 2218 paddq %xmm15,%xmm15 2219.byte 102,15,56,222,224 2220.byte 102,15,56,222,232 2221 pand %xmm8,%xmm14 2222 movaps %xmm11,%xmm12 2223.byte 102,15,56,222,240 2224 pxor %xmm14,%xmm15 2225 movdqa %xmm9,%xmm14 2226.byte 102,15,56,222,248 2227 movups -48(%rcx),%xmm0 2228 2229 paddd %xmm9,%xmm9 2230.byte 102,15,56,222,209 2231 pxor %xmm15,%xmm11 2232 psrad $31,%xmm14 2233.byte 102,15,56,222,217 2234 paddq %xmm15,%xmm15 2235 pand %xmm8,%xmm14 2236.byte 102,15,56,222,225 2237.byte 102,15,56,222,233 2238 movdqa %xmm13,48(%rsp) 2239 pxor %xmm14,%xmm15 2240.byte 102,15,56,222,241 2241 movaps %xmm12,%xmm13 2242 movdqa %xmm9,%xmm14 2243.byte 102,15,56,222,249 2244 movups -32(%rcx),%xmm1 2245 2246 paddd %xmm9,%xmm9 2247.byte 102,15,56,222,208 2248 pxor %xmm15,%xmm12 2249 psrad $31,%xmm14 2250.byte 102,15,56,222,216 2251 paddq %xmm15,%xmm15 2252 pand %xmm8,%xmm14 2253.byte 102,15,56,222,224 2254.byte 102,15,56,222,232 2255.byte 102,15,56,222,240 2256 pxor %xmm14,%xmm15 2257 movaps %xmm13,%xmm14 2258.byte 102,15,56,222,248 2259 2260 movdqa %xmm9,%xmm0 2261 paddd %xmm9,%xmm9 2262.byte 102,15,56,222,209 2263 pxor %xmm15,%xmm13 2264 psrad $31,%xmm0 2265.byte 102,15,56,222,217 2266 paddq %xmm15,%xmm15 2267 pand %xmm8,%xmm0 2268.byte 102,15,56,222,225 2269.byte 102,15,56,222,233 2270 pxor %xmm0,%xmm15 2271 movups (%r11),%xmm0 2272.byte 102,15,56,222,241 2273.byte 102,15,56,222,249 2274 movups 16(%r11),%xmm1 2275 2276 pxor %xmm15,%xmm14 2277.byte 102,15,56,223,84,36,0 2278 psrad $31,%xmm9 2279 paddq %xmm15,%xmm15 2280.byte 102,15,56,223,92,36,16 2281.byte 102,15,56,223,100,36,32 2282 pand %xmm8,%xmm9 2283 movq %r10,%rax 2284.byte 102,15,56,223,108,36,48 2285.byte 102,15,56,223,116,36,64 2286.byte 102,15,56,223,124,36,80 2287 pxor %xmm9,%xmm15 2288 2289 leaq 96(%rsi),%rsi 2290 movups %xmm2,-96(%rsi) 2291 movups %xmm3,-80(%rsi) 2292 movups %xmm4,-64(%rsi) 2293 movups %xmm5,-48(%rsi) 2294 movups %xmm6,-32(%rsi) 2295 movups %xmm7,-16(%rsi) 2296 subq $96,%rdx 2297 jnc .Lxts_dec_grandloop 2298 2299 movl $16+96,%eax 2300 subl %r10d,%eax 2301 movq %r11,%rcx 2302 shrl $4,%eax 2303 2304.Lxts_dec_short: 2305 2306 movl %eax,%r10d 2307 pxor %xmm0,%xmm10 2308 pxor %xmm0,%xmm11 2309 addq $96,%rdx 2310 jz .Lxts_dec_done 2311 2312 pxor %xmm0,%xmm12 2313 cmpq $0x20,%rdx 2314 jb .Lxts_dec_one 2315 pxor %xmm0,%xmm13 2316 je .Lxts_dec_two 2317 2318 pxor %xmm0,%xmm14 2319 cmpq $0x40,%rdx 2320 jb .Lxts_dec_three 2321 je .Lxts_dec_four 2322 2323 movdqu (%rdi),%xmm2 2324 movdqu 16(%rdi),%xmm3 2325 movdqu 32(%rdi),%xmm4 2326 pxor %xmm10,%xmm2 2327 movdqu 48(%rdi),%xmm5 2328 pxor %xmm11,%xmm3 2329 movdqu 64(%rdi),%xmm6 2330 leaq 80(%rdi),%rdi 2331 pxor %xmm12,%xmm4 2332 pxor %xmm13,%xmm5 2333 pxor %xmm14,%xmm6 2334 2335 call _aesni_decrypt6 2336 2337 xorps %xmm10,%xmm2 2338 xorps %xmm11,%xmm3 2339 xorps %xmm12,%xmm4 2340 movdqu %xmm2,(%rsi) 2341 xorps %xmm13,%xmm5 2342 movdqu %xmm3,16(%rsi) 2343 xorps %xmm14,%xmm6 2344 movdqu %xmm4,32(%rsi) 2345 pxor %xmm14,%xmm14 2346 movdqu %xmm5,48(%rsi) 2347 pcmpgtd %xmm15,%xmm14 2348 movdqu %xmm6,64(%rsi) 2349 leaq 80(%rsi),%rsi 2350 pshufd $0x13,%xmm14,%xmm11 2351 andq $15,%r9 2352 jz .Lxts_dec_ret 2353 2354 movdqa %xmm15,%xmm10 2355 paddq %xmm15,%xmm15 2356 pand %xmm8,%xmm11 2357 pxor %xmm15,%xmm11 2358 jmp .Lxts_dec_done2 2359 2360.align 16 2361.Lxts_dec_one: 2362 movups (%rdi),%xmm2 2363 leaq 16(%rdi),%rdi 2364 xorps %xmm10,%xmm2 2365 movups (%rcx),%xmm0 2366 movups 16(%rcx),%xmm1 2367 leaq 32(%rcx),%rcx 2368 xorps %xmm0,%xmm2 2369.Loop_dec1_12: 2370.byte 102,15,56,222,209 2371 decl %eax 2372 movups (%rcx),%xmm1 2373 leaq 16(%rcx),%rcx 2374 jnz .Loop_dec1_12 2375.byte 102,15,56,223,209 2376 xorps %xmm10,%xmm2 2377 movdqa %xmm11,%xmm10 2378 movups %xmm2,(%rsi) 2379 movdqa %xmm12,%xmm11 2380 leaq 16(%rsi),%rsi 2381 jmp .Lxts_dec_done 2382 2383.align 16 2384.Lxts_dec_two: 2385 movups (%rdi),%xmm2 2386 movups 16(%rdi),%xmm3 2387 leaq 32(%rdi),%rdi 2388 xorps %xmm10,%xmm2 2389 xorps %xmm11,%xmm3 2390 2391 call _aesni_decrypt2 2392 2393 xorps %xmm10,%xmm2 2394 movdqa %xmm12,%xmm10 2395 xorps %xmm11,%xmm3 2396 movdqa %xmm13,%xmm11 2397 movups %xmm2,(%rsi) 2398 movups %xmm3,16(%rsi) 2399 leaq 32(%rsi),%rsi 2400 jmp .Lxts_dec_done 2401 2402.align 16 2403.Lxts_dec_three: 2404 movups (%rdi),%xmm2 2405 movups 16(%rdi),%xmm3 2406 movups 32(%rdi),%xmm4 2407 leaq 48(%rdi),%rdi 2408 xorps %xmm10,%xmm2 2409 xorps %xmm11,%xmm3 2410 xorps %xmm12,%xmm4 2411 2412 call _aesni_decrypt3 2413 2414 xorps %xmm10,%xmm2 2415 movdqa %xmm13,%xmm10 2416 xorps %xmm11,%xmm3 2417 movdqa %xmm14,%xmm11 2418 xorps %xmm12,%xmm4 2419 movups %xmm2,(%rsi) 2420 movups %xmm3,16(%rsi) 2421 movups %xmm4,32(%rsi) 2422 leaq 48(%rsi),%rsi 2423 jmp .Lxts_dec_done 2424 2425.align 16 2426.Lxts_dec_four: 2427 movups (%rdi),%xmm2 2428 movups 16(%rdi),%xmm3 2429 movups 32(%rdi),%xmm4 2430 xorps %xmm10,%xmm2 2431 movups 48(%rdi),%xmm5 2432 leaq 64(%rdi),%rdi 2433 xorps %xmm11,%xmm3 2434 xorps %xmm12,%xmm4 2435 xorps %xmm13,%xmm5 2436 2437 call _aesni_decrypt4 2438 2439 pxor %xmm10,%xmm2 2440 movdqa %xmm14,%xmm10 2441 pxor %xmm11,%xmm3 2442 movdqa %xmm15,%xmm11 2443 pxor %xmm12,%xmm4 2444 movdqu %xmm2,(%rsi) 2445 pxor %xmm13,%xmm5 2446 movdqu %xmm3,16(%rsi) 2447 movdqu %xmm4,32(%rsi) 2448 movdqu %xmm5,48(%rsi) 2449 leaq 64(%rsi),%rsi 2450 jmp .Lxts_dec_done 2451 2452.align 16 2453.Lxts_dec_done: 2454 andq $15,%r9 2455 jz .Lxts_dec_ret 2456.Lxts_dec_done2: 2457 movq %r9,%rdx 2458 movq %r11,%rcx 2459 movl %r10d,%eax 2460 2461 movups (%rdi),%xmm2 2462 xorps %xmm11,%xmm2 2463 movups (%rcx),%xmm0 2464 movups 16(%rcx),%xmm1 2465 leaq 32(%rcx),%rcx 2466 xorps %xmm0,%xmm2 2467.Loop_dec1_13: 2468.byte 102,15,56,222,209 2469 decl %eax 2470 movups (%rcx),%xmm1 2471 leaq 16(%rcx),%rcx 2472 jnz .Loop_dec1_13 2473.byte 102,15,56,223,209 2474 xorps %xmm11,%xmm2 2475 movups %xmm2,(%rsi) 2476 2477.Lxts_dec_steal: 2478 movzbl 16(%rdi),%eax 2479 movzbl (%rsi),%ecx 2480 leaq 1(%rdi),%rdi 2481 movb %al,(%rsi) 2482 movb %cl,16(%rsi) 2483 leaq 1(%rsi),%rsi 2484 subq $1,%rdx 2485 jnz .Lxts_dec_steal 2486 2487 subq %r9,%rsi 2488 movq %r11,%rcx 2489 movl %r10d,%eax 2490 2491 movups (%rsi),%xmm2 2492 xorps %xmm10,%xmm2 2493 movups (%rcx),%xmm0 2494 movups 16(%rcx),%xmm1 2495 leaq 32(%rcx),%rcx 2496 xorps %xmm0,%xmm2 2497.Loop_dec1_14: 2498.byte 102,15,56,222,209 2499 decl %eax 2500 movups (%rcx),%xmm1 2501 leaq 16(%rcx),%rcx 2502 jnz .Loop_dec1_14 2503.byte 102,15,56,223,209 2504 xorps %xmm10,%xmm2 2505 movups %xmm2,(%rsi) 2506 2507.Lxts_dec_ret: 2508 xorps %xmm0,%xmm0 2509 pxor %xmm1,%xmm1 2510 pxor %xmm2,%xmm2 2511 pxor %xmm3,%xmm3 2512 pxor %xmm4,%xmm4 2513 pxor %xmm5,%xmm5 2514 pxor %xmm6,%xmm6 2515 pxor %xmm7,%xmm7 2516 movaps %xmm0,0(%rsp) 2517 pxor %xmm8,%xmm8 2518 movaps %xmm0,16(%rsp) 2519 pxor %xmm9,%xmm9 2520 movaps %xmm0,32(%rsp) 2521 pxor %xmm10,%xmm10 2522 movaps %xmm0,48(%rsp) 2523 pxor %xmm11,%xmm11 2524 movaps %xmm0,64(%rsp) 2525 pxor %xmm12,%xmm12 2526 movaps %xmm0,80(%rsp) 2527 pxor %xmm13,%xmm13 2528 movaps %xmm0,96(%rsp) 2529 pxor %xmm14,%xmm14 2530 pxor %xmm15,%xmm15 2531 leaq (%rbp),%rsp 2532 popq %rbp 2533.Lxts_dec_epilogue: 2534 .byte 0xf3,0xc3 2535.size aesni_xts_decrypt,.-aesni_xts_decrypt 2536.globl aesni_cbc_encrypt 2537.type aesni_cbc_encrypt,@function 2538.align 16 2539aesni_cbc_encrypt: 2540 testq %rdx,%rdx 2541 jz .Lcbc_ret 2542 2543 movl 240(%rcx),%r10d 2544 movq %rcx,%r11 2545 testl %r9d,%r9d 2546 jz .Lcbc_decrypt 2547 2548 movups (%r8),%xmm2 2549 movl %r10d,%eax 2550 cmpq $16,%rdx 2551 jb .Lcbc_enc_tail 2552 subq $16,%rdx 2553 jmp .Lcbc_enc_loop 2554.align 16 2555.Lcbc_enc_loop: 2556 movups (%rdi),%xmm3 2557 leaq 16(%rdi),%rdi 2558 2559 movups (%rcx),%xmm0 2560 movups 16(%rcx),%xmm1 2561 xorps %xmm0,%xmm3 2562 leaq 32(%rcx),%rcx 2563 xorps %xmm3,%xmm2 2564.Loop_enc1_15: 2565.byte 102,15,56,220,209 2566 decl %eax 2567 movups (%rcx),%xmm1 2568 leaq 16(%rcx),%rcx 2569 jnz .Loop_enc1_15 2570.byte 102,15,56,221,209 2571 movl %r10d,%eax 2572 movq %r11,%rcx 2573 movups %xmm2,0(%rsi) 2574 leaq 16(%rsi),%rsi 2575 subq $16,%rdx 2576 jnc .Lcbc_enc_loop 2577 addq $16,%rdx 2578 jnz .Lcbc_enc_tail 2579 pxor %xmm0,%xmm0 2580 pxor %xmm1,%xmm1 2581 movups %xmm2,(%r8) 2582 pxor %xmm2,%xmm2 2583 pxor %xmm3,%xmm3 2584 jmp .Lcbc_ret 2585 2586.Lcbc_enc_tail: 2587 movq %rdx,%rcx 2588 xchgq %rdi,%rsi 2589.long 0x9066A4F3 2590 movl $16,%ecx 2591 subq %rdx,%rcx 2592 xorl %eax,%eax 2593.long 0x9066AAF3 2594 leaq -16(%rdi),%rdi 2595 movl %r10d,%eax 2596 movq %rdi,%rsi 2597 movq %r11,%rcx 2598 xorq %rdx,%rdx 2599 jmp .Lcbc_enc_loop 2600 2601.align 16 2602.Lcbc_decrypt: 2603 cmpq $16,%rdx 2604 jne .Lcbc_decrypt_bulk 2605 2606 2607 2608 movdqu (%rdi),%xmm2 2609 movdqu (%r8),%xmm3 2610 movdqa %xmm2,%xmm4 2611 movups (%rcx),%xmm0 2612 movups 16(%rcx),%xmm1 2613 leaq 32(%rcx),%rcx 2614 xorps %xmm0,%xmm2 2615.Loop_dec1_16: 2616.byte 102,15,56,222,209 2617 decl %r10d 2618 movups (%rcx),%xmm1 2619 leaq 16(%rcx),%rcx 2620 jnz .Loop_dec1_16 2621.byte 102,15,56,223,209 2622 pxor %xmm0,%xmm0 2623 pxor %xmm1,%xmm1 2624 movdqu %xmm4,(%r8) 2625 xorps %xmm3,%xmm2 2626 pxor %xmm3,%xmm3 2627 movups %xmm2,(%rsi) 2628 pxor %xmm2,%xmm2 2629 jmp .Lcbc_ret 2630.align 16 2631.Lcbc_decrypt_bulk: 2632 leaq (%rsp),%rax 2633 pushq %rbp 2634 subq $16,%rsp 2635 andq $-16,%rsp 2636 leaq -8(%rax),%rbp 2637 movups (%r8),%xmm10 2638 movl %r10d,%eax 2639 cmpq $0x50,%rdx 2640 jbe .Lcbc_dec_tail 2641 2642 movups (%rcx),%xmm0 2643 movdqu 0(%rdi),%xmm2 2644 movdqu 16(%rdi),%xmm3 2645 movdqa %xmm2,%xmm11 2646 movdqu 32(%rdi),%xmm4 2647 movdqa %xmm3,%xmm12 2648 movdqu 48(%rdi),%xmm5 2649 movdqa %xmm4,%xmm13 2650 movdqu 64(%rdi),%xmm6 2651 movdqa %xmm5,%xmm14 2652 movdqu 80(%rdi),%xmm7 2653 movdqa %xmm6,%xmm15 2654 movl OPENSSL_ia32cap_P+4(%rip),%r9d 2655 cmpq $0x70,%rdx 2656 jbe .Lcbc_dec_six_or_seven 2657 2658 andl $71303168,%r9d 2659 subq $0x50,%rdx 2660 cmpl $4194304,%r9d 2661 je .Lcbc_dec_loop6_enter 2662 subq $0x20,%rdx 2663 leaq 112(%rcx),%rcx 2664 jmp .Lcbc_dec_loop8_enter 2665.align 16 2666.Lcbc_dec_loop8: 2667 movups %xmm9,(%rsi) 2668 leaq 16(%rsi),%rsi 2669.Lcbc_dec_loop8_enter: 2670 movdqu 96(%rdi),%xmm8 2671 pxor %xmm0,%xmm2 2672 movdqu 112(%rdi),%xmm9 2673 pxor %xmm0,%xmm3 2674 movups 16-112(%rcx),%xmm1 2675 pxor %xmm0,%xmm4 2676 xorq %r11,%r11 2677 cmpq $0x70,%rdx 2678 pxor %xmm0,%xmm5 2679 pxor %xmm0,%xmm6 2680 pxor %xmm0,%xmm7 2681 pxor %xmm0,%xmm8 2682 2683.byte 102,15,56,222,209 2684 pxor %xmm0,%xmm9 2685 movups 32-112(%rcx),%xmm0 2686.byte 102,15,56,222,217 2687.byte 102,15,56,222,225 2688.byte 102,15,56,222,233 2689.byte 102,15,56,222,241 2690.byte 102,15,56,222,249 2691.byte 102,68,15,56,222,193 2692 setnc %r11b 2693 shlq $7,%r11 2694.byte 102,68,15,56,222,201 2695 addq %rdi,%r11 2696 movups 48-112(%rcx),%xmm1 2697.byte 102,15,56,222,208 2698.byte 102,15,56,222,216 2699.byte 102,15,56,222,224 2700.byte 102,15,56,222,232 2701.byte 102,15,56,222,240 2702.byte 102,15,56,222,248 2703.byte 102,68,15,56,222,192 2704.byte 102,68,15,56,222,200 2705 movups 64-112(%rcx),%xmm0 2706 nop 2707.byte 102,15,56,222,209 2708.byte 102,15,56,222,217 2709.byte 102,15,56,222,225 2710.byte 102,15,56,222,233 2711.byte 102,15,56,222,241 2712.byte 102,15,56,222,249 2713.byte 102,68,15,56,222,193 2714.byte 102,68,15,56,222,201 2715 movups 80-112(%rcx),%xmm1 2716 nop 2717.byte 102,15,56,222,208 2718.byte 102,15,56,222,216 2719.byte 102,15,56,222,224 2720.byte 102,15,56,222,232 2721.byte 102,15,56,222,240 2722.byte 102,15,56,222,248 2723.byte 102,68,15,56,222,192 2724.byte 102,68,15,56,222,200 2725 movups 96-112(%rcx),%xmm0 2726 nop 2727.byte 102,15,56,222,209 2728.byte 102,15,56,222,217 2729.byte 102,15,56,222,225 2730.byte 102,15,56,222,233 2731.byte 102,15,56,222,241 2732.byte 102,15,56,222,249 2733.byte 102,68,15,56,222,193 2734.byte 102,68,15,56,222,201 2735 movups 112-112(%rcx),%xmm1 2736 nop 2737.byte 102,15,56,222,208 2738.byte 102,15,56,222,216 2739.byte 102,15,56,222,224 2740.byte 102,15,56,222,232 2741.byte 102,15,56,222,240 2742.byte 102,15,56,222,248 2743.byte 102,68,15,56,222,192 2744.byte 102,68,15,56,222,200 2745 movups 128-112(%rcx),%xmm0 2746 nop 2747.byte 102,15,56,222,209 2748.byte 102,15,56,222,217 2749.byte 102,15,56,222,225 2750.byte 102,15,56,222,233 2751.byte 102,15,56,222,241 2752.byte 102,15,56,222,249 2753.byte 102,68,15,56,222,193 2754.byte 102,68,15,56,222,201 2755 movups 144-112(%rcx),%xmm1 2756 cmpl $11,%eax 2757.byte 102,15,56,222,208 2758.byte 102,15,56,222,216 2759.byte 102,15,56,222,224 2760.byte 102,15,56,222,232 2761.byte 102,15,56,222,240 2762.byte 102,15,56,222,248 2763.byte 102,68,15,56,222,192 2764.byte 102,68,15,56,222,200 2765 movups 160-112(%rcx),%xmm0 2766 jb .Lcbc_dec_done 2767.byte 102,15,56,222,209 2768.byte 102,15,56,222,217 2769.byte 102,15,56,222,225 2770.byte 102,15,56,222,233 2771.byte 102,15,56,222,241 2772.byte 102,15,56,222,249 2773.byte 102,68,15,56,222,193 2774.byte 102,68,15,56,222,201 2775 movups 176-112(%rcx),%xmm1 2776 nop 2777.byte 102,15,56,222,208 2778.byte 102,15,56,222,216 2779.byte 102,15,56,222,224 2780.byte 102,15,56,222,232 2781.byte 102,15,56,222,240 2782.byte 102,15,56,222,248 2783.byte 102,68,15,56,222,192 2784.byte 102,68,15,56,222,200 2785 movups 192-112(%rcx),%xmm0 2786 je .Lcbc_dec_done 2787.byte 102,15,56,222,209 2788.byte 102,15,56,222,217 2789.byte 102,15,56,222,225 2790.byte 102,15,56,222,233 2791.byte 102,15,56,222,241 2792.byte 102,15,56,222,249 2793.byte 102,68,15,56,222,193 2794.byte 102,68,15,56,222,201 2795 movups 208-112(%rcx),%xmm1 2796 nop 2797.byte 102,15,56,222,208 2798.byte 102,15,56,222,216 2799.byte 102,15,56,222,224 2800.byte 102,15,56,222,232 2801.byte 102,15,56,222,240 2802.byte 102,15,56,222,248 2803.byte 102,68,15,56,222,192 2804.byte 102,68,15,56,222,200 2805 movups 224-112(%rcx),%xmm0 2806 jmp .Lcbc_dec_done 2807.align 16 2808.Lcbc_dec_done: 2809.byte 102,15,56,222,209 2810.byte 102,15,56,222,217 2811 pxor %xmm0,%xmm10 2812 pxor %xmm0,%xmm11 2813.byte 102,15,56,222,225 2814.byte 102,15,56,222,233 2815 pxor %xmm0,%xmm12 2816 pxor %xmm0,%xmm13 2817.byte 102,15,56,222,241 2818.byte 102,15,56,222,249 2819 pxor %xmm0,%xmm14 2820 pxor %xmm0,%xmm15 2821.byte 102,68,15,56,222,193 2822.byte 102,68,15,56,222,201 2823 movdqu 80(%rdi),%xmm1 2824 2825.byte 102,65,15,56,223,210 2826 movdqu 96(%rdi),%xmm10 2827 pxor %xmm0,%xmm1 2828.byte 102,65,15,56,223,219 2829 pxor %xmm0,%xmm10 2830 movdqu 112(%rdi),%xmm0 2831.byte 102,65,15,56,223,228 2832 leaq 128(%rdi),%rdi 2833 movdqu 0(%r11),%xmm11 2834.byte 102,65,15,56,223,237 2835.byte 102,65,15,56,223,246 2836 movdqu 16(%r11),%xmm12 2837 movdqu 32(%r11),%xmm13 2838.byte 102,65,15,56,223,255 2839.byte 102,68,15,56,223,193 2840 movdqu 48(%r11),%xmm14 2841 movdqu 64(%r11),%xmm15 2842.byte 102,69,15,56,223,202 2843 movdqa %xmm0,%xmm10 2844 movdqu 80(%r11),%xmm1 2845 movups -112(%rcx),%xmm0 2846 2847 movups %xmm2,(%rsi) 2848 movdqa %xmm11,%xmm2 2849 movups %xmm3,16(%rsi) 2850 movdqa %xmm12,%xmm3 2851 movups %xmm4,32(%rsi) 2852 movdqa %xmm13,%xmm4 2853 movups %xmm5,48(%rsi) 2854 movdqa %xmm14,%xmm5 2855 movups %xmm6,64(%rsi) 2856 movdqa %xmm15,%xmm6 2857 movups %xmm7,80(%rsi) 2858 movdqa %xmm1,%xmm7 2859 movups %xmm8,96(%rsi) 2860 leaq 112(%rsi),%rsi 2861 2862 subq $0x80,%rdx 2863 ja .Lcbc_dec_loop8 2864 2865 movaps %xmm9,%xmm2 2866 leaq -112(%rcx),%rcx 2867 addq $0x70,%rdx 2868 jle .Lcbc_dec_clear_tail_collected 2869 movups %xmm9,(%rsi) 2870 leaq 16(%rsi),%rsi 2871 cmpq $0x50,%rdx 2872 jbe .Lcbc_dec_tail 2873 2874 movaps %xmm11,%xmm2 2875.Lcbc_dec_six_or_seven: 2876 cmpq $0x60,%rdx 2877 ja .Lcbc_dec_seven 2878 2879 movaps %xmm7,%xmm8 2880 call _aesni_decrypt6 2881 pxor %xmm10,%xmm2 2882 movaps %xmm8,%xmm10 2883 pxor %xmm11,%xmm3 2884 movdqu %xmm2,(%rsi) 2885 pxor %xmm12,%xmm4 2886 movdqu %xmm3,16(%rsi) 2887 pxor %xmm3,%xmm3 2888 pxor %xmm13,%xmm5 2889 movdqu %xmm4,32(%rsi) 2890 pxor %xmm4,%xmm4 2891 pxor %xmm14,%xmm6 2892 movdqu %xmm5,48(%rsi) 2893 pxor %xmm5,%xmm5 2894 pxor %xmm15,%xmm7 2895 movdqu %xmm6,64(%rsi) 2896 pxor %xmm6,%xmm6 2897 leaq 80(%rsi),%rsi 2898 movdqa %xmm7,%xmm2 2899 pxor %xmm7,%xmm7 2900 jmp .Lcbc_dec_tail_collected 2901 2902.align 16 2903.Lcbc_dec_seven: 2904 movups 96(%rdi),%xmm8 2905 xorps %xmm9,%xmm9 2906 call _aesni_decrypt8 2907 movups 80(%rdi),%xmm9 2908 pxor %xmm10,%xmm2 2909 movups 96(%rdi),%xmm10 2910 pxor %xmm11,%xmm3 2911 movdqu %xmm2,(%rsi) 2912 pxor %xmm12,%xmm4 2913 movdqu %xmm3,16(%rsi) 2914 pxor %xmm3,%xmm3 2915 pxor %xmm13,%xmm5 2916 movdqu %xmm4,32(%rsi) 2917 pxor %xmm4,%xmm4 2918 pxor %xmm14,%xmm6 2919 movdqu %xmm5,48(%rsi) 2920 pxor %xmm5,%xmm5 2921 pxor %xmm15,%xmm7 2922 movdqu %xmm6,64(%rsi) 2923 pxor %xmm6,%xmm6 2924 pxor %xmm9,%xmm8 2925 movdqu %xmm7,80(%rsi) 2926 pxor %xmm7,%xmm7 2927 leaq 96(%rsi),%rsi 2928 movdqa %xmm8,%xmm2 2929 pxor %xmm8,%xmm8 2930 pxor %xmm9,%xmm9 2931 jmp .Lcbc_dec_tail_collected 2932 2933.align 16 2934.Lcbc_dec_loop6: 2935 movups %xmm7,(%rsi) 2936 leaq 16(%rsi),%rsi 2937 movdqu 0(%rdi),%xmm2 2938 movdqu 16(%rdi),%xmm3 2939 movdqa %xmm2,%xmm11 2940 movdqu 32(%rdi),%xmm4 2941 movdqa %xmm3,%xmm12 2942 movdqu 48(%rdi),%xmm5 2943 movdqa %xmm4,%xmm13 2944 movdqu 64(%rdi),%xmm6 2945 movdqa %xmm5,%xmm14 2946 movdqu 80(%rdi),%xmm7 2947 movdqa %xmm6,%xmm15 2948.Lcbc_dec_loop6_enter: 2949 leaq 96(%rdi),%rdi 2950 movdqa %xmm7,%xmm8 2951 2952 call _aesni_decrypt6 2953 2954 pxor %xmm10,%xmm2 2955 movdqa %xmm8,%xmm10 2956 pxor %xmm11,%xmm3 2957 movdqu %xmm2,(%rsi) 2958 pxor %xmm12,%xmm4 2959 movdqu %xmm3,16(%rsi) 2960 pxor %xmm13,%xmm5 2961 movdqu %xmm4,32(%rsi) 2962 pxor %xmm14,%xmm6 2963 movq %r11,%rcx 2964 movdqu %xmm5,48(%rsi) 2965 pxor %xmm15,%xmm7 2966 movl %r10d,%eax 2967 movdqu %xmm6,64(%rsi) 2968 leaq 80(%rsi),%rsi 2969 subq $0x60,%rdx 2970 ja .Lcbc_dec_loop6 2971 2972 movdqa %xmm7,%xmm2 2973 addq $0x50,%rdx 2974 jle .Lcbc_dec_clear_tail_collected 2975 movups %xmm7,(%rsi) 2976 leaq 16(%rsi),%rsi 2977 2978.Lcbc_dec_tail: 2979 movups (%rdi),%xmm2 2980 subq $0x10,%rdx 2981 jbe .Lcbc_dec_one 2982 2983 movups 16(%rdi),%xmm3 2984 movaps %xmm2,%xmm11 2985 subq $0x10,%rdx 2986 jbe .Lcbc_dec_two 2987 2988 movups 32(%rdi),%xmm4 2989 movaps %xmm3,%xmm12 2990 subq $0x10,%rdx 2991 jbe .Lcbc_dec_three 2992 2993 movups 48(%rdi),%xmm5 2994 movaps %xmm4,%xmm13 2995 subq $0x10,%rdx 2996 jbe .Lcbc_dec_four 2997 2998 movups 64(%rdi),%xmm6 2999 movaps %xmm5,%xmm14 3000 movaps %xmm6,%xmm15 3001 xorps %xmm7,%xmm7 3002 call _aesni_decrypt6 3003 pxor %xmm10,%xmm2 3004 movaps %xmm15,%xmm10 3005 pxor %xmm11,%xmm3 3006 movdqu %xmm2,(%rsi) 3007 pxor %xmm12,%xmm4 3008 movdqu %xmm3,16(%rsi) 3009 pxor %xmm3,%xmm3 3010 pxor %xmm13,%xmm5 3011 movdqu %xmm4,32(%rsi) 3012 pxor %xmm4,%xmm4 3013 pxor %xmm14,%xmm6 3014 movdqu %xmm5,48(%rsi) 3015 pxor %xmm5,%xmm5 3016 leaq 64(%rsi),%rsi 3017 movdqa %xmm6,%xmm2 3018 pxor %xmm6,%xmm6 3019 pxor %xmm7,%xmm7 3020 subq $0x10,%rdx 3021 jmp .Lcbc_dec_tail_collected 3022 3023.align 16 3024.Lcbc_dec_one: 3025 movaps %xmm2,%xmm11 3026 movups (%rcx),%xmm0 3027 movups 16(%rcx),%xmm1 3028 leaq 32(%rcx),%rcx 3029 xorps %xmm0,%xmm2 3030.Loop_dec1_17: 3031.byte 102,15,56,222,209 3032 decl %eax 3033 movups (%rcx),%xmm1 3034 leaq 16(%rcx),%rcx 3035 jnz .Loop_dec1_17 3036.byte 102,15,56,223,209 3037 xorps %xmm10,%xmm2 3038 movaps %xmm11,%xmm10 3039 jmp .Lcbc_dec_tail_collected 3040.align 16 3041.Lcbc_dec_two: 3042 movaps %xmm3,%xmm12 3043 call _aesni_decrypt2 3044 pxor %xmm10,%xmm2 3045 movaps %xmm12,%xmm10 3046 pxor %xmm11,%xmm3 3047 movdqu %xmm2,(%rsi) 3048 movdqa %xmm3,%xmm2 3049 pxor %xmm3,%xmm3 3050 leaq 16(%rsi),%rsi 3051 jmp .Lcbc_dec_tail_collected 3052.align 16 3053.Lcbc_dec_three: 3054 movaps %xmm4,%xmm13 3055 call _aesni_decrypt3 3056 pxor %xmm10,%xmm2 3057 movaps %xmm13,%xmm10 3058 pxor %xmm11,%xmm3 3059 movdqu %xmm2,(%rsi) 3060 pxor %xmm12,%xmm4 3061 movdqu %xmm3,16(%rsi) 3062 pxor %xmm3,%xmm3 3063 movdqa %xmm4,%xmm2 3064 pxor %xmm4,%xmm4 3065 leaq 32(%rsi),%rsi 3066 jmp .Lcbc_dec_tail_collected 3067.align 16 3068.Lcbc_dec_four: 3069 movaps %xmm5,%xmm14 3070 call _aesni_decrypt4 3071 pxor %xmm10,%xmm2 3072 movaps %xmm14,%xmm10 3073 pxor %xmm11,%xmm3 3074 movdqu %xmm2,(%rsi) 3075 pxor %xmm12,%xmm4 3076 movdqu %xmm3,16(%rsi) 3077 pxor %xmm3,%xmm3 3078 pxor %xmm13,%xmm5 3079 movdqu %xmm4,32(%rsi) 3080 pxor %xmm4,%xmm4 3081 movdqa %xmm5,%xmm2 3082 pxor %xmm5,%xmm5 3083 leaq 48(%rsi),%rsi 3084 jmp .Lcbc_dec_tail_collected 3085 3086.align 16 3087.Lcbc_dec_clear_tail_collected: 3088 pxor %xmm3,%xmm3 3089 pxor %xmm4,%xmm4 3090 pxor %xmm5,%xmm5 3091 pxor %xmm6,%xmm6 3092 pxor %xmm7,%xmm7 3093 pxor %xmm8,%xmm8 3094 pxor %xmm9,%xmm9 3095.Lcbc_dec_tail_collected: 3096 movups %xmm10,(%r8) 3097 andq $15,%rdx 3098 jnz .Lcbc_dec_tail_partial 3099 movups %xmm2,(%rsi) 3100 pxor %xmm2,%xmm2 3101 jmp .Lcbc_dec_ret 3102.align 16 3103.Lcbc_dec_tail_partial: 3104 movaps %xmm2,(%rsp) 3105 pxor %xmm2,%xmm2 3106 movq $16,%rcx 3107 movq %rsi,%rdi 3108 subq %rdx,%rcx 3109 leaq (%rsp),%rsi 3110.long 0x9066A4F3 3111 movdqa %xmm2,(%rsp) 3112 3113.Lcbc_dec_ret: 3114 xorps %xmm0,%xmm0 3115 pxor %xmm1,%xmm1 3116 leaq (%rbp),%rsp 3117 popq %rbp 3118.Lcbc_ret: 3119 .byte 0xf3,0xc3 3120.size aesni_cbc_encrypt,.-aesni_cbc_encrypt 3121.globl aesni_set_decrypt_key 3122.type aesni_set_decrypt_key,@function 3123.align 16 3124aesni_set_decrypt_key: 3125.byte 0x48,0x83,0xEC,0x08 3126 call __aesni_set_encrypt_key 3127 shll $4,%esi 3128 testl %eax,%eax 3129 jnz .Ldec_key_ret 3130 leaq 16(%rdx,%rsi,1),%rdi 3131 3132 movups (%rdx),%xmm0 3133 movups (%rdi),%xmm1 3134 movups %xmm0,(%rdi) 3135 movups %xmm1,(%rdx) 3136 leaq 16(%rdx),%rdx 3137 leaq -16(%rdi),%rdi 3138 3139.Ldec_key_inverse: 3140 movups (%rdx),%xmm0 3141 movups (%rdi),%xmm1 3142.byte 102,15,56,219,192 3143.byte 102,15,56,219,201 3144 leaq 16(%rdx),%rdx 3145 leaq -16(%rdi),%rdi 3146 movups %xmm0,16(%rdi) 3147 movups %xmm1,-16(%rdx) 3148 cmpq %rdx,%rdi 3149 ja .Ldec_key_inverse 3150 3151 movups (%rdx),%xmm0 3152.byte 102,15,56,219,192 3153 pxor %xmm1,%xmm1 3154 movups %xmm0,(%rdi) 3155 pxor %xmm0,%xmm0 3156.Ldec_key_ret: 3157 addq $8,%rsp 3158 .byte 0xf3,0xc3 3159.LSEH_end_set_decrypt_key: 3160.size aesni_set_decrypt_key,.-aesni_set_decrypt_key 3161.globl aesni_set_encrypt_key 3162.type aesni_set_encrypt_key,@function 3163.align 16 3164aesni_set_encrypt_key: 3165__aesni_set_encrypt_key: 3166.byte 0x48,0x83,0xEC,0x08 3167 movq $-1,%rax 3168 testq %rdi,%rdi 3169 jz .Lenc_key_ret 3170 testq %rdx,%rdx 3171 jz .Lenc_key_ret 3172 3173 movl $268437504,%r10d 3174 movups (%rdi),%xmm0 3175 xorps %xmm4,%xmm4 3176 andl OPENSSL_ia32cap_P+4(%rip),%r10d 3177 leaq 16(%rdx),%rax 3178 cmpl $256,%esi 3179 je .L14rounds 3180 cmpl $192,%esi 3181 je .L12rounds 3182 cmpl $128,%esi 3183 jne .Lbad_keybits 3184 3185.L10rounds: 3186 movl $9,%esi 3187 cmpl $268435456,%r10d 3188 je .L10rounds_alt 3189 3190 movups %xmm0,(%rdx) 3191.byte 102,15,58,223,200,1 3192 call .Lkey_expansion_128_cold 3193.byte 102,15,58,223,200,2 3194 call .Lkey_expansion_128 3195.byte 102,15,58,223,200,4 3196 call .Lkey_expansion_128 3197.byte 102,15,58,223,200,8 3198 call .Lkey_expansion_128 3199.byte 102,15,58,223,200,16 3200 call .Lkey_expansion_128 3201.byte 102,15,58,223,200,32 3202 call .Lkey_expansion_128 3203.byte 102,15,58,223,200,64 3204 call .Lkey_expansion_128 3205.byte 102,15,58,223,200,128 3206 call .Lkey_expansion_128 3207.byte 102,15,58,223,200,27 3208 call .Lkey_expansion_128 3209.byte 102,15,58,223,200,54 3210 call .Lkey_expansion_128 3211 movups %xmm0,(%rax) 3212 movl %esi,80(%rax) 3213 xorl %eax,%eax 3214 jmp .Lenc_key_ret 3215 3216.align 16 3217.L10rounds_alt: 3218 movdqa .Lkey_rotate(%rip),%xmm5 3219 movl $8,%r10d 3220 movdqa .Lkey_rcon1(%rip),%xmm4 3221 movdqa %xmm0,%xmm2 3222 movdqu %xmm0,(%rdx) 3223 jmp .Loop_key128 3224 3225.align 16 3226.Loop_key128: 3227.byte 102,15,56,0,197 3228.byte 102,15,56,221,196 3229 pslld $1,%xmm4 3230 leaq 16(%rax),%rax 3231 3232 movdqa %xmm2,%xmm3 3233 pslldq $4,%xmm2 3234 pxor %xmm2,%xmm3 3235 pslldq $4,%xmm2 3236 pxor %xmm2,%xmm3 3237 pslldq $4,%xmm2 3238 pxor %xmm3,%xmm2 3239 3240 pxor %xmm2,%xmm0 3241 movdqu %xmm0,-16(%rax) 3242 movdqa %xmm0,%xmm2 3243 3244 decl %r10d 3245 jnz .Loop_key128 3246 3247 movdqa .Lkey_rcon1b(%rip),%xmm4 3248 3249.byte 102,15,56,0,197 3250.byte 102,15,56,221,196 3251 pslld $1,%xmm4 3252 3253 movdqa %xmm2,%xmm3 3254 pslldq $4,%xmm2 3255 pxor %xmm2,%xmm3 3256 pslldq $4,%xmm2 3257 pxor %xmm2,%xmm3 3258 pslldq $4,%xmm2 3259 pxor %xmm3,%xmm2 3260 3261 pxor %xmm2,%xmm0 3262 movdqu %xmm0,(%rax) 3263 3264 movdqa %xmm0,%xmm2 3265.byte 102,15,56,0,197 3266.byte 102,15,56,221,196 3267 3268 movdqa %xmm2,%xmm3 3269 pslldq $4,%xmm2 3270 pxor %xmm2,%xmm3 3271 pslldq $4,%xmm2 3272 pxor %xmm2,%xmm3 3273 pslldq $4,%xmm2 3274 pxor %xmm3,%xmm2 3275 3276 pxor %xmm2,%xmm0 3277 movdqu %xmm0,16(%rax) 3278 3279 movl %esi,96(%rax) 3280 xorl %eax,%eax 3281 jmp .Lenc_key_ret 3282 3283.align 16 3284.L12rounds: 3285 movq 16(%rdi),%xmm2 3286 movl $11,%esi 3287 cmpl $268435456,%r10d 3288 je .L12rounds_alt 3289 3290 movups %xmm0,(%rdx) 3291.byte 102,15,58,223,202,1 3292 call .Lkey_expansion_192a_cold 3293.byte 102,15,58,223,202,2 3294 call .Lkey_expansion_192b 3295.byte 102,15,58,223,202,4 3296 call .Lkey_expansion_192a 3297.byte 102,15,58,223,202,8 3298 call .Lkey_expansion_192b 3299.byte 102,15,58,223,202,16 3300 call .Lkey_expansion_192a 3301.byte 102,15,58,223,202,32 3302 call .Lkey_expansion_192b 3303.byte 102,15,58,223,202,64 3304 call .Lkey_expansion_192a 3305.byte 102,15,58,223,202,128 3306 call .Lkey_expansion_192b 3307 movups %xmm0,(%rax) 3308 movl %esi,48(%rax) 3309 xorq %rax,%rax 3310 jmp .Lenc_key_ret 3311 3312.align 16 3313.L12rounds_alt: 3314 movdqa .Lkey_rotate192(%rip),%xmm5 3315 movdqa .Lkey_rcon1(%rip),%xmm4 3316 movl $8,%r10d 3317 movdqu %xmm0,(%rdx) 3318 jmp .Loop_key192 3319 3320.align 16 3321.Loop_key192: 3322 movq %xmm2,0(%rax) 3323 movdqa %xmm2,%xmm1 3324.byte 102,15,56,0,213 3325.byte 102,15,56,221,212 3326 pslld $1,%xmm4 3327 leaq 24(%rax),%rax 3328 3329 movdqa %xmm0,%xmm3 3330 pslldq $4,%xmm0 3331 pxor %xmm0,%xmm3 3332 pslldq $4,%xmm0 3333 pxor %xmm0,%xmm3 3334 pslldq $4,%xmm0 3335 pxor %xmm3,%xmm0 3336 3337 pshufd $0xff,%xmm0,%xmm3 3338 pxor %xmm1,%xmm3 3339 pslldq $4,%xmm1 3340 pxor %xmm1,%xmm3 3341 3342 pxor %xmm2,%xmm0 3343 pxor %xmm3,%xmm2 3344 movdqu %xmm0,-16(%rax) 3345 3346 decl %r10d 3347 jnz .Loop_key192 3348 3349 movl %esi,32(%rax) 3350 xorl %eax,%eax 3351 jmp .Lenc_key_ret 3352 3353.align 16 3354.L14rounds: 3355 movups 16(%rdi),%xmm2 3356 movl $13,%esi 3357 leaq 16(%rax),%rax 3358 cmpl $268435456,%r10d 3359 je .L14rounds_alt 3360 3361 movups %xmm0,(%rdx) 3362 movups %xmm2,16(%rdx) 3363.byte 102,15,58,223,202,1 3364 call .Lkey_expansion_256a_cold 3365.byte 102,15,58,223,200,1 3366 call .Lkey_expansion_256b 3367.byte 102,15,58,223,202,2 3368 call .Lkey_expansion_256a 3369.byte 102,15,58,223,200,2 3370 call .Lkey_expansion_256b 3371.byte 102,15,58,223,202,4 3372 call .Lkey_expansion_256a 3373.byte 102,15,58,223,200,4 3374 call .Lkey_expansion_256b 3375.byte 102,15,58,223,202,8 3376 call .Lkey_expansion_256a 3377.byte 102,15,58,223,200,8 3378 call .Lkey_expansion_256b 3379.byte 102,15,58,223,202,16 3380 call .Lkey_expansion_256a 3381.byte 102,15,58,223,200,16 3382 call .Lkey_expansion_256b 3383.byte 102,15,58,223,202,32 3384 call .Lkey_expansion_256a 3385.byte 102,15,58,223,200,32 3386 call .Lkey_expansion_256b 3387.byte 102,15,58,223,202,64 3388 call .Lkey_expansion_256a 3389 movups %xmm0,(%rax) 3390 movl %esi,16(%rax) 3391 xorq %rax,%rax 3392 jmp .Lenc_key_ret 3393 3394.align 16 3395.L14rounds_alt: 3396 movdqa .Lkey_rotate(%rip),%xmm5 3397 movdqa .Lkey_rcon1(%rip),%xmm4 3398 movl $7,%r10d 3399 movdqu %xmm0,0(%rdx) 3400 movdqa %xmm2,%xmm1 3401 movdqu %xmm2,16(%rdx) 3402 jmp .Loop_key256 3403 3404.align 16 3405.Loop_key256: 3406.byte 102,15,56,0,213 3407.byte 102,15,56,221,212 3408 3409 movdqa %xmm0,%xmm3 3410 pslldq $4,%xmm0 3411 pxor %xmm0,%xmm3 3412 pslldq $4,%xmm0 3413 pxor %xmm0,%xmm3 3414 pslldq $4,%xmm0 3415 pxor %xmm3,%xmm0 3416 pslld $1,%xmm4 3417 3418 pxor %xmm2,%xmm0 3419 movdqu %xmm0,(%rax) 3420 3421 decl %r10d 3422 jz .Ldone_key256 3423 3424 pshufd $0xff,%xmm0,%xmm2 3425 pxor %xmm3,%xmm3 3426.byte 102,15,56,221,211 3427 3428 movdqa %xmm1,%xmm3 3429 pslldq $4,%xmm1 3430 pxor %xmm1,%xmm3 3431 pslldq $4,%xmm1 3432 pxor %xmm1,%xmm3 3433 pslldq $4,%xmm1 3434 pxor %xmm3,%xmm1 3435 3436 pxor %xmm1,%xmm2 3437 movdqu %xmm2,16(%rax) 3438 leaq 32(%rax),%rax 3439 movdqa %xmm2,%xmm1 3440 3441 jmp .Loop_key256 3442 3443.Ldone_key256: 3444 movl %esi,16(%rax) 3445 xorl %eax,%eax 3446 jmp .Lenc_key_ret 3447 3448.align 16 3449.Lbad_keybits: 3450 movq $-2,%rax 3451.Lenc_key_ret: 3452 pxor %xmm0,%xmm0 3453 pxor %xmm1,%xmm1 3454 pxor %xmm2,%xmm2 3455 pxor %xmm3,%xmm3 3456 pxor %xmm4,%xmm4 3457 pxor %xmm5,%xmm5 3458 addq $8,%rsp 3459 .byte 0xf3,0xc3 3460.LSEH_end_set_encrypt_key: 3461 3462.align 16 3463.Lkey_expansion_128: 3464 movups %xmm0,(%rax) 3465 leaq 16(%rax),%rax 3466.Lkey_expansion_128_cold: 3467 shufps $16,%xmm0,%xmm4 3468 xorps %xmm4,%xmm0 3469 shufps $140,%xmm0,%xmm4 3470 xorps %xmm4,%xmm0 3471 shufps $255,%xmm1,%xmm1 3472 xorps %xmm1,%xmm0 3473 .byte 0xf3,0xc3 3474 3475.align 16 3476.Lkey_expansion_192a: 3477 movups %xmm0,(%rax) 3478 leaq 16(%rax),%rax 3479.Lkey_expansion_192a_cold: 3480 movaps %xmm2,%xmm5 3481.Lkey_expansion_192b_warm: 3482 shufps $16,%xmm0,%xmm4 3483 movdqa %xmm2,%xmm3 3484 xorps %xmm4,%xmm0 3485 shufps $140,%xmm0,%xmm4 3486 pslldq $4,%xmm3 3487 xorps %xmm4,%xmm0 3488 pshufd $85,%xmm1,%xmm1 3489 pxor %xmm3,%xmm2 3490 pxor %xmm1,%xmm0 3491 pshufd $255,%xmm0,%xmm3 3492 pxor %xmm3,%xmm2 3493 .byte 0xf3,0xc3 3494 3495.align 16 3496.Lkey_expansion_192b: 3497 movaps %xmm0,%xmm3 3498 shufps $68,%xmm0,%xmm5 3499 movups %xmm5,(%rax) 3500 shufps $78,%xmm2,%xmm3 3501 movups %xmm3,16(%rax) 3502 leaq 32(%rax),%rax 3503 jmp .Lkey_expansion_192b_warm 3504 3505.align 16 3506.Lkey_expansion_256a: 3507 movups %xmm2,(%rax) 3508 leaq 16(%rax),%rax 3509.Lkey_expansion_256a_cold: 3510 shufps $16,%xmm0,%xmm4 3511 xorps %xmm4,%xmm0 3512 shufps $140,%xmm0,%xmm4 3513 xorps %xmm4,%xmm0 3514 shufps $255,%xmm1,%xmm1 3515 xorps %xmm1,%xmm0 3516 .byte 0xf3,0xc3 3517 3518.align 16 3519.Lkey_expansion_256b: 3520 movups %xmm0,(%rax) 3521 leaq 16(%rax),%rax 3522 3523 shufps $16,%xmm2,%xmm4 3524 xorps %xmm4,%xmm2 3525 shufps $140,%xmm2,%xmm4 3526 xorps %xmm4,%xmm2 3527 shufps $170,%xmm1,%xmm1 3528 xorps %xmm1,%xmm2 3529 .byte 0xf3,0xc3 3530.size aesni_set_encrypt_key,.-aesni_set_encrypt_key 3531.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key 3532.align 64 3533.Lbswap_mask: 3534.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 3535.Lincrement32: 3536.long 6,6,6,0 3537.Lincrement64: 3538.long 1,0,0,0 3539.Lxts_magic: 3540.long 0x87,0,1,0 3541.Lincrement1: 3542.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 3543.Lkey_rotate: 3544.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d 3545.Lkey_rotate192: 3546.long 0x04070605,0x04070605,0x04070605,0x04070605 3547.Lkey_rcon1: 3548.long 1,1,1,1 3549.Lkey_rcon1b: 3550.long 0x1b,0x1b,0x1b,0x1b 3551 3552.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 3553.align 64 3554