1#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) 2.text 3.extern OPENSSL_ia32cap_P 4.hidden OPENSSL_ia32cap_P 5.globl aes_hw_encrypt 6.hidden aes_hw_encrypt 7.type aes_hw_encrypt,@function 8.align 16 9aes_hw_encrypt: 10 movups (%rdi),%xmm2 11 movl 240(%rdx),%eax 12 movups (%rdx),%xmm0 13 movups 16(%rdx),%xmm1 14 leaq 32(%rdx),%rdx 15 xorps %xmm0,%xmm2 16.Loop_enc1_1: 17.byte 102,15,56,220,209 18 decl %eax 19 movups (%rdx),%xmm1 20 leaq 16(%rdx),%rdx 21 jnz .Loop_enc1_1 22.byte 102,15,56,221,209 23 pxor %xmm0,%xmm0 24 pxor %xmm1,%xmm1 25 movups %xmm2,(%rsi) 26 pxor %xmm2,%xmm2 27 .byte 0xf3,0xc3 28.size aes_hw_encrypt,.-aes_hw_encrypt 29 30.globl aes_hw_decrypt 31.hidden aes_hw_decrypt 32.type aes_hw_decrypt,@function 33.align 16 34aes_hw_decrypt: 35 movups (%rdi),%xmm2 36 movl 240(%rdx),%eax 37 movups (%rdx),%xmm0 38 movups 16(%rdx),%xmm1 39 leaq 32(%rdx),%rdx 40 xorps %xmm0,%xmm2 41.Loop_dec1_2: 42.byte 102,15,56,222,209 43 decl %eax 44 movups (%rdx),%xmm1 45 leaq 16(%rdx),%rdx 46 jnz .Loop_dec1_2 47.byte 102,15,56,223,209 48 pxor %xmm0,%xmm0 49 pxor %xmm1,%xmm1 50 movups %xmm2,(%rsi) 51 pxor %xmm2,%xmm2 52 .byte 0xf3,0xc3 53.size aes_hw_decrypt, .-aes_hw_decrypt 54.type _aesni_encrypt2,@function 55.align 16 56_aesni_encrypt2: 57 movups (%rcx),%xmm0 58 shll $4,%eax 59 movups 16(%rcx),%xmm1 60 xorps %xmm0,%xmm2 61 xorps %xmm0,%xmm3 62 movups 32(%rcx),%xmm0 63 leaq 32(%rcx,%rax,1),%rcx 64 negq %rax 65 addq $16,%rax 66 67.Lenc_loop2: 68.byte 102,15,56,220,209 69.byte 102,15,56,220,217 70 movups (%rcx,%rax,1),%xmm1 71 addq $32,%rax 72.byte 102,15,56,220,208 73.byte 102,15,56,220,216 74 movups -16(%rcx,%rax,1),%xmm0 75 jnz .Lenc_loop2 76 77.byte 102,15,56,220,209 78.byte 102,15,56,220,217 79.byte 102,15,56,221,208 80.byte 102,15,56,221,216 81 .byte 0xf3,0xc3 82.size _aesni_encrypt2,.-_aesni_encrypt2 83.type _aesni_decrypt2,@function 84.align 16 85_aesni_decrypt2: 86 movups (%rcx),%xmm0 87 shll $4,%eax 88 movups 16(%rcx),%xmm1 89 xorps %xmm0,%xmm2 90 xorps %xmm0,%xmm3 91 movups 32(%rcx),%xmm0 92 leaq 32(%rcx,%rax,1),%rcx 93 negq %rax 94 addq $16,%rax 95 96.Ldec_loop2: 97.byte 102,15,56,222,209 98.byte 102,15,56,222,217 99 movups (%rcx,%rax,1),%xmm1 100 addq $32,%rax 101.byte 102,15,56,222,208 102.byte 102,15,56,222,216 103 movups -16(%rcx,%rax,1),%xmm0 104 jnz .Ldec_loop2 105 106.byte 102,15,56,222,209 107.byte 102,15,56,222,217 108.byte 102,15,56,223,208 109.byte 102,15,56,223,216 110 .byte 0xf3,0xc3 111.size _aesni_decrypt2,.-_aesni_decrypt2 112.type _aesni_encrypt3,@function 113.align 16 114_aesni_encrypt3: 115 movups (%rcx),%xmm0 116 shll $4,%eax 117 movups 16(%rcx),%xmm1 118 xorps %xmm0,%xmm2 119 xorps %xmm0,%xmm3 120 xorps %xmm0,%xmm4 121 movups 32(%rcx),%xmm0 122 leaq 32(%rcx,%rax,1),%rcx 123 negq %rax 124 addq $16,%rax 125 126.Lenc_loop3: 127.byte 102,15,56,220,209 128.byte 102,15,56,220,217 129.byte 102,15,56,220,225 130 movups (%rcx,%rax,1),%xmm1 131 addq $32,%rax 132.byte 102,15,56,220,208 133.byte 102,15,56,220,216 134.byte 102,15,56,220,224 135 movups -16(%rcx,%rax,1),%xmm0 136 jnz .Lenc_loop3 137 138.byte 102,15,56,220,209 139.byte 102,15,56,220,217 140.byte 102,15,56,220,225 141.byte 102,15,56,221,208 142.byte 102,15,56,221,216 143.byte 102,15,56,221,224 144 .byte 0xf3,0xc3 145.size _aesni_encrypt3,.-_aesni_encrypt3 146.type _aesni_decrypt3,@function 147.align 16 148_aesni_decrypt3: 149 movups (%rcx),%xmm0 150 shll $4,%eax 151 movups 16(%rcx),%xmm1 152 xorps %xmm0,%xmm2 153 xorps %xmm0,%xmm3 154 xorps %xmm0,%xmm4 155 movups 32(%rcx),%xmm0 156 leaq 32(%rcx,%rax,1),%rcx 157 negq %rax 158 addq $16,%rax 159 160.Ldec_loop3: 161.byte 102,15,56,222,209 162.byte 102,15,56,222,217 163.byte 102,15,56,222,225 164 movups (%rcx,%rax,1),%xmm1 165 addq $32,%rax 166.byte 102,15,56,222,208 167.byte 102,15,56,222,216 168.byte 102,15,56,222,224 169 movups -16(%rcx,%rax,1),%xmm0 170 jnz .Ldec_loop3 171 172.byte 102,15,56,222,209 173.byte 102,15,56,222,217 174.byte 102,15,56,222,225 175.byte 102,15,56,223,208 176.byte 102,15,56,223,216 177.byte 102,15,56,223,224 178 .byte 0xf3,0xc3 179.size _aesni_decrypt3,.-_aesni_decrypt3 180.type _aesni_encrypt4,@function 181.align 16 182_aesni_encrypt4: 183 movups (%rcx),%xmm0 184 shll $4,%eax 185 movups 16(%rcx),%xmm1 186 xorps %xmm0,%xmm2 187 xorps %xmm0,%xmm3 188 xorps %xmm0,%xmm4 189 xorps %xmm0,%xmm5 190 movups 32(%rcx),%xmm0 191 leaq 32(%rcx,%rax,1),%rcx 192 negq %rax 193.byte 0x0f,0x1f,0x00 194 addq $16,%rax 195 196.Lenc_loop4: 197.byte 102,15,56,220,209 198.byte 102,15,56,220,217 199.byte 102,15,56,220,225 200.byte 102,15,56,220,233 201 movups (%rcx,%rax,1),%xmm1 202 addq $32,%rax 203.byte 102,15,56,220,208 204.byte 102,15,56,220,216 205.byte 102,15,56,220,224 206.byte 102,15,56,220,232 207 movups -16(%rcx,%rax,1),%xmm0 208 jnz .Lenc_loop4 209 210.byte 102,15,56,220,209 211.byte 102,15,56,220,217 212.byte 102,15,56,220,225 213.byte 102,15,56,220,233 214.byte 102,15,56,221,208 215.byte 102,15,56,221,216 216.byte 102,15,56,221,224 217.byte 102,15,56,221,232 218 .byte 0xf3,0xc3 219.size _aesni_encrypt4,.-_aesni_encrypt4 220.type _aesni_decrypt4,@function 221.align 16 222_aesni_decrypt4: 223 movups (%rcx),%xmm0 224 shll $4,%eax 225 movups 16(%rcx),%xmm1 226 xorps %xmm0,%xmm2 227 xorps %xmm0,%xmm3 228 xorps %xmm0,%xmm4 229 xorps %xmm0,%xmm5 230 movups 32(%rcx),%xmm0 231 leaq 32(%rcx,%rax,1),%rcx 232 negq %rax 233.byte 0x0f,0x1f,0x00 234 addq $16,%rax 235 236.Ldec_loop4: 237.byte 102,15,56,222,209 238.byte 102,15,56,222,217 239.byte 102,15,56,222,225 240.byte 102,15,56,222,233 241 movups (%rcx,%rax,1),%xmm1 242 addq $32,%rax 243.byte 102,15,56,222,208 244.byte 102,15,56,222,216 245.byte 102,15,56,222,224 246.byte 102,15,56,222,232 247 movups -16(%rcx,%rax,1),%xmm0 248 jnz .Ldec_loop4 249 250.byte 102,15,56,222,209 251.byte 102,15,56,222,217 252.byte 102,15,56,222,225 253.byte 102,15,56,222,233 254.byte 102,15,56,223,208 255.byte 102,15,56,223,216 256.byte 102,15,56,223,224 257.byte 102,15,56,223,232 258 .byte 0xf3,0xc3 259.size _aesni_decrypt4,.-_aesni_decrypt4 260.type _aesni_encrypt6,@function 261.align 16 262_aesni_encrypt6: 263 movups (%rcx),%xmm0 264 shll $4,%eax 265 movups 16(%rcx),%xmm1 266 xorps %xmm0,%xmm2 267 pxor %xmm0,%xmm3 268 pxor %xmm0,%xmm4 269.byte 102,15,56,220,209 270 leaq 32(%rcx,%rax,1),%rcx 271 negq %rax 272.byte 102,15,56,220,217 273 pxor %xmm0,%xmm5 274 pxor %xmm0,%xmm6 275.byte 102,15,56,220,225 276 pxor %xmm0,%xmm7 277 movups (%rcx,%rax,1),%xmm0 278 addq $16,%rax 279 jmp .Lenc_loop6_enter 280.align 16 281.Lenc_loop6: 282.byte 102,15,56,220,209 283.byte 102,15,56,220,217 284.byte 102,15,56,220,225 285.Lenc_loop6_enter: 286.byte 102,15,56,220,233 287.byte 102,15,56,220,241 288.byte 102,15,56,220,249 289 movups (%rcx,%rax,1),%xmm1 290 addq $32,%rax 291.byte 102,15,56,220,208 292.byte 102,15,56,220,216 293.byte 102,15,56,220,224 294.byte 102,15,56,220,232 295.byte 102,15,56,220,240 296.byte 102,15,56,220,248 297 movups -16(%rcx,%rax,1),%xmm0 298 jnz .Lenc_loop6 299 300.byte 102,15,56,220,209 301.byte 102,15,56,220,217 302.byte 102,15,56,220,225 303.byte 102,15,56,220,233 304.byte 102,15,56,220,241 305.byte 102,15,56,220,249 306.byte 102,15,56,221,208 307.byte 102,15,56,221,216 308.byte 102,15,56,221,224 309.byte 102,15,56,221,232 310.byte 102,15,56,221,240 311.byte 102,15,56,221,248 312 .byte 0xf3,0xc3 313.size _aesni_encrypt6,.-_aesni_encrypt6 314.type _aesni_decrypt6,@function 315.align 16 316_aesni_decrypt6: 317 movups (%rcx),%xmm0 318 shll $4,%eax 319 movups 16(%rcx),%xmm1 320 xorps %xmm0,%xmm2 321 pxor %xmm0,%xmm3 322 pxor %xmm0,%xmm4 323.byte 102,15,56,222,209 324 leaq 32(%rcx,%rax,1),%rcx 325 negq %rax 326.byte 102,15,56,222,217 327 pxor %xmm0,%xmm5 328 pxor %xmm0,%xmm6 329.byte 102,15,56,222,225 330 pxor %xmm0,%xmm7 331 movups (%rcx,%rax,1),%xmm0 332 addq $16,%rax 333 jmp .Ldec_loop6_enter 334.align 16 335.Ldec_loop6: 336.byte 102,15,56,222,209 337.byte 102,15,56,222,217 338.byte 102,15,56,222,225 339.Ldec_loop6_enter: 340.byte 102,15,56,222,233 341.byte 102,15,56,222,241 342.byte 102,15,56,222,249 343 movups (%rcx,%rax,1),%xmm1 344 addq $32,%rax 345.byte 102,15,56,222,208 346.byte 102,15,56,222,216 347.byte 102,15,56,222,224 348.byte 102,15,56,222,232 349.byte 102,15,56,222,240 350.byte 102,15,56,222,248 351 movups -16(%rcx,%rax,1),%xmm0 352 jnz .Ldec_loop6 353 354.byte 102,15,56,222,209 355.byte 102,15,56,222,217 356.byte 102,15,56,222,225 357.byte 102,15,56,222,233 358.byte 102,15,56,222,241 359.byte 102,15,56,222,249 360.byte 102,15,56,223,208 361.byte 102,15,56,223,216 362.byte 102,15,56,223,224 363.byte 102,15,56,223,232 364.byte 102,15,56,223,240 365.byte 102,15,56,223,248 366 .byte 0xf3,0xc3 367.size _aesni_decrypt6,.-_aesni_decrypt6 368.type _aesni_encrypt8,@function 369.align 16 370_aesni_encrypt8: 371 movups (%rcx),%xmm0 372 shll $4,%eax 373 movups 16(%rcx),%xmm1 374 xorps %xmm0,%xmm2 375 xorps %xmm0,%xmm3 376 pxor %xmm0,%xmm4 377 pxor %xmm0,%xmm5 378 pxor %xmm0,%xmm6 379 leaq 32(%rcx,%rax,1),%rcx 380 negq %rax 381.byte 102,15,56,220,209 382 pxor %xmm0,%xmm7 383 pxor %xmm0,%xmm8 384.byte 102,15,56,220,217 385 pxor %xmm0,%xmm9 386 movups (%rcx,%rax,1),%xmm0 387 addq $16,%rax 388 jmp .Lenc_loop8_inner 389.align 16 390.Lenc_loop8: 391.byte 102,15,56,220,209 392.byte 102,15,56,220,217 393.Lenc_loop8_inner: 394.byte 102,15,56,220,225 395.byte 102,15,56,220,233 396.byte 102,15,56,220,241 397.byte 102,15,56,220,249 398.byte 102,68,15,56,220,193 399.byte 102,68,15,56,220,201 400.Lenc_loop8_enter: 401 movups (%rcx,%rax,1),%xmm1 402 addq $32,%rax 403.byte 102,15,56,220,208 404.byte 102,15,56,220,216 405.byte 102,15,56,220,224 406.byte 102,15,56,220,232 407.byte 102,15,56,220,240 408.byte 102,15,56,220,248 409.byte 102,68,15,56,220,192 410.byte 102,68,15,56,220,200 411 movups -16(%rcx,%rax,1),%xmm0 412 jnz .Lenc_loop8 413 414.byte 102,15,56,220,209 415.byte 102,15,56,220,217 416.byte 102,15,56,220,225 417.byte 102,15,56,220,233 418.byte 102,15,56,220,241 419.byte 102,15,56,220,249 420.byte 102,68,15,56,220,193 421.byte 102,68,15,56,220,201 422.byte 102,15,56,221,208 423.byte 102,15,56,221,216 424.byte 102,15,56,221,224 425.byte 102,15,56,221,232 426.byte 102,15,56,221,240 427.byte 102,15,56,221,248 428.byte 102,68,15,56,221,192 429.byte 102,68,15,56,221,200 430 .byte 0xf3,0xc3 431.size _aesni_encrypt8,.-_aesni_encrypt8 432.type _aesni_decrypt8,@function 433.align 16 434_aesni_decrypt8: 435 movups (%rcx),%xmm0 436 shll $4,%eax 437 movups 16(%rcx),%xmm1 438 xorps %xmm0,%xmm2 439 xorps %xmm0,%xmm3 440 pxor %xmm0,%xmm4 441 pxor %xmm0,%xmm5 442 pxor %xmm0,%xmm6 443 leaq 32(%rcx,%rax,1),%rcx 444 negq %rax 445.byte 102,15,56,222,209 446 pxor %xmm0,%xmm7 447 pxor %xmm0,%xmm8 448.byte 102,15,56,222,217 449 pxor %xmm0,%xmm9 450 movups (%rcx,%rax,1),%xmm0 451 addq $16,%rax 452 jmp .Ldec_loop8_inner 453.align 16 454.Ldec_loop8: 455.byte 102,15,56,222,209 456.byte 102,15,56,222,217 457.Ldec_loop8_inner: 458.byte 102,15,56,222,225 459.byte 102,15,56,222,233 460.byte 102,15,56,222,241 461.byte 102,15,56,222,249 462.byte 102,68,15,56,222,193 463.byte 102,68,15,56,222,201 464.Ldec_loop8_enter: 465 movups (%rcx,%rax,1),%xmm1 466 addq $32,%rax 467.byte 102,15,56,222,208 468.byte 102,15,56,222,216 469.byte 102,15,56,222,224 470.byte 102,15,56,222,232 471.byte 102,15,56,222,240 472.byte 102,15,56,222,248 473.byte 102,68,15,56,222,192 474.byte 102,68,15,56,222,200 475 movups -16(%rcx,%rax,1),%xmm0 476 jnz .Ldec_loop8 477 478.byte 102,15,56,222,209 479.byte 102,15,56,222,217 480.byte 102,15,56,222,225 481.byte 102,15,56,222,233 482.byte 102,15,56,222,241 483.byte 102,15,56,222,249 484.byte 102,68,15,56,222,193 485.byte 102,68,15,56,222,201 486.byte 102,15,56,223,208 487.byte 102,15,56,223,216 488.byte 102,15,56,223,224 489.byte 102,15,56,223,232 490.byte 102,15,56,223,240 491.byte 102,15,56,223,248 492.byte 102,68,15,56,223,192 493.byte 102,68,15,56,223,200 494 .byte 0xf3,0xc3 495.size _aesni_decrypt8,.-_aesni_decrypt8 496.globl aes_hw_ecb_encrypt 497.hidden aes_hw_ecb_encrypt 498.type aes_hw_ecb_encrypt,@function 499.align 16 500aes_hw_ecb_encrypt: 501 andq $-16,%rdx 502 jz .Lecb_ret 503 504 movl 240(%rcx),%eax 505 movups (%rcx),%xmm0 506 movq %rcx,%r11 507 movl %eax,%r10d 508 testl %r8d,%r8d 509 jz .Lecb_decrypt 510 511 cmpq $0x80,%rdx 512 jb .Lecb_enc_tail 513 514 movdqu (%rdi),%xmm2 515 movdqu 16(%rdi),%xmm3 516 movdqu 32(%rdi),%xmm4 517 movdqu 48(%rdi),%xmm5 518 movdqu 64(%rdi),%xmm6 519 movdqu 80(%rdi),%xmm7 520 movdqu 96(%rdi),%xmm8 521 movdqu 112(%rdi),%xmm9 522 leaq 128(%rdi),%rdi 523 subq $0x80,%rdx 524 jmp .Lecb_enc_loop8_enter 525.align 16 526.Lecb_enc_loop8: 527 movups %xmm2,(%rsi) 528 movq %r11,%rcx 529 movdqu (%rdi),%xmm2 530 movl %r10d,%eax 531 movups %xmm3,16(%rsi) 532 movdqu 16(%rdi),%xmm3 533 movups %xmm4,32(%rsi) 534 movdqu 32(%rdi),%xmm4 535 movups %xmm5,48(%rsi) 536 movdqu 48(%rdi),%xmm5 537 movups %xmm6,64(%rsi) 538 movdqu 64(%rdi),%xmm6 539 movups %xmm7,80(%rsi) 540 movdqu 80(%rdi),%xmm7 541 movups %xmm8,96(%rsi) 542 movdqu 96(%rdi),%xmm8 543 movups %xmm9,112(%rsi) 544 leaq 128(%rsi),%rsi 545 movdqu 112(%rdi),%xmm9 546 leaq 128(%rdi),%rdi 547.Lecb_enc_loop8_enter: 548 549 call _aesni_encrypt8 550 551 subq $0x80,%rdx 552 jnc .Lecb_enc_loop8 553 554 movups %xmm2,(%rsi) 555 movq %r11,%rcx 556 movups %xmm3,16(%rsi) 557 movl %r10d,%eax 558 movups %xmm4,32(%rsi) 559 movups %xmm5,48(%rsi) 560 movups %xmm6,64(%rsi) 561 movups %xmm7,80(%rsi) 562 movups %xmm8,96(%rsi) 563 movups %xmm9,112(%rsi) 564 leaq 128(%rsi),%rsi 565 addq $0x80,%rdx 566 jz .Lecb_ret 567 568.Lecb_enc_tail: 569 movups (%rdi),%xmm2 570 cmpq $0x20,%rdx 571 jb .Lecb_enc_one 572 movups 16(%rdi),%xmm3 573 je .Lecb_enc_two 574 movups 32(%rdi),%xmm4 575 cmpq $0x40,%rdx 576 jb .Lecb_enc_three 577 movups 48(%rdi),%xmm5 578 je .Lecb_enc_four 579 movups 64(%rdi),%xmm6 580 cmpq $0x60,%rdx 581 jb .Lecb_enc_five 582 movups 80(%rdi),%xmm7 583 je .Lecb_enc_six 584 movdqu 96(%rdi),%xmm8 585 xorps %xmm9,%xmm9 586 call _aesni_encrypt8 587 movups %xmm2,(%rsi) 588 movups %xmm3,16(%rsi) 589 movups %xmm4,32(%rsi) 590 movups %xmm5,48(%rsi) 591 movups %xmm6,64(%rsi) 592 movups %xmm7,80(%rsi) 593 movups %xmm8,96(%rsi) 594 jmp .Lecb_ret 595.align 16 596.Lecb_enc_one: 597 movups (%rcx),%xmm0 598 movups 16(%rcx),%xmm1 599 leaq 32(%rcx),%rcx 600 xorps %xmm0,%xmm2 601.Loop_enc1_3: 602.byte 102,15,56,220,209 603 decl %eax 604 movups (%rcx),%xmm1 605 leaq 16(%rcx),%rcx 606 jnz .Loop_enc1_3 607.byte 102,15,56,221,209 608 movups %xmm2,(%rsi) 609 jmp .Lecb_ret 610.align 16 611.Lecb_enc_two: 612 call _aesni_encrypt2 613 movups %xmm2,(%rsi) 614 movups %xmm3,16(%rsi) 615 jmp .Lecb_ret 616.align 16 617.Lecb_enc_three: 618 call _aesni_encrypt3 619 movups %xmm2,(%rsi) 620 movups %xmm3,16(%rsi) 621 movups %xmm4,32(%rsi) 622 jmp .Lecb_ret 623.align 16 624.Lecb_enc_four: 625 call _aesni_encrypt4 626 movups %xmm2,(%rsi) 627 movups %xmm3,16(%rsi) 628 movups %xmm4,32(%rsi) 629 movups %xmm5,48(%rsi) 630 jmp .Lecb_ret 631.align 16 632.Lecb_enc_five: 633 xorps %xmm7,%xmm7 634 call _aesni_encrypt6 635 movups %xmm2,(%rsi) 636 movups %xmm3,16(%rsi) 637 movups %xmm4,32(%rsi) 638 movups %xmm5,48(%rsi) 639 movups %xmm6,64(%rsi) 640 jmp .Lecb_ret 641.align 16 642.Lecb_enc_six: 643 call _aesni_encrypt6 644 movups %xmm2,(%rsi) 645 movups %xmm3,16(%rsi) 646 movups %xmm4,32(%rsi) 647 movups %xmm5,48(%rsi) 648 movups %xmm6,64(%rsi) 649 movups %xmm7,80(%rsi) 650 jmp .Lecb_ret 651 652.align 16 653.Lecb_decrypt: 654 cmpq $0x80,%rdx 655 jb .Lecb_dec_tail 656 657 movdqu (%rdi),%xmm2 658 movdqu 16(%rdi),%xmm3 659 movdqu 32(%rdi),%xmm4 660 movdqu 48(%rdi),%xmm5 661 movdqu 64(%rdi),%xmm6 662 movdqu 80(%rdi),%xmm7 663 movdqu 96(%rdi),%xmm8 664 movdqu 112(%rdi),%xmm9 665 leaq 128(%rdi),%rdi 666 subq $0x80,%rdx 667 jmp .Lecb_dec_loop8_enter 668.align 16 669.Lecb_dec_loop8: 670 movups %xmm2,(%rsi) 671 movq %r11,%rcx 672 movdqu (%rdi),%xmm2 673 movl %r10d,%eax 674 movups %xmm3,16(%rsi) 675 movdqu 16(%rdi),%xmm3 676 movups %xmm4,32(%rsi) 677 movdqu 32(%rdi),%xmm4 678 movups %xmm5,48(%rsi) 679 movdqu 48(%rdi),%xmm5 680 movups %xmm6,64(%rsi) 681 movdqu 64(%rdi),%xmm6 682 movups %xmm7,80(%rsi) 683 movdqu 80(%rdi),%xmm7 684 movups %xmm8,96(%rsi) 685 movdqu 96(%rdi),%xmm8 686 movups %xmm9,112(%rsi) 687 leaq 128(%rsi),%rsi 688 movdqu 112(%rdi),%xmm9 689 leaq 128(%rdi),%rdi 690.Lecb_dec_loop8_enter: 691 692 call _aesni_decrypt8 693 694 movups (%r11),%xmm0 695 subq $0x80,%rdx 696 jnc .Lecb_dec_loop8 697 698 movups %xmm2,(%rsi) 699 pxor %xmm2,%xmm2 700 movq %r11,%rcx 701 movups %xmm3,16(%rsi) 702 pxor %xmm3,%xmm3 703 movl %r10d,%eax 704 movups %xmm4,32(%rsi) 705 pxor %xmm4,%xmm4 706 movups %xmm5,48(%rsi) 707 pxor %xmm5,%xmm5 708 movups %xmm6,64(%rsi) 709 pxor %xmm6,%xmm6 710 movups %xmm7,80(%rsi) 711 pxor %xmm7,%xmm7 712 movups %xmm8,96(%rsi) 713 pxor %xmm8,%xmm8 714 movups %xmm9,112(%rsi) 715 pxor %xmm9,%xmm9 716 leaq 128(%rsi),%rsi 717 addq $0x80,%rdx 718 jz .Lecb_ret 719 720.Lecb_dec_tail: 721 movups (%rdi),%xmm2 722 cmpq $0x20,%rdx 723 jb .Lecb_dec_one 724 movups 16(%rdi),%xmm3 725 je .Lecb_dec_two 726 movups 32(%rdi),%xmm4 727 cmpq $0x40,%rdx 728 jb .Lecb_dec_three 729 movups 48(%rdi),%xmm5 730 je .Lecb_dec_four 731 movups 64(%rdi),%xmm6 732 cmpq $0x60,%rdx 733 jb .Lecb_dec_five 734 movups 80(%rdi),%xmm7 735 je .Lecb_dec_six 736 movups 96(%rdi),%xmm8 737 movups (%rcx),%xmm0 738 xorps %xmm9,%xmm9 739 call _aesni_decrypt8 740 movups %xmm2,(%rsi) 741 pxor %xmm2,%xmm2 742 movups %xmm3,16(%rsi) 743 pxor %xmm3,%xmm3 744 movups %xmm4,32(%rsi) 745 pxor %xmm4,%xmm4 746 movups %xmm5,48(%rsi) 747 pxor %xmm5,%xmm5 748 movups %xmm6,64(%rsi) 749 pxor %xmm6,%xmm6 750 movups %xmm7,80(%rsi) 751 pxor %xmm7,%xmm7 752 movups %xmm8,96(%rsi) 753 pxor %xmm8,%xmm8 754 pxor %xmm9,%xmm9 755 jmp .Lecb_ret 756.align 16 757.Lecb_dec_one: 758 movups (%rcx),%xmm0 759 movups 16(%rcx),%xmm1 760 leaq 32(%rcx),%rcx 761 xorps %xmm0,%xmm2 762.Loop_dec1_4: 763.byte 102,15,56,222,209 764 decl %eax 765 movups (%rcx),%xmm1 766 leaq 16(%rcx),%rcx 767 jnz .Loop_dec1_4 768.byte 102,15,56,223,209 769 movups %xmm2,(%rsi) 770 pxor %xmm2,%xmm2 771 jmp .Lecb_ret 772.align 16 773.Lecb_dec_two: 774 call _aesni_decrypt2 775 movups %xmm2,(%rsi) 776 pxor %xmm2,%xmm2 777 movups %xmm3,16(%rsi) 778 pxor %xmm3,%xmm3 779 jmp .Lecb_ret 780.align 16 781.Lecb_dec_three: 782 call _aesni_decrypt3 783 movups %xmm2,(%rsi) 784 pxor %xmm2,%xmm2 785 movups %xmm3,16(%rsi) 786 pxor %xmm3,%xmm3 787 movups %xmm4,32(%rsi) 788 pxor %xmm4,%xmm4 789 jmp .Lecb_ret 790.align 16 791.Lecb_dec_four: 792 call _aesni_decrypt4 793 movups %xmm2,(%rsi) 794 pxor %xmm2,%xmm2 795 movups %xmm3,16(%rsi) 796 pxor %xmm3,%xmm3 797 movups %xmm4,32(%rsi) 798 pxor %xmm4,%xmm4 799 movups %xmm5,48(%rsi) 800 pxor %xmm5,%xmm5 801 jmp .Lecb_ret 802.align 16 803.Lecb_dec_five: 804 xorps %xmm7,%xmm7 805 call _aesni_decrypt6 806 movups %xmm2,(%rsi) 807 pxor %xmm2,%xmm2 808 movups %xmm3,16(%rsi) 809 pxor %xmm3,%xmm3 810 movups %xmm4,32(%rsi) 811 pxor %xmm4,%xmm4 812 movups %xmm5,48(%rsi) 813 pxor %xmm5,%xmm5 814 movups %xmm6,64(%rsi) 815 pxor %xmm6,%xmm6 816 pxor %xmm7,%xmm7 817 jmp .Lecb_ret 818.align 16 819.Lecb_dec_six: 820 call _aesni_decrypt6 821 movups %xmm2,(%rsi) 822 pxor %xmm2,%xmm2 823 movups %xmm3,16(%rsi) 824 pxor %xmm3,%xmm3 825 movups %xmm4,32(%rsi) 826 pxor %xmm4,%xmm4 827 movups %xmm5,48(%rsi) 828 pxor %xmm5,%xmm5 829 movups %xmm6,64(%rsi) 830 pxor %xmm6,%xmm6 831 movups %xmm7,80(%rsi) 832 pxor %xmm7,%xmm7 833 834.Lecb_ret: 835 xorps %xmm0,%xmm0 836 pxor %xmm1,%xmm1 837 .byte 0xf3,0xc3 838.size aes_hw_ecb_encrypt,.-aes_hw_ecb_encrypt 839.globl aes_hw_ccm64_encrypt_blocks 840.hidden aes_hw_ccm64_encrypt_blocks 841.type aes_hw_ccm64_encrypt_blocks,@function 842.align 16 843aes_hw_ccm64_encrypt_blocks: 844 movl 240(%rcx),%eax 845 movdqu (%r8),%xmm6 846 movdqa .Lincrement64(%rip),%xmm9 847 movdqa .Lbswap_mask(%rip),%xmm7 848 849 shll $4,%eax 850 movl $16,%r10d 851 leaq 0(%rcx),%r11 852 movdqu (%r9),%xmm3 853 movdqa %xmm6,%xmm2 854 leaq 32(%rcx,%rax,1),%rcx 855.byte 102,15,56,0,247 856 subq %rax,%r10 857 jmp .Lccm64_enc_outer 858.align 16 859.Lccm64_enc_outer: 860 movups (%r11),%xmm0 861 movq %r10,%rax 862 movups (%rdi),%xmm8 863 864 xorps %xmm0,%xmm2 865 movups 16(%r11),%xmm1 866 xorps %xmm8,%xmm0 867 xorps %xmm0,%xmm3 868 movups 32(%r11),%xmm0 869 870.Lccm64_enc2_loop: 871.byte 102,15,56,220,209 872.byte 102,15,56,220,217 873 movups (%rcx,%rax,1),%xmm1 874 addq $32,%rax 875.byte 102,15,56,220,208 876.byte 102,15,56,220,216 877 movups -16(%rcx,%rax,1),%xmm0 878 jnz .Lccm64_enc2_loop 879.byte 102,15,56,220,209 880.byte 102,15,56,220,217 881 paddq %xmm9,%xmm6 882 decq %rdx 883.byte 102,15,56,221,208 884.byte 102,15,56,221,216 885 886 leaq 16(%rdi),%rdi 887 xorps %xmm2,%xmm8 888 movdqa %xmm6,%xmm2 889 movups %xmm8,(%rsi) 890.byte 102,15,56,0,215 891 leaq 16(%rsi),%rsi 892 jnz .Lccm64_enc_outer 893 894 pxor %xmm0,%xmm0 895 pxor %xmm1,%xmm1 896 pxor %xmm2,%xmm2 897 movups %xmm3,(%r9) 898 pxor %xmm3,%xmm3 899 pxor %xmm8,%xmm8 900 pxor %xmm6,%xmm6 901 .byte 0xf3,0xc3 902.size aes_hw_ccm64_encrypt_blocks,.-aes_hw_ccm64_encrypt_blocks 903.globl aes_hw_ccm64_decrypt_blocks 904.hidden aes_hw_ccm64_decrypt_blocks 905.type aes_hw_ccm64_decrypt_blocks,@function 906.align 16 907aes_hw_ccm64_decrypt_blocks: 908 movl 240(%rcx),%eax 909 movups (%r8),%xmm6 910 movdqu (%r9),%xmm3 911 movdqa .Lincrement64(%rip),%xmm9 912 movdqa .Lbswap_mask(%rip),%xmm7 913 914 movaps %xmm6,%xmm2 915 movl %eax,%r10d 916 movq %rcx,%r11 917.byte 102,15,56,0,247 918 movups (%rcx),%xmm0 919 movups 16(%rcx),%xmm1 920 leaq 32(%rcx),%rcx 921 xorps %xmm0,%xmm2 922.Loop_enc1_5: 923.byte 102,15,56,220,209 924 decl %eax 925 movups (%rcx),%xmm1 926 leaq 16(%rcx),%rcx 927 jnz .Loop_enc1_5 928.byte 102,15,56,221,209 929 shll $4,%r10d 930 movl $16,%eax 931 movups (%rdi),%xmm8 932 paddq %xmm9,%xmm6 933 leaq 16(%rdi),%rdi 934 subq %r10,%rax 935 leaq 32(%r11,%r10,1),%rcx 936 movq %rax,%r10 937 jmp .Lccm64_dec_outer 938.align 16 939.Lccm64_dec_outer: 940 xorps %xmm2,%xmm8 941 movdqa %xmm6,%xmm2 942 movups %xmm8,(%rsi) 943 leaq 16(%rsi),%rsi 944.byte 102,15,56,0,215 945 946 subq $1,%rdx 947 jz .Lccm64_dec_break 948 949 movups (%r11),%xmm0 950 movq %r10,%rax 951 movups 16(%r11),%xmm1 952 xorps %xmm0,%xmm8 953 xorps %xmm0,%xmm2 954 xorps %xmm8,%xmm3 955 movups 32(%r11),%xmm0 956 jmp .Lccm64_dec2_loop 957.align 16 958.Lccm64_dec2_loop: 959.byte 102,15,56,220,209 960.byte 102,15,56,220,217 961 movups (%rcx,%rax,1),%xmm1 962 addq $32,%rax 963.byte 102,15,56,220,208 964.byte 102,15,56,220,216 965 movups -16(%rcx,%rax,1),%xmm0 966 jnz .Lccm64_dec2_loop 967 movups (%rdi),%xmm8 968 paddq %xmm9,%xmm6 969.byte 102,15,56,220,209 970.byte 102,15,56,220,217 971.byte 102,15,56,221,208 972.byte 102,15,56,221,216 973 leaq 16(%rdi),%rdi 974 jmp .Lccm64_dec_outer 975 976.align 16 977.Lccm64_dec_break: 978 979 movl 240(%r11),%eax 980 movups (%r11),%xmm0 981 movups 16(%r11),%xmm1 982 xorps %xmm0,%xmm8 983 leaq 32(%r11),%r11 984 xorps %xmm8,%xmm3 985.Loop_enc1_6: 986.byte 102,15,56,220,217 987 decl %eax 988 movups (%r11),%xmm1 989 leaq 16(%r11),%r11 990 jnz .Loop_enc1_6 991.byte 102,15,56,221,217 992 pxor %xmm0,%xmm0 993 pxor %xmm1,%xmm1 994 pxor %xmm2,%xmm2 995 movups %xmm3,(%r9) 996 pxor %xmm3,%xmm3 997 pxor %xmm8,%xmm8 998 pxor %xmm6,%xmm6 999 .byte 0xf3,0xc3 1000.size aes_hw_ccm64_decrypt_blocks,.-aes_hw_ccm64_decrypt_blocks 1001.globl aes_hw_ctr32_encrypt_blocks 1002.hidden aes_hw_ctr32_encrypt_blocks 1003.type aes_hw_ctr32_encrypt_blocks,@function 1004.align 16 1005aes_hw_ctr32_encrypt_blocks: 1006.cfi_startproc 1007 cmpq $1,%rdx 1008 jne .Lctr32_bulk 1009 1010 1011 1012 movups (%r8),%xmm2 1013 movups (%rdi),%xmm3 1014 movl 240(%rcx),%edx 1015 movups (%rcx),%xmm0 1016 movups 16(%rcx),%xmm1 1017 leaq 32(%rcx),%rcx 1018 xorps %xmm0,%xmm2 1019.Loop_enc1_7: 1020.byte 102,15,56,220,209 1021 decl %edx 1022 movups (%rcx),%xmm1 1023 leaq 16(%rcx),%rcx 1024 jnz .Loop_enc1_7 1025.byte 102,15,56,221,209 1026 pxor %xmm0,%xmm0 1027 pxor %xmm1,%xmm1 1028 xorps %xmm3,%xmm2 1029 pxor %xmm3,%xmm3 1030 movups %xmm2,(%rsi) 1031 xorps %xmm2,%xmm2 1032 jmp .Lctr32_epilogue 1033 1034.align 16 1035.Lctr32_bulk: 1036 leaq (%rsp),%r11 1037.cfi_def_cfa_register %r11 1038 pushq %rbp 1039.cfi_offset %rbp,-16 1040 subq $128,%rsp 1041 andq $-16,%rsp 1042 1043 1044 1045 1046 movdqu (%r8),%xmm2 1047 movdqu (%rcx),%xmm0 1048 movl 12(%r8),%r8d 1049 pxor %xmm0,%xmm2 1050 movl 12(%rcx),%ebp 1051 movdqa %xmm2,0(%rsp) 1052 bswapl %r8d 1053 movdqa %xmm2,%xmm3 1054 movdqa %xmm2,%xmm4 1055 movdqa %xmm2,%xmm5 1056 movdqa %xmm2,64(%rsp) 1057 movdqa %xmm2,80(%rsp) 1058 movdqa %xmm2,96(%rsp) 1059 movq %rdx,%r10 1060 movdqa %xmm2,112(%rsp) 1061 1062 leaq 1(%r8),%rax 1063 leaq 2(%r8),%rdx 1064 bswapl %eax 1065 bswapl %edx 1066 xorl %ebp,%eax 1067 xorl %ebp,%edx 1068.byte 102,15,58,34,216,3 1069 leaq 3(%r8),%rax 1070 movdqa %xmm3,16(%rsp) 1071.byte 102,15,58,34,226,3 1072 bswapl %eax 1073 movq %r10,%rdx 1074 leaq 4(%r8),%r10 1075 movdqa %xmm4,32(%rsp) 1076 xorl %ebp,%eax 1077 bswapl %r10d 1078.byte 102,15,58,34,232,3 1079 xorl %ebp,%r10d 1080 movdqa %xmm5,48(%rsp) 1081 leaq 5(%r8),%r9 1082 movl %r10d,64+12(%rsp) 1083 bswapl %r9d 1084 leaq 6(%r8),%r10 1085 movl 240(%rcx),%eax 1086 xorl %ebp,%r9d 1087 bswapl %r10d 1088 movl %r9d,80+12(%rsp) 1089 xorl %ebp,%r10d 1090 leaq 7(%r8),%r9 1091 movl %r10d,96+12(%rsp) 1092 bswapl %r9d 1093 leaq OPENSSL_ia32cap_P(%rip),%r10 1094 movl 4(%r10),%r10d 1095 xorl %ebp,%r9d 1096 andl $71303168,%r10d 1097 movl %r9d,112+12(%rsp) 1098 1099 movups 16(%rcx),%xmm1 1100 1101 movdqa 64(%rsp),%xmm6 1102 movdqa 80(%rsp),%xmm7 1103 1104 cmpq $8,%rdx 1105 jb .Lctr32_tail 1106 1107 subq $6,%rdx 1108 cmpl $4194304,%r10d 1109 je .Lctr32_6x 1110 1111 leaq 128(%rcx),%rcx 1112 subq $2,%rdx 1113 jmp .Lctr32_loop8 1114 1115.align 16 1116.Lctr32_6x: 1117 shll $4,%eax 1118 movl $48,%r10d 1119 bswapl %ebp 1120 leaq 32(%rcx,%rax,1),%rcx 1121 subq %rax,%r10 1122 jmp .Lctr32_loop6 1123 1124.align 16 1125.Lctr32_loop6: 1126 addl $6,%r8d 1127 movups -48(%rcx,%r10,1),%xmm0 1128.byte 102,15,56,220,209 1129 movl %r8d,%eax 1130 xorl %ebp,%eax 1131.byte 102,15,56,220,217 1132.byte 0x0f,0x38,0xf1,0x44,0x24,12 1133 leal 1(%r8),%eax 1134.byte 102,15,56,220,225 1135 xorl %ebp,%eax 1136.byte 0x0f,0x38,0xf1,0x44,0x24,28 1137.byte 102,15,56,220,233 1138 leal 2(%r8),%eax 1139 xorl %ebp,%eax 1140.byte 102,15,56,220,241 1141.byte 0x0f,0x38,0xf1,0x44,0x24,44 1142 leal 3(%r8),%eax 1143.byte 102,15,56,220,249 1144 movups -32(%rcx,%r10,1),%xmm1 1145 xorl %ebp,%eax 1146 1147.byte 102,15,56,220,208 1148.byte 0x0f,0x38,0xf1,0x44,0x24,60 1149 leal 4(%r8),%eax 1150.byte 102,15,56,220,216 1151 xorl %ebp,%eax 1152.byte 0x0f,0x38,0xf1,0x44,0x24,76 1153.byte 102,15,56,220,224 1154 leal 5(%r8),%eax 1155 xorl %ebp,%eax 1156.byte 102,15,56,220,232 1157.byte 0x0f,0x38,0xf1,0x44,0x24,92 1158 movq %r10,%rax 1159.byte 102,15,56,220,240 1160.byte 102,15,56,220,248 1161 movups -16(%rcx,%r10,1),%xmm0 1162 1163 call .Lenc_loop6 1164 1165 movdqu (%rdi),%xmm8 1166 movdqu 16(%rdi),%xmm9 1167 movdqu 32(%rdi),%xmm10 1168 movdqu 48(%rdi),%xmm11 1169 movdqu 64(%rdi),%xmm12 1170 movdqu 80(%rdi),%xmm13 1171 leaq 96(%rdi),%rdi 1172 movups -64(%rcx,%r10,1),%xmm1 1173 pxor %xmm2,%xmm8 1174 movaps 0(%rsp),%xmm2 1175 pxor %xmm3,%xmm9 1176 movaps 16(%rsp),%xmm3 1177 pxor %xmm4,%xmm10 1178 movaps 32(%rsp),%xmm4 1179 pxor %xmm5,%xmm11 1180 movaps 48(%rsp),%xmm5 1181 pxor %xmm6,%xmm12 1182 movaps 64(%rsp),%xmm6 1183 pxor %xmm7,%xmm13 1184 movaps 80(%rsp),%xmm7 1185 movdqu %xmm8,(%rsi) 1186 movdqu %xmm9,16(%rsi) 1187 movdqu %xmm10,32(%rsi) 1188 movdqu %xmm11,48(%rsi) 1189 movdqu %xmm12,64(%rsi) 1190 movdqu %xmm13,80(%rsi) 1191 leaq 96(%rsi),%rsi 1192 1193 subq $6,%rdx 1194 jnc .Lctr32_loop6 1195 1196 addq $6,%rdx 1197 jz .Lctr32_done 1198 1199 leal -48(%r10),%eax 1200 leaq -80(%rcx,%r10,1),%rcx 1201 negl %eax 1202 shrl $4,%eax 1203 jmp .Lctr32_tail 1204 1205.align 32 1206.Lctr32_loop8: 1207 addl $8,%r8d 1208 movdqa 96(%rsp),%xmm8 1209.byte 102,15,56,220,209 1210 movl %r8d,%r9d 1211 movdqa 112(%rsp),%xmm9 1212.byte 102,15,56,220,217 1213 bswapl %r9d 1214 movups 32-128(%rcx),%xmm0 1215.byte 102,15,56,220,225 1216 xorl %ebp,%r9d 1217 nop 1218.byte 102,15,56,220,233 1219 movl %r9d,0+12(%rsp) 1220 leaq 1(%r8),%r9 1221.byte 102,15,56,220,241 1222.byte 102,15,56,220,249 1223.byte 102,68,15,56,220,193 1224.byte 102,68,15,56,220,201 1225 movups 48-128(%rcx),%xmm1 1226 bswapl %r9d 1227.byte 102,15,56,220,208 1228.byte 102,15,56,220,216 1229 xorl %ebp,%r9d 1230.byte 0x66,0x90 1231.byte 102,15,56,220,224 1232.byte 102,15,56,220,232 1233 movl %r9d,16+12(%rsp) 1234 leaq 2(%r8),%r9 1235.byte 102,15,56,220,240 1236.byte 102,15,56,220,248 1237.byte 102,68,15,56,220,192 1238.byte 102,68,15,56,220,200 1239 movups 64-128(%rcx),%xmm0 1240 bswapl %r9d 1241.byte 102,15,56,220,209 1242.byte 102,15,56,220,217 1243 xorl %ebp,%r9d 1244.byte 0x66,0x90 1245.byte 102,15,56,220,225 1246.byte 102,15,56,220,233 1247 movl %r9d,32+12(%rsp) 1248 leaq 3(%r8),%r9 1249.byte 102,15,56,220,241 1250.byte 102,15,56,220,249 1251.byte 102,68,15,56,220,193 1252.byte 102,68,15,56,220,201 1253 movups 80-128(%rcx),%xmm1 1254 bswapl %r9d 1255.byte 102,15,56,220,208 1256.byte 102,15,56,220,216 1257 xorl %ebp,%r9d 1258.byte 0x66,0x90 1259.byte 102,15,56,220,224 1260.byte 102,15,56,220,232 1261 movl %r9d,48+12(%rsp) 1262 leaq 4(%r8),%r9 1263.byte 102,15,56,220,240 1264.byte 102,15,56,220,248 1265.byte 102,68,15,56,220,192 1266.byte 102,68,15,56,220,200 1267 movups 96-128(%rcx),%xmm0 1268 bswapl %r9d 1269.byte 102,15,56,220,209 1270.byte 102,15,56,220,217 1271 xorl %ebp,%r9d 1272.byte 0x66,0x90 1273.byte 102,15,56,220,225 1274.byte 102,15,56,220,233 1275 movl %r9d,64+12(%rsp) 1276 leaq 5(%r8),%r9 1277.byte 102,15,56,220,241 1278.byte 102,15,56,220,249 1279.byte 102,68,15,56,220,193 1280.byte 102,68,15,56,220,201 1281 movups 112-128(%rcx),%xmm1 1282 bswapl %r9d 1283.byte 102,15,56,220,208 1284.byte 102,15,56,220,216 1285 xorl %ebp,%r9d 1286.byte 0x66,0x90 1287.byte 102,15,56,220,224 1288.byte 102,15,56,220,232 1289 movl %r9d,80+12(%rsp) 1290 leaq 6(%r8),%r9 1291.byte 102,15,56,220,240 1292.byte 102,15,56,220,248 1293.byte 102,68,15,56,220,192 1294.byte 102,68,15,56,220,200 1295 movups 128-128(%rcx),%xmm0 1296 bswapl %r9d 1297.byte 102,15,56,220,209 1298.byte 102,15,56,220,217 1299 xorl %ebp,%r9d 1300.byte 0x66,0x90 1301.byte 102,15,56,220,225 1302.byte 102,15,56,220,233 1303 movl %r9d,96+12(%rsp) 1304 leaq 7(%r8),%r9 1305.byte 102,15,56,220,241 1306.byte 102,15,56,220,249 1307.byte 102,68,15,56,220,193 1308.byte 102,68,15,56,220,201 1309 movups 144-128(%rcx),%xmm1 1310 bswapl %r9d 1311.byte 102,15,56,220,208 1312.byte 102,15,56,220,216 1313.byte 102,15,56,220,224 1314 xorl %ebp,%r9d 1315 movdqu 0(%rdi),%xmm10 1316.byte 102,15,56,220,232 1317 movl %r9d,112+12(%rsp) 1318 cmpl $11,%eax 1319.byte 102,15,56,220,240 1320.byte 102,15,56,220,248 1321.byte 102,68,15,56,220,192 1322.byte 102,68,15,56,220,200 1323 movups 160-128(%rcx),%xmm0 1324 1325 jb .Lctr32_enc_done 1326 1327.byte 102,15,56,220,209 1328.byte 102,15,56,220,217 1329.byte 102,15,56,220,225 1330.byte 102,15,56,220,233 1331.byte 102,15,56,220,241 1332.byte 102,15,56,220,249 1333.byte 102,68,15,56,220,193 1334.byte 102,68,15,56,220,201 1335 movups 176-128(%rcx),%xmm1 1336 1337.byte 102,15,56,220,208 1338.byte 102,15,56,220,216 1339.byte 102,15,56,220,224 1340.byte 102,15,56,220,232 1341.byte 102,15,56,220,240 1342.byte 102,15,56,220,248 1343.byte 102,68,15,56,220,192 1344.byte 102,68,15,56,220,200 1345 movups 192-128(%rcx),%xmm0 1346 je .Lctr32_enc_done 1347 1348.byte 102,15,56,220,209 1349.byte 102,15,56,220,217 1350.byte 102,15,56,220,225 1351.byte 102,15,56,220,233 1352.byte 102,15,56,220,241 1353.byte 102,15,56,220,249 1354.byte 102,68,15,56,220,193 1355.byte 102,68,15,56,220,201 1356 movups 208-128(%rcx),%xmm1 1357 1358.byte 102,15,56,220,208 1359.byte 102,15,56,220,216 1360.byte 102,15,56,220,224 1361.byte 102,15,56,220,232 1362.byte 102,15,56,220,240 1363.byte 102,15,56,220,248 1364.byte 102,68,15,56,220,192 1365.byte 102,68,15,56,220,200 1366 movups 224-128(%rcx),%xmm0 1367 jmp .Lctr32_enc_done 1368 1369.align 16 1370.Lctr32_enc_done: 1371 movdqu 16(%rdi),%xmm11 1372 pxor %xmm0,%xmm10 1373 movdqu 32(%rdi),%xmm12 1374 pxor %xmm0,%xmm11 1375 movdqu 48(%rdi),%xmm13 1376 pxor %xmm0,%xmm12 1377 movdqu 64(%rdi),%xmm14 1378 pxor %xmm0,%xmm13 1379 movdqu 80(%rdi),%xmm15 1380 pxor %xmm0,%xmm14 1381 pxor %xmm0,%xmm15 1382.byte 102,15,56,220,209 1383.byte 102,15,56,220,217 1384.byte 102,15,56,220,225 1385.byte 102,15,56,220,233 1386.byte 102,15,56,220,241 1387.byte 102,15,56,220,249 1388.byte 102,68,15,56,220,193 1389.byte 102,68,15,56,220,201 1390 movdqu 96(%rdi),%xmm1 1391 leaq 128(%rdi),%rdi 1392 1393.byte 102,65,15,56,221,210 1394 pxor %xmm0,%xmm1 1395 movdqu 112-128(%rdi),%xmm10 1396.byte 102,65,15,56,221,219 1397 pxor %xmm0,%xmm10 1398 movdqa 0(%rsp),%xmm11 1399.byte 102,65,15,56,221,228 1400.byte 102,65,15,56,221,237 1401 movdqa 16(%rsp),%xmm12 1402 movdqa 32(%rsp),%xmm13 1403.byte 102,65,15,56,221,246 1404.byte 102,65,15,56,221,255 1405 movdqa 48(%rsp),%xmm14 1406 movdqa 64(%rsp),%xmm15 1407.byte 102,68,15,56,221,193 1408 movdqa 80(%rsp),%xmm0 1409 movups 16-128(%rcx),%xmm1 1410.byte 102,69,15,56,221,202 1411 1412 movups %xmm2,(%rsi) 1413 movdqa %xmm11,%xmm2 1414 movups %xmm3,16(%rsi) 1415 movdqa %xmm12,%xmm3 1416 movups %xmm4,32(%rsi) 1417 movdqa %xmm13,%xmm4 1418 movups %xmm5,48(%rsi) 1419 movdqa %xmm14,%xmm5 1420 movups %xmm6,64(%rsi) 1421 movdqa %xmm15,%xmm6 1422 movups %xmm7,80(%rsi) 1423 movdqa %xmm0,%xmm7 1424 movups %xmm8,96(%rsi) 1425 movups %xmm9,112(%rsi) 1426 leaq 128(%rsi),%rsi 1427 1428 subq $8,%rdx 1429 jnc .Lctr32_loop8 1430 1431 addq $8,%rdx 1432 jz .Lctr32_done 1433 leaq -128(%rcx),%rcx 1434 1435.Lctr32_tail: 1436 1437 1438 leaq 16(%rcx),%rcx 1439 cmpq $4,%rdx 1440 jb .Lctr32_loop3 1441 je .Lctr32_loop4 1442 1443 1444 shll $4,%eax 1445 movdqa 96(%rsp),%xmm8 1446 pxor %xmm9,%xmm9 1447 1448 movups 16(%rcx),%xmm0 1449.byte 102,15,56,220,209 1450.byte 102,15,56,220,217 1451 leaq 32-16(%rcx,%rax,1),%rcx 1452 negq %rax 1453.byte 102,15,56,220,225 1454 addq $16,%rax 1455 movups (%rdi),%xmm10 1456.byte 102,15,56,220,233 1457.byte 102,15,56,220,241 1458 movups 16(%rdi),%xmm11 1459 movups 32(%rdi),%xmm12 1460.byte 102,15,56,220,249 1461.byte 102,68,15,56,220,193 1462 1463 call .Lenc_loop8_enter 1464 1465 movdqu 48(%rdi),%xmm13 1466 pxor %xmm10,%xmm2 1467 movdqu 64(%rdi),%xmm10 1468 pxor %xmm11,%xmm3 1469 movdqu %xmm2,(%rsi) 1470 pxor %xmm12,%xmm4 1471 movdqu %xmm3,16(%rsi) 1472 pxor %xmm13,%xmm5 1473 movdqu %xmm4,32(%rsi) 1474 pxor %xmm10,%xmm6 1475 movdqu %xmm5,48(%rsi) 1476 movdqu %xmm6,64(%rsi) 1477 cmpq $6,%rdx 1478 jb .Lctr32_done 1479 1480 movups 80(%rdi),%xmm11 1481 xorps %xmm11,%xmm7 1482 movups %xmm7,80(%rsi) 1483 je .Lctr32_done 1484 1485 movups 96(%rdi),%xmm12 1486 xorps %xmm12,%xmm8 1487 movups %xmm8,96(%rsi) 1488 jmp .Lctr32_done 1489 1490.align 32 1491.Lctr32_loop4: 1492.byte 102,15,56,220,209 1493 leaq 16(%rcx),%rcx 1494 decl %eax 1495.byte 102,15,56,220,217 1496.byte 102,15,56,220,225 1497.byte 102,15,56,220,233 1498 movups (%rcx),%xmm1 1499 jnz .Lctr32_loop4 1500.byte 102,15,56,221,209 1501.byte 102,15,56,221,217 1502 movups (%rdi),%xmm10 1503 movups 16(%rdi),%xmm11 1504.byte 102,15,56,221,225 1505.byte 102,15,56,221,233 1506 movups 32(%rdi),%xmm12 1507 movups 48(%rdi),%xmm13 1508 1509 xorps %xmm10,%xmm2 1510 movups %xmm2,(%rsi) 1511 xorps %xmm11,%xmm3 1512 movups %xmm3,16(%rsi) 1513 pxor %xmm12,%xmm4 1514 movdqu %xmm4,32(%rsi) 1515 pxor %xmm13,%xmm5 1516 movdqu %xmm5,48(%rsi) 1517 jmp .Lctr32_done 1518 1519.align 32 1520.Lctr32_loop3: 1521.byte 102,15,56,220,209 1522 leaq 16(%rcx),%rcx 1523 decl %eax 1524.byte 102,15,56,220,217 1525.byte 102,15,56,220,225 1526 movups (%rcx),%xmm1 1527 jnz .Lctr32_loop3 1528.byte 102,15,56,221,209 1529.byte 102,15,56,221,217 1530.byte 102,15,56,221,225 1531 1532 movups (%rdi),%xmm10 1533 xorps %xmm10,%xmm2 1534 movups %xmm2,(%rsi) 1535 cmpq $2,%rdx 1536 jb .Lctr32_done 1537 1538 movups 16(%rdi),%xmm11 1539 xorps %xmm11,%xmm3 1540 movups %xmm3,16(%rsi) 1541 je .Lctr32_done 1542 1543 movups 32(%rdi),%xmm12 1544 xorps %xmm12,%xmm4 1545 movups %xmm4,32(%rsi) 1546 1547.Lctr32_done: 1548 xorps %xmm0,%xmm0 1549 xorl %ebp,%ebp 1550 pxor %xmm1,%xmm1 1551 pxor %xmm2,%xmm2 1552 pxor %xmm3,%xmm3 1553 pxor %xmm4,%xmm4 1554 pxor %xmm5,%xmm5 1555 pxor %xmm6,%xmm6 1556 pxor %xmm7,%xmm7 1557 movaps %xmm0,0(%rsp) 1558 pxor %xmm8,%xmm8 1559 movaps %xmm0,16(%rsp) 1560 pxor %xmm9,%xmm9 1561 movaps %xmm0,32(%rsp) 1562 pxor %xmm10,%xmm10 1563 movaps %xmm0,48(%rsp) 1564 pxor %xmm11,%xmm11 1565 movaps %xmm0,64(%rsp) 1566 pxor %xmm12,%xmm12 1567 movaps %xmm0,80(%rsp) 1568 pxor %xmm13,%xmm13 1569 movaps %xmm0,96(%rsp) 1570 pxor %xmm14,%xmm14 1571 movaps %xmm0,112(%rsp) 1572 pxor %xmm15,%xmm15 1573 movq -8(%r11),%rbp 1574.cfi_restore %rbp 1575 leaq (%r11),%rsp 1576.cfi_def_cfa_register %rsp 1577.Lctr32_epilogue: 1578 .byte 0xf3,0xc3 1579.cfi_endproc 1580.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks 1581.globl aes_hw_xts_encrypt 1582.hidden aes_hw_xts_encrypt 1583.type aes_hw_xts_encrypt,@function 1584.align 16 1585aes_hw_xts_encrypt: 1586.cfi_startproc 1587 leaq (%rsp),%r11 1588.cfi_def_cfa_register %r11 1589 pushq %rbp 1590.cfi_offset %rbp,-16 1591 subq $112,%rsp 1592 andq $-16,%rsp 1593 movups (%r9),%xmm2 1594 movl 240(%r8),%eax 1595 movl 240(%rcx),%r10d 1596 movups (%r8),%xmm0 1597 movups 16(%r8),%xmm1 1598 leaq 32(%r8),%r8 1599 xorps %xmm0,%xmm2 1600.Loop_enc1_8: 1601.byte 102,15,56,220,209 1602 decl %eax 1603 movups (%r8),%xmm1 1604 leaq 16(%r8),%r8 1605 jnz .Loop_enc1_8 1606.byte 102,15,56,221,209 1607 movups (%rcx),%xmm0 1608 movq %rcx,%rbp 1609 movl %r10d,%eax 1610 shll $4,%r10d 1611 movq %rdx,%r9 1612 andq $-16,%rdx 1613 1614 movups 16(%rcx,%r10,1),%xmm1 1615 1616 movdqa .Lxts_magic(%rip),%xmm8 1617 movdqa %xmm2,%xmm15 1618 pshufd $0x5f,%xmm2,%xmm9 1619 pxor %xmm0,%xmm1 1620 movdqa %xmm9,%xmm14 1621 paddd %xmm9,%xmm9 1622 movdqa %xmm15,%xmm10 1623 psrad $31,%xmm14 1624 paddq %xmm15,%xmm15 1625 pand %xmm8,%xmm14 1626 pxor %xmm0,%xmm10 1627 pxor %xmm14,%xmm15 1628 movdqa %xmm9,%xmm14 1629 paddd %xmm9,%xmm9 1630 movdqa %xmm15,%xmm11 1631 psrad $31,%xmm14 1632 paddq %xmm15,%xmm15 1633 pand %xmm8,%xmm14 1634 pxor %xmm0,%xmm11 1635 pxor %xmm14,%xmm15 1636 movdqa %xmm9,%xmm14 1637 paddd %xmm9,%xmm9 1638 movdqa %xmm15,%xmm12 1639 psrad $31,%xmm14 1640 paddq %xmm15,%xmm15 1641 pand %xmm8,%xmm14 1642 pxor %xmm0,%xmm12 1643 pxor %xmm14,%xmm15 1644 movdqa %xmm9,%xmm14 1645 paddd %xmm9,%xmm9 1646 movdqa %xmm15,%xmm13 1647 psrad $31,%xmm14 1648 paddq %xmm15,%xmm15 1649 pand %xmm8,%xmm14 1650 pxor %xmm0,%xmm13 1651 pxor %xmm14,%xmm15 1652 movdqa %xmm15,%xmm14 1653 psrad $31,%xmm9 1654 paddq %xmm15,%xmm15 1655 pand %xmm8,%xmm9 1656 pxor %xmm0,%xmm14 1657 pxor %xmm9,%xmm15 1658 movaps %xmm1,96(%rsp) 1659 1660 subq $96,%rdx 1661 jc .Lxts_enc_short 1662 1663 movl $16+96,%eax 1664 leaq 32(%rbp,%r10,1),%rcx 1665 subq %r10,%rax 1666 movups 16(%rbp),%xmm1 1667 movq %rax,%r10 1668 leaq .Lxts_magic(%rip),%r8 1669 jmp .Lxts_enc_grandloop 1670 1671.align 32 1672.Lxts_enc_grandloop: 1673 movdqu 0(%rdi),%xmm2 1674 movdqa %xmm0,%xmm8 1675 movdqu 16(%rdi),%xmm3 1676 pxor %xmm10,%xmm2 1677 movdqu 32(%rdi),%xmm4 1678 pxor %xmm11,%xmm3 1679.byte 102,15,56,220,209 1680 movdqu 48(%rdi),%xmm5 1681 pxor %xmm12,%xmm4 1682.byte 102,15,56,220,217 1683 movdqu 64(%rdi),%xmm6 1684 pxor %xmm13,%xmm5 1685.byte 102,15,56,220,225 1686 movdqu 80(%rdi),%xmm7 1687 pxor %xmm15,%xmm8 1688 movdqa 96(%rsp),%xmm9 1689 pxor %xmm14,%xmm6 1690.byte 102,15,56,220,233 1691 movups 32(%rbp),%xmm0 1692 leaq 96(%rdi),%rdi 1693 pxor %xmm8,%xmm7 1694 1695 pxor %xmm9,%xmm10 1696.byte 102,15,56,220,241 1697 pxor %xmm9,%xmm11 1698 movdqa %xmm10,0(%rsp) 1699.byte 102,15,56,220,249 1700 movups 48(%rbp),%xmm1 1701 pxor %xmm9,%xmm12 1702 1703.byte 102,15,56,220,208 1704 pxor %xmm9,%xmm13 1705 movdqa %xmm11,16(%rsp) 1706.byte 102,15,56,220,216 1707 pxor %xmm9,%xmm14 1708 movdqa %xmm12,32(%rsp) 1709.byte 102,15,56,220,224 1710.byte 102,15,56,220,232 1711 pxor %xmm9,%xmm8 1712 movdqa %xmm14,64(%rsp) 1713.byte 102,15,56,220,240 1714.byte 102,15,56,220,248 1715 movups 64(%rbp),%xmm0 1716 movdqa %xmm8,80(%rsp) 1717 pshufd $0x5f,%xmm15,%xmm9 1718 jmp .Lxts_enc_loop6 1719.align 32 1720.Lxts_enc_loop6: 1721.byte 102,15,56,220,209 1722.byte 102,15,56,220,217 1723.byte 102,15,56,220,225 1724.byte 102,15,56,220,233 1725.byte 102,15,56,220,241 1726.byte 102,15,56,220,249 1727 movups -64(%rcx,%rax,1),%xmm1 1728 addq $32,%rax 1729 1730.byte 102,15,56,220,208 1731.byte 102,15,56,220,216 1732.byte 102,15,56,220,224 1733.byte 102,15,56,220,232 1734.byte 102,15,56,220,240 1735.byte 102,15,56,220,248 1736 movups -80(%rcx,%rax,1),%xmm0 1737 jnz .Lxts_enc_loop6 1738 1739 movdqa (%r8),%xmm8 1740 movdqa %xmm9,%xmm14 1741 paddd %xmm9,%xmm9 1742.byte 102,15,56,220,209 1743 paddq %xmm15,%xmm15 1744 psrad $31,%xmm14 1745.byte 102,15,56,220,217 1746 pand %xmm8,%xmm14 1747 movups (%rbp),%xmm10 1748.byte 102,15,56,220,225 1749.byte 102,15,56,220,233 1750.byte 102,15,56,220,241 1751 pxor %xmm14,%xmm15 1752 movaps %xmm10,%xmm11 1753.byte 102,15,56,220,249 1754 movups -64(%rcx),%xmm1 1755 1756 movdqa %xmm9,%xmm14 1757.byte 102,15,56,220,208 1758 paddd %xmm9,%xmm9 1759 pxor %xmm15,%xmm10 1760.byte 102,15,56,220,216 1761 psrad $31,%xmm14 1762 paddq %xmm15,%xmm15 1763.byte 102,15,56,220,224 1764.byte 102,15,56,220,232 1765 pand %xmm8,%xmm14 1766 movaps %xmm11,%xmm12 1767.byte 102,15,56,220,240 1768 pxor %xmm14,%xmm15 1769 movdqa %xmm9,%xmm14 1770.byte 102,15,56,220,248 1771 movups -48(%rcx),%xmm0 1772 1773 paddd %xmm9,%xmm9 1774.byte 102,15,56,220,209 1775 pxor %xmm15,%xmm11 1776 psrad $31,%xmm14 1777.byte 102,15,56,220,217 1778 paddq %xmm15,%xmm15 1779 pand %xmm8,%xmm14 1780.byte 102,15,56,220,225 1781.byte 102,15,56,220,233 1782 movdqa %xmm13,48(%rsp) 1783 pxor %xmm14,%xmm15 1784.byte 102,15,56,220,241 1785 movaps %xmm12,%xmm13 1786 movdqa %xmm9,%xmm14 1787.byte 102,15,56,220,249 1788 movups -32(%rcx),%xmm1 1789 1790 paddd %xmm9,%xmm9 1791.byte 102,15,56,220,208 1792 pxor %xmm15,%xmm12 1793 psrad $31,%xmm14 1794.byte 102,15,56,220,216 1795 paddq %xmm15,%xmm15 1796 pand %xmm8,%xmm14 1797.byte 102,15,56,220,224 1798.byte 102,15,56,220,232 1799.byte 102,15,56,220,240 1800 pxor %xmm14,%xmm15 1801 movaps %xmm13,%xmm14 1802.byte 102,15,56,220,248 1803 1804 movdqa %xmm9,%xmm0 1805 paddd %xmm9,%xmm9 1806.byte 102,15,56,220,209 1807 pxor %xmm15,%xmm13 1808 psrad $31,%xmm0 1809.byte 102,15,56,220,217 1810 paddq %xmm15,%xmm15 1811 pand %xmm8,%xmm0 1812.byte 102,15,56,220,225 1813.byte 102,15,56,220,233 1814 pxor %xmm0,%xmm15 1815 movups (%rbp),%xmm0 1816.byte 102,15,56,220,241 1817.byte 102,15,56,220,249 1818 movups 16(%rbp),%xmm1 1819 1820 pxor %xmm15,%xmm14 1821.byte 102,15,56,221,84,36,0 1822 psrad $31,%xmm9 1823 paddq %xmm15,%xmm15 1824.byte 102,15,56,221,92,36,16 1825.byte 102,15,56,221,100,36,32 1826 pand %xmm8,%xmm9 1827 movq %r10,%rax 1828.byte 102,15,56,221,108,36,48 1829.byte 102,15,56,221,116,36,64 1830.byte 102,15,56,221,124,36,80 1831 pxor %xmm9,%xmm15 1832 1833 leaq 96(%rsi),%rsi 1834 movups %xmm2,-96(%rsi) 1835 movups %xmm3,-80(%rsi) 1836 movups %xmm4,-64(%rsi) 1837 movups %xmm5,-48(%rsi) 1838 movups %xmm6,-32(%rsi) 1839 movups %xmm7,-16(%rsi) 1840 subq $96,%rdx 1841 jnc .Lxts_enc_grandloop 1842 1843 movl $16+96,%eax 1844 subl %r10d,%eax 1845 movq %rbp,%rcx 1846 shrl $4,%eax 1847 1848.Lxts_enc_short: 1849 1850 movl %eax,%r10d 1851 pxor %xmm0,%xmm10 1852 addq $96,%rdx 1853 jz .Lxts_enc_done 1854 1855 pxor %xmm0,%xmm11 1856 cmpq $0x20,%rdx 1857 jb .Lxts_enc_one 1858 pxor %xmm0,%xmm12 1859 je .Lxts_enc_two 1860 1861 pxor %xmm0,%xmm13 1862 cmpq $0x40,%rdx 1863 jb .Lxts_enc_three 1864 pxor %xmm0,%xmm14 1865 je .Lxts_enc_four 1866 1867 movdqu (%rdi),%xmm2 1868 movdqu 16(%rdi),%xmm3 1869 movdqu 32(%rdi),%xmm4 1870 pxor %xmm10,%xmm2 1871 movdqu 48(%rdi),%xmm5 1872 pxor %xmm11,%xmm3 1873 movdqu 64(%rdi),%xmm6 1874 leaq 80(%rdi),%rdi 1875 pxor %xmm12,%xmm4 1876 pxor %xmm13,%xmm5 1877 pxor %xmm14,%xmm6 1878 pxor %xmm7,%xmm7 1879 1880 call _aesni_encrypt6 1881 1882 xorps %xmm10,%xmm2 1883 movdqa %xmm15,%xmm10 1884 xorps %xmm11,%xmm3 1885 xorps %xmm12,%xmm4 1886 movdqu %xmm2,(%rsi) 1887 xorps %xmm13,%xmm5 1888 movdqu %xmm3,16(%rsi) 1889 xorps %xmm14,%xmm6 1890 movdqu %xmm4,32(%rsi) 1891 movdqu %xmm5,48(%rsi) 1892 movdqu %xmm6,64(%rsi) 1893 leaq 80(%rsi),%rsi 1894 jmp .Lxts_enc_done 1895 1896.align 16 1897.Lxts_enc_one: 1898 movups (%rdi),%xmm2 1899 leaq 16(%rdi),%rdi 1900 xorps %xmm10,%xmm2 1901 movups (%rcx),%xmm0 1902 movups 16(%rcx),%xmm1 1903 leaq 32(%rcx),%rcx 1904 xorps %xmm0,%xmm2 1905.Loop_enc1_9: 1906.byte 102,15,56,220,209 1907 decl %eax 1908 movups (%rcx),%xmm1 1909 leaq 16(%rcx),%rcx 1910 jnz .Loop_enc1_9 1911.byte 102,15,56,221,209 1912 xorps %xmm10,%xmm2 1913 movdqa %xmm11,%xmm10 1914 movups %xmm2,(%rsi) 1915 leaq 16(%rsi),%rsi 1916 jmp .Lxts_enc_done 1917 1918.align 16 1919.Lxts_enc_two: 1920 movups (%rdi),%xmm2 1921 movups 16(%rdi),%xmm3 1922 leaq 32(%rdi),%rdi 1923 xorps %xmm10,%xmm2 1924 xorps %xmm11,%xmm3 1925 1926 call _aesni_encrypt2 1927 1928 xorps %xmm10,%xmm2 1929 movdqa %xmm12,%xmm10 1930 xorps %xmm11,%xmm3 1931 movups %xmm2,(%rsi) 1932 movups %xmm3,16(%rsi) 1933 leaq 32(%rsi),%rsi 1934 jmp .Lxts_enc_done 1935 1936.align 16 1937.Lxts_enc_three: 1938 movups (%rdi),%xmm2 1939 movups 16(%rdi),%xmm3 1940 movups 32(%rdi),%xmm4 1941 leaq 48(%rdi),%rdi 1942 xorps %xmm10,%xmm2 1943 xorps %xmm11,%xmm3 1944 xorps %xmm12,%xmm4 1945 1946 call _aesni_encrypt3 1947 1948 xorps %xmm10,%xmm2 1949 movdqa %xmm13,%xmm10 1950 xorps %xmm11,%xmm3 1951 xorps %xmm12,%xmm4 1952 movups %xmm2,(%rsi) 1953 movups %xmm3,16(%rsi) 1954 movups %xmm4,32(%rsi) 1955 leaq 48(%rsi),%rsi 1956 jmp .Lxts_enc_done 1957 1958.align 16 1959.Lxts_enc_four: 1960 movups (%rdi),%xmm2 1961 movups 16(%rdi),%xmm3 1962 movups 32(%rdi),%xmm4 1963 xorps %xmm10,%xmm2 1964 movups 48(%rdi),%xmm5 1965 leaq 64(%rdi),%rdi 1966 xorps %xmm11,%xmm3 1967 xorps %xmm12,%xmm4 1968 xorps %xmm13,%xmm5 1969 1970 call _aesni_encrypt4 1971 1972 pxor %xmm10,%xmm2 1973 movdqa %xmm14,%xmm10 1974 pxor %xmm11,%xmm3 1975 pxor %xmm12,%xmm4 1976 movdqu %xmm2,(%rsi) 1977 pxor %xmm13,%xmm5 1978 movdqu %xmm3,16(%rsi) 1979 movdqu %xmm4,32(%rsi) 1980 movdqu %xmm5,48(%rsi) 1981 leaq 64(%rsi),%rsi 1982 jmp .Lxts_enc_done 1983 1984.align 16 1985.Lxts_enc_done: 1986 andq $15,%r9 1987 jz .Lxts_enc_ret 1988 movq %r9,%rdx 1989 1990.Lxts_enc_steal: 1991 movzbl (%rdi),%eax 1992 movzbl -16(%rsi),%ecx 1993 leaq 1(%rdi),%rdi 1994 movb %al,-16(%rsi) 1995 movb %cl,0(%rsi) 1996 leaq 1(%rsi),%rsi 1997 subq $1,%rdx 1998 jnz .Lxts_enc_steal 1999 2000 subq %r9,%rsi 2001 movq %rbp,%rcx 2002 movl %r10d,%eax 2003 2004 movups -16(%rsi),%xmm2 2005 xorps %xmm10,%xmm2 2006 movups (%rcx),%xmm0 2007 movups 16(%rcx),%xmm1 2008 leaq 32(%rcx),%rcx 2009 xorps %xmm0,%xmm2 2010.Loop_enc1_10: 2011.byte 102,15,56,220,209 2012 decl %eax 2013 movups (%rcx),%xmm1 2014 leaq 16(%rcx),%rcx 2015 jnz .Loop_enc1_10 2016.byte 102,15,56,221,209 2017 xorps %xmm10,%xmm2 2018 movups %xmm2,-16(%rsi) 2019 2020.Lxts_enc_ret: 2021 xorps %xmm0,%xmm0 2022 pxor %xmm1,%xmm1 2023 pxor %xmm2,%xmm2 2024 pxor %xmm3,%xmm3 2025 pxor %xmm4,%xmm4 2026 pxor %xmm5,%xmm5 2027 pxor %xmm6,%xmm6 2028 pxor %xmm7,%xmm7 2029 movaps %xmm0,0(%rsp) 2030 pxor %xmm8,%xmm8 2031 movaps %xmm0,16(%rsp) 2032 pxor %xmm9,%xmm9 2033 movaps %xmm0,32(%rsp) 2034 pxor %xmm10,%xmm10 2035 movaps %xmm0,48(%rsp) 2036 pxor %xmm11,%xmm11 2037 movaps %xmm0,64(%rsp) 2038 pxor %xmm12,%xmm12 2039 movaps %xmm0,80(%rsp) 2040 pxor %xmm13,%xmm13 2041 movaps %xmm0,96(%rsp) 2042 pxor %xmm14,%xmm14 2043 pxor %xmm15,%xmm15 2044 movq -8(%r11),%rbp 2045.cfi_restore %rbp 2046 leaq (%r11),%rsp 2047.cfi_def_cfa_register %rsp 2048.Lxts_enc_epilogue: 2049 .byte 0xf3,0xc3 2050.cfi_endproc 2051.size aes_hw_xts_encrypt,.-aes_hw_xts_encrypt 2052.globl aes_hw_xts_decrypt 2053.hidden aes_hw_xts_decrypt 2054.type aes_hw_xts_decrypt,@function 2055.align 16 2056aes_hw_xts_decrypt: 2057.cfi_startproc 2058 leaq (%rsp),%r11 2059.cfi_def_cfa_register %r11 2060 pushq %rbp 2061.cfi_offset %rbp,-16 2062 subq $112,%rsp 2063 andq $-16,%rsp 2064 movups (%r9),%xmm2 2065 movl 240(%r8),%eax 2066 movl 240(%rcx),%r10d 2067 movups (%r8),%xmm0 2068 movups 16(%r8),%xmm1 2069 leaq 32(%r8),%r8 2070 xorps %xmm0,%xmm2 2071.Loop_enc1_11: 2072.byte 102,15,56,220,209 2073 decl %eax 2074 movups (%r8),%xmm1 2075 leaq 16(%r8),%r8 2076 jnz .Loop_enc1_11 2077.byte 102,15,56,221,209 2078 xorl %eax,%eax 2079 testq $15,%rdx 2080 setnz %al 2081 shlq $4,%rax 2082 subq %rax,%rdx 2083 2084 movups (%rcx),%xmm0 2085 movq %rcx,%rbp 2086 movl %r10d,%eax 2087 shll $4,%r10d 2088 movq %rdx,%r9 2089 andq $-16,%rdx 2090 2091 movups 16(%rcx,%r10,1),%xmm1 2092 2093 movdqa .Lxts_magic(%rip),%xmm8 2094 movdqa %xmm2,%xmm15 2095 pshufd $0x5f,%xmm2,%xmm9 2096 pxor %xmm0,%xmm1 2097 movdqa %xmm9,%xmm14 2098 paddd %xmm9,%xmm9 2099 movdqa %xmm15,%xmm10 2100 psrad $31,%xmm14 2101 paddq %xmm15,%xmm15 2102 pand %xmm8,%xmm14 2103 pxor %xmm0,%xmm10 2104 pxor %xmm14,%xmm15 2105 movdqa %xmm9,%xmm14 2106 paddd %xmm9,%xmm9 2107 movdqa %xmm15,%xmm11 2108 psrad $31,%xmm14 2109 paddq %xmm15,%xmm15 2110 pand %xmm8,%xmm14 2111 pxor %xmm0,%xmm11 2112 pxor %xmm14,%xmm15 2113 movdqa %xmm9,%xmm14 2114 paddd %xmm9,%xmm9 2115 movdqa %xmm15,%xmm12 2116 psrad $31,%xmm14 2117 paddq %xmm15,%xmm15 2118 pand %xmm8,%xmm14 2119 pxor %xmm0,%xmm12 2120 pxor %xmm14,%xmm15 2121 movdqa %xmm9,%xmm14 2122 paddd %xmm9,%xmm9 2123 movdqa %xmm15,%xmm13 2124 psrad $31,%xmm14 2125 paddq %xmm15,%xmm15 2126 pand %xmm8,%xmm14 2127 pxor %xmm0,%xmm13 2128 pxor %xmm14,%xmm15 2129 movdqa %xmm15,%xmm14 2130 psrad $31,%xmm9 2131 paddq %xmm15,%xmm15 2132 pand %xmm8,%xmm9 2133 pxor %xmm0,%xmm14 2134 pxor %xmm9,%xmm15 2135 movaps %xmm1,96(%rsp) 2136 2137 subq $96,%rdx 2138 jc .Lxts_dec_short 2139 2140 movl $16+96,%eax 2141 leaq 32(%rbp,%r10,1),%rcx 2142 subq %r10,%rax 2143 movups 16(%rbp),%xmm1 2144 movq %rax,%r10 2145 leaq .Lxts_magic(%rip),%r8 2146 jmp .Lxts_dec_grandloop 2147 2148.align 32 2149.Lxts_dec_grandloop: 2150 movdqu 0(%rdi),%xmm2 2151 movdqa %xmm0,%xmm8 2152 movdqu 16(%rdi),%xmm3 2153 pxor %xmm10,%xmm2 2154 movdqu 32(%rdi),%xmm4 2155 pxor %xmm11,%xmm3 2156.byte 102,15,56,222,209 2157 movdqu 48(%rdi),%xmm5 2158 pxor %xmm12,%xmm4 2159.byte 102,15,56,222,217 2160 movdqu 64(%rdi),%xmm6 2161 pxor %xmm13,%xmm5 2162.byte 102,15,56,222,225 2163 movdqu 80(%rdi),%xmm7 2164 pxor %xmm15,%xmm8 2165 movdqa 96(%rsp),%xmm9 2166 pxor %xmm14,%xmm6 2167.byte 102,15,56,222,233 2168 movups 32(%rbp),%xmm0 2169 leaq 96(%rdi),%rdi 2170 pxor %xmm8,%xmm7 2171 2172 pxor %xmm9,%xmm10 2173.byte 102,15,56,222,241 2174 pxor %xmm9,%xmm11 2175 movdqa %xmm10,0(%rsp) 2176.byte 102,15,56,222,249 2177 movups 48(%rbp),%xmm1 2178 pxor %xmm9,%xmm12 2179 2180.byte 102,15,56,222,208 2181 pxor %xmm9,%xmm13 2182 movdqa %xmm11,16(%rsp) 2183.byte 102,15,56,222,216 2184 pxor %xmm9,%xmm14 2185 movdqa %xmm12,32(%rsp) 2186.byte 102,15,56,222,224 2187.byte 102,15,56,222,232 2188 pxor %xmm9,%xmm8 2189 movdqa %xmm14,64(%rsp) 2190.byte 102,15,56,222,240 2191.byte 102,15,56,222,248 2192 movups 64(%rbp),%xmm0 2193 movdqa %xmm8,80(%rsp) 2194 pshufd $0x5f,%xmm15,%xmm9 2195 jmp .Lxts_dec_loop6 2196.align 32 2197.Lxts_dec_loop6: 2198.byte 102,15,56,222,209 2199.byte 102,15,56,222,217 2200.byte 102,15,56,222,225 2201.byte 102,15,56,222,233 2202.byte 102,15,56,222,241 2203.byte 102,15,56,222,249 2204 movups -64(%rcx,%rax,1),%xmm1 2205 addq $32,%rax 2206 2207.byte 102,15,56,222,208 2208.byte 102,15,56,222,216 2209.byte 102,15,56,222,224 2210.byte 102,15,56,222,232 2211.byte 102,15,56,222,240 2212.byte 102,15,56,222,248 2213 movups -80(%rcx,%rax,1),%xmm0 2214 jnz .Lxts_dec_loop6 2215 2216 movdqa (%r8),%xmm8 2217 movdqa %xmm9,%xmm14 2218 paddd %xmm9,%xmm9 2219.byte 102,15,56,222,209 2220 paddq %xmm15,%xmm15 2221 psrad $31,%xmm14 2222.byte 102,15,56,222,217 2223 pand %xmm8,%xmm14 2224 movups (%rbp),%xmm10 2225.byte 102,15,56,222,225 2226.byte 102,15,56,222,233 2227.byte 102,15,56,222,241 2228 pxor %xmm14,%xmm15 2229 movaps %xmm10,%xmm11 2230.byte 102,15,56,222,249 2231 movups -64(%rcx),%xmm1 2232 2233 movdqa %xmm9,%xmm14 2234.byte 102,15,56,222,208 2235 paddd %xmm9,%xmm9 2236 pxor %xmm15,%xmm10 2237.byte 102,15,56,222,216 2238 psrad $31,%xmm14 2239 paddq %xmm15,%xmm15 2240.byte 102,15,56,222,224 2241.byte 102,15,56,222,232 2242 pand %xmm8,%xmm14 2243 movaps %xmm11,%xmm12 2244.byte 102,15,56,222,240 2245 pxor %xmm14,%xmm15 2246 movdqa %xmm9,%xmm14 2247.byte 102,15,56,222,248 2248 movups -48(%rcx),%xmm0 2249 2250 paddd %xmm9,%xmm9 2251.byte 102,15,56,222,209 2252 pxor %xmm15,%xmm11 2253 psrad $31,%xmm14 2254.byte 102,15,56,222,217 2255 paddq %xmm15,%xmm15 2256 pand %xmm8,%xmm14 2257.byte 102,15,56,222,225 2258.byte 102,15,56,222,233 2259 movdqa %xmm13,48(%rsp) 2260 pxor %xmm14,%xmm15 2261.byte 102,15,56,222,241 2262 movaps %xmm12,%xmm13 2263 movdqa %xmm9,%xmm14 2264.byte 102,15,56,222,249 2265 movups -32(%rcx),%xmm1 2266 2267 paddd %xmm9,%xmm9 2268.byte 102,15,56,222,208 2269 pxor %xmm15,%xmm12 2270 psrad $31,%xmm14 2271.byte 102,15,56,222,216 2272 paddq %xmm15,%xmm15 2273 pand %xmm8,%xmm14 2274.byte 102,15,56,222,224 2275.byte 102,15,56,222,232 2276.byte 102,15,56,222,240 2277 pxor %xmm14,%xmm15 2278 movaps %xmm13,%xmm14 2279.byte 102,15,56,222,248 2280 2281 movdqa %xmm9,%xmm0 2282 paddd %xmm9,%xmm9 2283.byte 102,15,56,222,209 2284 pxor %xmm15,%xmm13 2285 psrad $31,%xmm0 2286.byte 102,15,56,222,217 2287 paddq %xmm15,%xmm15 2288 pand %xmm8,%xmm0 2289.byte 102,15,56,222,225 2290.byte 102,15,56,222,233 2291 pxor %xmm0,%xmm15 2292 movups (%rbp),%xmm0 2293.byte 102,15,56,222,241 2294.byte 102,15,56,222,249 2295 movups 16(%rbp),%xmm1 2296 2297 pxor %xmm15,%xmm14 2298.byte 102,15,56,223,84,36,0 2299 psrad $31,%xmm9 2300 paddq %xmm15,%xmm15 2301.byte 102,15,56,223,92,36,16 2302.byte 102,15,56,223,100,36,32 2303 pand %xmm8,%xmm9 2304 movq %r10,%rax 2305.byte 102,15,56,223,108,36,48 2306.byte 102,15,56,223,116,36,64 2307.byte 102,15,56,223,124,36,80 2308 pxor %xmm9,%xmm15 2309 2310 leaq 96(%rsi),%rsi 2311 movups %xmm2,-96(%rsi) 2312 movups %xmm3,-80(%rsi) 2313 movups %xmm4,-64(%rsi) 2314 movups %xmm5,-48(%rsi) 2315 movups %xmm6,-32(%rsi) 2316 movups %xmm7,-16(%rsi) 2317 subq $96,%rdx 2318 jnc .Lxts_dec_grandloop 2319 2320 movl $16+96,%eax 2321 subl %r10d,%eax 2322 movq %rbp,%rcx 2323 shrl $4,%eax 2324 2325.Lxts_dec_short: 2326 2327 movl %eax,%r10d 2328 pxor %xmm0,%xmm10 2329 pxor %xmm0,%xmm11 2330 addq $96,%rdx 2331 jz .Lxts_dec_done 2332 2333 pxor %xmm0,%xmm12 2334 cmpq $0x20,%rdx 2335 jb .Lxts_dec_one 2336 pxor %xmm0,%xmm13 2337 je .Lxts_dec_two 2338 2339 pxor %xmm0,%xmm14 2340 cmpq $0x40,%rdx 2341 jb .Lxts_dec_three 2342 je .Lxts_dec_four 2343 2344 movdqu (%rdi),%xmm2 2345 movdqu 16(%rdi),%xmm3 2346 movdqu 32(%rdi),%xmm4 2347 pxor %xmm10,%xmm2 2348 movdqu 48(%rdi),%xmm5 2349 pxor %xmm11,%xmm3 2350 movdqu 64(%rdi),%xmm6 2351 leaq 80(%rdi),%rdi 2352 pxor %xmm12,%xmm4 2353 pxor %xmm13,%xmm5 2354 pxor %xmm14,%xmm6 2355 2356 call _aesni_decrypt6 2357 2358 xorps %xmm10,%xmm2 2359 xorps %xmm11,%xmm3 2360 xorps %xmm12,%xmm4 2361 movdqu %xmm2,(%rsi) 2362 xorps %xmm13,%xmm5 2363 movdqu %xmm3,16(%rsi) 2364 xorps %xmm14,%xmm6 2365 movdqu %xmm4,32(%rsi) 2366 pxor %xmm14,%xmm14 2367 movdqu %xmm5,48(%rsi) 2368 pcmpgtd %xmm15,%xmm14 2369 movdqu %xmm6,64(%rsi) 2370 leaq 80(%rsi),%rsi 2371 pshufd $0x13,%xmm14,%xmm11 2372 andq $15,%r9 2373 jz .Lxts_dec_ret 2374 2375 movdqa %xmm15,%xmm10 2376 paddq %xmm15,%xmm15 2377 pand %xmm8,%xmm11 2378 pxor %xmm15,%xmm11 2379 jmp .Lxts_dec_done2 2380 2381.align 16 2382.Lxts_dec_one: 2383 movups (%rdi),%xmm2 2384 leaq 16(%rdi),%rdi 2385 xorps %xmm10,%xmm2 2386 movups (%rcx),%xmm0 2387 movups 16(%rcx),%xmm1 2388 leaq 32(%rcx),%rcx 2389 xorps %xmm0,%xmm2 2390.Loop_dec1_12: 2391.byte 102,15,56,222,209 2392 decl %eax 2393 movups (%rcx),%xmm1 2394 leaq 16(%rcx),%rcx 2395 jnz .Loop_dec1_12 2396.byte 102,15,56,223,209 2397 xorps %xmm10,%xmm2 2398 movdqa %xmm11,%xmm10 2399 movups %xmm2,(%rsi) 2400 movdqa %xmm12,%xmm11 2401 leaq 16(%rsi),%rsi 2402 jmp .Lxts_dec_done 2403 2404.align 16 2405.Lxts_dec_two: 2406 movups (%rdi),%xmm2 2407 movups 16(%rdi),%xmm3 2408 leaq 32(%rdi),%rdi 2409 xorps %xmm10,%xmm2 2410 xorps %xmm11,%xmm3 2411 2412 call _aesni_decrypt2 2413 2414 xorps %xmm10,%xmm2 2415 movdqa %xmm12,%xmm10 2416 xorps %xmm11,%xmm3 2417 movdqa %xmm13,%xmm11 2418 movups %xmm2,(%rsi) 2419 movups %xmm3,16(%rsi) 2420 leaq 32(%rsi),%rsi 2421 jmp .Lxts_dec_done 2422 2423.align 16 2424.Lxts_dec_three: 2425 movups (%rdi),%xmm2 2426 movups 16(%rdi),%xmm3 2427 movups 32(%rdi),%xmm4 2428 leaq 48(%rdi),%rdi 2429 xorps %xmm10,%xmm2 2430 xorps %xmm11,%xmm3 2431 xorps %xmm12,%xmm4 2432 2433 call _aesni_decrypt3 2434 2435 xorps %xmm10,%xmm2 2436 movdqa %xmm13,%xmm10 2437 xorps %xmm11,%xmm3 2438 movdqa %xmm14,%xmm11 2439 xorps %xmm12,%xmm4 2440 movups %xmm2,(%rsi) 2441 movups %xmm3,16(%rsi) 2442 movups %xmm4,32(%rsi) 2443 leaq 48(%rsi),%rsi 2444 jmp .Lxts_dec_done 2445 2446.align 16 2447.Lxts_dec_four: 2448 movups (%rdi),%xmm2 2449 movups 16(%rdi),%xmm3 2450 movups 32(%rdi),%xmm4 2451 xorps %xmm10,%xmm2 2452 movups 48(%rdi),%xmm5 2453 leaq 64(%rdi),%rdi 2454 xorps %xmm11,%xmm3 2455 xorps %xmm12,%xmm4 2456 xorps %xmm13,%xmm5 2457 2458 call _aesni_decrypt4 2459 2460 pxor %xmm10,%xmm2 2461 movdqa %xmm14,%xmm10 2462 pxor %xmm11,%xmm3 2463 movdqa %xmm15,%xmm11 2464 pxor %xmm12,%xmm4 2465 movdqu %xmm2,(%rsi) 2466 pxor %xmm13,%xmm5 2467 movdqu %xmm3,16(%rsi) 2468 movdqu %xmm4,32(%rsi) 2469 movdqu %xmm5,48(%rsi) 2470 leaq 64(%rsi),%rsi 2471 jmp .Lxts_dec_done 2472 2473.align 16 2474.Lxts_dec_done: 2475 andq $15,%r9 2476 jz .Lxts_dec_ret 2477.Lxts_dec_done2: 2478 movq %r9,%rdx 2479 movq %rbp,%rcx 2480 movl %r10d,%eax 2481 2482 movups (%rdi),%xmm2 2483 xorps %xmm11,%xmm2 2484 movups (%rcx),%xmm0 2485 movups 16(%rcx),%xmm1 2486 leaq 32(%rcx),%rcx 2487 xorps %xmm0,%xmm2 2488.Loop_dec1_13: 2489.byte 102,15,56,222,209 2490 decl %eax 2491 movups (%rcx),%xmm1 2492 leaq 16(%rcx),%rcx 2493 jnz .Loop_dec1_13 2494.byte 102,15,56,223,209 2495 xorps %xmm11,%xmm2 2496 movups %xmm2,(%rsi) 2497 2498.Lxts_dec_steal: 2499 movzbl 16(%rdi),%eax 2500 movzbl (%rsi),%ecx 2501 leaq 1(%rdi),%rdi 2502 movb %al,(%rsi) 2503 movb %cl,16(%rsi) 2504 leaq 1(%rsi),%rsi 2505 subq $1,%rdx 2506 jnz .Lxts_dec_steal 2507 2508 subq %r9,%rsi 2509 movq %rbp,%rcx 2510 movl %r10d,%eax 2511 2512 movups (%rsi),%xmm2 2513 xorps %xmm10,%xmm2 2514 movups (%rcx),%xmm0 2515 movups 16(%rcx),%xmm1 2516 leaq 32(%rcx),%rcx 2517 xorps %xmm0,%xmm2 2518.Loop_dec1_14: 2519.byte 102,15,56,222,209 2520 decl %eax 2521 movups (%rcx),%xmm1 2522 leaq 16(%rcx),%rcx 2523 jnz .Loop_dec1_14 2524.byte 102,15,56,223,209 2525 xorps %xmm10,%xmm2 2526 movups %xmm2,(%rsi) 2527 2528.Lxts_dec_ret: 2529 xorps %xmm0,%xmm0 2530 pxor %xmm1,%xmm1 2531 pxor %xmm2,%xmm2 2532 pxor %xmm3,%xmm3 2533 pxor %xmm4,%xmm4 2534 pxor %xmm5,%xmm5 2535 pxor %xmm6,%xmm6 2536 pxor %xmm7,%xmm7 2537 movaps %xmm0,0(%rsp) 2538 pxor %xmm8,%xmm8 2539 movaps %xmm0,16(%rsp) 2540 pxor %xmm9,%xmm9 2541 movaps %xmm0,32(%rsp) 2542 pxor %xmm10,%xmm10 2543 movaps %xmm0,48(%rsp) 2544 pxor %xmm11,%xmm11 2545 movaps %xmm0,64(%rsp) 2546 pxor %xmm12,%xmm12 2547 movaps %xmm0,80(%rsp) 2548 pxor %xmm13,%xmm13 2549 movaps %xmm0,96(%rsp) 2550 pxor %xmm14,%xmm14 2551 pxor %xmm15,%xmm15 2552 movq -8(%r11),%rbp 2553.cfi_restore %rbp 2554 leaq (%r11),%rsp 2555.cfi_def_cfa_register %rsp 2556.Lxts_dec_epilogue: 2557 .byte 0xf3,0xc3 2558.cfi_endproc 2559.size aes_hw_xts_decrypt,.-aes_hw_xts_decrypt 2560.globl aes_hw_ocb_encrypt 2561.hidden aes_hw_ocb_encrypt 2562.type aes_hw_ocb_encrypt,@function 2563.align 32 2564aes_hw_ocb_encrypt: 2565.cfi_startproc 2566 leaq (%rsp),%rax 2567 pushq %rbx 2568.cfi_adjust_cfa_offset 8 2569.cfi_offset %rbx,-16 2570 pushq %rbp 2571.cfi_adjust_cfa_offset 8 2572.cfi_offset %rbp,-24 2573 pushq %r12 2574.cfi_adjust_cfa_offset 8 2575.cfi_offset %r12,-32 2576 pushq %r13 2577.cfi_adjust_cfa_offset 8 2578.cfi_offset %r13,-40 2579 pushq %r14 2580.cfi_adjust_cfa_offset 8 2581.cfi_offset %r14,-48 2582 movq 8(%rax),%rbx 2583 movq 8+8(%rax),%rbp 2584 2585 movl 240(%rcx),%r10d 2586 movq %rcx,%r11 2587 shll $4,%r10d 2588 movups (%rcx),%xmm9 2589 movups 16(%rcx,%r10,1),%xmm1 2590 2591 movdqu (%r9),%xmm15 2592 pxor %xmm1,%xmm9 2593 pxor %xmm1,%xmm15 2594 2595 movl $16+32,%eax 2596 leaq 32(%r11,%r10,1),%rcx 2597 movups 16(%r11),%xmm1 2598 subq %r10,%rax 2599 movq %rax,%r10 2600 2601 movdqu (%rbx),%xmm10 2602 movdqu (%rbp),%xmm8 2603 2604 testq $1,%r8 2605 jnz .Locb_enc_odd 2606 2607 bsfq %r8,%r12 2608 addq $1,%r8 2609 shlq $4,%r12 2610 movdqu (%rbx,%r12,1),%xmm7 2611 movdqu (%rdi),%xmm2 2612 leaq 16(%rdi),%rdi 2613 2614 call __ocb_encrypt1 2615 2616 movdqa %xmm7,%xmm15 2617 movups %xmm2,(%rsi) 2618 leaq 16(%rsi),%rsi 2619 subq $1,%rdx 2620 jz .Locb_enc_done 2621 2622.Locb_enc_odd: 2623 leaq 1(%r8),%r12 2624 leaq 3(%r8),%r13 2625 leaq 5(%r8),%r14 2626 leaq 6(%r8),%r8 2627 bsfq %r12,%r12 2628 bsfq %r13,%r13 2629 bsfq %r14,%r14 2630 shlq $4,%r12 2631 shlq $4,%r13 2632 shlq $4,%r14 2633 2634 subq $6,%rdx 2635 jc .Locb_enc_short 2636 jmp .Locb_enc_grandloop 2637 2638.align 32 2639.Locb_enc_grandloop: 2640 movdqu 0(%rdi),%xmm2 2641 movdqu 16(%rdi),%xmm3 2642 movdqu 32(%rdi),%xmm4 2643 movdqu 48(%rdi),%xmm5 2644 movdqu 64(%rdi),%xmm6 2645 movdqu 80(%rdi),%xmm7 2646 leaq 96(%rdi),%rdi 2647 2648 call __ocb_encrypt6 2649 2650 movups %xmm2,0(%rsi) 2651 movups %xmm3,16(%rsi) 2652 movups %xmm4,32(%rsi) 2653 movups %xmm5,48(%rsi) 2654 movups %xmm6,64(%rsi) 2655 movups %xmm7,80(%rsi) 2656 leaq 96(%rsi),%rsi 2657 subq $6,%rdx 2658 jnc .Locb_enc_grandloop 2659 2660.Locb_enc_short: 2661 addq $6,%rdx 2662 jz .Locb_enc_done 2663 2664 movdqu 0(%rdi),%xmm2 2665 cmpq $2,%rdx 2666 jb .Locb_enc_one 2667 movdqu 16(%rdi),%xmm3 2668 je .Locb_enc_two 2669 2670 movdqu 32(%rdi),%xmm4 2671 cmpq $4,%rdx 2672 jb .Locb_enc_three 2673 movdqu 48(%rdi),%xmm5 2674 je .Locb_enc_four 2675 2676 movdqu 64(%rdi),%xmm6 2677 pxor %xmm7,%xmm7 2678 2679 call __ocb_encrypt6 2680 2681 movdqa %xmm14,%xmm15 2682 movups %xmm2,0(%rsi) 2683 movups %xmm3,16(%rsi) 2684 movups %xmm4,32(%rsi) 2685 movups %xmm5,48(%rsi) 2686 movups %xmm6,64(%rsi) 2687 2688 jmp .Locb_enc_done 2689 2690.align 16 2691.Locb_enc_one: 2692 movdqa %xmm10,%xmm7 2693 2694 call __ocb_encrypt1 2695 2696 movdqa %xmm7,%xmm15 2697 movups %xmm2,0(%rsi) 2698 jmp .Locb_enc_done 2699 2700.align 16 2701.Locb_enc_two: 2702 pxor %xmm4,%xmm4 2703 pxor %xmm5,%xmm5 2704 2705 call __ocb_encrypt4 2706 2707 movdqa %xmm11,%xmm15 2708 movups %xmm2,0(%rsi) 2709 movups %xmm3,16(%rsi) 2710 2711 jmp .Locb_enc_done 2712 2713.align 16 2714.Locb_enc_three: 2715 pxor %xmm5,%xmm5 2716 2717 call __ocb_encrypt4 2718 2719 movdqa %xmm12,%xmm15 2720 movups %xmm2,0(%rsi) 2721 movups %xmm3,16(%rsi) 2722 movups %xmm4,32(%rsi) 2723 2724 jmp .Locb_enc_done 2725 2726.align 16 2727.Locb_enc_four: 2728 call __ocb_encrypt4 2729 2730 movdqa %xmm13,%xmm15 2731 movups %xmm2,0(%rsi) 2732 movups %xmm3,16(%rsi) 2733 movups %xmm4,32(%rsi) 2734 movups %xmm5,48(%rsi) 2735 2736.Locb_enc_done: 2737 pxor %xmm0,%xmm15 2738 movdqu %xmm8,(%rbp) 2739 movdqu %xmm15,(%r9) 2740 2741 xorps %xmm0,%xmm0 2742 pxor %xmm1,%xmm1 2743 pxor %xmm2,%xmm2 2744 pxor %xmm3,%xmm3 2745 pxor %xmm4,%xmm4 2746 pxor %xmm5,%xmm5 2747 pxor %xmm6,%xmm6 2748 pxor %xmm7,%xmm7 2749 pxor %xmm8,%xmm8 2750 pxor %xmm9,%xmm9 2751 pxor %xmm10,%xmm10 2752 pxor %xmm11,%xmm11 2753 pxor %xmm12,%xmm12 2754 pxor %xmm13,%xmm13 2755 pxor %xmm14,%xmm14 2756 pxor %xmm15,%xmm15 2757 leaq 40(%rsp),%rax 2758.cfi_def_cfa %rax,8 2759 movq -40(%rax),%r14 2760.cfi_restore %r14 2761 movq -32(%rax),%r13 2762.cfi_restore %r13 2763 movq -24(%rax),%r12 2764.cfi_restore %r12 2765 movq -16(%rax),%rbp 2766.cfi_restore %rbp 2767 movq -8(%rax),%rbx 2768.cfi_restore %rbx 2769 leaq (%rax),%rsp 2770.cfi_def_cfa_register %rsp 2771.Locb_enc_epilogue: 2772 .byte 0xf3,0xc3 2773.cfi_endproc 2774.size aes_hw_ocb_encrypt,.-aes_hw_ocb_encrypt 2775 2776.type __ocb_encrypt6,@function 2777.align 32 2778__ocb_encrypt6: 2779 pxor %xmm9,%xmm15 2780 movdqu (%rbx,%r12,1),%xmm11 2781 movdqa %xmm10,%xmm12 2782 movdqu (%rbx,%r13,1),%xmm13 2783 movdqa %xmm10,%xmm14 2784 pxor %xmm15,%xmm10 2785 movdqu (%rbx,%r14,1),%xmm15 2786 pxor %xmm10,%xmm11 2787 pxor %xmm2,%xmm8 2788 pxor %xmm10,%xmm2 2789 pxor %xmm11,%xmm12 2790 pxor %xmm3,%xmm8 2791 pxor %xmm11,%xmm3 2792 pxor %xmm12,%xmm13 2793 pxor %xmm4,%xmm8 2794 pxor %xmm12,%xmm4 2795 pxor %xmm13,%xmm14 2796 pxor %xmm5,%xmm8 2797 pxor %xmm13,%xmm5 2798 pxor %xmm14,%xmm15 2799 pxor %xmm6,%xmm8 2800 pxor %xmm14,%xmm6 2801 pxor %xmm7,%xmm8 2802 pxor %xmm15,%xmm7 2803 movups 32(%r11),%xmm0 2804 2805 leaq 1(%r8),%r12 2806 leaq 3(%r8),%r13 2807 leaq 5(%r8),%r14 2808 addq $6,%r8 2809 pxor %xmm9,%xmm10 2810 bsfq %r12,%r12 2811 bsfq %r13,%r13 2812 bsfq %r14,%r14 2813 2814.byte 102,15,56,220,209 2815.byte 102,15,56,220,217 2816.byte 102,15,56,220,225 2817.byte 102,15,56,220,233 2818 pxor %xmm9,%xmm11 2819 pxor %xmm9,%xmm12 2820.byte 102,15,56,220,241 2821 pxor %xmm9,%xmm13 2822 pxor %xmm9,%xmm14 2823.byte 102,15,56,220,249 2824 movups 48(%r11),%xmm1 2825 pxor %xmm9,%xmm15 2826 2827.byte 102,15,56,220,208 2828.byte 102,15,56,220,216 2829.byte 102,15,56,220,224 2830.byte 102,15,56,220,232 2831.byte 102,15,56,220,240 2832.byte 102,15,56,220,248 2833 movups 64(%r11),%xmm0 2834 shlq $4,%r12 2835 shlq $4,%r13 2836 jmp .Locb_enc_loop6 2837 2838.align 32 2839.Locb_enc_loop6: 2840.byte 102,15,56,220,209 2841.byte 102,15,56,220,217 2842.byte 102,15,56,220,225 2843.byte 102,15,56,220,233 2844.byte 102,15,56,220,241 2845.byte 102,15,56,220,249 2846 movups (%rcx,%rax,1),%xmm1 2847 addq $32,%rax 2848 2849.byte 102,15,56,220,208 2850.byte 102,15,56,220,216 2851.byte 102,15,56,220,224 2852.byte 102,15,56,220,232 2853.byte 102,15,56,220,240 2854.byte 102,15,56,220,248 2855 movups -16(%rcx,%rax,1),%xmm0 2856 jnz .Locb_enc_loop6 2857 2858.byte 102,15,56,220,209 2859.byte 102,15,56,220,217 2860.byte 102,15,56,220,225 2861.byte 102,15,56,220,233 2862.byte 102,15,56,220,241 2863.byte 102,15,56,220,249 2864 movups 16(%r11),%xmm1 2865 shlq $4,%r14 2866 2867.byte 102,65,15,56,221,210 2868 movdqu (%rbx),%xmm10 2869 movq %r10,%rax 2870.byte 102,65,15,56,221,219 2871.byte 102,65,15,56,221,228 2872.byte 102,65,15,56,221,237 2873.byte 102,65,15,56,221,246 2874.byte 102,65,15,56,221,255 2875 .byte 0xf3,0xc3 2876.size __ocb_encrypt6,.-__ocb_encrypt6 2877 2878.type __ocb_encrypt4,@function 2879.align 32 2880__ocb_encrypt4: 2881 pxor %xmm9,%xmm15 2882 movdqu (%rbx,%r12,1),%xmm11 2883 movdqa %xmm10,%xmm12 2884 movdqu (%rbx,%r13,1),%xmm13 2885 pxor %xmm15,%xmm10 2886 pxor %xmm10,%xmm11 2887 pxor %xmm2,%xmm8 2888 pxor %xmm10,%xmm2 2889 pxor %xmm11,%xmm12 2890 pxor %xmm3,%xmm8 2891 pxor %xmm11,%xmm3 2892 pxor %xmm12,%xmm13 2893 pxor %xmm4,%xmm8 2894 pxor %xmm12,%xmm4 2895 pxor %xmm5,%xmm8 2896 pxor %xmm13,%xmm5 2897 movups 32(%r11),%xmm0 2898 2899 pxor %xmm9,%xmm10 2900 pxor %xmm9,%xmm11 2901 pxor %xmm9,%xmm12 2902 pxor %xmm9,%xmm13 2903 2904.byte 102,15,56,220,209 2905.byte 102,15,56,220,217 2906.byte 102,15,56,220,225 2907.byte 102,15,56,220,233 2908 movups 48(%r11),%xmm1 2909 2910.byte 102,15,56,220,208 2911.byte 102,15,56,220,216 2912.byte 102,15,56,220,224 2913.byte 102,15,56,220,232 2914 movups 64(%r11),%xmm0 2915 jmp .Locb_enc_loop4 2916 2917.align 32 2918.Locb_enc_loop4: 2919.byte 102,15,56,220,209 2920.byte 102,15,56,220,217 2921.byte 102,15,56,220,225 2922.byte 102,15,56,220,233 2923 movups (%rcx,%rax,1),%xmm1 2924 addq $32,%rax 2925 2926.byte 102,15,56,220,208 2927.byte 102,15,56,220,216 2928.byte 102,15,56,220,224 2929.byte 102,15,56,220,232 2930 movups -16(%rcx,%rax,1),%xmm0 2931 jnz .Locb_enc_loop4 2932 2933.byte 102,15,56,220,209 2934.byte 102,15,56,220,217 2935.byte 102,15,56,220,225 2936.byte 102,15,56,220,233 2937 movups 16(%r11),%xmm1 2938 movq %r10,%rax 2939 2940.byte 102,65,15,56,221,210 2941.byte 102,65,15,56,221,219 2942.byte 102,65,15,56,221,228 2943.byte 102,65,15,56,221,237 2944 .byte 0xf3,0xc3 2945.size __ocb_encrypt4,.-__ocb_encrypt4 2946 2947.type __ocb_encrypt1,@function 2948.align 32 2949__ocb_encrypt1: 2950 pxor %xmm15,%xmm7 2951 pxor %xmm9,%xmm7 2952 pxor %xmm2,%xmm8 2953 pxor %xmm7,%xmm2 2954 movups 32(%r11),%xmm0 2955 2956.byte 102,15,56,220,209 2957 movups 48(%r11),%xmm1 2958 pxor %xmm9,%xmm7 2959 2960.byte 102,15,56,220,208 2961 movups 64(%r11),%xmm0 2962 jmp .Locb_enc_loop1 2963 2964.align 32 2965.Locb_enc_loop1: 2966.byte 102,15,56,220,209 2967 movups (%rcx,%rax,1),%xmm1 2968 addq $32,%rax 2969 2970.byte 102,15,56,220,208 2971 movups -16(%rcx,%rax,1),%xmm0 2972 jnz .Locb_enc_loop1 2973 2974.byte 102,15,56,220,209 2975 movups 16(%r11),%xmm1 2976 movq %r10,%rax 2977 2978.byte 102,15,56,221,215 2979 .byte 0xf3,0xc3 2980.size __ocb_encrypt1,.-__ocb_encrypt1 2981 2982.globl aes_hw_ocb_decrypt 2983.hidden aes_hw_ocb_decrypt 2984.type aes_hw_ocb_decrypt,@function 2985.align 32 2986aes_hw_ocb_decrypt: 2987.cfi_startproc 2988 leaq (%rsp),%rax 2989 pushq %rbx 2990.cfi_adjust_cfa_offset 8 2991.cfi_offset %rbx,-16 2992 pushq %rbp 2993.cfi_adjust_cfa_offset 8 2994.cfi_offset %rbp,-24 2995 pushq %r12 2996.cfi_adjust_cfa_offset 8 2997.cfi_offset %r12,-32 2998 pushq %r13 2999.cfi_adjust_cfa_offset 8 3000.cfi_offset %r13,-40 3001 pushq %r14 3002.cfi_adjust_cfa_offset 8 3003.cfi_offset %r14,-48 3004 movq 8(%rax),%rbx 3005 movq 8+8(%rax),%rbp 3006 3007 movl 240(%rcx),%r10d 3008 movq %rcx,%r11 3009 shll $4,%r10d 3010 movups (%rcx),%xmm9 3011 movups 16(%rcx,%r10,1),%xmm1 3012 3013 movdqu (%r9),%xmm15 3014 pxor %xmm1,%xmm9 3015 pxor %xmm1,%xmm15 3016 3017 movl $16+32,%eax 3018 leaq 32(%r11,%r10,1),%rcx 3019 movups 16(%r11),%xmm1 3020 subq %r10,%rax 3021 movq %rax,%r10 3022 3023 movdqu (%rbx),%xmm10 3024 movdqu (%rbp),%xmm8 3025 3026 testq $1,%r8 3027 jnz .Locb_dec_odd 3028 3029 bsfq %r8,%r12 3030 addq $1,%r8 3031 shlq $4,%r12 3032 movdqu (%rbx,%r12,1),%xmm7 3033 movdqu (%rdi),%xmm2 3034 leaq 16(%rdi),%rdi 3035 3036 call __ocb_decrypt1 3037 3038 movdqa %xmm7,%xmm15 3039 movups %xmm2,(%rsi) 3040 xorps %xmm2,%xmm8 3041 leaq 16(%rsi),%rsi 3042 subq $1,%rdx 3043 jz .Locb_dec_done 3044 3045.Locb_dec_odd: 3046 leaq 1(%r8),%r12 3047 leaq 3(%r8),%r13 3048 leaq 5(%r8),%r14 3049 leaq 6(%r8),%r8 3050 bsfq %r12,%r12 3051 bsfq %r13,%r13 3052 bsfq %r14,%r14 3053 shlq $4,%r12 3054 shlq $4,%r13 3055 shlq $4,%r14 3056 3057 subq $6,%rdx 3058 jc .Locb_dec_short 3059 jmp .Locb_dec_grandloop 3060 3061.align 32 3062.Locb_dec_grandloop: 3063 movdqu 0(%rdi),%xmm2 3064 movdqu 16(%rdi),%xmm3 3065 movdqu 32(%rdi),%xmm4 3066 movdqu 48(%rdi),%xmm5 3067 movdqu 64(%rdi),%xmm6 3068 movdqu 80(%rdi),%xmm7 3069 leaq 96(%rdi),%rdi 3070 3071 call __ocb_decrypt6 3072 3073 movups %xmm2,0(%rsi) 3074 pxor %xmm2,%xmm8 3075 movups %xmm3,16(%rsi) 3076 pxor %xmm3,%xmm8 3077 movups %xmm4,32(%rsi) 3078 pxor %xmm4,%xmm8 3079 movups %xmm5,48(%rsi) 3080 pxor %xmm5,%xmm8 3081 movups %xmm6,64(%rsi) 3082 pxor %xmm6,%xmm8 3083 movups %xmm7,80(%rsi) 3084 pxor %xmm7,%xmm8 3085 leaq 96(%rsi),%rsi 3086 subq $6,%rdx 3087 jnc .Locb_dec_grandloop 3088 3089.Locb_dec_short: 3090 addq $6,%rdx 3091 jz .Locb_dec_done 3092 3093 movdqu 0(%rdi),%xmm2 3094 cmpq $2,%rdx 3095 jb .Locb_dec_one 3096 movdqu 16(%rdi),%xmm3 3097 je .Locb_dec_two 3098 3099 movdqu 32(%rdi),%xmm4 3100 cmpq $4,%rdx 3101 jb .Locb_dec_three 3102 movdqu 48(%rdi),%xmm5 3103 je .Locb_dec_four 3104 3105 movdqu 64(%rdi),%xmm6 3106 pxor %xmm7,%xmm7 3107 3108 call __ocb_decrypt6 3109 3110 movdqa %xmm14,%xmm15 3111 movups %xmm2,0(%rsi) 3112 pxor %xmm2,%xmm8 3113 movups %xmm3,16(%rsi) 3114 pxor %xmm3,%xmm8 3115 movups %xmm4,32(%rsi) 3116 pxor %xmm4,%xmm8 3117 movups %xmm5,48(%rsi) 3118 pxor %xmm5,%xmm8 3119 movups %xmm6,64(%rsi) 3120 pxor %xmm6,%xmm8 3121 3122 jmp .Locb_dec_done 3123 3124.align 16 3125.Locb_dec_one: 3126 movdqa %xmm10,%xmm7 3127 3128 call __ocb_decrypt1 3129 3130 movdqa %xmm7,%xmm15 3131 movups %xmm2,0(%rsi) 3132 xorps %xmm2,%xmm8 3133 jmp .Locb_dec_done 3134 3135.align 16 3136.Locb_dec_two: 3137 pxor %xmm4,%xmm4 3138 pxor %xmm5,%xmm5 3139 3140 call __ocb_decrypt4 3141 3142 movdqa %xmm11,%xmm15 3143 movups %xmm2,0(%rsi) 3144 xorps %xmm2,%xmm8 3145 movups %xmm3,16(%rsi) 3146 xorps %xmm3,%xmm8 3147 3148 jmp .Locb_dec_done 3149 3150.align 16 3151.Locb_dec_three: 3152 pxor %xmm5,%xmm5 3153 3154 call __ocb_decrypt4 3155 3156 movdqa %xmm12,%xmm15 3157 movups %xmm2,0(%rsi) 3158 xorps %xmm2,%xmm8 3159 movups %xmm3,16(%rsi) 3160 xorps %xmm3,%xmm8 3161 movups %xmm4,32(%rsi) 3162 xorps %xmm4,%xmm8 3163 3164 jmp .Locb_dec_done 3165 3166.align 16 3167.Locb_dec_four: 3168 call __ocb_decrypt4 3169 3170 movdqa %xmm13,%xmm15 3171 movups %xmm2,0(%rsi) 3172 pxor %xmm2,%xmm8 3173 movups %xmm3,16(%rsi) 3174 pxor %xmm3,%xmm8 3175 movups %xmm4,32(%rsi) 3176 pxor %xmm4,%xmm8 3177 movups %xmm5,48(%rsi) 3178 pxor %xmm5,%xmm8 3179 3180.Locb_dec_done: 3181 pxor %xmm0,%xmm15 3182 movdqu %xmm8,(%rbp) 3183 movdqu %xmm15,(%r9) 3184 3185 xorps %xmm0,%xmm0 3186 pxor %xmm1,%xmm1 3187 pxor %xmm2,%xmm2 3188 pxor %xmm3,%xmm3 3189 pxor %xmm4,%xmm4 3190 pxor %xmm5,%xmm5 3191 pxor %xmm6,%xmm6 3192 pxor %xmm7,%xmm7 3193 pxor %xmm8,%xmm8 3194 pxor %xmm9,%xmm9 3195 pxor %xmm10,%xmm10 3196 pxor %xmm11,%xmm11 3197 pxor %xmm12,%xmm12 3198 pxor %xmm13,%xmm13 3199 pxor %xmm14,%xmm14 3200 pxor %xmm15,%xmm15 3201 leaq 40(%rsp),%rax 3202.cfi_def_cfa %rax,8 3203 movq -40(%rax),%r14 3204.cfi_restore %r14 3205 movq -32(%rax),%r13 3206.cfi_restore %r13 3207 movq -24(%rax),%r12 3208.cfi_restore %r12 3209 movq -16(%rax),%rbp 3210.cfi_restore %rbp 3211 movq -8(%rax),%rbx 3212.cfi_restore %rbx 3213 leaq (%rax),%rsp 3214.cfi_def_cfa_register %rsp 3215.Locb_dec_epilogue: 3216 .byte 0xf3,0xc3 3217.cfi_endproc 3218.size aes_hw_ocb_decrypt,.-aes_hw_ocb_decrypt 3219 3220.type __ocb_decrypt6,@function 3221.align 32 3222__ocb_decrypt6: 3223 pxor %xmm9,%xmm15 3224 movdqu (%rbx,%r12,1),%xmm11 3225 movdqa %xmm10,%xmm12 3226 movdqu (%rbx,%r13,1),%xmm13 3227 movdqa %xmm10,%xmm14 3228 pxor %xmm15,%xmm10 3229 movdqu (%rbx,%r14,1),%xmm15 3230 pxor %xmm10,%xmm11 3231 pxor %xmm10,%xmm2 3232 pxor %xmm11,%xmm12 3233 pxor %xmm11,%xmm3 3234 pxor %xmm12,%xmm13 3235 pxor %xmm12,%xmm4 3236 pxor %xmm13,%xmm14 3237 pxor %xmm13,%xmm5 3238 pxor %xmm14,%xmm15 3239 pxor %xmm14,%xmm6 3240 pxor %xmm15,%xmm7 3241 movups 32(%r11),%xmm0 3242 3243 leaq 1(%r8),%r12 3244 leaq 3(%r8),%r13 3245 leaq 5(%r8),%r14 3246 addq $6,%r8 3247 pxor %xmm9,%xmm10 3248 bsfq %r12,%r12 3249 bsfq %r13,%r13 3250 bsfq %r14,%r14 3251 3252.byte 102,15,56,222,209 3253.byte 102,15,56,222,217 3254.byte 102,15,56,222,225 3255.byte 102,15,56,222,233 3256 pxor %xmm9,%xmm11 3257 pxor %xmm9,%xmm12 3258.byte 102,15,56,222,241 3259 pxor %xmm9,%xmm13 3260 pxor %xmm9,%xmm14 3261.byte 102,15,56,222,249 3262 movups 48(%r11),%xmm1 3263 pxor %xmm9,%xmm15 3264 3265.byte 102,15,56,222,208 3266.byte 102,15,56,222,216 3267.byte 102,15,56,222,224 3268.byte 102,15,56,222,232 3269.byte 102,15,56,222,240 3270.byte 102,15,56,222,248 3271 movups 64(%r11),%xmm0 3272 shlq $4,%r12 3273 shlq $4,%r13 3274 jmp .Locb_dec_loop6 3275 3276.align 32 3277.Locb_dec_loop6: 3278.byte 102,15,56,222,209 3279.byte 102,15,56,222,217 3280.byte 102,15,56,222,225 3281.byte 102,15,56,222,233 3282.byte 102,15,56,222,241 3283.byte 102,15,56,222,249 3284 movups (%rcx,%rax,1),%xmm1 3285 addq $32,%rax 3286 3287.byte 102,15,56,222,208 3288.byte 102,15,56,222,216 3289.byte 102,15,56,222,224 3290.byte 102,15,56,222,232 3291.byte 102,15,56,222,240 3292.byte 102,15,56,222,248 3293 movups -16(%rcx,%rax,1),%xmm0 3294 jnz .Locb_dec_loop6 3295 3296.byte 102,15,56,222,209 3297.byte 102,15,56,222,217 3298.byte 102,15,56,222,225 3299.byte 102,15,56,222,233 3300.byte 102,15,56,222,241 3301.byte 102,15,56,222,249 3302 movups 16(%r11),%xmm1 3303 shlq $4,%r14 3304 3305.byte 102,65,15,56,223,210 3306 movdqu (%rbx),%xmm10 3307 movq %r10,%rax 3308.byte 102,65,15,56,223,219 3309.byte 102,65,15,56,223,228 3310.byte 102,65,15,56,223,237 3311.byte 102,65,15,56,223,246 3312.byte 102,65,15,56,223,255 3313 .byte 0xf3,0xc3 3314.size __ocb_decrypt6,.-__ocb_decrypt6 3315 3316.type __ocb_decrypt4,@function 3317.align 32 3318__ocb_decrypt4: 3319 pxor %xmm9,%xmm15 3320 movdqu (%rbx,%r12,1),%xmm11 3321 movdqa %xmm10,%xmm12 3322 movdqu (%rbx,%r13,1),%xmm13 3323 pxor %xmm15,%xmm10 3324 pxor %xmm10,%xmm11 3325 pxor %xmm10,%xmm2 3326 pxor %xmm11,%xmm12 3327 pxor %xmm11,%xmm3 3328 pxor %xmm12,%xmm13 3329 pxor %xmm12,%xmm4 3330 pxor %xmm13,%xmm5 3331 movups 32(%r11),%xmm0 3332 3333 pxor %xmm9,%xmm10 3334 pxor %xmm9,%xmm11 3335 pxor %xmm9,%xmm12 3336 pxor %xmm9,%xmm13 3337 3338.byte 102,15,56,222,209 3339.byte 102,15,56,222,217 3340.byte 102,15,56,222,225 3341.byte 102,15,56,222,233 3342 movups 48(%r11),%xmm1 3343 3344.byte 102,15,56,222,208 3345.byte 102,15,56,222,216 3346.byte 102,15,56,222,224 3347.byte 102,15,56,222,232 3348 movups 64(%r11),%xmm0 3349 jmp .Locb_dec_loop4 3350 3351.align 32 3352.Locb_dec_loop4: 3353.byte 102,15,56,222,209 3354.byte 102,15,56,222,217 3355.byte 102,15,56,222,225 3356.byte 102,15,56,222,233 3357 movups (%rcx,%rax,1),%xmm1 3358 addq $32,%rax 3359 3360.byte 102,15,56,222,208 3361.byte 102,15,56,222,216 3362.byte 102,15,56,222,224 3363.byte 102,15,56,222,232 3364 movups -16(%rcx,%rax,1),%xmm0 3365 jnz .Locb_dec_loop4 3366 3367.byte 102,15,56,222,209 3368.byte 102,15,56,222,217 3369.byte 102,15,56,222,225 3370.byte 102,15,56,222,233 3371 movups 16(%r11),%xmm1 3372 movq %r10,%rax 3373 3374.byte 102,65,15,56,223,210 3375.byte 102,65,15,56,223,219 3376.byte 102,65,15,56,223,228 3377.byte 102,65,15,56,223,237 3378 .byte 0xf3,0xc3 3379.size __ocb_decrypt4,.-__ocb_decrypt4 3380 3381.type __ocb_decrypt1,@function 3382.align 32 3383__ocb_decrypt1: 3384 pxor %xmm15,%xmm7 3385 pxor %xmm9,%xmm7 3386 pxor %xmm7,%xmm2 3387 movups 32(%r11),%xmm0 3388 3389.byte 102,15,56,222,209 3390 movups 48(%r11),%xmm1 3391 pxor %xmm9,%xmm7 3392 3393.byte 102,15,56,222,208 3394 movups 64(%r11),%xmm0 3395 jmp .Locb_dec_loop1 3396 3397.align 32 3398.Locb_dec_loop1: 3399.byte 102,15,56,222,209 3400 movups (%rcx,%rax,1),%xmm1 3401 addq $32,%rax 3402 3403.byte 102,15,56,222,208 3404 movups -16(%rcx,%rax,1),%xmm0 3405 jnz .Locb_dec_loop1 3406 3407.byte 102,15,56,222,209 3408 movups 16(%r11),%xmm1 3409 movq %r10,%rax 3410 3411.byte 102,15,56,223,215 3412 .byte 0xf3,0xc3 3413.size __ocb_decrypt1,.-__ocb_decrypt1 3414.globl aes_hw_cbc_encrypt 3415.hidden aes_hw_cbc_encrypt 3416.type aes_hw_cbc_encrypt,@function 3417.align 16 3418aes_hw_cbc_encrypt: 3419.cfi_startproc 3420 testq %rdx,%rdx 3421 jz .Lcbc_ret 3422 3423 movl 240(%rcx),%r10d 3424 movq %rcx,%r11 3425 testl %r9d,%r9d 3426 jz .Lcbc_decrypt 3427 3428 movups (%r8),%xmm2 3429 movl %r10d,%eax 3430 cmpq $16,%rdx 3431 jb .Lcbc_enc_tail 3432 subq $16,%rdx 3433 jmp .Lcbc_enc_loop 3434.align 16 3435.Lcbc_enc_loop: 3436 movups (%rdi),%xmm3 3437 leaq 16(%rdi),%rdi 3438 3439 movups (%rcx),%xmm0 3440 movups 16(%rcx),%xmm1 3441 xorps %xmm0,%xmm3 3442 leaq 32(%rcx),%rcx 3443 xorps %xmm3,%xmm2 3444.Loop_enc1_15: 3445.byte 102,15,56,220,209 3446 decl %eax 3447 movups (%rcx),%xmm1 3448 leaq 16(%rcx),%rcx 3449 jnz .Loop_enc1_15 3450.byte 102,15,56,221,209 3451 movl %r10d,%eax 3452 movq %r11,%rcx 3453 movups %xmm2,0(%rsi) 3454 leaq 16(%rsi),%rsi 3455 subq $16,%rdx 3456 jnc .Lcbc_enc_loop 3457 addq $16,%rdx 3458 jnz .Lcbc_enc_tail 3459 pxor %xmm0,%xmm0 3460 pxor %xmm1,%xmm1 3461 movups %xmm2,(%r8) 3462 pxor %xmm2,%xmm2 3463 pxor %xmm3,%xmm3 3464 jmp .Lcbc_ret 3465 3466.Lcbc_enc_tail: 3467 movq %rdx,%rcx 3468 xchgq %rdi,%rsi 3469.long 0x9066A4F3 3470 movl $16,%ecx 3471 subq %rdx,%rcx 3472 xorl %eax,%eax 3473.long 0x9066AAF3 3474 leaq -16(%rdi),%rdi 3475 movl %r10d,%eax 3476 movq %rdi,%rsi 3477 movq %r11,%rcx 3478 xorq %rdx,%rdx 3479 jmp .Lcbc_enc_loop 3480 3481.align 16 3482.Lcbc_decrypt: 3483 cmpq $16,%rdx 3484 jne .Lcbc_decrypt_bulk 3485 3486 3487 3488 movdqu (%rdi),%xmm2 3489 movdqu (%r8),%xmm3 3490 movdqa %xmm2,%xmm4 3491 movups (%rcx),%xmm0 3492 movups 16(%rcx),%xmm1 3493 leaq 32(%rcx),%rcx 3494 xorps %xmm0,%xmm2 3495.Loop_dec1_16: 3496.byte 102,15,56,222,209 3497 decl %r10d 3498 movups (%rcx),%xmm1 3499 leaq 16(%rcx),%rcx 3500 jnz .Loop_dec1_16 3501.byte 102,15,56,223,209 3502 pxor %xmm0,%xmm0 3503 pxor %xmm1,%xmm1 3504 movdqu %xmm4,(%r8) 3505 xorps %xmm3,%xmm2 3506 pxor %xmm3,%xmm3 3507 movups %xmm2,(%rsi) 3508 pxor %xmm2,%xmm2 3509 jmp .Lcbc_ret 3510.align 16 3511.Lcbc_decrypt_bulk: 3512 leaq (%rsp),%r11 3513.cfi_def_cfa_register %r11 3514 pushq %rbp 3515.cfi_offset %rbp,-16 3516 subq $16,%rsp 3517 andq $-16,%rsp 3518 movq %rcx,%rbp 3519 movups (%r8),%xmm10 3520 movl %r10d,%eax 3521 cmpq $0x50,%rdx 3522 jbe .Lcbc_dec_tail 3523 3524 movups (%rcx),%xmm0 3525 movdqu 0(%rdi),%xmm2 3526 movdqu 16(%rdi),%xmm3 3527 movdqa %xmm2,%xmm11 3528 movdqu 32(%rdi),%xmm4 3529 movdqa %xmm3,%xmm12 3530 movdqu 48(%rdi),%xmm5 3531 movdqa %xmm4,%xmm13 3532 movdqu 64(%rdi),%xmm6 3533 movdqa %xmm5,%xmm14 3534 movdqu 80(%rdi),%xmm7 3535 movdqa %xmm6,%xmm15 3536 leaq OPENSSL_ia32cap_P(%rip),%r9 3537 movl 4(%r9),%r9d 3538 cmpq $0x70,%rdx 3539 jbe .Lcbc_dec_six_or_seven 3540 3541 andl $71303168,%r9d 3542 subq $0x50,%rdx 3543 cmpl $4194304,%r9d 3544 je .Lcbc_dec_loop6_enter 3545 subq $0x20,%rdx 3546 leaq 112(%rcx),%rcx 3547 jmp .Lcbc_dec_loop8_enter 3548.align 16 3549.Lcbc_dec_loop8: 3550 movups %xmm9,(%rsi) 3551 leaq 16(%rsi),%rsi 3552.Lcbc_dec_loop8_enter: 3553 movdqu 96(%rdi),%xmm8 3554 pxor %xmm0,%xmm2 3555 movdqu 112(%rdi),%xmm9 3556 pxor %xmm0,%xmm3 3557 movups 16-112(%rcx),%xmm1 3558 pxor %xmm0,%xmm4 3559 movq $-1,%rbp 3560 cmpq $0x70,%rdx 3561 pxor %xmm0,%xmm5 3562 pxor %xmm0,%xmm6 3563 pxor %xmm0,%xmm7 3564 pxor %xmm0,%xmm8 3565 3566.byte 102,15,56,222,209 3567 pxor %xmm0,%xmm9 3568 movups 32-112(%rcx),%xmm0 3569.byte 102,15,56,222,217 3570.byte 102,15,56,222,225 3571.byte 102,15,56,222,233 3572.byte 102,15,56,222,241 3573.byte 102,15,56,222,249 3574.byte 102,68,15,56,222,193 3575 adcq $0,%rbp 3576 andq $128,%rbp 3577.byte 102,68,15,56,222,201 3578 addq %rdi,%rbp 3579 movups 48-112(%rcx),%xmm1 3580.byte 102,15,56,222,208 3581.byte 102,15,56,222,216 3582.byte 102,15,56,222,224 3583.byte 102,15,56,222,232 3584.byte 102,15,56,222,240 3585.byte 102,15,56,222,248 3586.byte 102,68,15,56,222,192 3587.byte 102,68,15,56,222,200 3588 movups 64-112(%rcx),%xmm0 3589 nop 3590.byte 102,15,56,222,209 3591.byte 102,15,56,222,217 3592.byte 102,15,56,222,225 3593.byte 102,15,56,222,233 3594.byte 102,15,56,222,241 3595.byte 102,15,56,222,249 3596.byte 102,68,15,56,222,193 3597.byte 102,68,15,56,222,201 3598 movups 80-112(%rcx),%xmm1 3599 nop 3600.byte 102,15,56,222,208 3601.byte 102,15,56,222,216 3602.byte 102,15,56,222,224 3603.byte 102,15,56,222,232 3604.byte 102,15,56,222,240 3605.byte 102,15,56,222,248 3606.byte 102,68,15,56,222,192 3607.byte 102,68,15,56,222,200 3608 movups 96-112(%rcx),%xmm0 3609 nop 3610.byte 102,15,56,222,209 3611.byte 102,15,56,222,217 3612.byte 102,15,56,222,225 3613.byte 102,15,56,222,233 3614.byte 102,15,56,222,241 3615.byte 102,15,56,222,249 3616.byte 102,68,15,56,222,193 3617.byte 102,68,15,56,222,201 3618 movups 112-112(%rcx),%xmm1 3619 nop 3620.byte 102,15,56,222,208 3621.byte 102,15,56,222,216 3622.byte 102,15,56,222,224 3623.byte 102,15,56,222,232 3624.byte 102,15,56,222,240 3625.byte 102,15,56,222,248 3626.byte 102,68,15,56,222,192 3627.byte 102,68,15,56,222,200 3628 movups 128-112(%rcx),%xmm0 3629 nop 3630.byte 102,15,56,222,209 3631.byte 102,15,56,222,217 3632.byte 102,15,56,222,225 3633.byte 102,15,56,222,233 3634.byte 102,15,56,222,241 3635.byte 102,15,56,222,249 3636.byte 102,68,15,56,222,193 3637.byte 102,68,15,56,222,201 3638 movups 144-112(%rcx),%xmm1 3639 cmpl $11,%eax 3640.byte 102,15,56,222,208 3641.byte 102,15,56,222,216 3642.byte 102,15,56,222,224 3643.byte 102,15,56,222,232 3644.byte 102,15,56,222,240 3645.byte 102,15,56,222,248 3646.byte 102,68,15,56,222,192 3647.byte 102,68,15,56,222,200 3648 movups 160-112(%rcx),%xmm0 3649 jb .Lcbc_dec_done 3650.byte 102,15,56,222,209 3651.byte 102,15,56,222,217 3652.byte 102,15,56,222,225 3653.byte 102,15,56,222,233 3654.byte 102,15,56,222,241 3655.byte 102,15,56,222,249 3656.byte 102,68,15,56,222,193 3657.byte 102,68,15,56,222,201 3658 movups 176-112(%rcx),%xmm1 3659 nop 3660.byte 102,15,56,222,208 3661.byte 102,15,56,222,216 3662.byte 102,15,56,222,224 3663.byte 102,15,56,222,232 3664.byte 102,15,56,222,240 3665.byte 102,15,56,222,248 3666.byte 102,68,15,56,222,192 3667.byte 102,68,15,56,222,200 3668 movups 192-112(%rcx),%xmm0 3669 je .Lcbc_dec_done 3670.byte 102,15,56,222,209 3671.byte 102,15,56,222,217 3672.byte 102,15,56,222,225 3673.byte 102,15,56,222,233 3674.byte 102,15,56,222,241 3675.byte 102,15,56,222,249 3676.byte 102,68,15,56,222,193 3677.byte 102,68,15,56,222,201 3678 movups 208-112(%rcx),%xmm1 3679 nop 3680.byte 102,15,56,222,208 3681.byte 102,15,56,222,216 3682.byte 102,15,56,222,224 3683.byte 102,15,56,222,232 3684.byte 102,15,56,222,240 3685.byte 102,15,56,222,248 3686.byte 102,68,15,56,222,192 3687.byte 102,68,15,56,222,200 3688 movups 224-112(%rcx),%xmm0 3689 jmp .Lcbc_dec_done 3690.align 16 3691.Lcbc_dec_done: 3692.byte 102,15,56,222,209 3693.byte 102,15,56,222,217 3694 pxor %xmm0,%xmm10 3695 pxor %xmm0,%xmm11 3696.byte 102,15,56,222,225 3697.byte 102,15,56,222,233 3698 pxor %xmm0,%xmm12 3699 pxor %xmm0,%xmm13 3700.byte 102,15,56,222,241 3701.byte 102,15,56,222,249 3702 pxor %xmm0,%xmm14 3703 pxor %xmm0,%xmm15 3704.byte 102,68,15,56,222,193 3705.byte 102,68,15,56,222,201 3706 movdqu 80(%rdi),%xmm1 3707 3708.byte 102,65,15,56,223,210 3709 movdqu 96(%rdi),%xmm10 3710 pxor %xmm0,%xmm1 3711.byte 102,65,15,56,223,219 3712 pxor %xmm0,%xmm10 3713 movdqu 112(%rdi),%xmm0 3714.byte 102,65,15,56,223,228 3715 leaq 128(%rdi),%rdi 3716 movdqu 0(%rbp),%xmm11 3717.byte 102,65,15,56,223,237 3718.byte 102,65,15,56,223,246 3719 movdqu 16(%rbp),%xmm12 3720 movdqu 32(%rbp),%xmm13 3721.byte 102,65,15,56,223,255 3722.byte 102,68,15,56,223,193 3723 movdqu 48(%rbp),%xmm14 3724 movdqu 64(%rbp),%xmm15 3725.byte 102,69,15,56,223,202 3726 movdqa %xmm0,%xmm10 3727 movdqu 80(%rbp),%xmm1 3728 movups -112(%rcx),%xmm0 3729 3730 movups %xmm2,(%rsi) 3731 movdqa %xmm11,%xmm2 3732 movups %xmm3,16(%rsi) 3733 movdqa %xmm12,%xmm3 3734 movups %xmm4,32(%rsi) 3735 movdqa %xmm13,%xmm4 3736 movups %xmm5,48(%rsi) 3737 movdqa %xmm14,%xmm5 3738 movups %xmm6,64(%rsi) 3739 movdqa %xmm15,%xmm6 3740 movups %xmm7,80(%rsi) 3741 movdqa %xmm1,%xmm7 3742 movups %xmm8,96(%rsi) 3743 leaq 112(%rsi),%rsi 3744 3745 subq $0x80,%rdx 3746 ja .Lcbc_dec_loop8 3747 3748 movaps %xmm9,%xmm2 3749 leaq -112(%rcx),%rcx 3750 addq $0x70,%rdx 3751 jle .Lcbc_dec_clear_tail_collected 3752 movups %xmm9,(%rsi) 3753 leaq 16(%rsi),%rsi 3754 cmpq $0x50,%rdx 3755 jbe .Lcbc_dec_tail 3756 3757 movaps %xmm11,%xmm2 3758.Lcbc_dec_six_or_seven: 3759 cmpq $0x60,%rdx 3760 ja .Lcbc_dec_seven 3761 3762 movaps %xmm7,%xmm8 3763 call _aesni_decrypt6 3764 pxor %xmm10,%xmm2 3765 movaps %xmm8,%xmm10 3766 pxor %xmm11,%xmm3 3767 movdqu %xmm2,(%rsi) 3768 pxor %xmm12,%xmm4 3769 movdqu %xmm3,16(%rsi) 3770 pxor %xmm3,%xmm3 3771 pxor %xmm13,%xmm5 3772 movdqu %xmm4,32(%rsi) 3773 pxor %xmm4,%xmm4 3774 pxor %xmm14,%xmm6 3775 movdqu %xmm5,48(%rsi) 3776 pxor %xmm5,%xmm5 3777 pxor %xmm15,%xmm7 3778 movdqu %xmm6,64(%rsi) 3779 pxor %xmm6,%xmm6 3780 leaq 80(%rsi),%rsi 3781 movdqa %xmm7,%xmm2 3782 pxor %xmm7,%xmm7 3783 jmp .Lcbc_dec_tail_collected 3784 3785.align 16 3786.Lcbc_dec_seven: 3787 movups 96(%rdi),%xmm8 3788 xorps %xmm9,%xmm9 3789 call _aesni_decrypt8 3790 movups 80(%rdi),%xmm9 3791 pxor %xmm10,%xmm2 3792 movups 96(%rdi),%xmm10 3793 pxor %xmm11,%xmm3 3794 movdqu %xmm2,(%rsi) 3795 pxor %xmm12,%xmm4 3796 movdqu %xmm3,16(%rsi) 3797 pxor %xmm3,%xmm3 3798 pxor %xmm13,%xmm5 3799 movdqu %xmm4,32(%rsi) 3800 pxor %xmm4,%xmm4 3801 pxor %xmm14,%xmm6 3802 movdqu %xmm5,48(%rsi) 3803 pxor %xmm5,%xmm5 3804 pxor %xmm15,%xmm7 3805 movdqu %xmm6,64(%rsi) 3806 pxor %xmm6,%xmm6 3807 pxor %xmm9,%xmm8 3808 movdqu %xmm7,80(%rsi) 3809 pxor %xmm7,%xmm7 3810 leaq 96(%rsi),%rsi 3811 movdqa %xmm8,%xmm2 3812 pxor %xmm8,%xmm8 3813 pxor %xmm9,%xmm9 3814 jmp .Lcbc_dec_tail_collected 3815 3816.align 16 3817.Lcbc_dec_loop6: 3818 movups %xmm7,(%rsi) 3819 leaq 16(%rsi),%rsi 3820 movdqu 0(%rdi),%xmm2 3821 movdqu 16(%rdi),%xmm3 3822 movdqa %xmm2,%xmm11 3823 movdqu 32(%rdi),%xmm4 3824 movdqa %xmm3,%xmm12 3825 movdqu 48(%rdi),%xmm5 3826 movdqa %xmm4,%xmm13 3827 movdqu 64(%rdi),%xmm6 3828 movdqa %xmm5,%xmm14 3829 movdqu 80(%rdi),%xmm7 3830 movdqa %xmm6,%xmm15 3831.Lcbc_dec_loop6_enter: 3832 leaq 96(%rdi),%rdi 3833 movdqa %xmm7,%xmm8 3834 3835 call _aesni_decrypt6 3836 3837 pxor %xmm10,%xmm2 3838 movdqa %xmm8,%xmm10 3839 pxor %xmm11,%xmm3 3840 movdqu %xmm2,(%rsi) 3841 pxor %xmm12,%xmm4 3842 movdqu %xmm3,16(%rsi) 3843 pxor %xmm13,%xmm5 3844 movdqu %xmm4,32(%rsi) 3845 pxor %xmm14,%xmm6 3846 movq %rbp,%rcx 3847 movdqu %xmm5,48(%rsi) 3848 pxor %xmm15,%xmm7 3849 movl %r10d,%eax 3850 movdqu %xmm6,64(%rsi) 3851 leaq 80(%rsi),%rsi 3852 subq $0x60,%rdx 3853 ja .Lcbc_dec_loop6 3854 3855 movdqa %xmm7,%xmm2 3856 addq $0x50,%rdx 3857 jle .Lcbc_dec_clear_tail_collected 3858 movups %xmm7,(%rsi) 3859 leaq 16(%rsi),%rsi 3860 3861.Lcbc_dec_tail: 3862 movups (%rdi),%xmm2 3863 subq $0x10,%rdx 3864 jbe .Lcbc_dec_one 3865 3866 movups 16(%rdi),%xmm3 3867 movaps %xmm2,%xmm11 3868 subq $0x10,%rdx 3869 jbe .Lcbc_dec_two 3870 3871 movups 32(%rdi),%xmm4 3872 movaps %xmm3,%xmm12 3873 subq $0x10,%rdx 3874 jbe .Lcbc_dec_three 3875 3876 movups 48(%rdi),%xmm5 3877 movaps %xmm4,%xmm13 3878 subq $0x10,%rdx 3879 jbe .Lcbc_dec_four 3880 3881 movups 64(%rdi),%xmm6 3882 movaps %xmm5,%xmm14 3883 movaps %xmm6,%xmm15 3884 xorps %xmm7,%xmm7 3885 call _aesni_decrypt6 3886 pxor %xmm10,%xmm2 3887 movaps %xmm15,%xmm10 3888 pxor %xmm11,%xmm3 3889 movdqu %xmm2,(%rsi) 3890 pxor %xmm12,%xmm4 3891 movdqu %xmm3,16(%rsi) 3892 pxor %xmm3,%xmm3 3893 pxor %xmm13,%xmm5 3894 movdqu %xmm4,32(%rsi) 3895 pxor %xmm4,%xmm4 3896 pxor %xmm14,%xmm6 3897 movdqu %xmm5,48(%rsi) 3898 pxor %xmm5,%xmm5 3899 leaq 64(%rsi),%rsi 3900 movdqa %xmm6,%xmm2 3901 pxor %xmm6,%xmm6 3902 pxor %xmm7,%xmm7 3903 subq $0x10,%rdx 3904 jmp .Lcbc_dec_tail_collected 3905 3906.align 16 3907.Lcbc_dec_one: 3908 movaps %xmm2,%xmm11 3909 movups (%rcx),%xmm0 3910 movups 16(%rcx),%xmm1 3911 leaq 32(%rcx),%rcx 3912 xorps %xmm0,%xmm2 3913.Loop_dec1_17: 3914.byte 102,15,56,222,209 3915 decl %eax 3916 movups (%rcx),%xmm1 3917 leaq 16(%rcx),%rcx 3918 jnz .Loop_dec1_17 3919.byte 102,15,56,223,209 3920 xorps %xmm10,%xmm2 3921 movaps %xmm11,%xmm10 3922 jmp .Lcbc_dec_tail_collected 3923.align 16 3924.Lcbc_dec_two: 3925 movaps %xmm3,%xmm12 3926 call _aesni_decrypt2 3927 pxor %xmm10,%xmm2 3928 movaps %xmm12,%xmm10 3929 pxor %xmm11,%xmm3 3930 movdqu %xmm2,(%rsi) 3931 movdqa %xmm3,%xmm2 3932 pxor %xmm3,%xmm3 3933 leaq 16(%rsi),%rsi 3934 jmp .Lcbc_dec_tail_collected 3935.align 16 3936.Lcbc_dec_three: 3937 movaps %xmm4,%xmm13 3938 call _aesni_decrypt3 3939 pxor %xmm10,%xmm2 3940 movaps %xmm13,%xmm10 3941 pxor %xmm11,%xmm3 3942 movdqu %xmm2,(%rsi) 3943 pxor %xmm12,%xmm4 3944 movdqu %xmm3,16(%rsi) 3945 pxor %xmm3,%xmm3 3946 movdqa %xmm4,%xmm2 3947 pxor %xmm4,%xmm4 3948 leaq 32(%rsi),%rsi 3949 jmp .Lcbc_dec_tail_collected 3950.align 16 3951.Lcbc_dec_four: 3952 movaps %xmm5,%xmm14 3953 call _aesni_decrypt4 3954 pxor %xmm10,%xmm2 3955 movaps %xmm14,%xmm10 3956 pxor %xmm11,%xmm3 3957 movdqu %xmm2,(%rsi) 3958 pxor %xmm12,%xmm4 3959 movdqu %xmm3,16(%rsi) 3960 pxor %xmm3,%xmm3 3961 pxor %xmm13,%xmm5 3962 movdqu %xmm4,32(%rsi) 3963 pxor %xmm4,%xmm4 3964 movdqa %xmm5,%xmm2 3965 pxor %xmm5,%xmm5 3966 leaq 48(%rsi),%rsi 3967 jmp .Lcbc_dec_tail_collected 3968 3969.align 16 3970.Lcbc_dec_clear_tail_collected: 3971 pxor %xmm3,%xmm3 3972 pxor %xmm4,%xmm4 3973 pxor %xmm5,%xmm5 3974 pxor %xmm6,%xmm6 3975 pxor %xmm7,%xmm7 3976 pxor %xmm8,%xmm8 3977 pxor %xmm9,%xmm9 3978.Lcbc_dec_tail_collected: 3979 movups %xmm10,(%r8) 3980 andq $15,%rdx 3981 jnz .Lcbc_dec_tail_partial 3982 movups %xmm2,(%rsi) 3983 pxor %xmm2,%xmm2 3984 jmp .Lcbc_dec_ret 3985.align 16 3986.Lcbc_dec_tail_partial: 3987 movaps %xmm2,(%rsp) 3988 pxor %xmm2,%xmm2 3989 movq $16,%rcx 3990 movq %rsi,%rdi 3991 subq %rdx,%rcx 3992 leaq (%rsp),%rsi 3993.long 0x9066A4F3 3994 movdqa %xmm2,(%rsp) 3995 3996.Lcbc_dec_ret: 3997 xorps %xmm0,%xmm0 3998 pxor %xmm1,%xmm1 3999 movq -8(%r11),%rbp 4000.cfi_restore %rbp 4001 leaq (%r11),%rsp 4002.cfi_def_cfa_register %rsp 4003.Lcbc_ret: 4004 .byte 0xf3,0xc3 4005.cfi_endproc 4006.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt 4007.globl aes_hw_set_decrypt_key 4008.hidden aes_hw_set_decrypt_key 4009.type aes_hw_set_decrypt_key,@function 4010.align 16 4011aes_hw_set_decrypt_key: 4012.cfi_startproc 4013.byte 0x48,0x83,0xEC,0x08 4014.cfi_adjust_cfa_offset 8 4015 call __aesni_set_encrypt_key 4016 shll $4,%esi 4017 testl %eax,%eax 4018 jnz .Ldec_key_ret 4019 leaq 16(%rdx,%rsi,1),%rdi 4020 4021 movups (%rdx),%xmm0 4022 movups (%rdi),%xmm1 4023 movups %xmm0,(%rdi) 4024 movups %xmm1,(%rdx) 4025 leaq 16(%rdx),%rdx 4026 leaq -16(%rdi),%rdi 4027 4028.Ldec_key_inverse: 4029 movups (%rdx),%xmm0 4030 movups (%rdi),%xmm1 4031.byte 102,15,56,219,192 4032.byte 102,15,56,219,201 4033 leaq 16(%rdx),%rdx 4034 leaq -16(%rdi),%rdi 4035 movups %xmm0,16(%rdi) 4036 movups %xmm1,-16(%rdx) 4037 cmpq %rdx,%rdi 4038 ja .Ldec_key_inverse 4039 4040 movups (%rdx),%xmm0 4041.byte 102,15,56,219,192 4042 pxor %xmm1,%xmm1 4043 movups %xmm0,(%rdi) 4044 pxor %xmm0,%xmm0 4045.Ldec_key_ret: 4046 addq $8,%rsp 4047.cfi_adjust_cfa_offset -8 4048 .byte 0xf3,0xc3 4049.cfi_endproc 4050.LSEH_end_set_decrypt_key: 4051.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key 4052.globl aes_hw_set_encrypt_key 4053.hidden aes_hw_set_encrypt_key 4054.type aes_hw_set_encrypt_key,@function 4055.align 16 4056aes_hw_set_encrypt_key: 4057__aesni_set_encrypt_key: 4058.cfi_startproc 4059.byte 0x48,0x83,0xEC,0x08 4060.cfi_adjust_cfa_offset 8 4061 movq $-1,%rax 4062 testq %rdi,%rdi 4063 jz .Lenc_key_ret 4064 testq %rdx,%rdx 4065 jz .Lenc_key_ret 4066 4067 movups (%rdi),%xmm0 4068 xorps %xmm4,%xmm4 4069 leaq OPENSSL_ia32cap_P(%rip),%r10 4070 movl 4(%r10),%r10d 4071 andl $268437504,%r10d 4072 leaq 16(%rdx),%rax 4073 cmpl $256,%esi 4074 je .L14rounds 4075 cmpl $192,%esi 4076 je .L12rounds 4077 cmpl $128,%esi 4078 jne .Lbad_keybits 4079 4080.L10rounds: 4081 movl $9,%esi 4082 cmpl $268435456,%r10d 4083 je .L10rounds_alt 4084 4085 movups %xmm0,(%rdx) 4086.byte 102,15,58,223,200,1 4087 call .Lkey_expansion_128_cold 4088.byte 102,15,58,223,200,2 4089 call .Lkey_expansion_128 4090.byte 102,15,58,223,200,4 4091 call .Lkey_expansion_128 4092.byte 102,15,58,223,200,8 4093 call .Lkey_expansion_128 4094.byte 102,15,58,223,200,16 4095 call .Lkey_expansion_128 4096.byte 102,15,58,223,200,32 4097 call .Lkey_expansion_128 4098.byte 102,15,58,223,200,64 4099 call .Lkey_expansion_128 4100.byte 102,15,58,223,200,128 4101 call .Lkey_expansion_128 4102.byte 102,15,58,223,200,27 4103 call .Lkey_expansion_128 4104.byte 102,15,58,223,200,54 4105 call .Lkey_expansion_128 4106 movups %xmm0,(%rax) 4107 movl %esi,80(%rax) 4108 xorl %eax,%eax 4109 jmp .Lenc_key_ret 4110 4111.align 16 4112.L10rounds_alt: 4113 movdqa .Lkey_rotate(%rip),%xmm5 4114 movl $8,%r10d 4115 movdqa .Lkey_rcon1(%rip),%xmm4 4116 movdqa %xmm0,%xmm2 4117 movdqu %xmm0,(%rdx) 4118 jmp .Loop_key128 4119 4120.align 16 4121.Loop_key128: 4122.byte 102,15,56,0,197 4123.byte 102,15,56,221,196 4124 pslld $1,%xmm4 4125 leaq 16(%rax),%rax 4126 4127 movdqa %xmm2,%xmm3 4128 pslldq $4,%xmm2 4129 pxor %xmm2,%xmm3 4130 pslldq $4,%xmm2 4131 pxor %xmm2,%xmm3 4132 pslldq $4,%xmm2 4133 pxor %xmm3,%xmm2 4134 4135 pxor %xmm2,%xmm0 4136 movdqu %xmm0,-16(%rax) 4137 movdqa %xmm0,%xmm2 4138 4139 decl %r10d 4140 jnz .Loop_key128 4141 4142 movdqa .Lkey_rcon1b(%rip),%xmm4 4143 4144.byte 102,15,56,0,197 4145.byte 102,15,56,221,196 4146 pslld $1,%xmm4 4147 4148 movdqa %xmm2,%xmm3 4149 pslldq $4,%xmm2 4150 pxor %xmm2,%xmm3 4151 pslldq $4,%xmm2 4152 pxor %xmm2,%xmm3 4153 pslldq $4,%xmm2 4154 pxor %xmm3,%xmm2 4155 4156 pxor %xmm2,%xmm0 4157 movdqu %xmm0,(%rax) 4158 4159 movdqa %xmm0,%xmm2 4160.byte 102,15,56,0,197 4161.byte 102,15,56,221,196 4162 4163 movdqa %xmm2,%xmm3 4164 pslldq $4,%xmm2 4165 pxor %xmm2,%xmm3 4166 pslldq $4,%xmm2 4167 pxor %xmm2,%xmm3 4168 pslldq $4,%xmm2 4169 pxor %xmm3,%xmm2 4170 4171 pxor %xmm2,%xmm0 4172 movdqu %xmm0,16(%rax) 4173 4174 movl %esi,96(%rax) 4175 xorl %eax,%eax 4176 jmp .Lenc_key_ret 4177 4178.align 16 4179.L12rounds: 4180 movq 16(%rdi),%xmm2 4181 movl $11,%esi 4182 cmpl $268435456,%r10d 4183 je .L12rounds_alt 4184 4185 movups %xmm0,(%rdx) 4186.byte 102,15,58,223,202,1 4187 call .Lkey_expansion_192a_cold 4188.byte 102,15,58,223,202,2 4189 call .Lkey_expansion_192b 4190.byte 102,15,58,223,202,4 4191 call .Lkey_expansion_192a 4192.byte 102,15,58,223,202,8 4193 call .Lkey_expansion_192b 4194.byte 102,15,58,223,202,16 4195 call .Lkey_expansion_192a 4196.byte 102,15,58,223,202,32 4197 call .Lkey_expansion_192b 4198.byte 102,15,58,223,202,64 4199 call .Lkey_expansion_192a 4200.byte 102,15,58,223,202,128 4201 call .Lkey_expansion_192b 4202 movups %xmm0,(%rax) 4203 movl %esi,48(%rax) 4204 xorq %rax,%rax 4205 jmp .Lenc_key_ret 4206 4207.align 16 4208.L12rounds_alt: 4209 movdqa .Lkey_rotate192(%rip),%xmm5 4210 movdqa .Lkey_rcon1(%rip),%xmm4 4211 movl $8,%r10d 4212 movdqu %xmm0,(%rdx) 4213 jmp .Loop_key192 4214 4215.align 16 4216.Loop_key192: 4217 movq %xmm2,0(%rax) 4218 movdqa %xmm2,%xmm1 4219.byte 102,15,56,0,213 4220.byte 102,15,56,221,212 4221 pslld $1,%xmm4 4222 leaq 24(%rax),%rax 4223 4224 movdqa %xmm0,%xmm3 4225 pslldq $4,%xmm0 4226 pxor %xmm0,%xmm3 4227 pslldq $4,%xmm0 4228 pxor %xmm0,%xmm3 4229 pslldq $4,%xmm0 4230 pxor %xmm3,%xmm0 4231 4232 pshufd $0xff,%xmm0,%xmm3 4233 pxor %xmm1,%xmm3 4234 pslldq $4,%xmm1 4235 pxor %xmm1,%xmm3 4236 4237 pxor %xmm2,%xmm0 4238 pxor %xmm3,%xmm2 4239 movdqu %xmm0,-16(%rax) 4240 4241 decl %r10d 4242 jnz .Loop_key192 4243 4244 movl %esi,32(%rax) 4245 xorl %eax,%eax 4246 jmp .Lenc_key_ret 4247 4248.align 16 4249.L14rounds: 4250 movups 16(%rdi),%xmm2 4251 movl $13,%esi 4252 leaq 16(%rax),%rax 4253 cmpl $268435456,%r10d 4254 je .L14rounds_alt 4255 4256 movups %xmm0,(%rdx) 4257 movups %xmm2,16(%rdx) 4258.byte 102,15,58,223,202,1 4259 call .Lkey_expansion_256a_cold 4260.byte 102,15,58,223,200,1 4261 call .Lkey_expansion_256b 4262.byte 102,15,58,223,202,2 4263 call .Lkey_expansion_256a 4264.byte 102,15,58,223,200,2 4265 call .Lkey_expansion_256b 4266.byte 102,15,58,223,202,4 4267 call .Lkey_expansion_256a 4268.byte 102,15,58,223,200,4 4269 call .Lkey_expansion_256b 4270.byte 102,15,58,223,202,8 4271 call .Lkey_expansion_256a 4272.byte 102,15,58,223,200,8 4273 call .Lkey_expansion_256b 4274.byte 102,15,58,223,202,16 4275 call .Lkey_expansion_256a 4276.byte 102,15,58,223,200,16 4277 call .Lkey_expansion_256b 4278.byte 102,15,58,223,202,32 4279 call .Lkey_expansion_256a 4280.byte 102,15,58,223,200,32 4281 call .Lkey_expansion_256b 4282.byte 102,15,58,223,202,64 4283 call .Lkey_expansion_256a 4284 movups %xmm0,(%rax) 4285 movl %esi,16(%rax) 4286 xorq %rax,%rax 4287 jmp .Lenc_key_ret 4288 4289.align 16 4290.L14rounds_alt: 4291 movdqa .Lkey_rotate(%rip),%xmm5 4292 movdqa .Lkey_rcon1(%rip),%xmm4 4293 movl $7,%r10d 4294 movdqu %xmm0,0(%rdx) 4295 movdqa %xmm2,%xmm1 4296 movdqu %xmm2,16(%rdx) 4297 jmp .Loop_key256 4298 4299.align 16 4300.Loop_key256: 4301.byte 102,15,56,0,213 4302.byte 102,15,56,221,212 4303 4304 movdqa %xmm0,%xmm3 4305 pslldq $4,%xmm0 4306 pxor %xmm0,%xmm3 4307 pslldq $4,%xmm0 4308 pxor %xmm0,%xmm3 4309 pslldq $4,%xmm0 4310 pxor %xmm3,%xmm0 4311 pslld $1,%xmm4 4312 4313 pxor %xmm2,%xmm0 4314 movdqu %xmm0,(%rax) 4315 4316 decl %r10d 4317 jz .Ldone_key256 4318 4319 pshufd $0xff,%xmm0,%xmm2 4320 pxor %xmm3,%xmm3 4321.byte 102,15,56,221,211 4322 4323 movdqa %xmm1,%xmm3 4324 pslldq $4,%xmm1 4325 pxor %xmm1,%xmm3 4326 pslldq $4,%xmm1 4327 pxor %xmm1,%xmm3 4328 pslldq $4,%xmm1 4329 pxor %xmm3,%xmm1 4330 4331 pxor %xmm1,%xmm2 4332 movdqu %xmm2,16(%rax) 4333 leaq 32(%rax),%rax 4334 movdqa %xmm2,%xmm1 4335 4336 jmp .Loop_key256 4337 4338.Ldone_key256: 4339 movl %esi,16(%rax) 4340 xorl %eax,%eax 4341 jmp .Lenc_key_ret 4342 4343.align 16 4344.Lbad_keybits: 4345 movq $-2,%rax 4346.Lenc_key_ret: 4347 pxor %xmm0,%xmm0 4348 pxor %xmm1,%xmm1 4349 pxor %xmm2,%xmm2 4350 pxor %xmm3,%xmm3 4351 pxor %xmm4,%xmm4 4352 pxor %xmm5,%xmm5 4353 addq $8,%rsp 4354.cfi_adjust_cfa_offset -8 4355 .byte 0xf3,0xc3 4356.cfi_endproc 4357.LSEH_end_set_encrypt_key: 4358 4359.align 16 4360.Lkey_expansion_128: 4361 movups %xmm0,(%rax) 4362 leaq 16(%rax),%rax 4363.Lkey_expansion_128_cold: 4364 shufps $16,%xmm0,%xmm4 4365 xorps %xmm4,%xmm0 4366 shufps $140,%xmm0,%xmm4 4367 xorps %xmm4,%xmm0 4368 shufps $255,%xmm1,%xmm1 4369 xorps %xmm1,%xmm0 4370 .byte 0xf3,0xc3 4371 4372.align 16 4373.Lkey_expansion_192a: 4374 movups %xmm0,(%rax) 4375 leaq 16(%rax),%rax 4376.Lkey_expansion_192a_cold: 4377 movaps %xmm2,%xmm5 4378.Lkey_expansion_192b_warm: 4379 shufps $16,%xmm0,%xmm4 4380 movdqa %xmm2,%xmm3 4381 xorps %xmm4,%xmm0 4382 shufps $140,%xmm0,%xmm4 4383 pslldq $4,%xmm3 4384 xorps %xmm4,%xmm0 4385 pshufd $85,%xmm1,%xmm1 4386 pxor %xmm3,%xmm2 4387 pxor %xmm1,%xmm0 4388 pshufd $255,%xmm0,%xmm3 4389 pxor %xmm3,%xmm2 4390 .byte 0xf3,0xc3 4391 4392.align 16 4393.Lkey_expansion_192b: 4394 movaps %xmm0,%xmm3 4395 shufps $68,%xmm0,%xmm5 4396 movups %xmm5,(%rax) 4397 shufps $78,%xmm2,%xmm3 4398 movups %xmm3,16(%rax) 4399 leaq 32(%rax),%rax 4400 jmp .Lkey_expansion_192b_warm 4401 4402.align 16 4403.Lkey_expansion_256a: 4404 movups %xmm2,(%rax) 4405 leaq 16(%rax),%rax 4406.Lkey_expansion_256a_cold: 4407 shufps $16,%xmm0,%xmm4 4408 xorps %xmm4,%xmm0 4409 shufps $140,%xmm0,%xmm4 4410 xorps %xmm4,%xmm0 4411 shufps $255,%xmm1,%xmm1 4412 xorps %xmm1,%xmm0 4413 .byte 0xf3,0xc3 4414 4415.align 16 4416.Lkey_expansion_256b: 4417 movups %xmm0,(%rax) 4418 leaq 16(%rax),%rax 4419 4420 shufps $16,%xmm2,%xmm4 4421 xorps %xmm4,%xmm2 4422 shufps $140,%xmm2,%xmm4 4423 xorps %xmm4,%xmm2 4424 shufps $170,%xmm1,%xmm1 4425 xorps %xmm1,%xmm2 4426 .byte 0xf3,0xc3 4427.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key 4428.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key 4429.align 64 4430.Lbswap_mask: 4431.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 4432.Lincrement32: 4433.long 6,6,6,0 4434.Lincrement64: 4435.long 1,0,0,0 4436.Lxts_magic: 4437.long 0x87,0,1,0 4438.Lincrement1: 4439.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 4440.Lkey_rotate: 4441.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d 4442.Lkey_rotate192: 4443.long 0x04070605,0x04070605,0x04070605,0x04070605 4444.Lkey_rcon1: 4445.long 1,1,1,1 4446.Lkey_rcon1b: 4447.long 0x1b,0x1b,0x1b,0x1b 4448 4449.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 4450.align 64 4451#endif 4452