1 # $FreeBSD$ 2.text 3.globl aesni_encrypt 4.type aesni_encrypt,@function 5.align 16 6aesni_encrypt: 7 movups (%rdi),%xmm2 8 movl 240(%rdx),%eax 9 movups (%rdx),%xmm0 10 movups 16(%rdx),%xmm1 11 leaq 32(%rdx),%rdx 12 xorps %xmm0,%xmm2 13.Loop_enc1_1: 14.byte 102,15,56,220,209 15 decl %eax 16 movups (%rdx),%xmm1 17 leaq 16(%rdx),%rdx 18 jnz .Loop_enc1_1 19.byte 102,15,56,221,209 20 movups %xmm2,(%rsi) 21 .byte 0xf3,0xc3 22.size aesni_encrypt,.-aesni_encrypt 23 24.globl aesni_decrypt 25.type aesni_decrypt,@function 26.align 16 27aesni_decrypt: 28 movups (%rdi),%xmm2 29 movl 240(%rdx),%eax 30 movups (%rdx),%xmm0 31 movups 16(%rdx),%xmm1 32 leaq 32(%rdx),%rdx 33 xorps %xmm0,%xmm2 34.Loop_dec1_2: 35.byte 102,15,56,222,209 36 decl %eax 37 movups (%rdx),%xmm1 38 leaq 16(%rdx),%rdx 39 jnz .Loop_dec1_2 40.byte 102,15,56,223,209 41 movups %xmm2,(%rsi) 42 .byte 0xf3,0xc3 43.size aesni_decrypt, .-aesni_decrypt 44.type _aesni_encrypt3,@function 45.align 16 46_aesni_encrypt3: 47 movups (%rcx),%xmm0 48 shrl $1,%eax 49 movups 16(%rcx),%xmm1 50 leaq 32(%rcx),%rcx 51 xorps %xmm0,%xmm2 52 xorps %xmm0,%xmm3 53 xorps %xmm0,%xmm4 54 movups (%rcx),%xmm0 55 56.Lenc_loop3: 57.byte 102,15,56,220,209 58.byte 102,15,56,220,217 59 decl %eax 60.byte 102,15,56,220,225 61 movups 16(%rcx),%xmm1 62.byte 102,15,56,220,208 63.byte 102,15,56,220,216 64 leaq 32(%rcx),%rcx 65.byte 102,15,56,220,224 66 movups (%rcx),%xmm0 67 jnz .Lenc_loop3 68 69.byte 102,15,56,220,209 70.byte 102,15,56,220,217 71.byte 102,15,56,220,225 72.byte 102,15,56,221,208 73.byte 102,15,56,221,216 74.byte 102,15,56,221,224 75 .byte 0xf3,0xc3 76.size _aesni_encrypt3,.-_aesni_encrypt3 77.type _aesni_decrypt3,@function 78.align 16 79_aesni_decrypt3: 80 movups (%rcx),%xmm0 81 shrl $1,%eax 82 movups 16(%rcx),%xmm1 83 leaq 32(%rcx),%rcx 84 xorps %xmm0,%xmm2 85 xorps %xmm0,%xmm3 86 xorps %xmm0,%xmm4 87 movups (%rcx),%xmm0 88 89.Ldec_loop3: 90.byte 102,15,56,222,209 91.byte 102,15,56,222,217 92 decl %eax 93.byte 102,15,56,222,225 94 movups 16(%rcx),%xmm1 95.byte 102,15,56,222,208 96.byte 102,15,56,222,216 97 leaq 32(%rcx),%rcx 98.byte 102,15,56,222,224 99 movups (%rcx),%xmm0 100 jnz .Ldec_loop3 101 102.byte 102,15,56,222,209 103.byte 102,15,56,222,217 104.byte 102,15,56,222,225 105.byte 102,15,56,223,208 106.byte 102,15,56,223,216 107.byte 102,15,56,223,224 108 .byte 0xf3,0xc3 109.size _aesni_decrypt3,.-_aesni_decrypt3 110.type _aesni_encrypt4,@function 111.align 16 112_aesni_encrypt4: 113 movups (%rcx),%xmm0 114 shrl $1,%eax 115 movups 16(%rcx),%xmm1 116 leaq 32(%rcx),%rcx 117 xorps %xmm0,%xmm2 118 xorps %xmm0,%xmm3 119 xorps %xmm0,%xmm4 120 xorps %xmm0,%xmm5 121 movups (%rcx),%xmm0 122 123.Lenc_loop4: 124.byte 102,15,56,220,209 125.byte 102,15,56,220,217 126 decl %eax 127.byte 102,15,56,220,225 128.byte 102,15,56,220,233 129 movups 16(%rcx),%xmm1 130.byte 102,15,56,220,208 131.byte 102,15,56,220,216 132 leaq 32(%rcx),%rcx 133.byte 102,15,56,220,224 134.byte 102,15,56,220,232 135 movups (%rcx),%xmm0 136 jnz .Lenc_loop4 137 138.byte 102,15,56,220,209 139.byte 102,15,56,220,217 140.byte 102,15,56,220,225 141.byte 102,15,56,220,233 142.byte 102,15,56,221,208 143.byte 102,15,56,221,216 144.byte 102,15,56,221,224 145.byte 102,15,56,221,232 146 .byte 0xf3,0xc3 147.size _aesni_encrypt4,.-_aesni_encrypt4 148.type _aesni_decrypt4,@function 149.align 16 150_aesni_decrypt4: 151 movups (%rcx),%xmm0 152 shrl $1,%eax 153 movups 16(%rcx),%xmm1 154 leaq 32(%rcx),%rcx 155 xorps %xmm0,%xmm2 156 xorps %xmm0,%xmm3 157 xorps %xmm0,%xmm4 158 xorps %xmm0,%xmm5 159 movups (%rcx),%xmm0 160 161.Ldec_loop4: 162.byte 102,15,56,222,209 163.byte 102,15,56,222,217 164 decl %eax 165.byte 102,15,56,222,225 166.byte 102,15,56,222,233 167 movups 16(%rcx),%xmm1 168.byte 102,15,56,222,208 169.byte 102,15,56,222,216 170 leaq 32(%rcx),%rcx 171.byte 102,15,56,222,224 172.byte 102,15,56,222,232 173 movups (%rcx),%xmm0 174 jnz .Ldec_loop4 175 176.byte 102,15,56,222,209 177.byte 102,15,56,222,217 178.byte 102,15,56,222,225 179.byte 102,15,56,222,233 180.byte 102,15,56,223,208 181.byte 102,15,56,223,216 182.byte 102,15,56,223,224 183.byte 102,15,56,223,232 184 .byte 0xf3,0xc3 185.size _aesni_decrypt4,.-_aesni_decrypt4 186.type _aesni_encrypt6,@function 187.align 16 188_aesni_encrypt6: 189 movups (%rcx),%xmm0 190 shrl $1,%eax 191 movups 16(%rcx),%xmm1 192 leaq 32(%rcx),%rcx 193 xorps %xmm0,%xmm2 194 pxor %xmm0,%xmm3 195.byte 102,15,56,220,209 196 pxor %xmm0,%xmm4 197.byte 102,15,56,220,217 198 pxor %xmm0,%xmm5 199.byte 102,15,56,220,225 200 pxor %xmm0,%xmm6 201.byte 102,15,56,220,233 202 pxor %xmm0,%xmm7 203 decl %eax 204.byte 102,15,56,220,241 205 movups (%rcx),%xmm0 206.byte 102,15,56,220,249 207 jmp .Lenc_loop6_enter 208.align 16 209.Lenc_loop6: 210.byte 102,15,56,220,209 211.byte 102,15,56,220,217 212 decl %eax 213.byte 102,15,56,220,225 214.byte 102,15,56,220,233 215.byte 102,15,56,220,241 216.byte 102,15,56,220,249 217.Lenc_loop6_enter: 218 movups 16(%rcx),%xmm1 219.byte 102,15,56,220,208 220.byte 102,15,56,220,216 221 leaq 32(%rcx),%rcx 222.byte 102,15,56,220,224 223.byte 102,15,56,220,232 224.byte 102,15,56,220,240 225.byte 102,15,56,220,248 226 movups (%rcx),%xmm0 227 jnz .Lenc_loop6 228 229.byte 102,15,56,220,209 230.byte 102,15,56,220,217 231.byte 102,15,56,220,225 232.byte 102,15,56,220,233 233.byte 102,15,56,220,241 234.byte 102,15,56,220,249 235.byte 102,15,56,221,208 236.byte 102,15,56,221,216 237.byte 102,15,56,221,224 238.byte 102,15,56,221,232 239.byte 102,15,56,221,240 240.byte 102,15,56,221,248 241 .byte 0xf3,0xc3 242.size _aesni_encrypt6,.-_aesni_encrypt6 243.type _aesni_decrypt6,@function 244.align 16 245_aesni_decrypt6: 246 movups (%rcx),%xmm0 247 shrl $1,%eax 248 movups 16(%rcx),%xmm1 249 leaq 32(%rcx),%rcx 250 xorps %xmm0,%xmm2 251 pxor %xmm0,%xmm3 252.byte 102,15,56,222,209 253 pxor %xmm0,%xmm4 254.byte 102,15,56,222,217 255 pxor %xmm0,%xmm5 256.byte 102,15,56,222,225 257 pxor %xmm0,%xmm6 258.byte 102,15,56,222,233 259 pxor %xmm0,%xmm7 260 decl %eax 261.byte 102,15,56,222,241 262 movups (%rcx),%xmm0 263.byte 102,15,56,222,249 264 jmp .Ldec_loop6_enter 265.align 16 266.Ldec_loop6: 267.byte 102,15,56,222,209 268.byte 102,15,56,222,217 269 decl %eax 270.byte 102,15,56,222,225 271.byte 102,15,56,222,233 272.byte 102,15,56,222,241 273.byte 102,15,56,222,249 274.Ldec_loop6_enter: 275 movups 16(%rcx),%xmm1 276.byte 102,15,56,222,208 277.byte 102,15,56,222,216 278 leaq 32(%rcx),%rcx 279.byte 102,15,56,222,224 280.byte 102,15,56,222,232 281.byte 102,15,56,222,240 282.byte 102,15,56,222,248 283 movups (%rcx),%xmm0 284 jnz .Ldec_loop6 285 286.byte 102,15,56,222,209 287.byte 102,15,56,222,217 288.byte 102,15,56,222,225 289.byte 102,15,56,222,233 290.byte 102,15,56,222,241 291.byte 102,15,56,222,249 292.byte 102,15,56,223,208 293.byte 102,15,56,223,216 294.byte 102,15,56,223,224 295.byte 102,15,56,223,232 296.byte 102,15,56,223,240 297.byte 102,15,56,223,248 298 .byte 0xf3,0xc3 299.size _aesni_decrypt6,.-_aesni_decrypt6 300.type _aesni_encrypt8,@function 301.align 16 302_aesni_encrypt8: 303 movups (%rcx),%xmm0 304 shrl $1,%eax 305 movups 16(%rcx),%xmm1 306 leaq 32(%rcx),%rcx 307 xorps %xmm0,%xmm2 308 xorps %xmm0,%xmm3 309.byte 102,15,56,220,209 310 pxor %xmm0,%xmm4 311.byte 102,15,56,220,217 312 pxor %xmm0,%xmm5 313.byte 102,15,56,220,225 314 pxor %xmm0,%xmm6 315.byte 102,15,56,220,233 316 pxor %xmm0,%xmm7 317 decl %eax 318.byte 102,15,56,220,241 319 pxor %xmm0,%xmm8 320.byte 102,15,56,220,249 321 pxor %xmm0,%xmm9 322 movups (%rcx),%xmm0 323.byte 102,68,15,56,220,193 324.byte 102,68,15,56,220,201 325 movups 16(%rcx),%xmm1 326 jmp .Lenc_loop8_enter 327.align 16 328.Lenc_loop8: 329.byte 102,15,56,220,209 330.byte 102,15,56,220,217 331 decl %eax 332.byte 102,15,56,220,225 333.byte 102,15,56,220,233 334.byte 102,15,56,220,241 335.byte 102,15,56,220,249 336.byte 102,68,15,56,220,193 337.byte 102,68,15,56,220,201 338 movups 16(%rcx),%xmm1 339.Lenc_loop8_enter: 340.byte 102,15,56,220,208 341.byte 102,15,56,220,216 342 leaq 32(%rcx),%rcx 343.byte 102,15,56,220,224 344.byte 102,15,56,220,232 345.byte 102,15,56,220,240 346.byte 102,15,56,220,248 347.byte 102,68,15,56,220,192 348.byte 102,68,15,56,220,200 349 movups (%rcx),%xmm0 350 jnz .Lenc_loop8 351 352.byte 102,15,56,220,209 353.byte 102,15,56,220,217 354.byte 102,15,56,220,225 355.byte 102,15,56,220,233 356.byte 102,15,56,220,241 357.byte 102,15,56,220,249 358.byte 102,68,15,56,220,193 359.byte 102,68,15,56,220,201 360.byte 102,15,56,221,208 361.byte 102,15,56,221,216 362.byte 102,15,56,221,224 363.byte 102,15,56,221,232 364.byte 102,15,56,221,240 365.byte 102,15,56,221,248 366.byte 102,68,15,56,221,192 367.byte 102,68,15,56,221,200 368 .byte 0xf3,0xc3 369.size _aesni_encrypt8,.-_aesni_encrypt8 370.type _aesni_decrypt8,@function 371.align 16 372_aesni_decrypt8: 373 movups (%rcx),%xmm0 374 shrl $1,%eax 375 movups 16(%rcx),%xmm1 376 leaq 32(%rcx),%rcx 377 xorps %xmm0,%xmm2 378 xorps %xmm0,%xmm3 379.byte 102,15,56,222,209 380 pxor %xmm0,%xmm4 381.byte 102,15,56,222,217 382 pxor %xmm0,%xmm5 383.byte 102,15,56,222,225 384 pxor %xmm0,%xmm6 385.byte 102,15,56,222,233 386 pxor %xmm0,%xmm7 387 decl %eax 388.byte 102,15,56,222,241 389 pxor %xmm0,%xmm8 390.byte 102,15,56,222,249 391 pxor %xmm0,%xmm9 392 movups (%rcx),%xmm0 393.byte 102,68,15,56,222,193 394.byte 102,68,15,56,222,201 395 movups 16(%rcx),%xmm1 396 jmp .Ldec_loop8_enter 397.align 16 398.Ldec_loop8: 399.byte 102,15,56,222,209 400.byte 102,15,56,222,217 401 decl %eax 402.byte 102,15,56,222,225 403.byte 102,15,56,222,233 404.byte 102,15,56,222,241 405.byte 102,15,56,222,249 406.byte 102,68,15,56,222,193 407.byte 102,68,15,56,222,201 408 movups 16(%rcx),%xmm1 409.Ldec_loop8_enter: 410.byte 102,15,56,222,208 411.byte 102,15,56,222,216 412 leaq 32(%rcx),%rcx 413.byte 102,15,56,222,224 414.byte 102,15,56,222,232 415.byte 102,15,56,222,240 416.byte 102,15,56,222,248 417.byte 102,68,15,56,222,192 418.byte 102,68,15,56,222,200 419 movups (%rcx),%xmm0 420 jnz .Ldec_loop8 421 422.byte 102,15,56,222,209 423.byte 102,15,56,222,217 424.byte 102,15,56,222,225 425.byte 102,15,56,222,233 426.byte 102,15,56,222,241 427.byte 102,15,56,222,249 428.byte 102,68,15,56,222,193 429.byte 102,68,15,56,222,201 430.byte 102,15,56,223,208 431.byte 102,15,56,223,216 432.byte 102,15,56,223,224 433.byte 102,15,56,223,232 434.byte 102,15,56,223,240 435.byte 102,15,56,223,248 436.byte 102,68,15,56,223,192 437.byte 102,68,15,56,223,200 438 .byte 0xf3,0xc3 439.size _aesni_decrypt8,.-_aesni_decrypt8 440.globl aesni_ecb_encrypt 441.type aesni_ecb_encrypt,@function 442.align 16 443aesni_ecb_encrypt: 444 andq $-16,%rdx 445 jz .Lecb_ret 446 447 movl 240(%rcx),%eax 448 movups (%rcx),%xmm0 449 movq %rcx,%r11 450 movl %eax,%r10d 451 testl %r8d,%r8d 452 jz .Lecb_decrypt 453 454 cmpq $128,%rdx 455 jb .Lecb_enc_tail 456 457 movdqu (%rdi),%xmm2 458 movdqu 16(%rdi),%xmm3 459 movdqu 32(%rdi),%xmm4 460 movdqu 48(%rdi),%xmm5 461 movdqu 64(%rdi),%xmm6 462 movdqu 80(%rdi),%xmm7 463 movdqu 96(%rdi),%xmm8 464 movdqu 112(%rdi),%xmm9 465 leaq 128(%rdi),%rdi 466 subq $128,%rdx 467 jmp .Lecb_enc_loop8_enter 468.align 16 469.Lecb_enc_loop8: 470 movups %xmm2,(%rsi) 471 movq %r11,%rcx 472 movdqu (%rdi),%xmm2 473 movl %r10d,%eax 474 movups %xmm3,16(%rsi) 475 movdqu 16(%rdi),%xmm3 476 movups %xmm4,32(%rsi) 477 movdqu 32(%rdi),%xmm4 478 movups %xmm5,48(%rsi) 479 movdqu 48(%rdi),%xmm5 480 movups %xmm6,64(%rsi) 481 movdqu 64(%rdi),%xmm6 482 movups %xmm7,80(%rsi) 483 movdqu 80(%rdi),%xmm7 484 movups %xmm8,96(%rsi) 485 movdqu 96(%rdi),%xmm8 486 movups %xmm9,112(%rsi) 487 leaq 128(%rsi),%rsi 488 movdqu 112(%rdi),%xmm9 489 leaq 128(%rdi),%rdi 490.Lecb_enc_loop8_enter: 491 492 call _aesni_encrypt8 493 494 subq $128,%rdx 495 jnc .Lecb_enc_loop8 496 497 movups %xmm2,(%rsi) 498 movq %r11,%rcx 499 movups %xmm3,16(%rsi) 500 movl %r10d,%eax 501 movups %xmm4,32(%rsi) 502 movups %xmm5,48(%rsi) 503 movups %xmm6,64(%rsi) 504 movups %xmm7,80(%rsi) 505 movups %xmm8,96(%rsi) 506 movups %xmm9,112(%rsi) 507 leaq 128(%rsi),%rsi 508 addq $128,%rdx 509 jz .Lecb_ret 510 511.Lecb_enc_tail: 512 movups (%rdi),%xmm2 513 cmpq $32,%rdx 514 jb .Lecb_enc_one 515 movups 16(%rdi),%xmm3 516 je .Lecb_enc_two 517 movups 32(%rdi),%xmm4 518 cmpq $64,%rdx 519 jb .Lecb_enc_three 520 movups 48(%rdi),%xmm5 521 je .Lecb_enc_four 522 movups 64(%rdi),%xmm6 523 cmpq $96,%rdx 524 jb .Lecb_enc_five 525 movups 80(%rdi),%xmm7 526 je .Lecb_enc_six 527 movdqu 96(%rdi),%xmm8 528 call _aesni_encrypt8 529 movups %xmm2,(%rsi) 530 movups %xmm3,16(%rsi) 531 movups %xmm4,32(%rsi) 532 movups %xmm5,48(%rsi) 533 movups %xmm6,64(%rsi) 534 movups %xmm7,80(%rsi) 535 movups %xmm8,96(%rsi) 536 jmp .Lecb_ret 537.align 16 538.Lecb_enc_one: 539 movups (%rcx),%xmm0 540 movups 16(%rcx),%xmm1 541 leaq 32(%rcx),%rcx 542 xorps %xmm0,%xmm2 543.Loop_enc1_3: 544.byte 102,15,56,220,209 545 decl %eax 546 movups (%rcx),%xmm1 547 leaq 16(%rcx),%rcx 548 jnz .Loop_enc1_3 549.byte 102,15,56,221,209 550 movups %xmm2,(%rsi) 551 jmp .Lecb_ret 552.align 16 553.Lecb_enc_two: 554 xorps %xmm4,%xmm4 555 call _aesni_encrypt3 556 movups %xmm2,(%rsi) 557 movups %xmm3,16(%rsi) 558 jmp .Lecb_ret 559.align 16 560.Lecb_enc_three: 561 call _aesni_encrypt3 562 movups %xmm2,(%rsi) 563 movups %xmm3,16(%rsi) 564 movups %xmm4,32(%rsi) 565 jmp .Lecb_ret 566.align 16 567.Lecb_enc_four: 568 call _aesni_encrypt4 569 movups %xmm2,(%rsi) 570 movups %xmm3,16(%rsi) 571 movups %xmm4,32(%rsi) 572 movups %xmm5,48(%rsi) 573 jmp .Lecb_ret 574.align 16 575.Lecb_enc_five: 576 xorps %xmm7,%xmm7 577 call _aesni_encrypt6 578 movups %xmm2,(%rsi) 579 movups %xmm3,16(%rsi) 580 movups %xmm4,32(%rsi) 581 movups %xmm5,48(%rsi) 582 movups %xmm6,64(%rsi) 583 jmp .Lecb_ret 584.align 16 585.Lecb_enc_six: 586 call _aesni_encrypt6 587 movups %xmm2,(%rsi) 588 movups %xmm3,16(%rsi) 589 movups %xmm4,32(%rsi) 590 movups %xmm5,48(%rsi) 591 movups %xmm6,64(%rsi) 592 movups %xmm7,80(%rsi) 593 jmp .Lecb_ret 594 595.align 16 596.Lecb_decrypt: 597 cmpq $128,%rdx 598 jb .Lecb_dec_tail 599 600 movdqu (%rdi),%xmm2 601 movdqu 16(%rdi),%xmm3 602 movdqu 32(%rdi),%xmm4 603 movdqu 48(%rdi),%xmm5 604 movdqu 64(%rdi),%xmm6 605 movdqu 80(%rdi),%xmm7 606 movdqu 96(%rdi),%xmm8 607 movdqu 112(%rdi),%xmm9 608 leaq 128(%rdi),%rdi 609 subq $128,%rdx 610 jmp .Lecb_dec_loop8_enter 611.align 16 612.Lecb_dec_loop8: 613 movups %xmm2,(%rsi) 614 movq %r11,%rcx 615 movdqu (%rdi),%xmm2 616 movl %r10d,%eax 617 movups %xmm3,16(%rsi) 618 movdqu 16(%rdi),%xmm3 619 movups %xmm4,32(%rsi) 620 movdqu 32(%rdi),%xmm4 621 movups %xmm5,48(%rsi) 622 movdqu 48(%rdi),%xmm5 623 movups %xmm6,64(%rsi) 624 movdqu 64(%rdi),%xmm6 625 movups %xmm7,80(%rsi) 626 movdqu 80(%rdi),%xmm7 627 movups %xmm8,96(%rsi) 628 movdqu 96(%rdi),%xmm8 629 movups %xmm9,112(%rsi) 630 leaq 128(%rsi),%rsi 631 movdqu 112(%rdi),%xmm9 632 leaq 128(%rdi),%rdi 633.Lecb_dec_loop8_enter: 634 635 call _aesni_decrypt8 636 637 movups (%r11),%xmm0 638 subq $128,%rdx 639 jnc .Lecb_dec_loop8 640 641 movups %xmm2,(%rsi) 642 movq %r11,%rcx 643 movups %xmm3,16(%rsi) 644 movl %r10d,%eax 645 movups %xmm4,32(%rsi) 646 movups %xmm5,48(%rsi) 647 movups %xmm6,64(%rsi) 648 movups %xmm7,80(%rsi) 649 movups %xmm8,96(%rsi) 650 movups %xmm9,112(%rsi) 651 leaq 128(%rsi),%rsi 652 addq $128,%rdx 653 jz .Lecb_ret 654 655.Lecb_dec_tail: 656 movups (%rdi),%xmm2 657 cmpq $32,%rdx 658 jb .Lecb_dec_one 659 movups 16(%rdi),%xmm3 660 je .Lecb_dec_two 661 movups 32(%rdi),%xmm4 662 cmpq $64,%rdx 663 jb .Lecb_dec_three 664 movups 48(%rdi),%xmm5 665 je .Lecb_dec_four 666 movups 64(%rdi),%xmm6 667 cmpq $96,%rdx 668 jb .Lecb_dec_five 669 movups 80(%rdi),%xmm7 670 je .Lecb_dec_six 671 movups 96(%rdi),%xmm8 672 movups (%rcx),%xmm0 673 call _aesni_decrypt8 674 movups %xmm2,(%rsi) 675 movups %xmm3,16(%rsi) 676 movups %xmm4,32(%rsi) 677 movups %xmm5,48(%rsi) 678 movups %xmm6,64(%rsi) 679 movups %xmm7,80(%rsi) 680 movups %xmm8,96(%rsi) 681 jmp .Lecb_ret 682.align 16 683.Lecb_dec_one: 684 movups (%rcx),%xmm0 685 movups 16(%rcx),%xmm1 686 leaq 32(%rcx),%rcx 687 xorps %xmm0,%xmm2 688.Loop_dec1_4: 689.byte 102,15,56,222,209 690 decl %eax 691 movups (%rcx),%xmm1 692 leaq 16(%rcx),%rcx 693 jnz .Loop_dec1_4 694.byte 102,15,56,223,209 695 movups %xmm2,(%rsi) 696 jmp .Lecb_ret 697.align 16 698.Lecb_dec_two: 699 xorps %xmm4,%xmm4 700 call _aesni_decrypt3 701 movups %xmm2,(%rsi) 702 movups %xmm3,16(%rsi) 703 jmp .Lecb_ret 704.align 16 705.Lecb_dec_three: 706 call _aesni_decrypt3 707 movups %xmm2,(%rsi) 708 movups %xmm3,16(%rsi) 709 movups %xmm4,32(%rsi) 710 jmp .Lecb_ret 711.align 16 712.Lecb_dec_four: 713 call _aesni_decrypt4 714 movups %xmm2,(%rsi) 715 movups %xmm3,16(%rsi) 716 movups %xmm4,32(%rsi) 717 movups %xmm5,48(%rsi) 718 jmp .Lecb_ret 719.align 16 720.Lecb_dec_five: 721 xorps %xmm7,%xmm7 722 call _aesni_decrypt6 723 movups %xmm2,(%rsi) 724 movups %xmm3,16(%rsi) 725 movups %xmm4,32(%rsi) 726 movups %xmm5,48(%rsi) 727 movups %xmm6,64(%rsi) 728 jmp .Lecb_ret 729.align 16 730.Lecb_dec_six: 731 call _aesni_decrypt6 732 movups %xmm2,(%rsi) 733 movups %xmm3,16(%rsi) 734 movups %xmm4,32(%rsi) 735 movups %xmm5,48(%rsi) 736 movups %xmm6,64(%rsi) 737 movups %xmm7,80(%rsi) 738 739.Lecb_ret: 740 .byte 0xf3,0xc3 741.size aesni_ecb_encrypt,.-aesni_ecb_encrypt 742.globl aesni_ccm64_encrypt_blocks 743.type aesni_ccm64_encrypt_blocks,@function 744.align 16 745aesni_ccm64_encrypt_blocks: 746 movl 240(%rcx),%eax 747 movdqu (%r8),%xmm9 748 movdqa .Lincrement64(%rip),%xmm6 749 movdqa .Lbswap_mask(%rip),%xmm7 750 751 shrl $1,%eax 752 leaq 0(%rcx),%r11 753 movdqu (%r9),%xmm3 754 movdqa %xmm9,%xmm2 755 movl %eax,%r10d 756.byte 102,68,15,56,0,207 757 jmp .Lccm64_enc_outer 758.align 16 759.Lccm64_enc_outer: 760 movups (%r11),%xmm0 761 movl %r10d,%eax 762 movups (%rdi),%xmm8 763 764 xorps %xmm0,%xmm2 765 movups 16(%r11),%xmm1 766 xorps %xmm8,%xmm0 767 leaq 32(%r11),%rcx 768 xorps %xmm0,%xmm3 769 movups (%rcx),%xmm0 770 771.Lccm64_enc2_loop: 772.byte 102,15,56,220,209 773 decl %eax 774.byte 102,15,56,220,217 775 movups 16(%rcx),%xmm1 776.byte 102,15,56,220,208 777 leaq 32(%rcx),%rcx 778.byte 102,15,56,220,216 779 movups 0(%rcx),%xmm0 780 jnz .Lccm64_enc2_loop 781.byte 102,15,56,220,209 782.byte 102,15,56,220,217 783 paddq %xmm6,%xmm9 784.byte 102,15,56,221,208 785.byte 102,15,56,221,216 786 787 decq %rdx 788 leaq 16(%rdi),%rdi 789 xorps %xmm2,%xmm8 790 movdqa %xmm9,%xmm2 791 movups %xmm8,(%rsi) 792 leaq 16(%rsi),%rsi 793.byte 102,15,56,0,215 794 jnz .Lccm64_enc_outer 795 796 movups %xmm3,(%r9) 797 .byte 0xf3,0xc3 798.size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks 799.globl aesni_ccm64_decrypt_blocks 800.type aesni_ccm64_decrypt_blocks,@function 801.align 16 802aesni_ccm64_decrypt_blocks: 803 movl 240(%rcx),%eax 804 movups (%r8),%xmm9 805 movdqu (%r9),%xmm3 806 movdqa .Lincrement64(%rip),%xmm6 807 movdqa .Lbswap_mask(%rip),%xmm7 808 809 movaps %xmm9,%xmm2 810 movl %eax,%r10d 811 movq %rcx,%r11 812.byte 102,68,15,56,0,207 813 movups (%rcx),%xmm0 814 movups 16(%rcx),%xmm1 815 leaq 32(%rcx),%rcx 816 xorps %xmm0,%xmm2 817.Loop_enc1_5: 818.byte 102,15,56,220,209 819 decl %eax 820 movups (%rcx),%xmm1 821 leaq 16(%rcx),%rcx 822 jnz .Loop_enc1_5 823.byte 102,15,56,221,209 824 movups (%rdi),%xmm8 825 paddq %xmm6,%xmm9 826 leaq 16(%rdi),%rdi 827 jmp .Lccm64_dec_outer 828.align 16 829.Lccm64_dec_outer: 830 xorps %xmm2,%xmm8 831 movdqa %xmm9,%xmm2 832 movl %r10d,%eax 833 movups %xmm8,(%rsi) 834 leaq 16(%rsi),%rsi 835.byte 102,15,56,0,215 836 837 subq $1,%rdx 838 jz .Lccm64_dec_break 839 840 movups (%r11),%xmm0 841 shrl $1,%eax 842 movups 16(%r11),%xmm1 843 xorps %xmm0,%xmm8 844 leaq 32(%r11),%rcx 845 xorps %xmm0,%xmm2 846 xorps %xmm8,%xmm3 847 movups (%rcx),%xmm0 848 849.Lccm64_dec2_loop: 850.byte 102,15,56,220,209 851 decl %eax 852.byte 102,15,56,220,217 853 movups 16(%rcx),%xmm1 854.byte 102,15,56,220,208 855 leaq 32(%rcx),%rcx 856.byte 102,15,56,220,216 857 movups 0(%rcx),%xmm0 858 jnz .Lccm64_dec2_loop 859 movups (%rdi),%xmm8 860 paddq %xmm6,%xmm9 861.byte 102,15,56,220,209 862.byte 102,15,56,220,217 863 leaq 16(%rdi),%rdi 864.byte 102,15,56,221,208 865.byte 102,15,56,221,216 866 jmp .Lccm64_dec_outer 867 868.align 16 869.Lccm64_dec_break: 870 871 movups (%r11),%xmm0 872 movups 16(%r11),%xmm1 873 xorps %xmm0,%xmm8 874 leaq 32(%r11),%r11 875 xorps %xmm8,%xmm3 876.Loop_enc1_6: 877.byte 102,15,56,220,217 878 decl %eax 879 movups (%r11),%xmm1 880 leaq 16(%r11),%r11 881 jnz .Loop_enc1_6 882.byte 102,15,56,221,217 883 movups %xmm3,(%r9) 884 .byte 0xf3,0xc3 885.size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks 886.globl aesni_ctr32_encrypt_blocks 887.type aesni_ctr32_encrypt_blocks,@function 888.align 16 889aesni_ctr32_encrypt_blocks: 890 cmpq $1,%rdx 891 je .Lctr32_one_shortcut 892 893 movdqu (%r8),%xmm14 894 movdqa .Lbswap_mask(%rip),%xmm15 895 xorl %eax,%eax 896.byte 102,69,15,58,22,242,3 897.byte 102,68,15,58,34,240,3 898 899 movl 240(%rcx),%eax 900 bswapl %r10d 901 pxor %xmm12,%xmm12 902 pxor %xmm13,%xmm13 903.byte 102,69,15,58,34,226,0 904 leaq 3(%r10),%r11 905.byte 102,69,15,58,34,235,0 906 incl %r10d 907.byte 102,69,15,58,34,226,1 908 incq %r11 909.byte 102,69,15,58,34,235,1 910 incl %r10d 911.byte 102,69,15,58,34,226,2 912 incq %r11 913.byte 102,69,15,58,34,235,2 914 movdqa %xmm12,-40(%rsp) 915.byte 102,69,15,56,0,231 916 movdqa %xmm13,-24(%rsp) 917.byte 102,69,15,56,0,239 918 919 pshufd $192,%xmm12,%xmm2 920 pshufd $128,%xmm12,%xmm3 921 pshufd $64,%xmm12,%xmm4 922 cmpq $6,%rdx 923 jb .Lctr32_tail 924 shrl $1,%eax 925 movq %rcx,%r11 926 movl %eax,%r10d 927 subq $6,%rdx 928 jmp .Lctr32_loop6 929 930.align 16 931.Lctr32_loop6: 932 pshufd $192,%xmm13,%xmm5 933 por %xmm14,%xmm2 934 movups (%r11),%xmm0 935 pshufd $128,%xmm13,%xmm6 936 por %xmm14,%xmm3 937 movups 16(%r11),%xmm1 938 pshufd $64,%xmm13,%xmm7 939 por %xmm14,%xmm4 940 por %xmm14,%xmm5 941 xorps %xmm0,%xmm2 942 por %xmm14,%xmm6 943 por %xmm14,%xmm7 944 945 946 947 948 pxor %xmm0,%xmm3 949.byte 102,15,56,220,209 950 leaq 32(%r11),%rcx 951 pxor %xmm0,%xmm4 952.byte 102,15,56,220,217 953 movdqa .Lincrement32(%rip),%xmm13 954 pxor %xmm0,%xmm5 955.byte 102,15,56,220,225 956 movdqa -40(%rsp),%xmm12 957 pxor %xmm0,%xmm6 958.byte 102,15,56,220,233 959 pxor %xmm0,%xmm7 960 movups (%rcx),%xmm0 961 decl %eax 962.byte 102,15,56,220,241 963.byte 102,15,56,220,249 964 jmp .Lctr32_enc_loop6_enter 965.align 16 966.Lctr32_enc_loop6: 967.byte 102,15,56,220,209 968.byte 102,15,56,220,217 969 decl %eax 970.byte 102,15,56,220,225 971.byte 102,15,56,220,233 972.byte 102,15,56,220,241 973.byte 102,15,56,220,249 974.Lctr32_enc_loop6_enter: 975 movups 16(%rcx),%xmm1 976.byte 102,15,56,220,208 977.byte 102,15,56,220,216 978 leaq 32(%rcx),%rcx 979.byte 102,15,56,220,224 980.byte 102,15,56,220,232 981.byte 102,15,56,220,240 982.byte 102,15,56,220,248 983 movups (%rcx),%xmm0 984 jnz .Lctr32_enc_loop6 985 986.byte 102,15,56,220,209 987 paddd %xmm13,%xmm12 988.byte 102,15,56,220,217 989 paddd -24(%rsp),%xmm13 990.byte 102,15,56,220,225 991 movdqa %xmm12,-40(%rsp) 992.byte 102,15,56,220,233 993 movdqa %xmm13,-24(%rsp) 994.byte 102,15,56,220,241 995.byte 102,69,15,56,0,231 996.byte 102,15,56,220,249 997.byte 102,69,15,56,0,239 998 999.byte 102,15,56,221,208 1000 movups (%rdi),%xmm8 1001.byte 102,15,56,221,216 1002 movups 16(%rdi),%xmm9 1003.byte 102,15,56,221,224 1004 movups 32(%rdi),%xmm10 1005.byte 102,15,56,221,232 1006 movups 48(%rdi),%xmm11 1007.byte 102,15,56,221,240 1008 movups 64(%rdi),%xmm1 1009.byte 102,15,56,221,248 1010 movups 80(%rdi),%xmm0 1011 leaq 96(%rdi),%rdi 1012 1013 xorps %xmm2,%xmm8 1014 pshufd $192,%xmm12,%xmm2 1015 xorps %xmm3,%xmm9 1016 pshufd $128,%xmm12,%xmm3 1017 movups %xmm8,(%rsi) 1018 xorps %xmm4,%xmm10 1019 pshufd $64,%xmm12,%xmm4 1020 movups %xmm9,16(%rsi) 1021 xorps %xmm5,%xmm11 1022 movups %xmm10,32(%rsi) 1023 xorps %xmm6,%xmm1 1024 movups %xmm11,48(%rsi) 1025 xorps %xmm7,%xmm0 1026 movups %xmm1,64(%rsi) 1027 movups %xmm0,80(%rsi) 1028 leaq 96(%rsi),%rsi 1029 movl %r10d,%eax 1030 subq $6,%rdx 1031 jnc .Lctr32_loop6 1032 1033 addq $6,%rdx 1034 jz .Lctr32_done 1035 movq %r11,%rcx 1036 leal 1(%rax,%rax,1),%eax 1037 1038.Lctr32_tail: 1039 por %xmm14,%xmm2 1040 movups (%rdi),%xmm8 1041 cmpq $2,%rdx 1042 jb .Lctr32_one 1043 1044 por %xmm14,%xmm3 1045 movups 16(%rdi),%xmm9 1046 je .Lctr32_two 1047 1048 pshufd $192,%xmm13,%xmm5 1049 por %xmm14,%xmm4 1050 movups 32(%rdi),%xmm10 1051 cmpq $4,%rdx 1052 jb .Lctr32_three 1053 1054 pshufd $128,%xmm13,%xmm6 1055 por %xmm14,%xmm5 1056 movups 48(%rdi),%xmm11 1057 je .Lctr32_four 1058 1059 por %xmm14,%xmm6 1060 xorps %xmm7,%xmm7 1061 1062 call _aesni_encrypt6 1063 1064 movups 64(%rdi),%xmm1 1065 xorps %xmm2,%xmm8 1066 xorps %xmm3,%xmm9 1067 movups %xmm8,(%rsi) 1068 xorps %xmm4,%xmm10 1069 movups %xmm9,16(%rsi) 1070 xorps %xmm5,%xmm11 1071 movups %xmm10,32(%rsi) 1072 xorps %xmm6,%xmm1 1073 movups %xmm11,48(%rsi) 1074 movups %xmm1,64(%rsi) 1075 jmp .Lctr32_done 1076 1077.align 16 1078.Lctr32_one_shortcut: 1079 movups (%r8),%xmm2 1080 movups (%rdi),%xmm8 1081 movl 240(%rcx),%eax 1082.Lctr32_one: 1083 movups (%rcx),%xmm0 1084 movups 16(%rcx),%xmm1 1085 leaq 32(%rcx),%rcx 1086 xorps %xmm0,%xmm2 1087.Loop_enc1_7: 1088.byte 102,15,56,220,209 1089 decl %eax 1090 movups (%rcx),%xmm1 1091 leaq 16(%rcx),%rcx 1092 jnz .Loop_enc1_7 1093.byte 102,15,56,221,209 1094 xorps %xmm2,%xmm8 1095 movups %xmm8,(%rsi) 1096 jmp .Lctr32_done 1097 1098.align 16 1099.Lctr32_two: 1100 xorps %xmm4,%xmm4 1101 call _aesni_encrypt3 1102 xorps %xmm2,%xmm8 1103 xorps %xmm3,%xmm9 1104 movups %xmm8,(%rsi) 1105 movups %xmm9,16(%rsi) 1106 jmp .Lctr32_done 1107 1108.align 16 1109.Lctr32_three: 1110 call _aesni_encrypt3 1111 xorps %xmm2,%xmm8 1112 xorps %xmm3,%xmm9 1113 movups %xmm8,(%rsi) 1114 xorps %xmm4,%xmm10 1115 movups %xmm9,16(%rsi) 1116 movups %xmm10,32(%rsi) 1117 jmp .Lctr32_done 1118 1119.align 16 1120.Lctr32_four: 1121 call _aesni_encrypt4 1122 xorps %xmm2,%xmm8 1123 xorps %xmm3,%xmm9 1124 movups %xmm8,(%rsi) 1125 xorps %xmm4,%xmm10 1126 movups %xmm9,16(%rsi) 1127 xorps %xmm5,%xmm11 1128 movups %xmm10,32(%rsi) 1129 movups %xmm11,48(%rsi) 1130 1131.Lctr32_done: 1132 .byte 0xf3,0xc3 1133.size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks 1134.globl aesni_xts_encrypt 1135.type aesni_xts_encrypt,@function 1136.align 16 1137aesni_xts_encrypt: 1138 leaq -104(%rsp),%rsp 1139 movups (%r9),%xmm15 1140 movl 240(%r8),%eax 1141 movl 240(%rcx),%r10d 1142 movups (%r8),%xmm0 1143 movups 16(%r8),%xmm1 1144 leaq 32(%r8),%r8 1145 xorps %xmm0,%xmm15 1146.Loop_enc1_8: 1147.byte 102,68,15,56,220,249 1148 decl %eax 1149 movups (%r8),%xmm1 1150 leaq 16(%r8),%r8 1151 jnz .Loop_enc1_8 1152.byte 102,68,15,56,221,249 1153 movq %rcx,%r11 1154 movl %r10d,%eax 1155 movq %rdx,%r9 1156 andq $-16,%rdx 1157 1158 movdqa .Lxts_magic(%rip),%xmm8 1159 pxor %xmm14,%xmm14 1160 pcmpgtd %xmm15,%xmm14 1161 pshufd $19,%xmm14,%xmm9 1162 pxor %xmm14,%xmm14 1163 movdqa %xmm15,%xmm10 1164 paddq %xmm15,%xmm15 1165 pand %xmm8,%xmm9 1166 pcmpgtd %xmm15,%xmm14 1167 pxor %xmm9,%xmm15 1168 pshufd $19,%xmm14,%xmm9 1169 pxor %xmm14,%xmm14 1170 movdqa %xmm15,%xmm11 1171 paddq %xmm15,%xmm15 1172 pand %xmm8,%xmm9 1173 pcmpgtd %xmm15,%xmm14 1174 pxor %xmm9,%xmm15 1175 pshufd $19,%xmm14,%xmm9 1176 pxor %xmm14,%xmm14 1177 movdqa %xmm15,%xmm12 1178 paddq %xmm15,%xmm15 1179 pand %xmm8,%xmm9 1180 pcmpgtd %xmm15,%xmm14 1181 pxor %xmm9,%xmm15 1182 pshufd $19,%xmm14,%xmm9 1183 pxor %xmm14,%xmm14 1184 movdqa %xmm15,%xmm13 1185 paddq %xmm15,%xmm15 1186 pand %xmm8,%xmm9 1187 pcmpgtd %xmm15,%xmm14 1188 pxor %xmm9,%xmm15 1189 subq $96,%rdx 1190 jc .Lxts_enc_short 1191 1192 shrl $1,%eax 1193 subl $1,%eax 1194 movl %eax,%r10d 1195 jmp .Lxts_enc_grandloop 1196 1197.align 16 1198.Lxts_enc_grandloop: 1199 pshufd $19,%xmm14,%xmm9 1200 movdqa %xmm15,%xmm14 1201 paddq %xmm15,%xmm15 1202 movdqu 0(%rdi),%xmm2 1203 pand %xmm8,%xmm9 1204 movdqu 16(%rdi),%xmm3 1205 pxor %xmm9,%xmm15 1206 1207 movdqu 32(%rdi),%xmm4 1208 pxor %xmm10,%xmm2 1209 movdqu 48(%rdi),%xmm5 1210 pxor %xmm11,%xmm3 1211 movdqu 64(%rdi),%xmm6 1212 pxor %xmm12,%xmm4 1213 movdqu 80(%rdi),%xmm7 1214 leaq 96(%rdi),%rdi 1215 pxor %xmm13,%xmm5 1216 movups (%r11),%xmm0 1217 pxor %xmm14,%xmm6 1218 pxor %xmm15,%xmm7 1219 1220 1221 1222 movups 16(%r11),%xmm1 1223 pxor %xmm0,%xmm2 1224 pxor %xmm0,%xmm3 1225 movdqa %xmm10,0(%rsp) 1226.byte 102,15,56,220,209 1227 leaq 32(%r11),%rcx 1228 pxor %xmm0,%xmm4 1229 movdqa %xmm11,16(%rsp) 1230.byte 102,15,56,220,217 1231 pxor %xmm0,%xmm5 1232 movdqa %xmm12,32(%rsp) 1233.byte 102,15,56,220,225 1234 pxor %xmm0,%xmm6 1235 movdqa %xmm13,48(%rsp) 1236.byte 102,15,56,220,233 1237 pxor %xmm0,%xmm7 1238 movups (%rcx),%xmm0 1239 decl %eax 1240 movdqa %xmm14,64(%rsp) 1241.byte 102,15,56,220,241 1242 movdqa %xmm15,80(%rsp) 1243.byte 102,15,56,220,249 1244 pxor %xmm14,%xmm14 1245 pcmpgtd %xmm15,%xmm14 1246 jmp .Lxts_enc_loop6_enter 1247 1248.align 16 1249.Lxts_enc_loop6: 1250.byte 102,15,56,220,209 1251.byte 102,15,56,220,217 1252 decl %eax 1253.byte 102,15,56,220,225 1254.byte 102,15,56,220,233 1255.byte 102,15,56,220,241 1256.byte 102,15,56,220,249 1257.Lxts_enc_loop6_enter: 1258 movups 16(%rcx),%xmm1 1259.byte 102,15,56,220,208 1260.byte 102,15,56,220,216 1261 leaq 32(%rcx),%rcx 1262.byte 102,15,56,220,224 1263.byte 102,15,56,220,232 1264.byte 102,15,56,220,240 1265.byte 102,15,56,220,248 1266 movups (%rcx),%xmm0 1267 jnz .Lxts_enc_loop6 1268 1269 pshufd $19,%xmm14,%xmm9 1270 pxor %xmm14,%xmm14 1271 paddq %xmm15,%xmm15 1272.byte 102,15,56,220,209 1273 pand %xmm8,%xmm9 1274.byte 102,15,56,220,217 1275 pcmpgtd %xmm15,%xmm14 1276.byte 102,15,56,220,225 1277 pxor %xmm9,%xmm15 1278.byte 102,15,56,220,233 1279.byte 102,15,56,220,241 1280.byte 102,15,56,220,249 1281 movups 16(%rcx),%xmm1 1282 1283 pshufd $19,%xmm14,%xmm9 1284 pxor %xmm14,%xmm14 1285 movdqa %xmm15,%xmm10 1286 paddq %xmm15,%xmm15 1287.byte 102,15,56,220,208 1288 pand %xmm8,%xmm9 1289.byte 102,15,56,220,216 1290 pcmpgtd %xmm15,%xmm14 1291.byte 102,15,56,220,224 1292 pxor %xmm9,%xmm15 1293.byte 102,15,56,220,232 1294.byte 102,15,56,220,240 1295.byte 102,15,56,220,248 1296 movups 32(%rcx),%xmm0 1297 1298 pshufd $19,%xmm14,%xmm9 1299 pxor %xmm14,%xmm14 1300 movdqa %xmm15,%xmm11 1301 paddq %xmm15,%xmm15 1302.byte 102,15,56,220,209 1303 pand %xmm8,%xmm9 1304.byte 102,15,56,220,217 1305 pcmpgtd %xmm15,%xmm14 1306.byte 102,15,56,220,225 1307 pxor %xmm9,%xmm15 1308.byte 102,15,56,220,233 1309.byte 102,15,56,220,241 1310.byte 102,15,56,220,249 1311 1312 pshufd $19,%xmm14,%xmm9 1313 pxor %xmm14,%xmm14 1314 movdqa %xmm15,%xmm12 1315 paddq %xmm15,%xmm15 1316.byte 102,15,56,221,208 1317 pand %xmm8,%xmm9 1318.byte 102,15,56,221,216 1319 pcmpgtd %xmm15,%xmm14 1320.byte 102,15,56,221,224 1321 pxor %xmm9,%xmm15 1322.byte 102,15,56,221,232 1323.byte 102,15,56,221,240 1324.byte 102,15,56,221,248 1325 1326 pshufd $19,%xmm14,%xmm9 1327 pxor %xmm14,%xmm14 1328 movdqa %xmm15,%xmm13 1329 paddq %xmm15,%xmm15 1330 xorps 0(%rsp),%xmm2 1331 pand %xmm8,%xmm9 1332 xorps 16(%rsp),%xmm3 1333 pcmpgtd %xmm15,%xmm14 1334 pxor %xmm9,%xmm15 1335 1336 xorps 32(%rsp),%xmm4 1337 movups %xmm2,0(%rsi) 1338 xorps 48(%rsp),%xmm5 1339 movups %xmm3,16(%rsi) 1340 xorps 64(%rsp),%xmm6 1341 movups %xmm4,32(%rsi) 1342 xorps 80(%rsp),%xmm7 1343 movups %xmm5,48(%rsi) 1344 movl %r10d,%eax 1345 movups %xmm6,64(%rsi) 1346 movups %xmm7,80(%rsi) 1347 leaq 96(%rsi),%rsi 1348 subq $96,%rdx 1349 jnc .Lxts_enc_grandloop 1350 1351 leal 3(%rax,%rax,1),%eax 1352 movq %r11,%rcx 1353 movl %eax,%r10d 1354 1355.Lxts_enc_short: 1356 addq $96,%rdx 1357 jz .Lxts_enc_done 1358 1359 cmpq $32,%rdx 1360 jb .Lxts_enc_one 1361 je .Lxts_enc_two 1362 1363 cmpq $64,%rdx 1364 jb .Lxts_enc_three 1365 je .Lxts_enc_four 1366 1367 pshufd $19,%xmm14,%xmm9 1368 movdqa %xmm15,%xmm14 1369 paddq %xmm15,%xmm15 1370 movdqu (%rdi),%xmm2 1371 pand %xmm8,%xmm9 1372 movdqu 16(%rdi),%xmm3 1373 pxor %xmm9,%xmm15 1374 1375 movdqu 32(%rdi),%xmm4 1376 pxor %xmm10,%xmm2 1377 movdqu 48(%rdi),%xmm5 1378 pxor %xmm11,%xmm3 1379 movdqu 64(%rdi),%xmm6 1380 leaq 80(%rdi),%rdi 1381 pxor %xmm12,%xmm4 1382 pxor %xmm13,%xmm5 1383 pxor %xmm14,%xmm6 1384 1385 call _aesni_encrypt6 1386 1387 xorps %xmm10,%xmm2 1388 movdqa %xmm15,%xmm10 1389 xorps %xmm11,%xmm3 1390 xorps %xmm12,%xmm4 1391 movdqu %xmm2,(%rsi) 1392 xorps %xmm13,%xmm5 1393 movdqu %xmm3,16(%rsi) 1394 xorps %xmm14,%xmm6 1395 movdqu %xmm4,32(%rsi) 1396 movdqu %xmm5,48(%rsi) 1397 movdqu %xmm6,64(%rsi) 1398 leaq 80(%rsi),%rsi 1399 jmp .Lxts_enc_done 1400 1401.align 16 1402.Lxts_enc_one: 1403 movups (%rdi),%xmm2 1404 leaq 16(%rdi),%rdi 1405 xorps %xmm10,%xmm2 1406 movups (%rcx),%xmm0 1407 movups 16(%rcx),%xmm1 1408 leaq 32(%rcx),%rcx 1409 xorps %xmm0,%xmm2 1410.Loop_enc1_9: 1411.byte 102,15,56,220,209 1412 decl %eax 1413 movups (%rcx),%xmm1 1414 leaq 16(%rcx),%rcx 1415 jnz .Loop_enc1_9 1416.byte 102,15,56,221,209 1417 xorps %xmm10,%xmm2 1418 movdqa %xmm11,%xmm10 1419 movups %xmm2,(%rsi) 1420 leaq 16(%rsi),%rsi 1421 jmp .Lxts_enc_done 1422 1423.align 16 1424.Lxts_enc_two: 1425 movups (%rdi),%xmm2 1426 movups 16(%rdi),%xmm3 1427 leaq 32(%rdi),%rdi 1428 xorps %xmm10,%xmm2 1429 xorps %xmm11,%xmm3 1430 1431 call _aesni_encrypt3 1432 1433 xorps %xmm10,%xmm2 1434 movdqa %xmm12,%xmm10 1435 xorps %xmm11,%xmm3 1436 movups %xmm2,(%rsi) 1437 movups %xmm3,16(%rsi) 1438 leaq 32(%rsi),%rsi 1439 jmp .Lxts_enc_done 1440 1441.align 16 1442.Lxts_enc_three: 1443 movups (%rdi),%xmm2 1444 movups 16(%rdi),%xmm3 1445 movups 32(%rdi),%xmm4 1446 leaq 48(%rdi),%rdi 1447 xorps %xmm10,%xmm2 1448 xorps %xmm11,%xmm3 1449 xorps %xmm12,%xmm4 1450 1451 call _aesni_encrypt3 1452 1453 xorps %xmm10,%xmm2 1454 movdqa %xmm13,%xmm10 1455 xorps %xmm11,%xmm3 1456 xorps %xmm12,%xmm4 1457 movups %xmm2,(%rsi) 1458 movups %xmm3,16(%rsi) 1459 movups %xmm4,32(%rsi) 1460 leaq 48(%rsi),%rsi 1461 jmp .Lxts_enc_done 1462 1463.align 16 1464.Lxts_enc_four: 1465 movups (%rdi),%xmm2 1466 movups 16(%rdi),%xmm3 1467 movups 32(%rdi),%xmm4 1468 xorps %xmm10,%xmm2 1469 movups 48(%rdi),%xmm5 1470 leaq 64(%rdi),%rdi 1471 xorps %xmm11,%xmm3 1472 xorps %xmm12,%xmm4 1473 xorps %xmm13,%xmm5 1474 1475 call _aesni_encrypt4 1476 1477 xorps %xmm10,%xmm2 1478 movdqa %xmm15,%xmm10 1479 xorps %xmm11,%xmm3 1480 xorps %xmm12,%xmm4 1481 movups %xmm2,(%rsi) 1482 xorps %xmm13,%xmm5 1483 movups %xmm3,16(%rsi) 1484 movups %xmm4,32(%rsi) 1485 movups %xmm5,48(%rsi) 1486 leaq 64(%rsi),%rsi 1487 jmp .Lxts_enc_done 1488 1489.align 16 1490.Lxts_enc_done: 1491 andq $15,%r9 1492 jz .Lxts_enc_ret 1493 movq %r9,%rdx 1494 1495.Lxts_enc_steal: 1496 movzbl (%rdi),%eax 1497 movzbl -16(%rsi),%ecx 1498 leaq 1(%rdi),%rdi 1499 movb %al,-16(%rsi) 1500 movb %cl,0(%rsi) 1501 leaq 1(%rsi),%rsi 1502 subq $1,%rdx 1503 jnz .Lxts_enc_steal 1504 1505 subq %r9,%rsi 1506 movq %r11,%rcx 1507 movl %r10d,%eax 1508 1509 movups -16(%rsi),%xmm2 1510 xorps %xmm10,%xmm2 1511 movups (%rcx),%xmm0 1512 movups 16(%rcx),%xmm1 1513 leaq 32(%rcx),%rcx 1514 xorps %xmm0,%xmm2 1515.Loop_enc1_10: 1516.byte 102,15,56,220,209 1517 decl %eax 1518 movups (%rcx),%xmm1 1519 leaq 16(%rcx),%rcx 1520 jnz .Loop_enc1_10 1521.byte 102,15,56,221,209 1522 xorps %xmm10,%xmm2 1523 movups %xmm2,-16(%rsi) 1524 1525.Lxts_enc_ret: 1526 leaq 104(%rsp),%rsp 1527.Lxts_enc_epilogue: 1528 .byte 0xf3,0xc3 1529.size aesni_xts_encrypt,.-aesni_xts_encrypt 1530.globl aesni_xts_decrypt 1531.type aesni_xts_decrypt,@function 1532.align 16 1533aesni_xts_decrypt: 1534 leaq -104(%rsp),%rsp 1535 movups (%r9),%xmm15 1536 movl 240(%r8),%eax 1537 movl 240(%rcx),%r10d 1538 movups (%r8),%xmm0 1539 movups 16(%r8),%xmm1 1540 leaq 32(%r8),%r8 1541 xorps %xmm0,%xmm15 1542.Loop_enc1_11: 1543.byte 102,68,15,56,220,249 1544 decl %eax 1545 movups (%r8),%xmm1 1546 leaq 16(%r8),%r8 1547 jnz .Loop_enc1_11 1548.byte 102,68,15,56,221,249 1549 xorl %eax,%eax 1550 testq $15,%rdx 1551 setnz %al 1552 shlq $4,%rax 1553 subq %rax,%rdx 1554 1555 movq %rcx,%r11 1556 movl %r10d,%eax 1557 movq %rdx,%r9 1558 andq $-16,%rdx 1559 1560 movdqa .Lxts_magic(%rip),%xmm8 1561 pxor %xmm14,%xmm14 1562 pcmpgtd %xmm15,%xmm14 1563 pshufd $19,%xmm14,%xmm9 1564 pxor %xmm14,%xmm14 1565 movdqa %xmm15,%xmm10 1566 paddq %xmm15,%xmm15 1567 pand %xmm8,%xmm9 1568 pcmpgtd %xmm15,%xmm14 1569 pxor %xmm9,%xmm15 1570 pshufd $19,%xmm14,%xmm9 1571 pxor %xmm14,%xmm14 1572 movdqa %xmm15,%xmm11 1573 paddq %xmm15,%xmm15 1574 pand %xmm8,%xmm9 1575 pcmpgtd %xmm15,%xmm14 1576 pxor %xmm9,%xmm15 1577 pshufd $19,%xmm14,%xmm9 1578 pxor %xmm14,%xmm14 1579 movdqa %xmm15,%xmm12 1580 paddq %xmm15,%xmm15 1581 pand %xmm8,%xmm9 1582 pcmpgtd %xmm15,%xmm14 1583 pxor %xmm9,%xmm15 1584 pshufd $19,%xmm14,%xmm9 1585 pxor %xmm14,%xmm14 1586 movdqa %xmm15,%xmm13 1587 paddq %xmm15,%xmm15 1588 pand %xmm8,%xmm9 1589 pcmpgtd %xmm15,%xmm14 1590 pxor %xmm9,%xmm15 1591 subq $96,%rdx 1592 jc .Lxts_dec_short 1593 1594 shrl $1,%eax 1595 subl $1,%eax 1596 movl %eax,%r10d 1597 jmp .Lxts_dec_grandloop 1598 1599.align 16 1600.Lxts_dec_grandloop: 1601 pshufd $19,%xmm14,%xmm9 1602 movdqa %xmm15,%xmm14 1603 paddq %xmm15,%xmm15 1604 movdqu 0(%rdi),%xmm2 1605 pand %xmm8,%xmm9 1606 movdqu 16(%rdi),%xmm3 1607 pxor %xmm9,%xmm15 1608 1609 movdqu 32(%rdi),%xmm4 1610 pxor %xmm10,%xmm2 1611 movdqu 48(%rdi),%xmm5 1612 pxor %xmm11,%xmm3 1613 movdqu 64(%rdi),%xmm6 1614 pxor %xmm12,%xmm4 1615 movdqu 80(%rdi),%xmm7 1616 leaq 96(%rdi),%rdi 1617 pxor %xmm13,%xmm5 1618 movups (%r11),%xmm0 1619 pxor %xmm14,%xmm6 1620 pxor %xmm15,%xmm7 1621 1622 1623 1624 movups 16(%r11),%xmm1 1625 pxor %xmm0,%xmm2 1626 pxor %xmm0,%xmm3 1627 movdqa %xmm10,0(%rsp) 1628.byte 102,15,56,222,209 1629 leaq 32(%r11),%rcx 1630 pxor %xmm0,%xmm4 1631 movdqa %xmm11,16(%rsp) 1632.byte 102,15,56,222,217 1633 pxor %xmm0,%xmm5 1634 movdqa %xmm12,32(%rsp) 1635.byte 102,15,56,222,225 1636 pxor %xmm0,%xmm6 1637 movdqa %xmm13,48(%rsp) 1638.byte 102,15,56,222,233 1639 pxor %xmm0,%xmm7 1640 movups (%rcx),%xmm0 1641 decl %eax 1642 movdqa %xmm14,64(%rsp) 1643.byte 102,15,56,222,241 1644 movdqa %xmm15,80(%rsp) 1645.byte 102,15,56,222,249 1646 pxor %xmm14,%xmm14 1647 pcmpgtd %xmm15,%xmm14 1648 jmp .Lxts_dec_loop6_enter 1649 1650.align 16 1651.Lxts_dec_loop6: 1652.byte 102,15,56,222,209 1653.byte 102,15,56,222,217 1654 decl %eax 1655.byte 102,15,56,222,225 1656.byte 102,15,56,222,233 1657.byte 102,15,56,222,241 1658.byte 102,15,56,222,249 1659.Lxts_dec_loop6_enter: 1660 movups 16(%rcx),%xmm1 1661.byte 102,15,56,222,208 1662.byte 102,15,56,222,216 1663 leaq 32(%rcx),%rcx 1664.byte 102,15,56,222,224 1665.byte 102,15,56,222,232 1666.byte 102,15,56,222,240 1667.byte 102,15,56,222,248 1668 movups (%rcx),%xmm0 1669 jnz .Lxts_dec_loop6 1670 1671 pshufd $19,%xmm14,%xmm9 1672 pxor %xmm14,%xmm14 1673 paddq %xmm15,%xmm15 1674.byte 102,15,56,222,209 1675 pand %xmm8,%xmm9 1676.byte 102,15,56,222,217 1677 pcmpgtd %xmm15,%xmm14 1678.byte 102,15,56,222,225 1679 pxor %xmm9,%xmm15 1680.byte 102,15,56,222,233 1681.byte 102,15,56,222,241 1682.byte 102,15,56,222,249 1683 movups 16(%rcx),%xmm1 1684 1685 pshufd $19,%xmm14,%xmm9 1686 pxor %xmm14,%xmm14 1687 movdqa %xmm15,%xmm10 1688 paddq %xmm15,%xmm15 1689.byte 102,15,56,222,208 1690 pand %xmm8,%xmm9 1691.byte 102,15,56,222,216 1692 pcmpgtd %xmm15,%xmm14 1693.byte 102,15,56,222,224 1694 pxor %xmm9,%xmm15 1695.byte 102,15,56,222,232 1696.byte 102,15,56,222,240 1697.byte 102,15,56,222,248 1698 movups 32(%rcx),%xmm0 1699 1700 pshufd $19,%xmm14,%xmm9 1701 pxor %xmm14,%xmm14 1702 movdqa %xmm15,%xmm11 1703 paddq %xmm15,%xmm15 1704.byte 102,15,56,222,209 1705 pand %xmm8,%xmm9 1706.byte 102,15,56,222,217 1707 pcmpgtd %xmm15,%xmm14 1708.byte 102,15,56,222,225 1709 pxor %xmm9,%xmm15 1710.byte 102,15,56,222,233 1711.byte 102,15,56,222,241 1712.byte 102,15,56,222,249 1713 1714 pshufd $19,%xmm14,%xmm9 1715 pxor %xmm14,%xmm14 1716 movdqa %xmm15,%xmm12 1717 paddq %xmm15,%xmm15 1718.byte 102,15,56,223,208 1719 pand %xmm8,%xmm9 1720.byte 102,15,56,223,216 1721 pcmpgtd %xmm15,%xmm14 1722.byte 102,15,56,223,224 1723 pxor %xmm9,%xmm15 1724.byte 102,15,56,223,232 1725.byte 102,15,56,223,240 1726.byte 102,15,56,223,248 1727 1728 pshufd $19,%xmm14,%xmm9 1729 pxor %xmm14,%xmm14 1730 movdqa %xmm15,%xmm13 1731 paddq %xmm15,%xmm15 1732 xorps 0(%rsp),%xmm2 1733 pand %xmm8,%xmm9 1734 xorps 16(%rsp),%xmm3 1735 pcmpgtd %xmm15,%xmm14 1736 pxor %xmm9,%xmm15 1737 1738 xorps 32(%rsp),%xmm4 1739 movups %xmm2,0(%rsi) 1740 xorps 48(%rsp),%xmm5 1741 movups %xmm3,16(%rsi) 1742 xorps 64(%rsp),%xmm6 1743 movups %xmm4,32(%rsi) 1744 xorps 80(%rsp),%xmm7 1745 movups %xmm5,48(%rsi) 1746 movl %r10d,%eax 1747 movups %xmm6,64(%rsi) 1748 movups %xmm7,80(%rsi) 1749 leaq 96(%rsi),%rsi 1750 subq $96,%rdx 1751 jnc .Lxts_dec_grandloop 1752 1753 leal 3(%rax,%rax,1),%eax 1754 movq %r11,%rcx 1755 movl %eax,%r10d 1756 1757.Lxts_dec_short: 1758 addq $96,%rdx 1759 jz .Lxts_dec_done 1760 1761 cmpq $32,%rdx 1762 jb .Lxts_dec_one 1763 je .Lxts_dec_two 1764 1765 cmpq $64,%rdx 1766 jb .Lxts_dec_three 1767 je .Lxts_dec_four 1768 1769 pshufd $19,%xmm14,%xmm9 1770 movdqa %xmm15,%xmm14 1771 paddq %xmm15,%xmm15 1772 movdqu (%rdi),%xmm2 1773 pand %xmm8,%xmm9 1774 movdqu 16(%rdi),%xmm3 1775 pxor %xmm9,%xmm15 1776 1777 movdqu 32(%rdi),%xmm4 1778 pxor %xmm10,%xmm2 1779 movdqu 48(%rdi),%xmm5 1780 pxor %xmm11,%xmm3 1781 movdqu 64(%rdi),%xmm6 1782 leaq 80(%rdi),%rdi 1783 pxor %xmm12,%xmm4 1784 pxor %xmm13,%xmm5 1785 pxor %xmm14,%xmm6 1786 1787 call _aesni_decrypt6 1788 1789 xorps %xmm10,%xmm2 1790 xorps %xmm11,%xmm3 1791 xorps %xmm12,%xmm4 1792 movdqu %xmm2,(%rsi) 1793 xorps %xmm13,%xmm5 1794 movdqu %xmm3,16(%rsi) 1795 xorps %xmm14,%xmm6 1796 movdqu %xmm4,32(%rsi) 1797 pxor %xmm14,%xmm14 1798 movdqu %xmm5,48(%rsi) 1799 pcmpgtd %xmm15,%xmm14 1800 movdqu %xmm6,64(%rsi) 1801 leaq 80(%rsi),%rsi 1802 pshufd $19,%xmm14,%xmm11 1803 andq $15,%r9 1804 jz .Lxts_dec_ret 1805 1806 movdqa %xmm15,%xmm10 1807 paddq %xmm15,%xmm15 1808 pand %xmm8,%xmm11 1809 pxor %xmm15,%xmm11 1810 jmp .Lxts_dec_done2 1811 1812.align 16 1813.Lxts_dec_one: 1814 movups (%rdi),%xmm2 1815 leaq 16(%rdi),%rdi 1816 xorps %xmm10,%xmm2 1817 movups (%rcx),%xmm0 1818 movups 16(%rcx),%xmm1 1819 leaq 32(%rcx),%rcx 1820 xorps %xmm0,%xmm2 1821.Loop_dec1_12: 1822.byte 102,15,56,222,209 1823 decl %eax 1824 movups (%rcx),%xmm1 1825 leaq 16(%rcx),%rcx 1826 jnz .Loop_dec1_12 1827.byte 102,15,56,223,209 1828 xorps %xmm10,%xmm2 1829 movdqa %xmm11,%xmm10 1830 movups %xmm2,(%rsi) 1831 movdqa %xmm12,%xmm11 1832 leaq 16(%rsi),%rsi 1833 jmp .Lxts_dec_done 1834 1835.align 16 1836.Lxts_dec_two: 1837 movups (%rdi),%xmm2 1838 movups 16(%rdi),%xmm3 1839 leaq 32(%rdi),%rdi 1840 xorps %xmm10,%xmm2 1841 xorps %xmm11,%xmm3 1842 1843 call _aesni_decrypt3 1844 1845 xorps %xmm10,%xmm2 1846 movdqa %xmm12,%xmm10 1847 xorps %xmm11,%xmm3 1848 movdqa %xmm13,%xmm11 1849 movups %xmm2,(%rsi) 1850 movups %xmm3,16(%rsi) 1851 leaq 32(%rsi),%rsi 1852 jmp .Lxts_dec_done 1853 1854.align 16 1855.Lxts_dec_three: 1856 movups (%rdi),%xmm2 1857 movups 16(%rdi),%xmm3 1858 movups 32(%rdi),%xmm4 1859 leaq 48(%rdi),%rdi 1860 xorps %xmm10,%xmm2 1861 xorps %xmm11,%xmm3 1862 xorps %xmm12,%xmm4 1863 1864 call _aesni_decrypt3 1865 1866 xorps %xmm10,%xmm2 1867 movdqa %xmm13,%xmm10 1868 xorps %xmm11,%xmm3 1869 movdqa %xmm15,%xmm11 1870 xorps %xmm12,%xmm4 1871 movups %xmm2,(%rsi) 1872 movups %xmm3,16(%rsi) 1873 movups %xmm4,32(%rsi) 1874 leaq 48(%rsi),%rsi 1875 jmp .Lxts_dec_done 1876 1877.align 16 1878.Lxts_dec_four: 1879 pshufd $19,%xmm14,%xmm9 1880 movdqa %xmm15,%xmm14 1881 paddq %xmm15,%xmm15 1882 movups (%rdi),%xmm2 1883 pand %xmm8,%xmm9 1884 movups 16(%rdi),%xmm3 1885 pxor %xmm9,%xmm15 1886 1887 movups 32(%rdi),%xmm4 1888 xorps %xmm10,%xmm2 1889 movups 48(%rdi),%xmm5 1890 leaq 64(%rdi),%rdi 1891 xorps %xmm11,%xmm3 1892 xorps %xmm12,%xmm4 1893 xorps %xmm13,%xmm5 1894 1895 call _aesni_decrypt4 1896 1897 xorps %xmm10,%xmm2 1898 movdqa %xmm14,%xmm10 1899 xorps %xmm11,%xmm3 1900 movdqa %xmm15,%xmm11 1901 xorps %xmm12,%xmm4 1902 movups %xmm2,(%rsi) 1903 xorps %xmm13,%xmm5 1904 movups %xmm3,16(%rsi) 1905 movups %xmm4,32(%rsi) 1906 movups %xmm5,48(%rsi) 1907 leaq 64(%rsi),%rsi 1908 jmp .Lxts_dec_done 1909 1910.align 16 1911.Lxts_dec_done: 1912 andq $15,%r9 1913 jz .Lxts_dec_ret 1914.Lxts_dec_done2: 1915 movq %r9,%rdx 1916 movq %r11,%rcx 1917 movl %r10d,%eax 1918 1919 movups (%rdi),%xmm2 1920 xorps %xmm11,%xmm2 1921 movups (%rcx),%xmm0 1922 movups 16(%rcx),%xmm1 1923 leaq 32(%rcx),%rcx 1924 xorps %xmm0,%xmm2 1925.Loop_dec1_13: 1926.byte 102,15,56,222,209 1927 decl %eax 1928 movups (%rcx),%xmm1 1929 leaq 16(%rcx),%rcx 1930 jnz .Loop_dec1_13 1931.byte 102,15,56,223,209 1932 xorps %xmm11,%xmm2 1933 movups %xmm2,(%rsi) 1934 1935.Lxts_dec_steal: 1936 movzbl 16(%rdi),%eax 1937 movzbl (%rsi),%ecx 1938 leaq 1(%rdi),%rdi 1939 movb %al,(%rsi) 1940 movb %cl,16(%rsi) 1941 leaq 1(%rsi),%rsi 1942 subq $1,%rdx 1943 jnz .Lxts_dec_steal 1944 1945 subq %r9,%rsi 1946 movq %r11,%rcx 1947 movl %r10d,%eax 1948 1949 movups (%rsi),%xmm2 1950 xorps %xmm10,%xmm2 1951 movups (%rcx),%xmm0 1952 movups 16(%rcx),%xmm1 1953 leaq 32(%rcx),%rcx 1954 xorps %xmm0,%xmm2 1955.Loop_dec1_14: 1956.byte 102,15,56,222,209 1957 decl %eax 1958 movups (%rcx),%xmm1 1959 leaq 16(%rcx),%rcx 1960 jnz .Loop_dec1_14 1961.byte 102,15,56,223,209 1962 xorps %xmm10,%xmm2 1963 movups %xmm2,(%rsi) 1964 1965.Lxts_dec_ret: 1966 leaq 104(%rsp),%rsp 1967.Lxts_dec_epilogue: 1968 .byte 0xf3,0xc3 1969.size aesni_xts_decrypt,.-aesni_xts_decrypt 1970.globl aesni_cbc_encrypt 1971.type aesni_cbc_encrypt,@function 1972.align 16 1973aesni_cbc_encrypt: 1974 testq %rdx,%rdx 1975 jz .Lcbc_ret 1976 1977 movl 240(%rcx),%r10d 1978 movq %rcx,%r11 1979 testl %r9d,%r9d 1980 jz .Lcbc_decrypt 1981 1982 movups (%r8),%xmm2 1983 movl %r10d,%eax 1984 cmpq $16,%rdx 1985 jb .Lcbc_enc_tail 1986 subq $16,%rdx 1987 jmp .Lcbc_enc_loop 1988.align 16 1989.Lcbc_enc_loop: 1990 movups (%rdi),%xmm3 1991 leaq 16(%rdi),%rdi 1992 1993 movups (%rcx),%xmm0 1994 movups 16(%rcx),%xmm1 1995 xorps %xmm0,%xmm3 1996 leaq 32(%rcx),%rcx 1997 xorps %xmm3,%xmm2 1998.Loop_enc1_15: 1999.byte 102,15,56,220,209 2000 decl %eax 2001 movups (%rcx),%xmm1 2002 leaq 16(%rcx),%rcx 2003 jnz .Loop_enc1_15 2004.byte 102,15,56,221,209 2005 movl %r10d,%eax 2006 movq %r11,%rcx 2007 movups %xmm2,0(%rsi) 2008 leaq 16(%rsi),%rsi 2009 subq $16,%rdx 2010 jnc .Lcbc_enc_loop 2011 addq $16,%rdx 2012 jnz .Lcbc_enc_tail 2013 movups %xmm2,(%r8) 2014 jmp .Lcbc_ret 2015 2016.Lcbc_enc_tail: 2017 movq %rdx,%rcx 2018 xchgq %rdi,%rsi 2019.long 0x9066A4F3 2020 movl $16,%ecx 2021 subq %rdx,%rcx 2022 xorl %eax,%eax 2023.long 0x9066AAF3 2024 leaq -16(%rdi),%rdi 2025 movl %r10d,%eax 2026 movq %rdi,%rsi 2027 movq %r11,%rcx 2028 xorq %rdx,%rdx 2029 jmp .Lcbc_enc_loop 2030 2031.align 16 2032.Lcbc_decrypt: 2033 movups (%r8),%xmm9 2034 movl %r10d,%eax 2035 cmpq $112,%rdx 2036 jbe .Lcbc_dec_tail 2037 shrl $1,%r10d 2038 subq $112,%rdx 2039 movl %r10d,%eax 2040 movaps %xmm9,-24(%rsp) 2041 jmp .Lcbc_dec_loop8_enter 2042.align 16 2043.Lcbc_dec_loop8: 2044 movaps %xmm0,-24(%rsp) 2045 movups %xmm9,(%rsi) 2046 leaq 16(%rsi),%rsi 2047.Lcbc_dec_loop8_enter: 2048 movups (%rcx),%xmm0 2049 movups (%rdi),%xmm2 2050 movups 16(%rdi),%xmm3 2051 movups 16(%rcx),%xmm1 2052 2053 leaq 32(%rcx),%rcx 2054 movdqu 32(%rdi),%xmm4 2055 xorps %xmm0,%xmm2 2056 movdqu 48(%rdi),%xmm5 2057 xorps %xmm0,%xmm3 2058 movdqu 64(%rdi),%xmm6 2059.byte 102,15,56,222,209 2060 pxor %xmm0,%xmm4 2061 movdqu 80(%rdi),%xmm7 2062.byte 102,15,56,222,217 2063 pxor %xmm0,%xmm5 2064 movdqu 96(%rdi),%xmm8 2065.byte 102,15,56,222,225 2066 pxor %xmm0,%xmm6 2067 movdqu 112(%rdi),%xmm9 2068.byte 102,15,56,222,233 2069 pxor %xmm0,%xmm7 2070 decl %eax 2071.byte 102,15,56,222,241 2072 pxor %xmm0,%xmm8 2073.byte 102,15,56,222,249 2074 pxor %xmm0,%xmm9 2075 movups (%rcx),%xmm0 2076.byte 102,68,15,56,222,193 2077.byte 102,68,15,56,222,201 2078 movups 16(%rcx),%xmm1 2079 2080 call .Ldec_loop8_enter 2081 2082 movups (%rdi),%xmm1 2083 movups 16(%rdi),%xmm0 2084 xorps -24(%rsp),%xmm2 2085 xorps %xmm1,%xmm3 2086 movups 32(%rdi),%xmm1 2087 xorps %xmm0,%xmm4 2088 movups 48(%rdi),%xmm0 2089 xorps %xmm1,%xmm5 2090 movups 64(%rdi),%xmm1 2091 xorps %xmm0,%xmm6 2092 movups 80(%rdi),%xmm0 2093 xorps %xmm1,%xmm7 2094 movups 96(%rdi),%xmm1 2095 xorps %xmm0,%xmm8 2096 movups 112(%rdi),%xmm0 2097 xorps %xmm1,%xmm9 2098 movups %xmm2,(%rsi) 2099 movups %xmm3,16(%rsi) 2100 movups %xmm4,32(%rsi) 2101 movups %xmm5,48(%rsi) 2102 movl %r10d,%eax 2103 movups %xmm6,64(%rsi) 2104 movq %r11,%rcx 2105 movups %xmm7,80(%rsi) 2106 leaq 128(%rdi),%rdi 2107 movups %xmm8,96(%rsi) 2108 leaq 112(%rsi),%rsi 2109 subq $128,%rdx 2110 ja .Lcbc_dec_loop8 2111 2112 movaps %xmm9,%xmm2 2113 movaps %xmm0,%xmm9 2114 addq $112,%rdx 2115 jle .Lcbc_dec_tail_collected 2116 movups %xmm2,(%rsi) 2117 leal 1(%r10,%r10,1),%eax 2118 leaq 16(%rsi),%rsi 2119.Lcbc_dec_tail: 2120 movups (%rdi),%xmm2 2121 movaps %xmm2,%xmm8 2122 cmpq $16,%rdx 2123 jbe .Lcbc_dec_one 2124 2125 movups 16(%rdi),%xmm3 2126 movaps %xmm3,%xmm7 2127 cmpq $32,%rdx 2128 jbe .Lcbc_dec_two 2129 2130 movups 32(%rdi),%xmm4 2131 movaps %xmm4,%xmm6 2132 cmpq $48,%rdx 2133 jbe .Lcbc_dec_three 2134 2135 movups 48(%rdi),%xmm5 2136 cmpq $64,%rdx 2137 jbe .Lcbc_dec_four 2138 2139 movups 64(%rdi),%xmm6 2140 cmpq $80,%rdx 2141 jbe .Lcbc_dec_five 2142 2143 movups 80(%rdi),%xmm7 2144 cmpq $96,%rdx 2145 jbe .Lcbc_dec_six 2146 2147 movups 96(%rdi),%xmm8 2148 movaps %xmm9,-24(%rsp) 2149 call _aesni_decrypt8 2150 movups (%rdi),%xmm1 2151 movups 16(%rdi),%xmm0 2152 xorps -24(%rsp),%xmm2 2153 xorps %xmm1,%xmm3 2154 movups 32(%rdi),%xmm1 2155 xorps %xmm0,%xmm4 2156 movups 48(%rdi),%xmm0 2157 xorps %xmm1,%xmm5 2158 movups 64(%rdi),%xmm1 2159 xorps %xmm0,%xmm6 2160 movups 80(%rdi),%xmm0 2161 xorps %xmm1,%xmm7 2162 movups 96(%rdi),%xmm9 2163 xorps %xmm0,%xmm8 2164 movups %xmm2,(%rsi) 2165 movups %xmm3,16(%rsi) 2166 movups %xmm4,32(%rsi) 2167 movups %xmm5,48(%rsi) 2168 movups %xmm6,64(%rsi) 2169 movups %xmm7,80(%rsi) 2170 leaq 96(%rsi),%rsi 2171 movaps %xmm8,%xmm2 2172 subq $112,%rdx 2173 jmp .Lcbc_dec_tail_collected 2174.align 16 2175.Lcbc_dec_one: 2176 movups (%rcx),%xmm0 2177 movups 16(%rcx),%xmm1 2178 leaq 32(%rcx),%rcx 2179 xorps %xmm0,%xmm2 2180.Loop_dec1_16: 2181.byte 102,15,56,222,209 2182 decl %eax 2183 movups (%rcx),%xmm1 2184 leaq 16(%rcx),%rcx 2185 jnz .Loop_dec1_16 2186.byte 102,15,56,223,209 2187 xorps %xmm9,%xmm2 2188 movaps %xmm8,%xmm9 2189 subq $16,%rdx 2190 jmp .Lcbc_dec_tail_collected 2191.align 16 2192.Lcbc_dec_two: 2193 xorps %xmm4,%xmm4 2194 call _aesni_decrypt3 2195 xorps %xmm9,%xmm2 2196 xorps %xmm8,%xmm3 2197 movups %xmm2,(%rsi) 2198 movaps %xmm7,%xmm9 2199 movaps %xmm3,%xmm2 2200 leaq 16(%rsi),%rsi 2201 subq $32,%rdx 2202 jmp .Lcbc_dec_tail_collected 2203.align 16 2204.Lcbc_dec_three: 2205 call _aesni_decrypt3 2206 xorps %xmm9,%xmm2 2207 xorps %xmm8,%xmm3 2208 movups %xmm2,(%rsi) 2209 xorps %xmm7,%xmm4 2210 movups %xmm3,16(%rsi) 2211 movaps %xmm6,%xmm9 2212 movaps %xmm4,%xmm2 2213 leaq 32(%rsi),%rsi 2214 subq $48,%rdx 2215 jmp .Lcbc_dec_tail_collected 2216.align 16 2217.Lcbc_dec_four: 2218 call _aesni_decrypt4 2219 xorps %xmm9,%xmm2 2220 movups 48(%rdi),%xmm9 2221 xorps %xmm8,%xmm3 2222 movups %xmm2,(%rsi) 2223 xorps %xmm7,%xmm4 2224 movups %xmm3,16(%rsi) 2225 xorps %xmm6,%xmm5 2226 movups %xmm4,32(%rsi) 2227 movaps %xmm5,%xmm2 2228 leaq 48(%rsi),%rsi 2229 subq $64,%rdx 2230 jmp .Lcbc_dec_tail_collected 2231.align 16 2232.Lcbc_dec_five: 2233 xorps %xmm7,%xmm7 2234 call _aesni_decrypt6 2235 movups 16(%rdi),%xmm1 2236 movups 32(%rdi),%xmm0 2237 xorps %xmm9,%xmm2 2238 xorps %xmm8,%xmm3 2239 xorps %xmm1,%xmm4 2240 movups 48(%rdi),%xmm1 2241 xorps %xmm0,%xmm5 2242 movups 64(%rdi),%xmm9 2243 xorps %xmm1,%xmm6 2244 movups %xmm2,(%rsi) 2245 movups %xmm3,16(%rsi) 2246 movups %xmm4,32(%rsi) 2247 movups %xmm5,48(%rsi) 2248 leaq 64(%rsi),%rsi 2249 movaps %xmm6,%xmm2 2250 subq $80,%rdx 2251 jmp .Lcbc_dec_tail_collected 2252.align 16 2253.Lcbc_dec_six: 2254 call _aesni_decrypt6 2255 movups 16(%rdi),%xmm1 2256 movups 32(%rdi),%xmm0 2257 xorps %xmm9,%xmm2 2258 xorps %xmm8,%xmm3 2259 xorps %xmm1,%xmm4 2260 movups 48(%rdi),%xmm1 2261 xorps %xmm0,%xmm5 2262 movups 64(%rdi),%xmm0 2263 xorps %xmm1,%xmm6 2264 movups 80(%rdi),%xmm9 2265 xorps %xmm0,%xmm7 2266 movups %xmm2,(%rsi) 2267 movups %xmm3,16(%rsi) 2268 movups %xmm4,32(%rsi) 2269 movups %xmm5,48(%rsi) 2270 movups %xmm6,64(%rsi) 2271 leaq 80(%rsi),%rsi 2272 movaps %xmm7,%xmm2 2273 subq $96,%rdx 2274 jmp .Lcbc_dec_tail_collected 2275.align 16 2276.Lcbc_dec_tail_collected: 2277 andq $15,%rdx 2278 movups %xmm9,(%r8) 2279 jnz .Lcbc_dec_tail_partial 2280 movups %xmm2,(%rsi) 2281 jmp .Lcbc_dec_ret 2282.align 16 2283.Lcbc_dec_tail_partial: 2284 movaps %xmm2,-24(%rsp) 2285 movq $16,%rcx 2286 movq %rsi,%rdi 2287 subq %rdx,%rcx 2288 leaq -24(%rsp),%rsi 2289.long 0x9066A4F3 2290 2291.Lcbc_dec_ret: 2292.Lcbc_ret: 2293 .byte 0xf3,0xc3 2294.size aesni_cbc_encrypt,.-aesni_cbc_encrypt 2295.globl aesni_set_decrypt_key 2296.type aesni_set_decrypt_key,@function 2297.align 16 2298aesni_set_decrypt_key: 2299.byte 0x48,0x83,0xEC,0x08 2300 call __aesni_set_encrypt_key 2301 shll $4,%esi 2302 testl %eax,%eax 2303 jnz .Ldec_key_ret 2304 leaq 16(%rdx,%rsi,1),%rdi 2305 2306 movups (%rdx),%xmm0 2307 movups (%rdi),%xmm1 2308 movups %xmm0,(%rdi) 2309 movups %xmm1,(%rdx) 2310 leaq 16(%rdx),%rdx 2311 leaq -16(%rdi),%rdi 2312 2313.Ldec_key_inverse: 2314 movups (%rdx),%xmm0 2315 movups (%rdi),%xmm1 2316.byte 102,15,56,219,192 2317.byte 102,15,56,219,201 2318 leaq 16(%rdx),%rdx 2319 leaq -16(%rdi),%rdi 2320 movups %xmm0,16(%rdi) 2321 movups %xmm1,-16(%rdx) 2322 cmpq %rdx,%rdi 2323 ja .Ldec_key_inverse 2324 2325 movups (%rdx),%xmm0 2326.byte 102,15,56,219,192 2327 movups %xmm0,(%rdi) 2328.Ldec_key_ret: 2329 addq $8,%rsp 2330 .byte 0xf3,0xc3 2331.LSEH_end_set_decrypt_key: 2332.size aesni_set_decrypt_key,.-aesni_set_decrypt_key 2333.globl aesni_set_encrypt_key 2334.type aesni_set_encrypt_key,@function 2335.align 16 2336aesni_set_encrypt_key: 2337__aesni_set_encrypt_key: 2338.byte 0x48,0x83,0xEC,0x08 2339 movq $-1,%rax 2340 testq %rdi,%rdi 2341 jz .Lenc_key_ret 2342 testq %rdx,%rdx 2343 jz .Lenc_key_ret 2344 2345 movups (%rdi),%xmm0 2346 xorps %xmm4,%xmm4 2347 leaq 16(%rdx),%rax 2348 cmpl $256,%esi 2349 je .L14rounds 2350 cmpl $192,%esi 2351 je .L12rounds 2352 cmpl $128,%esi 2353 jne .Lbad_keybits 2354 2355.L10rounds: 2356 movl $9,%esi 2357 movups %xmm0,(%rdx) 2358.byte 102,15,58,223,200,1 2359 call .Lkey_expansion_128_cold 2360.byte 102,15,58,223,200,2 2361 call .Lkey_expansion_128 2362.byte 102,15,58,223,200,4 2363 call .Lkey_expansion_128 2364.byte 102,15,58,223,200,8 2365 call .Lkey_expansion_128 2366.byte 102,15,58,223,200,16 2367 call .Lkey_expansion_128 2368.byte 102,15,58,223,200,32 2369 call .Lkey_expansion_128 2370.byte 102,15,58,223,200,64 2371 call .Lkey_expansion_128 2372.byte 102,15,58,223,200,128 2373 call .Lkey_expansion_128 2374.byte 102,15,58,223,200,27 2375 call .Lkey_expansion_128 2376.byte 102,15,58,223,200,54 2377 call .Lkey_expansion_128 2378 movups %xmm0,(%rax) 2379 movl %esi,80(%rax) 2380 xorl %eax,%eax 2381 jmp .Lenc_key_ret 2382 2383.align 16 2384.L12rounds: 2385 movq 16(%rdi),%xmm2 2386 movl $11,%esi 2387 movups %xmm0,(%rdx) 2388.byte 102,15,58,223,202,1 2389 call .Lkey_expansion_192a_cold 2390.byte 102,15,58,223,202,2 2391 call .Lkey_expansion_192b 2392.byte 102,15,58,223,202,4 2393 call .Lkey_expansion_192a 2394.byte 102,15,58,223,202,8 2395 call .Lkey_expansion_192b 2396.byte 102,15,58,223,202,16 2397 call .Lkey_expansion_192a 2398.byte 102,15,58,223,202,32 2399 call .Lkey_expansion_192b 2400.byte 102,15,58,223,202,64 2401 call .Lkey_expansion_192a 2402.byte 102,15,58,223,202,128 2403 call .Lkey_expansion_192b 2404 movups %xmm0,(%rax) 2405 movl %esi,48(%rax) 2406 xorq %rax,%rax 2407 jmp .Lenc_key_ret 2408 2409.align 16 2410.L14rounds: 2411 movups 16(%rdi),%xmm2 2412 movl $13,%esi 2413 leaq 16(%rax),%rax 2414 movups %xmm0,(%rdx) 2415 movups %xmm2,16(%rdx) 2416.byte 102,15,58,223,202,1 2417 call .Lkey_expansion_256a_cold 2418.byte 102,15,58,223,200,1 2419 call .Lkey_expansion_256b 2420.byte 102,15,58,223,202,2 2421 call .Lkey_expansion_256a 2422.byte 102,15,58,223,200,2 2423 call .Lkey_expansion_256b 2424.byte 102,15,58,223,202,4 2425 call .Lkey_expansion_256a 2426.byte 102,15,58,223,200,4 2427 call .Lkey_expansion_256b 2428.byte 102,15,58,223,202,8 2429 call .Lkey_expansion_256a 2430.byte 102,15,58,223,200,8 2431 call .Lkey_expansion_256b 2432.byte 102,15,58,223,202,16 2433 call .Lkey_expansion_256a 2434.byte 102,15,58,223,200,16 2435 call .Lkey_expansion_256b 2436.byte 102,15,58,223,202,32 2437 call .Lkey_expansion_256a 2438.byte 102,15,58,223,200,32 2439 call .Lkey_expansion_256b 2440.byte 102,15,58,223,202,64 2441 call .Lkey_expansion_256a 2442 movups %xmm0,(%rax) 2443 movl %esi,16(%rax) 2444 xorq %rax,%rax 2445 jmp .Lenc_key_ret 2446 2447.align 16 2448.Lbad_keybits: 2449 movq $-2,%rax 2450.Lenc_key_ret: 2451 addq $8,%rsp 2452 .byte 0xf3,0xc3 2453.LSEH_end_set_encrypt_key: 2454 2455.align 16 2456.Lkey_expansion_128: 2457 movups %xmm0,(%rax) 2458 leaq 16(%rax),%rax 2459.Lkey_expansion_128_cold: 2460 shufps $16,%xmm0,%xmm4 2461 xorps %xmm4,%xmm0 2462 shufps $140,%xmm0,%xmm4 2463 xorps %xmm4,%xmm0 2464 shufps $255,%xmm1,%xmm1 2465 xorps %xmm1,%xmm0 2466 .byte 0xf3,0xc3 2467 2468.align 16 2469.Lkey_expansion_192a: 2470 movups %xmm0,(%rax) 2471 leaq 16(%rax),%rax 2472.Lkey_expansion_192a_cold: 2473 movaps %xmm2,%xmm5 2474.Lkey_expansion_192b_warm: 2475 shufps $16,%xmm0,%xmm4 2476 movdqa %xmm2,%xmm3 2477 xorps %xmm4,%xmm0 2478 shufps $140,%xmm0,%xmm4 2479 pslldq $4,%xmm3 2480 xorps %xmm4,%xmm0 2481 pshufd $85,%xmm1,%xmm1 2482 pxor %xmm3,%xmm2 2483 pxor %xmm1,%xmm0 2484 pshufd $255,%xmm0,%xmm3 2485 pxor %xmm3,%xmm2 2486 .byte 0xf3,0xc3 2487 2488.align 16 2489.Lkey_expansion_192b: 2490 movaps %xmm0,%xmm3 2491 shufps $68,%xmm0,%xmm5 2492 movups %xmm5,(%rax) 2493 shufps $78,%xmm2,%xmm3 2494 movups %xmm3,16(%rax) 2495 leaq 32(%rax),%rax 2496 jmp .Lkey_expansion_192b_warm 2497 2498.align 16 2499.Lkey_expansion_256a: 2500 movups %xmm2,(%rax) 2501 leaq 16(%rax),%rax 2502.Lkey_expansion_256a_cold: 2503 shufps $16,%xmm0,%xmm4 2504 xorps %xmm4,%xmm0 2505 shufps $140,%xmm0,%xmm4 2506 xorps %xmm4,%xmm0 2507 shufps $255,%xmm1,%xmm1 2508 xorps %xmm1,%xmm0 2509 .byte 0xf3,0xc3 2510 2511.align 16 2512.Lkey_expansion_256b: 2513 movups %xmm0,(%rax) 2514 leaq 16(%rax),%rax 2515 2516 shufps $16,%xmm2,%xmm4 2517 xorps %xmm4,%xmm2 2518 shufps $140,%xmm2,%xmm4 2519 xorps %xmm4,%xmm2 2520 shufps $170,%xmm1,%xmm1 2521 xorps %xmm1,%xmm2 2522 .byte 0xf3,0xc3 2523.size aesni_set_encrypt_key,.-aesni_set_encrypt_key 2524.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key 2525.align 64 2526.Lbswap_mask: 2527.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 2528.Lincrement32: 2529.long 6,6,6,0 2530.Lincrement64: 2531.long 1,0,0,0 2532.Lxts_magic: 2533.long 0x87,0,1,0 2534 2535.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 2536.align 64 2537