1299481Sjkim# $FreeBSD: releng/11.0/secure/lib/libcrypto/amd64/aesni-mb-x86_64.S 299481 2016-05-11 20:11:21Z jkim $ 2299481Sjkim# Do not modify. This file is auto-generated from aesni-mb-x86_64.pl. 3290207Sjkim.text 4290207Sjkim 5290207Sjkim 6290207Sjkim 7290207Sjkim.globl aesni_multi_cbc_encrypt 8290207Sjkim.type aesni_multi_cbc_encrypt,@function 9290207Sjkim.align 32 10290207Sjkimaesni_multi_cbc_encrypt: 11299481Sjkim cmpl $2,%edx 12299481Sjkim jb .Lenc_non_avx 13299481Sjkim movl OPENSSL_ia32cap_P+4(%rip),%ecx 14299481Sjkim testl $268435456,%ecx 15299481Sjkim jnz _avx_cbc_enc_shortcut 16299481Sjkim jmp .Lenc_non_avx 17299481Sjkim.align 16 18299481Sjkim.Lenc_non_avx: 19290207Sjkim movq %rsp,%rax 20290207Sjkim pushq %rbx 21290207Sjkim pushq %rbp 22290207Sjkim pushq %r12 23290207Sjkim pushq %r13 24290207Sjkim pushq %r14 25290207Sjkim pushq %r15 26290207Sjkim 27290207Sjkim 28290207Sjkim 29290207Sjkim 30290207Sjkim 31290207Sjkim 32290207Sjkim subq $48,%rsp 33290207Sjkim andq $-64,%rsp 34290207Sjkim movq %rax,16(%rsp) 35290207Sjkim 36290207Sjkim.Lenc4x_body: 37290207Sjkim movdqu (%rsi),%xmm12 38290207Sjkim leaq 120(%rsi),%rsi 39290207Sjkim leaq 80(%rdi),%rdi 40290207Sjkim 41290207Sjkim.Lenc4x_loop_grande: 42290207Sjkim movl %edx,24(%rsp) 43290207Sjkim xorl %edx,%edx 44290207Sjkim movl -64(%rdi),%ecx 45290207Sjkim movq -80(%rdi),%r8 46290207Sjkim cmpl %edx,%ecx 47290207Sjkim movq -72(%rdi),%r12 48290207Sjkim cmovgl %ecx,%edx 49290207Sjkim testl %ecx,%ecx 50290207Sjkim movdqu -56(%rdi),%xmm2 51290207Sjkim movl %ecx,32(%rsp) 52290207Sjkim cmovleq %rsp,%r8 53290207Sjkim movl -24(%rdi),%ecx 54290207Sjkim movq -40(%rdi),%r9 55290207Sjkim cmpl %edx,%ecx 56290207Sjkim movq -32(%rdi),%r13 57290207Sjkim cmovgl %ecx,%edx 58290207Sjkim testl %ecx,%ecx 59290207Sjkim movdqu -16(%rdi),%xmm3 60290207Sjkim movl %ecx,36(%rsp) 61290207Sjkim cmovleq %rsp,%r9 62290207Sjkim movl 16(%rdi),%ecx 63290207Sjkim movq 0(%rdi),%r10 64290207Sjkim cmpl %edx,%ecx 65290207Sjkim movq 8(%rdi),%r14 66290207Sjkim cmovgl %ecx,%edx 67290207Sjkim testl %ecx,%ecx 68290207Sjkim movdqu 24(%rdi),%xmm4 69290207Sjkim movl %ecx,40(%rsp) 70290207Sjkim cmovleq %rsp,%r10 71290207Sjkim movl 56(%rdi),%ecx 72290207Sjkim movq 40(%rdi),%r11 73290207Sjkim cmpl %edx,%ecx 74290207Sjkim movq 48(%rdi),%r15 75290207Sjkim cmovgl %ecx,%edx 76290207Sjkim testl %ecx,%ecx 77290207Sjkim movdqu 64(%rdi),%xmm5 78290207Sjkim movl %ecx,44(%rsp) 79290207Sjkim cmovleq %rsp,%r11 80290207Sjkim testl %edx,%edx 81290207Sjkim jz .Lenc4x_done 82290207Sjkim 83290207Sjkim movups 16-120(%rsi),%xmm1 84290207Sjkim pxor %xmm12,%xmm2 85290207Sjkim movups 32-120(%rsi),%xmm0 86290207Sjkim pxor %xmm12,%xmm3 87290207Sjkim movl 240-120(%rsi),%eax 88290207Sjkim pxor %xmm12,%xmm4 89290207Sjkim movdqu (%r8),%xmm6 90290207Sjkim pxor %xmm12,%xmm5 91290207Sjkim movdqu (%r9),%xmm7 92290207Sjkim pxor %xmm6,%xmm2 93290207Sjkim movdqu (%r10),%xmm8 94290207Sjkim pxor %xmm7,%xmm3 95290207Sjkim movdqu (%r11),%xmm9 96290207Sjkim pxor %xmm8,%xmm4 97290207Sjkim pxor %xmm9,%xmm5 98290207Sjkim movdqa 32(%rsp),%xmm10 99290207Sjkim xorq %rbx,%rbx 100290207Sjkim jmp .Loop_enc4x 101290207Sjkim 102290207Sjkim.align 32 103290207Sjkim.Loop_enc4x: 104290207Sjkim addq $16,%rbx 105290207Sjkim leaq 16(%rsp),%rbp 106290207Sjkim movl $1,%ecx 107290207Sjkim subq %rbx,%rbp 108290207Sjkim 109290207Sjkim.byte 102,15,56,220,209 110290207Sjkim prefetcht0 31(%r8,%rbx,1) 111290207Sjkim prefetcht0 31(%r9,%rbx,1) 112290207Sjkim.byte 102,15,56,220,217 113290207Sjkim prefetcht0 31(%r10,%rbx,1) 114290207Sjkim prefetcht0 31(%r10,%rbx,1) 115290207Sjkim.byte 102,15,56,220,225 116290207Sjkim.byte 102,15,56,220,233 117290207Sjkim movups 48-120(%rsi),%xmm1 118290207Sjkim cmpl 32(%rsp),%ecx 119290207Sjkim.byte 102,15,56,220,208 120290207Sjkim.byte 102,15,56,220,216 121290207Sjkim.byte 102,15,56,220,224 122290207Sjkim cmovgeq %rbp,%r8 123290207Sjkim cmovgq %rbp,%r12 124290207Sjkim.byte 102,15,56,220,232 125290207Sjkim movups -56(%rsi),%xmm0 126290207Sjkim cmpl 36(%rsp),%ecx 127290207Sjkim.byte 102,15,56,220,209 128290207Sjkim.byte 102,15,56,220,217 129290207Sjkim.byte 102,15,56,220,225 130290207Sjkim cmovgeq %rbp,%r9 131290207Sjkim cmovgq %rbp,%r13 132290207Sjkim.byte 102,15,56,220,233 133290207Sjkim movups -40(%rsi),%xmm1 134290207Sjkim cmpl 40(%rsp),%ecx 135290207Sjkim.byte 102,15,56,220,208 136290207Sjkim.byte 102,15,56,220,216 137290207Sjkim.byte 102,15,56,220,224 138290207Sjkim cmovgeq %rbp,%r10 139290207Sjkim cmovgq %rbp,%r14 140290207Sjkim.byte 102,15,56,220,232 141290207Sjkim movups -24(%rsi),%xmm0 142290207Sjkim cmpl 44(%rsp),%ecx 143290207Sjkim.byte 102,15,56,220,209 144290207Sjkim.byte 102,15,56,220,217 145290207Sjkim.byte 102,15,56,220,225 146290207Sjkim cmovgeq %rbp,%r11 147290207Sjkim cmovgq %rbp,%r15 148290207Sjkim.byte 102,15,56,220,233 149290207Sjkim movups -8(%rsi),%xmm1 150290207Sjkim movdqa %xmm10,%xmm11 151290207Sjkim.byte 102,15,56,220,208 152290207Sjkim prefetcht0 15(%r12,%rbx,1) 153290207Sjkim prefetcht0 15(%r13,%rbx,1) 154290207Sjkim.byte 102,15,56,220,216 155290207Sjkim prefetcht0 15(%r14,%rbx,1) 156290207Sjkim prefetcht0 15(%r15,%rbx,1) 157290207Sjkim.byte 102,15,56,220,224 158290207Sjkim.byte 102,15,56,220,232 159290207Sjkim movups 128-120(%rsi),%xmm0 160290207Sjkim pxor %xmm12,%xmm12 161290207Sjkim 162290207Sjkim.byte 102,15,56,220,209 163290207Sjkim pcmpgtd %xmm12,%xmm11 164290207Sjkim movdqu -120(%rsi),%xmm12 165290207Sjkim.byte 102,15,56,220,217 166290207Sjkim paddd %xmm11,%xmm10 167290207Sjkim movdqa %xmm10,32(%rsp) 168290207Sjkim.byte 102,15,56,220,225 169290207Sjkim.byte 102,15,56,220,233 170290207Sjkim movups 144-120(%rsi),%xmm1 171290207Sjkim 172290207Sjkim cmpl $11,%eax 173290207Sjkim 174290207Sjkim.byte 102,15,56,220,208 175290207Sjkim.byte 102,15,56,220,216 176290207Sjkim.byte 102,15,56,220,224 177290207Sjkim.byte 102,15,56,220,232 178290207Sjkim movups 160-120(%rsi),%xmm0 179290207Sjkim 180290207Sjkim jb .Lenc4x_tail 181290207Sjkim 182290207Sjkim.byte 102,15,56,220,209 183290207Sjkim.byte 102,15,56,220,217 184290207Sjkim.byte 102,15,56,220,225 185290207Sjkim.byte 102,15,56,220,233 186290207Sjkim movups 176-120(%rsi),%xmm1 187290207Sjkim 188290207Sjkim.byte 102,15,56,220,208 189290207Sjkim.byte 102,15,56,220,216 190290207Sjkim.byte 102,15,56,220,224 191290207Sjkim.byte 102,15,56,220,232 192290207Sjkim movups 192-120(%rsi),%xmm0 193290207Sjkim 194290207Sjkim je .Lenc4x_tail 195290207Sjkim 196290207Sjkim.byte 102,15,56,220,209 197290207Sjkim.byte 102,15,56,220,217 198290207Sjkim.byte 102,15,56,220,225 199290207Sjkim.byte 102,15,56,220,233 200290207Sjkim movups 208-120(%rsi),%xmm1 201290207Sjkim 202290207Sjkim.byte 102,15,56,220,208 203290207Sjkim.byte 102,15,56,220,216 204290207Sjkim.byte 102,15,56,220,224 205290207Sjkim.byte 102,15,56,220,232 206290207Sjkim movups 224-120(%rsi),%xmm0 207290207Sjkim jmp .Lenc4x_tail 208290207Sjkim 209290207Sjkim.align 32 210290207Sjkim.Lenc4x_tail: 211290207Sjkim.byte 102,15,56,220,209 212290207Sjkim.byte 102,15,56,220,217 213290207Sjkim.byte 102,15,56,220,225 214290207Sjkim.byte 102,15,56,220,233 215290207Sjkim movdqu (%r8,%rbx,1),%xmm6 216290207Sjkim movdqu 16-120(%rsi),%xmm1 217290207Sjkim 218290207Sjkim.byte 102,15,56,221,208 219290207Sjkim movdqu (%r9,%rbx,1),%xmm7 220290207Sjkim pxor %xmm12,%xmm6 221290207Sjkim.byte 102,15,56,221,216 222290207Sjkim movdqu (%r10,%rbx,1),%xmm8 223290207Sjkim pxor %xmm12,%xmm7 224290207Sjkim.byte 102,15,56,221,224 225290207Sjkim movdqu (%r11,%rbx,1),%xmm9 226290207Sjkim pxor %xmm12,%xmm8 227290207Sjkim.byte 102,15,56,221,232 228290207Sjkim movdqu 32-120(%rsi),%xmm0 229290207Sjkim pxor %xmm12,%xmm9 230290207Sjkim 231290207Sjkim movups %xmm2,-16(%r12,%rbx,1) 232290207Sjkim pxor %xmm6,%xmm2 233290207Sjkim movups %xmm3,-16(%r13,%rbx,1) 234290207Sjkim pxor %xmm7,%xmm3 235290207Sjkim movups %xmm4,-16(%r14,%rbx,1) 236290207Sjkim pxor %xmm8,%xmm4 237290207Sjkim movups %xmm5,-16(%r15,%rbx,1) 238290207Sjkim pxor %xmm9,%xmm5 239290207Sjkim 240290207Sjkim decl %edx 241290207Sjkim jnz .Loop_enc4x 242290207Sjkim 243290207Sjkim movq 16(%rsp),%rax 244290207Sjkim movl 24(%rsp),%edx 245290207Sjkim 246290207Sjkim 247290207Sjkim 248290207Sjkim 249290207Sjkim 250290207Sjkim 251290207Sjkim 252290207Sjkim 253290207Sjkim 254290207Sjkim 255290207Sjkim leaq 160(%rdi),%rdi 256290207Sjkim decl %edx 257290207Sjkim jnz .Lenc4x_loop_grande 258290207Sjkim 259290207Sjkim.Lenc4x_done: 260290207Sjkim movq -48(%rax),%r15 261290207Sjkim movq -40(%rax),%r14 262290207Sjkim movq -32(%rax),%r13 263290207Sjkim movq -24(%rax),%r12 264290207Sjkim movq -16(%rax),%rbp 265290207Sjkim movq -8(%rax),%rbx 266290207Sjkim leaq (%rax),%rsp 267290207Sjkim.Lenc4x_epilogue: 268290207Sjkim .byte 0xf3,0xc3 269290207Sjkim.size aesni_multi_cbc_encrypt,.-aesni_multi_cbc_encrypt 270290207Sjkim 271290207Sjkim.globl aesni_multi_cbc_decrypt 272290207Sjkim.type aesni_multi_cbc_decrypt,@function 273290207Sjkim.align 32 274290207Sjkimaesni_multi_cbc_decrypt: 275299481Sjkim cmpl $2,%edx 276299481Sjkim jb .Ldec_non_avx 277299481Sjkim movl OPENSSL_ia32cap_P+4(%rip),%ecx 278299481Sjkim testl $268435456,%ecx 279299481Sjkim jnz _avx_cbc_dec_shortcut 280299481Sjkim jmp .Ldec_non_avx 281299481Sjkim.align 16 282299481Sjkim.Ldec_non_avx: 283290207Sjkim movq %rsp,%rax 284290207Sjkim pushq %rbx 285290207Sjkim pushq %rbp 286290207Sjkim pushq %r12 287290207Sjkim pushq %r13 288290207Sjkim pushq %r14 289290207Sjkim pushq %r15 290290207Sjkim 291290207Sjkim 292290207Sjkim 293290207Sjkim 294290207Sjkim 295290207Sjkim 296290207Sjkim subq $48,%rsp 297290207Sjkim andq $-64,%rsp 298290207Sjkim movq %rax,16(%rsp) 299290207Sjkim 300290207Sjkim.Ldec4x_body: 301290207Sjkim movdqu (%rsi),%xmm12 302290207Sjkim leaq 120(%rsi),%rsi 303290207Sjkim leaq 80(%rdi),%rdi 304290207Sjkim 305290207Sjkim.Ldec4x_loop_grande: 306290207Sjkim movl %edx,24(%rsp) 307290207Sjkim xorl %edx,%edx 308290207Sjkim movl -64(%rdi),%ecx 309290207Sjkim movq -80(%rdi),%r8 310290207Sjkim cmpl %edx,%ecx 311290207Sjkim movq -72(%rdi),%r12 312290207Sjkim cmovgl %ecx,%edx 313290207Sjkim testl %ecx,%ecx 314290207Sjkim movdqu -56(%rdi),%xmm6 315290207Sjkim movl %ecx,32(%rsp) 316290207Sjkim cmovleq %rsp,%r8 317290207Sjkim movl -24(%rdi),%ecx 318290207Sjkim movq -40(%rdi),%r9 319290207Sjkim cmpl %edx,%ecx 320290207Sjkim movq -32(%rdi),%r13 321290207Sjkim cmovgl %ecx,%edx 322290207Sjkim testl %ecx,%ecx 323290207Sjkim movdqu -16(%rdi),%xmm7 324290207Sjkim movl %ecx,36(%rsp) 325290207Sjkim cmovleq %rsp,%r9 326290207Sjkim movl 16(%rdi),%ecx 327290207Sjkim movq 0(%rdi),%r10 328290207Sjkim cmpl %edx,%ecx 329290207Sjkim movq 8(%rdi),%r14 330290207Sjkim cmovgl %ecx,%edx 331290207Sjkim testl %ecx,%ecx 332290207Sjkim movdqu 24(%rdi),%xmm8 333290207Sjkim movl %ecx,40(%rsp) 334290207Sjkim cmovleq %rsp,%r10 335290207Sjkim movl 56(%rdi),%ecx 336290207Sjkim movq 40(%rdi),%r11 337290207Sjkim cmpl %edx,%ecx 338290207Sjkim movq 48(%rdi),%r15 339290207Sjkim cmovgl %ecx,%edx 340290207Sjkim testl %ecx,%ecx 341290207Sjkim movdqu 64(%rdi),%xmm9 342290207Sjkim movl %ecx,44(%rsp) 343290207Sjkim cmovleq %rsp,%r11 344290207Sjkim testl %edx,%edx 345290207Sjkim jz .Ldec4x_done 346290207Sjkim 347290207Sjkim movups 16-120(%rsi),%xmm1 348290207Sjkim movups 32-120(%rsi),%xmm0 349290207Sjkim movl 240-120(%rsi),%eax 350290207Sjkim movdqu (%r8),%xmm2 351290207Sjkim movdqu (%r9),%xmm3 352290207Sjkim pxor %xmm12,%xmm2 353290207Sjkim movdqu (%r10),%xmm4 354290207Sjkim pxor %xmm12,%xmm3 355290207Sjkim movdqu (%r11),%xmm5 356290207Sjkim pxor %xmm12,%xmm4 357290207Sjkim pxor %xmm12,%xmm5 358290207Sjkim movdqa 32(%rsp),%xmm10 359290207Sjkim xorq %rbx,%rbx 360290207Sjkim jmp .Loop_dec4x 361290207Sjkim 362290207Sjkim.align 32 363290207Sjkim.Loop_dec4x: 364290207Sjkim addq $16,%rbx 365290207Sjkim leaq 16(%rsp),%rbp 366290207Sjkim movl $1,%ecx 367290207Sjkim subq %rbx,%rbp 368290207Sjkim 369290207Sjkim.byte 102,15,56,222,209 370290207Sjkim prefetcht0 31(%r8,%rbx,1) 371290207Sjkim prefetcht0 31(%r9,%rbx,1) 372290207Sjkim.byte 102,15,56,222,217 373290207Sjkim prefetcht0 31(%r10,%rbx,1) 374290207Sjkim prefetcht0 31(%r11,%rbx,1) 375290207Sjkim.byte 102,15,56,222,225 376290207Sjkim.byte 102,15,56,222,233 377290207Sjkim movups 48-120(%rsi),%xmm1 378290207Sjkim cmpl 32(%rsp),%ecx 379290207Sjkim.byte 102,15,56,222,208 380290207Sjkim.byte 102,15,56,222,216 381290207Sjkim.byte 102,15,56,222,224 382290207Sjkim cmovgeq %rbp,%r8 383290207Sjkim cmovgq %rbp,%r12 384290207Sjkim.byte 102,15,56,222,232 385290207Sjkim movups -56(%rsi),%xmm0 386290207Sjkim cmpl 36(%rsp),%ecx 387290207Sjkim.byte 102,15,56,222,209 388290207Sjkim.byte 102,15,56,222,217 389290207Sjkim.byte 102,15,56,222,225 390290207Sjkim cmovgeq %rbp,%r9 391290207Sjkim cmovgq %rbp,%r13 392290207Sjkim.byte 102,15,56,222,233 393290207Sjkim movups -40(%rsi),%xmm1 394290207Sjkim cmpl 40(%rsp),%ecx 395290207Sjkim.byte 102,15,56,222,208 396290207Sjkim.byte 102,15,56,222,216 397290207Sjkim.byte 102,15,56,222,224 398290207Sjkim cmovgeq %rbp,%r10 399290207Sjkim cmovgq %rbp,%r14 400290207Sjkim.byte 102,15,56,222,232 401290207Sjkim movups -24(%rsi),%xmm0 402290207Sjkim cmpl 44(%rsp),%ecx 403290207Sjkim.byte 102,15,56,222,209 404290207Sjkim.byte 102,15,56,222,217 405290207Sjkim.byte 102,15,56,222,225 406290207Sjkim cmovgeq %rbp,%r11 407290207Sjkim cmovgq %rbp,%r15 408290207Sjkim.byte 102,15,56,222,233 409290207Sjkim movups -8(%rsi),%xmm1 410290207Sjkim movdqa %xmm10,%xmm11 411290207Sjkim.byte 102,15,56,222,208 412290207Sjkim prefetcht0 15(%r12,%rbx,1) 413290207Sjkim prefetcht0 15(%r13,%rbx,1) 414290207Sjkim.byte 102,15,56,222,216 415290207Sjkim prefetcht0 15(%r14,%rbx,1) 416290207Sjkim prefetcht0 15(%r15,%rbx,1) 417290207Sjkim.byte 102,15,56,222,224 418290207Sjkim.byte 102,15,56,222,232 419290207Sjkim movups 128-120(%rsi),%xmm0 420290207Sjkim pxor %xmm12,%xmm12 421290207Sjkim 422290207Sjkim.byte 102,15,56,222,209 423290207Sjkim pcmpgtd %xmm12,%xmm11 424290207Sjkim movdqu -120(%rsi),%xmm12 425290207Sjkim.byte 102,15,56,222,217 426290207Sjkim paddd %xmm11,%xmm10 427290207Sjkim movdqa %xmm10,32(%rsp) 428290207Sjkim.byte 102,15,56,222,225 429290207Sjkim.byte 102,15,56,222,233 430290207Sjkim movups 144-120(%rsi),%xmm1 431290207Sjkim 432290207Sjkim cmpl $11,%eax 433290207Sjkim 434290207Sjkim.byte 102,15,56,222,208 435290207Sjkim.byte 102,15,56,222,216 436290207Sjkim.byte 102,15,56,222,224 437290207Sjkim.byte 102,15,56,222,232 438290207Sjkim movups 160-120(%rsi),%xmm0 439290207Sjkim 440290207Sjkim jb .Ldec4x_tail 441290207Sjkim 442290207Sjkim.byte 102,15,56,222,209 443290207Sjkim.byte 102,15,56,222,217 444290207Sjkim.byte 102,15,56,222,225 445290207Sjkim.byte 102,15,56,222,233 446290207Sjkim movups 176-120(%rsi),%xmm1 447290207Sjkim 448290207Sjkim.byte 102,15,56,222,208 449290207Sjkim.byte 102,15,56,222,216 450290207Sjkim.byte 102,15,56,222,224 451290207Sjkim.byte 102,15,56,222,232 452290207Sjkim movups 192-120(%rsi),%xmm0 453290207Sjkim 454290207Sjkim je .Ldec4x_tail 455290207Sjkim 456290207Sjkim.byte 102,15,56,222,209 457290207Sjkim.byte 102,15,56,222,217 458290207Sjkim.byte 102,15,56,222,225 459290207Sjkim.byte 102,15,56,222,233 460290207Sjkim movups 208-120(%rsi),%xmm1 461290207Sjkim 462290207Sjkim.byte 102,15,56,222,208 463290207Sjkim.byte 102,15,56,222,216 464290207Sjkim.byte 102,15,56,222,224 465290207Sjkim.byte 102,15,56,222,232 466290207Sjkim movups 224-120(%rsi),%xmm0 467290207Sjkim jmp .Ldec4x_tail 468290207Sjkim 469290207Sjkim.align 32 470290207Sjkim.Ldec4x_tail: 471290207Sjkim.byte 102,15,56,222,209 472290207Sjkim.byte 102,15,56,222,217 473290207Sjkim.byte 102,15,56,222,225 474290207Sjkim pxor %xmm0,%xmm6 475290207Sjkim pxor %xmm0,%xmm7 476290207Sjkim.byte 102,15,56,222,233 477290207Sjkim movdqu 16-120(%rsi),%xmm1 478290207Sjkim pxor %xmm0,%xmm8 479290207Sjkim pxor %xmm0,%xmm9 480290207Sjkim movdqu 32-120(%rsi),%xmm0 481290207Sjkim 482290207Sjkim.byte 102,15,56,223,214 483290207Sjkim.byte 102,15,56,223,223 484290207Sjkim movdqu -16(%r8,%rbx,1),%xmm6 485290207Sjkim movdqu -16(%r9,%rbx,1),%xmm7 486290207Sjkim.byte 102,65,15,56,223,224 487290207Sjkim.byte 102,65,15,56,223,233 488290207Sjkim movdqu -16(%r10,%rbx,1),%xmm8 489290207Sjkim movdqu -16(%r11,%rbx,1),%xmm9 490290207Sjkim 491290207Sjkim movups %xmm2,-16(%r12,%rbx,1) 492290207Sjkim movdqu (%r8,%rbx,1),%xmm2 493290207Sjkim movups %xmm3,-16(%r13,%rbx,1) 494290207Sjkim movdqu (%r9,%rbx,1),%xmm3 495290207Sjkim pxor %xmm12,%xmm2 496290207Sjkim movups %xmm4,-16(%r14,%rbx,1) 497290207Sjkim movdqu (%r10,%rbx,1),%xmm4 498290207Sjkim pxor %xmm12,%xmm3 499290207Sjkim movups %xmm5,-16(%r15,%rbx,1) 500290207Sjkim movdqu (%r11,%rbx,1),%xmm5 501290207Sjkim pxor %xmm12,%xmm4 502290207Sjkim pxor %xmm12,%xmm5 503290207Sjkim 504290207Sjkim decl %edx 505290207Sjkim jnz .Loop_dec4x 506290207Sjkim 507290207Sjkim movq 16(%rsp),%rax 508290207Sjkim movl 24(%rsp),%edx 509290207Sjkim 510290207Sjkim leaq 160(%rdi),%rdi 511290207Sjkim decl %edx 512290207Sjkim jnz .Ldec4x_loop_grande 513290207Sjkim 514290207Sjkim.Ldec4x_done: 515290207Sjkim movq -48(%rax),%r15 516290207Sjkim movq -40(%rax),%r14 517290207Sjkim movq -32(%rax),%r13 518290207Sjkim movq -24(%rax),%r12 519290207Sjkim movq -16(%rax),%rbp 520290207Sjkim movq -8(%rax),%rbx 521290207Sjkim leaq (%rax),%rsp 522290207Sjkim.Ldec4x_epilogue: 523290207Sjkim .byte 0xf3,0xc3 524290207Sjkim.size aesni_multi_cbc_decrypt,.-aesni_multi_cbc_decrypt 525299481Sjkim.type aesni_multi_cbc_encrypt_avx,@function 526299481Sjkim.align 32 527299481Sjkimaesni_multi_cbc_encrypt_avx: 528299481Sjkim_avx_cbc_enc_shortcut: 529299481Sjkim movq %rsp,%rax 530299481Sjkim pushq %rbx 531299481Sjkim pushq %rbp 532299481Sjkim pushq %r12 533299481Sjkim pushq %r13 534299481Sjkim pushq %r14 535299481Sjkim pushq %r15 536299481Sjkim 537299481Sjkim 538299481Sjkim 539299481Sjkim 540299481Sjkim 541299481Sjkim 542299481Sjkim 543299481Sjkim 544299481Sjkim subq $192,%rsp 545299481Sjkim andq $-128,%rsp 546299481Sjkim movq %rax,16(%rsp) 547299481Sjkim 548299481Sjkim.Lenc8x_body: 549299481Sjkim vzeroupper 550299481Sjkim vmovdqu (%rsi),%xmm15 551299481Sjkim leaq 120(%rsi),%rsi 552299481Sjkim leaq 160(%rdi),%rdi 553299481Sjkim shrl $1,%edx 554299481Sjkim 555299481Sjkim.Lenc8x_loop_grande: 556299481Sjkim 557299481Sjkim xorl %edx,%edx 558299481Sjkim movl -144(%rdi),%ecx 559299481Sjkim movq -160(%rdi),%r8 560299481Sjkim cmpl %edx,%ecx 561299481Sjkim movq -152(%rdi),%rbx 562299481Sjkim cmovgl %ecx,%edx 563299481Sjkim testl %ecx,%ecx 564299481Sjkim vmovdqu -136(%rdi),%xmm2 565299481Sjkim movl %ecx,32(%rsp) 566299481Sjkim cmovleq %rsp,%r8 567299481Sjkim subq %r8,%rbx 568299481Sjkim movq %rbx,64(%rsp) 569299481Sjkim movl -104(%rdi),%ecx 570299481Sjkim movq -120(%rdi),%r9 571299481Sjkim cmpl %edx,%ecx 572299481Sjkim movq -112(%rdi),%rbp 573299481Sjkim cmovgl %ecx,%edx 574299481Sjkim testl %ecx,%ecx 575299481Sjkim vmovdqu -96(%rdi),%xmm3 576299481Sjkim movl %ecx,36(%rsp) 577299481Sjkim cmovleq %rsp,%r9 578299481Sjkim subq %r9,%rbp 579299481Sjkim movq %rbp,72(%rsp) 580299481Sjkim movl -64(%rdi),%ecx 581299481Sjkim movq -80(%rdi),%r10 582299481Sjkim cmpl %edx,%ecx 583299481Sjkim movq -72(%rdi),%rbp 584299481Sjkim cmovgl %ecx,%edx 585299481Sjkim testl %ecx,%ecx 586299481Sjkim vmovdqu -56(%rdi),%xmm4 587299481Sjkim movl %ecx,40(%rsp) 588299481Sjkim cmovleq %rsp,%r10 589299481Sjkim subq %r10,%rbp 590299481Sjkim movq %rbp,80(%rsp) 591299481Sjkim movl -24(%rdi),%ecx 592299481Sjkim movq -40(%rdi),%r11 593299481Sjkim cmpl %edx,%ecx 594299481Sjkim movq -32(%rdi),%rbp 595299481Sjkim cmovgl %ecx,%edx 596299481Sjkim testl %ecx,%ecx 597299481Sjkim vmovdqu -16(%rdi),%xmm5 598299481Sjkim movl %ecx,44(%rsp) 599299481Sjkim cmovleq %rsp,%r11 600299481Sjkim subq %r11,%rbp 601299481Sjkim movq %rbp,88(%rsp) 602299481Sjkim movl 16(%rdi),%ecx 603299481Sjkim movq 0(%rdi),%r12 604299481Sjkim cmpl %edx,%ecx 605299481Sjkim movq 8(%rdi),%rbp 606299481Sjkim cmovgl %ecx,%edx 607299481Sjkim testl %ecx,%ecx 608299481Sjkim vmovdqu 24(%rdi),%xmm6 609299481Sjkim movl %ecx,48(%rsp) 610299481Sjkim cmovleq %rsp,%r12 611299481Sjkim subq %r12,%rbp 612299481Sjkim movq %rbp,96(%rsp) 613299481Sjkim movl 56(%rdi),%ecx 614299481Sjkim movq 40(%rdi),%r13 615299481Sjkim cmpl %edx,%ecx 616299481Sjkim movq 48(%rdi),%rbp 617299481Sjkim cmovgl %ecx,%edx 618299481Sjkim testl %ecx,%ecx 619299481Sjkim vmovdqu 64(%rdi),%xmm7 620299481Sjkim movl %ecx,52(%rsp) 621299481Sjkim cmovleq %rsp,%r13 622299481Sjkim subq %r13,%rbp 623299481Sjkim movq %rbp,104(%rsp) 624299481Sjkim movl 96(%rdi),%ecx 625299481Sjkim movq 80(%rdi),%r14 626299481Sjkim cmpl %edx,%ecx 627299481Sjkim movq 88(%rdi),%rbp 628299481Sjkim cmovgl %ecx,%edx 629299481Sjkim testl %ecx,%ecx 630299481Sjkim vmovdqu 104(%rdi),%xmm8 631299481Sjkim movl %ecx,56(%rsp) 632299481Sjkim cmovleq %rsp,%r14 633299481Sjkim subq %r14,%rbp 634299481Sjkim movq %rbp,112(%rsp) 635299481Sjkim movl 136(%rdi),%ecx 636299481Sjkim movq 120(%rdi),%r15 637299481Sjkim cmpl %edx,%ecx 638299481Sjkim movq 128(%rdi),%rbp 639299481Sjkim cmovgl %ecx,%edx 640299481Sjkim testl %ecx,%ecx 641299481Sjkim vmovdqu 144(%rdi),%xmm9 642299481Sjkim movl %ecx,60(%rsp) 643299481Sjkim cmovleq %rsp,%r15 644299481Sjkim subq %r15,%rbp 645299481Sjkim movq %rbp,120(%rsp) 646299481Sjkim testl %edx,%edx 647299481Sjkim jz .Lenc8x_done 648299481Sjkim 649299481Sjkim vmovups 16-120(%rsi),%xmm1 650299481Sjkim vmovups 32-120(%rsi),%xmm0 651299481Sjkim movl 240-120(%rsi),%eax 652299481Sjkim 653299481Sjkim vpxor (%r8),%xmm15,%xmm10 654299481Sjkim leaq 128(%rsp),%rbp 655299481Sjkim vpxor (%r9),%xmm15,%xmm11 656299481Sjkim vpxor (%r10),%xmm15,%xmm12 657299481Sjkim vpxor (%r11),%xmm15,%xmm13 658299481Sjkim vpxor %xmm10,%xmm2,%xmm2 659299481Sjkim vpxor (%r12),%xmm15,%xmm10 660299481Sjkim vpxor %xmm11,%xmm3,%xmm3 661299481Sjkim vpxor (%r13),%xmm15,%xmm11 662299481Sjkim vpxor %xmm12,%xmm4,%xmm4 663299481Sjkim vpxor (%r14),%xmm15,%xmm12 664299481Sjkim vpxor %xmm13,%xmm5,%xmm5 665299481Sjkim vpxor (%r15),%xmm15,%xmm13 666299481Sjkim vpxor %xmm10,%xmm6,%xmm6 667299481Sjkim movl $1,%ecx 668299481Sjkim vpxor %xmm11,%xmm7,%xmm7 669299481Sjkim vpxor %xmm12,%xmm8,%xmm8 670299481Sjkim vpxor %xmm13,%xmm9,%xmm9 671299481Sjkim jmp .Loop_enc8x 672299481Sjkim 673299481Sjkim.align 32 674299481Sjkim.Loop_enc8x: 675299481Sjkim vaesenc %xmm1,%xmm2,%xmm2 676299481Sjkim cmpl 32+0(%rsp),%ecx 677299481Sjkim vaesenc %xmm1,%xmm3,%xmm3 678299481Sjkim prefetcht0 31(%r8) 679299481Sjkim vaesenc %xmm1,%xmm4,%xmm4 680299481Sjkim vaesenc %xmm1,%xmm5,%xmm5 681299481Sjkim leaq (%r8,%rbx,1),%rbx 682299481Sjkim cmovgeq %rsp,%r8 683299481Sjkim vaesenc %xmm1,%xmm6,%xmm6 684299481Sjkim cmovgq %rsp,%rbx 685299481Sjkim vaesenc %xmm1,%xmm7,%xmm7 686299481Sjkim subq %r8,%rbx 687299481Sjkim vaesenc %xmm1,%xmm8,%xmm8 688299481Sjkim vpxor 16(%r8),%xmm15,%xmm10 689299481Sjkim movq %rbx,64+0(%rsp) 690299481Sjkim vaesenc %xmm1,%xmm9,%xmm9 691299481Sjkim vmovups -72(%rsi),%xmm1 692299481Sjkim leaq 16(%r8,%rbx,1),%r8 693299481Sjkim vmovdqu %xmm10,0(%rbp) 694299481Sjkim vaesenc %xmm0,%xmm2,%xmm2 695299481Sjkim cmpl 32+4(%rsp),%ecx 696299481Sjkim movq 64+8(%rsp),%rbx 697299481Sjkim vaesenc %xmm0,%xmm3,%xmm3 698299481Sjkim prefetcht0 31(%r9) 699299481Sjkim vaesenc %xmm0,%xmm4,%xmm4 700299481Sjkim vaesenc %xmm0,%xmm5,%xmm5 701299481Sjkim leaq (%r9,%rbx,1),%rbx 702299481Sjkim cmovgeq %rsp,%r9 703299481Sjkim vaesenc %xmm0,%xmm6,%xmm6 704299481Sjkim cmovgq %rsp,%rbx 705299481Sjkim vaesenc %xmm0,%xmm7,%xmm7 706299481Sjkim subq %r9,%rbx 707299481Sjkim vaesenc %xmm0,%xmm8,%xmm8 708299481Sjkim vpxor 16(%r9),%xmm15,%xmm11 709299481Sjkim movq %rbx,64+8(%rsp) 710299481Sjkim vaesenc %xmm0,%xmm9,%xmm9 711299481Sjkim vmovups -56(%rsi),%xmm0 712299481Sjkim leaq 16(%r9,%rbx,1),%r9 713299481Sjkim vmovdqu %xmm11,16(%rbp) 714299481Sjkim vaesenc %xmm1,%xmm2,%xmm2 715299481Sjkim cmpl 32+8(%rsp),%ecx 716299481Sjkim movq 64+16(%rsp),%rbx 717299481Sjkim vaesenc %xmm1,%xmm3,%xmm3 718299481Sjkim prefetcht0 31(%r10) 719299481Sjkim vaesenc %xmm1,%xmm4,%xmm4 720299481Sjkim prefetcht0 15(%r8) 721299481Sjkim vaesenc %xmm1,%xmm5,%xmm5 722299481Sjkim leaq (%r10,%rbx,1),%rbx 723299481Sjkim cmovgeq %rsp,%r10 724299481Sjkim vaesenc %xmm1,%xmm6,%xmm6 725299481Sjkim cmovgq %rsp,%rbx 726299481Sjkim vaesenc %xmm1,%xmm7,%xmm7 727299481Sjkim subq %r10,%rbx 728299481Sjkim vaesenc %xmm1,%xmm8,%xmm8 729299481Sjkim vpxor 16(%r10),%xmm15,%xmm12 730299481Sjkim movq %rbx,64+16(%rsp) 731299481Sjkim vaesenc %xmm1,%xmm9,%xmm9 732299481Sjkim vmovups -40(%rsi),%xmm1 733299481Sjkim leaq 16(%r10,%rbx,1),%r10 734299481Sjkim vmovdqu %xmm12,32(%rbp) 735299481Sjkim vaesenc %xmm0,%xmm2,%xmm2 736299481Sjkim cmpl 32+12(%rsp),%ecx 737299481Sjkim movq 64+24(%rsp),%rbx 738299481Sjkim vaesenc %xmm0,%xmm3,%xmm3 739299481Sjkim prefetcht0 31(%r11) 740299481Sjkim vaesenc %xmm0,%xmm4,%xmm4 741299481Sjkim prefetcht0 15(%r9) 742299481Sjkim vaesenc %xmm0,%xmm5,%xmm5 743299481Sjkim leaq (%r11,%rbx,1),%rbx 744299481Sjkim cmovgeq %rsp,%r11 745299481Sjkim vaesenc %xmm0,%xmm6,%xmm6 746299481Sjkim cmovgq %rsp,%rbx 747299481Sjkim vaesenc %xmm0,%xmm7,%xmm7 748299481Sjkim subq %r11,%rbx 749299481Sjkim vaesenc %xmm0,%xmm8,%xmm8 750299481Sjkim vpxor 16(%r11),%xmm15,%xmm13 751299481Sjkim movq %rbx,64+24(%rsp) 752299481Sjkim vaesenc %xmm0,%xmm9,%xmm9 753299481Sjkim vmovups -24(%rsi),%xmm0 754299481Sjkim leaq 16(%r11,%rbx,1),%r11 755299481Sjkim vmovdqu %xmm13,48(%rbp) 756299481Sjkim vaesenc %xmm1,%xmm2,%xmm2 757299481Sjkim cmpl 32+16(%rsp),%ecx 758299481Sjkim movq 64+32(%rsp),%rbx 759299481Sjkim vaesenc %xmm1,%xmm3,%xmm3 760299481Sjkim prefetcht0 31(%r12) 761299481Sjkim vaesenc %xmm1,%xmm4,%xmm4 762299481Sjkim prefetcht0 15(%r10) 763299481Sjkim vaesenc %xmm1,%xmm5,%xmm5 764299481Sjkim leaq (%r12,%rbx,1),%rbx 765299481Sjkim cmovgeq %rsp,%r12 766299481Sjkim vaesenc %xmm1,%xmm6,%xmm6 767299481Sjkim cmovgq %rsp,%rbx 768299481Sjkim vaesenc %xmm1,%xmm7,%xmm7 769299481Sjkim subq %r12,%rbx 770299481Sjkim vaesenc %xmm1,%xmm8,%xmm8 771299481Sjkim vpxor 16(%r12),%xmm15,%xmm10 772299481Sjkim movq %rbx,64+32(%rsp) 773299481Sjkim vaesenc %xmm1,%xmm9,%xmm9 774299481Sjkim vmovups -8(%rsi),%xmm1 775299481Sjkim leaq 16(%r12,%rbx,1),%r12 776299481Sjkim vaesenc %xmm0,%xmm2,%xmm2 777299481Sjkim cmpl 32+20(%rsp),%ecx 778299481Sjkim movq 64+40(%rsp),%rbx 779299481Sjkim vaesenc %xmm0,%xmm3,%xmm3 780299481Sjkim prefetcht0 31(%r13) 781299481Sjkim vaesenc %xmm0,%xmm4,%xmm4 782299481Sjkim prefetcht0 15(%r11) 783299481Sjkim vaesenc %xmm0,%xmm5,%xmm5 784299481Sjkim leaq (%rbx,%r13,1),%rbx 785299481Sjkim cmovgeq %rsp,%r13 786299481Sjkim vaesenc %xmm0,%xmm6,%xmm6 787299481Sjkim cmovgq %rsp,%rbx 788299481Sjkim vaesenc %xmm0,%xmm7,%xmm7 789299481Sjkim subq %r13,%rbx 790299481Sjkim vaesenc %xmm0,%xmm8,%xmm8 791299481Sjkim vpxor 16(%r13),%xmm15,%xmm11 792299481Sjkim movq %rbx,64+40(%rsp) 793299481Sjkim vaesenc %xmm0,%xmm9,%xmm9 794299481Sjkim vmovups 8(%rsi),%xmm0 795299481Sjkim leaq 16(%r13,%rbx,1),%r13 796299481Sjkim vaesenc %xmm1,%xmm2,%xmm2 797299481Sjkim cmpl 32+24(%rsp),%ecx 798299481Sjkim movq 64+48(%rsp),%rbx 799299481Sjkim vaesenc %xmm1,%xmm3,%xmm3 800299481Sjkim prefetcht0 31(%r14) 801299481Sjkim vaesenc %xmm1,%xmm4,%xmm4 802299481Sjkim prefetcht0 15(%r12) 803299481Sjkim vaesenc %xmm1,%xmm5,%xmm5 804299481Sjkim leaq (%r14,%rbx,1),%rbx 805299481Sjkim cmovgeq %rsp,%r14 806299481Sjkim vaesenc %xmm1,%xmm6,%xmm6 807299481Sjkim cmovgq %rsp,%rbx 808299481Sjkim vaesenc %xmm1,%xmm7,%xmm7 809299481Sjkim subq %r14,%rbx 810299481Sjkim vaesenc %xmm1,%xmm8,%xmm8 811299481Sjkim vpxor 16(%r14),%xmm15,%xmm12 812299481Sjkim movq %rbx,64+48(%rsp) 813299481Sjkim vaesenc %xmm1,%xmm9,%xmm9 814299481Sjkim vmovups 24(%rsi),%xmm1 815299481Sjkim leaq 16(%r14,%rbx,1),%r14 816299481Sjkim vaesenc %xmm0,%xmm2,%xmm2 817299481Sjkim cmpl 32+28(%rsp),%ecx 818299481Sjkim movq 64+56(%rsp),%rbx 819299481Sjkim vaesenc %xmm0,%xmm3,%xmm3 820299481Sjkim prefetcht0 31(%r15) 821299481Sjkim vaesenc %xmm0,%xmm4,%xmm4 822299481Sjkim prefetcht0 15(%r13) 823299481Sjkim vaesenc %xmm0,%xmm5,%xmm5 824299481Sjkim leaq (%r15,%rbx,1),%rbx 825299481Sjkim cmovgeq %rsp,%r15 826299481Sjkim vaesenc %xmm0,%xmm6,%xmm6 827299481Sjkim cmovgq %rsp,%rbx 828299481Sjkim vaesenc %xmm0,%xmm7,%xmm7 829299481Sjkim subq %r15,%rbx 830299481Sjkim vaesenc %xmm0,%xmm8,%xmm8 831299481Sjkim vpxor 16(%r15),%xmm15,%xmm13 832299481Sjkim movq %rbx,64+56(%rsp) 833299481Sjkim vaesenc %xmm0,%xmm9,%xmm9 834299481Sjkim vmovups 40(%rsi),%xmm0 835299481Sjkim leaq 16(%r15,%rbx,1),%r15 836299481Sjkim vmovdqu 32(%rsp),%xmm14 837299481Sjkim prefetcht0 15(%r14) 838299481Sjkim prefetcht0 15(%r15) 839299481Sjkim cmpl $11,%eax 840299481Sjkim jb .Lenc8x_tail 841299481Sjkim 842299481Sjkim vaesenc %xmm1,%xmm2,%xmm2 843299481Sjkim vaesenc %xmm1,%xmm3,%xmm3 844299481Sjkim vaesenc %xmm1,%xmm4,%xmm4 845299481Sjkim vaesenc %xmm1,%xmm5,%xmm5 846299481Sjkim vaesenc %xmm1,%xmm6,%xmm6 847299481Sjkim vaesenc %xmm1,%xmm7,%xmm7 848299481Sjkim vaesenc %xmm1,%xmm8,%xmm8 849299481Sjkim vaesenc %xmm1,%xmm9,%xmm9 850299481Sjkim vmovups 176-120(%rsi),%xmm1 851299481Sjkim 852299481Sjkim vaesenc %xmm0,%xmm2,%xmm2 853299481Sjkim vaesenc %xmm0,%xmm3,%xmm3 854299481Sjkim vaesenc %xmm0,%xmm4,%xmm4 855299481Sjkim vaesenc %xmm0,%xmm5,%xmm5 856299481Sjkim vaesenc %xmm0,%xmm6,%xmm6 857299481Sjkim vaesenc %xmm0,%xmm7,%xmm7 858299481Sjkim vaesenc %xmm0,%xmm8,%xmm8 859299481Sjkim vaesenc %xmm0,%xmm9,%xmm9 860299481Sjkim vmovups 192-120(%rsi),%xmm0 861299481Sjkim je .Lenc8x_tail 862299481Sjkim 863299481Sjkim vaesenc %xmm1,%xmm2,%xmm2 864299481Sjkim vaesenc %xmm1,%xmm3,%xmm3 865299481Sjkim vaesenc %xmm1,%xmm4,%xmm4 866299481Sjkim vaesenc %xmm1,%xmm5,%xmm5 867299481Sjkim vaesenc %xmm1,%xmm6,%xmm6 868299481Sjkim vaesenc %xmm1,%xmm7,%xmm7 869299481Sjkim vaesenc %xmm1,%xmm8,%xmm8 870299481Sjkim vaesenc %xmm1,%xmm9,%xmm9 871299481Sjkim vmovups 208-120(%rsi),%xmm1 872299481Sjkim 873299481Sjkim vaesenc %xmm0,%xmm2,%xmm2 874299481Sjkim vaesenc %xmm0,%xmm3,%xmm3 875299481Sjkim vaesenc %xmm0,%xmm4,%xmm4 876299481Sjkim vaesenc %xmm0,%xmm5,%xmm5 877299481Sjkim vaesenc %xmm0,%xmm6,%xmm6 878299481Sjkim vaesenc %xmm0,%xmm7,%xmm7 879299481Sjkim vaesenc %xmm0,%xmm8,%xmm8 880299481Sjkim vaesenc %xmm0,%xmm9,%xmm9 881299481Sjkim vmovups 224-120(%rsi),%xmm0 882299481Sjkim 883299481Sjkim.Lenc8x_tail: 884299481Sjkim vaesenc %xmm1,%xmm2,%xmm2 885299481Sjkim vpxor %xmm15,%xmm15,%xmm15 886299481Sjkim vaesenc %xmm1,%xmm3,%xmm3 887299481Sjkim vaesenc %xmm1,%xmm4,%xmm4 888299481Sjkim vpcmpgtd %xmm15,%xmm14,%xmm15 889299481Sjkim vaesenc %xmm1,%xmm5,%xmm5 890299481Sjkim vaesenc %xmm1,%xmm6,%xmm6 891299481Sjkim vpaddd %xmm14,%xmm15,%xmm15 892299481Sjkim vmovdqu 48(%rsp),%xmm14 893299481Sjkim vaesenc %xmm1,%xmm7,%xmm7 894299481Sjkim movq 64(%rsp),%rbx 895299481Sjkim vaesenc %xmm1,%xmm8,%xmm8 896299481Sjkim vaesenc %xmm1,%xmm9,%xmm9 897299481Sjkim vmovups 16-120(%rsi),%xmm1 898299481Sjkim 899299481Sjkim vaesenclast %xmm0,%xmm2,%xmm2 900299481Sjkim vmovdqa %xmm15,32(%rsp) 901299481Sjkim vpxor %xmm15,%xmm15,%xmm15 902299481Sjkim vaesenclast %xmm0,%xmm3,%xmm3 903299481Sjkim vaesenclast %xmm0,%xmm4,%xmm4 904299481Sjkim vpcmpgtd %xmm15,%xmm14,%xmm15 905299481Sjkim vaesenclast %xmm0,%xmm5,%xmm5 906299481Sjkim vaesenclast %xmm0,%xmm6,%xmm6 907299481Sjkim vpaddd %xmm15,%xmm14,%xmm14 908299481Sjkim vmovdqu -120(%rsi),%xmm15 909299481Sjkim vaesenclast %xmm0,%xmm7,%xmm7 910299481Sjkim vaesenclast %xmm0,%xmm8,%xmm8 911299481Sjkim vmovdqa %xmm14,48(%rsp) 912299481Sjkim vaesenclast %xmm0,%xmm9,%xmm9 913299481Sjkim vmovups 32-120(%rsi),%xmm0 914299481Sjkim 915299481Sjkim vmovups %xmm2,-16(%r8) 916299481Sjkim subq %rbx,%r8 917299481Sjkim vpxor 0(%rbp),%xmm2,%xmm2 918299481Sjkim vmovups %xmm3,-16(%r9) 919299481Sjkim subq 72(%rsp),%r9 920299481Sjkim vpxor 16(%rbp),%xmm3,%xmm3 921299481Sjkim vmovups %xmm4,-16(%r10) 922299481Sjkim subq 80(%rsp),%r10 923299481Sjkim vpxor 32(%rbp),%xmm4,%xmm4 924299481Sjkim vmovups %xmm5,-16(%r11) 925299481Sjkim subq 88(%rsp),%r11 926299481Sjkim vpxor 48(%rbp),%xmm5,%xmm5 927299481Sjkim vmovups %xmm6,-16(%r12) 928299481Sjkim subq 96(%rsp),%r12 929299481Sjkim vpxor %xmm10,%xmm6,%xmm6 930299481Sjkim vmovups %xmm7,-16(%r13) 931299481Sjkim subq 104(%rsp),%r13 932299481Sjkim vpxor %xmm11,%xmm7,%xmm7 933299481Sjkim vmovups %xmm8,-16(%r14) 934299481Sjkim subq 112(%rsp),%r14 935299481Sjkim vpxor %xmm12,%xmm8,%xmm8 936299481Sjkim vmovups %xmm9,-16(%r15) 937299481Sjkim subq 120(%rsp),%r15 938299481Sjkim vpxor %xmm13,%xmm9,%xmm9 939299481Sjkim 940299481Sjkim decl %edx 941299481Sjkim jnz .Loop_enc8x 942299481Sjkim 943299481Sjkim movq 16(%rsp),%rax 944299481Sjkim 945299481Sjkim 946299481Sjkim 947299481Sjkim 948299481Sjkim 949299481Sjkim.Lenc8x_done: 950299481Sjkim vzeroupper 951299481Sjkim movq -48(%rax),%r15 952299481Sjkim movq -40(%rax),%r14 953299481Sjkim movq -32(%rax),%r13 954299481Sjkim movq -24(%rax),%r12 955299481Sjkim movq -16(%rax),%rbp 956299481Sjkim movq -8(%rax),%rbx 957299481Sjkim leaq (%rax),%rsp 958299481Sjkim.Lenc8x_epilogue: 959299481Sjkim .byte 0xf3,0xc3 960299481Sjkim.size aesni_multi_cbc_encrypt_avx,.-aesni_multi_cbc_encrypt_avx 961299481Sjkim 962299481Sjkim.type aesni_multi_cbc_decrypt_avx,@function 963299481Sjkim.align 32 964299481Sjkimaesni_multi_cbc_decrypt_avx: 965299481Sjkim_avx_cbc_dec_shortcut: 966299481Sjkim movq %rsp,%rax 967299481Sjkim pushq %rbx 968299481Sjkim pushq %rbp 969299481Sjkim pushq %r12 970299481Sjkim pushq %r13 971299481Sjkim pushq %r14 972299481Sjkim pushq %r15 973299481Sjkim 974299481Sjkim 975299481Sjkim 976299481Sjkim 977299481Sjkim 978299481Sjkim 979299481Sjkim 980299481Sjkim 981299481Sjkim 982299481Sjkim subq $256,%rsp 983299481Sjkim andq $-256,%rsp 984299481Sjkim subq $192,%rsp 985299481Sjkim movq %rax,16(%rsp) 986299481Sjkim 987299481Sjkim.Ldec8x_body: 988299481Sjkim vzeroupper 989299481Sjkim vmovdqu (%rsi),%xmm15 990299481Sjkim leaq 120(%rsi),%rsi 991299481Sjkim leaq 160(%rdi),%rdi 992299481Sjkim shrl $1,%edx 993299481Sjkim 994299481Sjkim.Ldec8x_loop_grande: 995299481Sjkim 996299481Sjkim xorl %edx,%edx 997299481Sjkim movl -144(%rdi),%ecx 998299481Sjkim movq -160(%rdi),%r8 999299481Sjkim cmpl %edx,%ecx 1000299481Sjkim movq -152(%rdi),%rbx 1001299481Sjkim cmovgl %ecx,%edx 1002299481Sjkim testl %ecx,%ecx 1003299481Sjkim vmovdqu -136(%rdi),%xmm2 1004299481Sjkim movl %ecx,32(%rsp) 1005299481Sjkim cmovleq %rsp,%r8 1006299481Sjkim subq %r8,%rbx 1007299481Sjkim movq %rbx,64(%rsp) 1008299481Sjkim vmovdqu %xmm2,192(%rsp) 1009299481Sjkim movl -104(%rdi),%ecx 1010299481Sjkim movq -120(%rdi),%r9 1011299481Sjkim cmpl %edx,%ecx 1012299481Sjkim movq -112(%rdi),%rbp 1013299481Sjkim cmovgl %ecx,%edx 1014299481Sjkim testl %ecx,%ecx 1015299481Sjkim vmovdqu -96(%rdi),%xmm3 1016299481Sjkim movl %ecx,36(%rsp) 1017299481Sjkim cmovleq %rsp,%r9 1018299481Sjkim subq %r9,%rbp 1019299481Sjkim movq %rbp,72(%rsp) 1020299481Sjkim vmovdqu %xmm3,208(%rsp) 1021299481Sjkim movl -64(%rdi),%ecx 1022299481Sjkim movq -80(%rdi),%r10 1023299481Sjkim cmpl %edx,%ecx 1024299481Sjkim movq -72(%rdi),%rbp 1025299481Sjkim cmovgl %ecx,%edx 1026299481Sjkim testl %ecx,%ecx 1027299481Sjkim vmovdqu -56(%rdi),%xmm4 1028299481Sjkim movl %ecx,40(%rsp) 1029299481Sjkim cmovleq %rsp,%r10 1030299481Sjkim subq %r10,%rbp 1031299481Sjkim movq %rbp,80(%rsp) 1032299481Sjkim vmovdqu %xmm4,224(%rsp) 1033299481Sjkim movl -24(%rdi),%ecx 1034299481Sjkim movq -40(%rdi),%r11 1035299481Sjkim cmpl %edx,%ecx 1036299481Sjkim movq -32(%rdi),%rbp 1037299481Sjkim cmovgl %ecx,%edx 1038299481Sjkim testl %ecx,%ecx 1039299481Sjkim vmovdqu -16(%rdi),%xmm5 1040299481Sjkim movl %ecx,44(%rsp) 1041299481Sjkim cmovleq %rsp,%r11 1042299481Sjkim subq %r11,%rbp 1043299481Sjkim movq %rbp,88(%rsp) 1044299481Sjkim vmovdqu %xmm5,240(%rsp) 1045299481Sjkim movl 16(%rdi),%ecx 1046299481Sjkim movq 0(%rdi),%r12 1047299481Sjkim cmpl %edx,%ecx 1048299481Sjkim movq 8(%rdi),%rbp 1049299481Sjkim cmovgl %ecx,%edx 1050299481Sjkim testl %ecx,%ecx 1051299481Sjkim vmovdqu 24(%rdi),%xmm6 1052299481Sjkim movl %ecx,48(%rsp) 1053299481Sjkim cmovleq %rsp,%r12 1054299481Sjkim subq %r12,%rbp 1055299481Sjkim movq %rbp,96(%rsp) 1056299481Sjkim vmovdqu %xmm6,256(%rsp) 1057299481Sjkim movl 56(%rdi),%ecx 1058299481Sjkim movq 40(%rdi),%r13 1059299481Sjkim cmpl %edx,%ecx 1060299481Sjkim movq 48(%rdi),%rbp 1061299481Sjkim cmovgl %ecx,%edx 1062299481Sjkim testl %ecx,%ecx 1063299481Sjkim vmovdqu 64(%rdi),%xmm7 1064299481Sjkim movl %ecx,52(%rsp) 1065299481Sjkim cmovleq %rsp,%r13 1066299481Sjkim subq %r13,%rbp 1067299481Sjkim movq %rbp,104(%rsp) 1068299481Sjkim vmovdqu %xmm7,272(%rsp) 1069299481Sjkim movl 96(%rdi),%ecx 1070299481Sjkim movq 80(%rdi),%r14 1071299481Sjkim cmpl %edx,%ecx 1072299481Sjkim movq 88(%rdi),%rbp 1073299481Sjkim cmovgl %ecx,%edx 1074299481Sjkim testl %ecx,%ecx 1075299481Sjkim vmovdqu 104(%rdi),%xmm8 1076299481Sjkim movl %ecx,56(%rsp) 1077299481Sjkim cmovleq %rsp,%r14 1078299481Sjkim subq %r14,%rbp 1079299481Sjkim movq %rbp,112(%rsp) 1080299481Sjkim vmovdqu %xmm8,288(%rsp) 1081299481Sjkim movl 136(%rdi),%ecx 1082299481Sjkim movq 120(%rdi),%r15 1083299481Sjkim cmpl %edx,%ecx 1084299481Sjkim movq 128(%rdi),%rbp 1085299481Sjkim cmovgl %ecx,%edx 1086299481Sjkim testl %ecx,%ecx 1087299481Sjkim vmovdqu 144(%rdi),%xmm9 1088299481Sjkim movl %ecx,60(%rsp) 1089299481Sjkim cmovleq %rsp,%r15 1090299481Sjkim subq %r15,%rbp 1091299481Sjkim movq %rbp,120(%rsp) 1092299481Sjkim vmovdqu %xmm9,304(%rsp) 1093299481Sjkim testl %edx,%edx 1094299481Sjkim jz .Ldec8x_done 1095299481Sjkim 1096299481Sjkim vmovups 16-120(%rsi),%xmm1 1097299481Sjkim vmovups 32-120(%rsi),%xmm0 1098299481Sjkim movl 240-120(%rsi),%eax 1099299481Sjkim leaq 192+128(%rsp),%rbp 1100299481Sjkim 1101299481Sjkim vmovdqu (%r8),%xmm2 1102299481Sjkim vmovdqu (%r9),%xmm3 1103299481Sjkim vmovdqu (%r10),%xmm4 1104299481Sjkim vmovdqu (%r11),%xmm5 1105299481Sjkim vmovdqu (%r12),%xmm6 1106299481Sjkim vmovdqu (%r13),%xmm7 1107299481Sjkim vmovdqu (%r14),%xmm8 1108299481Sjkim vmovdqu (%r15),%xmm9 1109299481Sjkim vmovdqu %xmm2,0(%rbp) 1110299481Sjkim vpxor %xmm15,%xmm2,%xmm2 1111299481Sjkim vmovdqu %xmm3,16(%rbp) 1112299481Sjkim vpxor %xmm15,%xmm3,%xmm3 1113299481Sjkim vmovdqu %xmm4,32(%rbp) 1114299481Sjkim vpxor %xmm15,%xmm4,%xmm4 1115299481Sjkim vmovdqu %xmm5,48(%rbp) 1116299481Sjkim vpxor %xmm15,%xmm5,%xmm5 1117299481Sjkim vmovdqu %xmm6,64(%rbp) 1118299481Sjkim vpxor %xmm15,%xmm6,%xmm6 1119299481Sjkim vmovdqu %xmm7,80(%rbp) 1120299481Sjkim vpxor %xmm15,%xmm7,%xmm7 1121299481Sjkim vmovdqu %xmm8,96(%rbp) 1122299481Sjkim vpxor %xmm15,%xmm8,%xmm8 1123299481Sjkim vmovdqu %xmm9,112(%rbp) 1124299481Sjkim vpxor %xmm15,%xmm9,%xmm9 1125299481Sjkim xorq $0x80,%rbp 1126299481Sjkim movl $1,%ecx 1127299481Sjkim jmp .Loop_dec8x 1128299481Sjkim 1129299481Sjkim.align 32 1130299481Sjkim.Loop_dec8x: 1131299481Sjkim vaesdec %xmm1,%xmm2,%xmm2 1132299481Sjkim cmpl 32+0(%rsp),%ecx 1133299481Sjkim vaesdec %xmm1,%xmm3,%xmm3 1134299481Sjkim prefetcht0 31(%r8) 1135299481Sjkim vaesdec %xmm1,%xmm4,%xmm4 1136299481Sjkim vaesdec %xmm1,%xmm5,%xmm5 1137299481Sjkim leaq (%r8,%rbx,1),%rbx 1138299481Sjkim cmovgeq %rsp,%r8 1139299481Sjkim vaesdec %xmm1,%xmm6,%xmm6 1140299481Sjkim cmovgq %rsp,%rbx 1141299481Sjkim vaesdec %xmm1,%xmm7,%xmm7 1142299481Sjkim subq %r8,%rbx 1143299481Sjkim vaesdec %xmm1,%xmm8,%xmm8 1144299481Sjkim vmovdqu 16(%r8),%xmm10 1145299481Sjkim movq %rbx,64+0(%rsp) 1146299481Sjkim vaesdec %xmm1,%xmm9,%xmm9 1147299481Sjkim vmovups -72(%rsi),%xmm1 1148299481Sjkim leaq 16(%r8,%rbx,1),%r8 1149299481Sjkim vmovdqu %xmm10,128(%rsp) 1150299481Sjkim vaesdec %xmm0,%xmm2,%xmm2 1151299481Sjkim cmpl 32+4(%rsp),%ecx 1152299481Sjkim movq 64+8(%rsp),%rbx 1153299481Sjkim vaesdec %xmm0,%xmm3,%xmm3 1154299481Sjkim prefetcht0 31(%r9) 1155299481Sjkim vaesdec %xmm0,%xmm4,%xmm4 1156299481Sjkim vaesdec %xmm0,%xmm5,%xmm5 1157299481Sjkim leaq (%r9,%rbx,1),%rbx 1158299481Sjkim cmovgeq %rsp,%r9 1159299481Sjkim vaesdec %xmm0,%xmm6,%xmm6 1160299481Sjkim cmovgq %rsp,%rbx 1161299481Sjkim vaesdec %xmm0,%xmm7,%xmm7 1162299481Sjkim subq %r9,%rbx 1163299481Sjkim vaesdec %xmm0,%xmm8,%xmm8 1164299481Sjkim vmovdqu 16(%r9),%xmm11 1165299481Sjkim movq %rbx,64+8(%rsp) 1166299481Sjkim vaesdec %xmm0,%xmm9,%xmm9 1167299481Sjkim vmovups -56(%rsi),%xmm0 1168299481Sjkim leaq 16(%r9,%rbx,1),%r9 1169299481Sjkim vmovdqu %xmm11,144(%rsp) 1170299481Sjkim vaesdec %xmm1,%xmm2,%xmm2 1171299481Sjkim cmpl 32+8(%rsp),%ecx 1172299481Sjkim movq 64+16(%rsp),%rbx 1173299481Sjkim vaesdec %xmm1,%xmm3,%xmm3 1174299481Sjkim prefetcht0 31(%r10) 1175299481Sjkim vaesdec %xmm1,%xmm4,%xmm4 1176299481Sjkim prefetcht0 15(%r8) 1177299481Sjkim vaesdec %xmm1,%xmm5,%xmm5 1178299481Sjkim leaq (%r10,%rbx,1),%rbx 1179299481Sjkim cmovgeq %rsp,%r10 1180299481Sjkim vaesdec %xmm1,%xmm6,%xmm6 1181299481Sjkim cmovgq %rsp,%rbx 1182299481Sjkim vaesdec %xmm1,%xmm7,%xmm7 1183299481Sjkim subq %r10,%rbx 1184299481Sjkim vaesdec %xmm1,%xmm8,%xmm8 1185299481Sjkim vmovdqu 16(%r10),%xmm12 1186299481Sjkim movq %rbx,64+16(%rsp) 1187299481Sjkim vaesdec %xmm1,%xmm9,%xmm9 1188299481Sjkim vmovups -40(%rsi),%xmm1 1189299481Sjkim leaq 16(%r10,%rbx,1),%r10 1190299481Sjkim vmovdqu %xmm12,160(%rsp) 1191299481Sjkim vaesdec %xmm0,%xmm2,%xmm2 1192299481Sjkim cmpl 32+12(%rsp),%ecx 1193299481Sjkim movq 64+24(%rsp),%rbx 1194299481Sjkim vaesdec %xmm0,%xmm3,%xmm3 1195299481Sjkim prefetcht0 31(%r11) 1196299481Sjkim vaesdec %xmm0,%xmm4,%xmm4 1197299481Sjkim prefetcht0 15(%r9) 1198299481Sjkim vaesdec %xmm0,%xmm5,%xmm5 1199299481Sjkim leaq (%r11,%rbx,1),%rbx 1200299481Sjkim cmovgeq %rsp,%r11 1201299481Sjkim vaesdec %xmm0,%xmm6,%xmm6 1202299481Sjkim cmovgq %rsp,%rbx 1203299481Sjkim vaesdec %xmm0,%xmm7,%xmm7 1204299481Sjkim subq %r11,%rbx 1205299481Sjkim vaesdec %xmm0,%xmm8,%xmm8 1206299481Sjkim vmovdqu 16(%r11),%xmm13 1207299481Sjkim movq %rbx,64+24(%rsp) 1208299481Sjkim vaesdec %xmm0,%xmm9,%xmm9 1209299481Sjkim vmovups -24(%rsi),%xmm0 1210299481Sjkim leaq 16(%r11,%rbx,1),%r11 1211299481Sjkim vmovdqu %xmm13,176(%rsp) 1212299481Sjkim vaesdec %xmm1,%xmm2,%xmm2 1213299481Sjkim cmpl 32+16(%rsp),%ecx 1214299481Sjkim movq 64+32(%rsp),%rbx 1215299481Sjkim vaesdec %xmm1,%xmm3,%xmm3 1216299481Sjkim prefetcht0 31(%r12) 1217299481Sjkim vaesdec %xmm1,%xmm4,%xmm4 1218299481Sjkim prefetcht0 15(%r10) 1219299481Sjkim vaesdec %xmm1,%xmm5,%xmm5 1220299481Sjkim leaq (%r12,%rbx,1),%rbx 1221299481Sjkim cmovgeq %rsp,%r12 1222299481Sjkim vaesdec %xmm1,%xmm6,%xmm6 1223299481Sjkim cmovgq %rsp,%rbx 1224299481Sjkim vaesdec %xmm1,%xmm7,%xmm7 1225299481Sjkim subq %r12,%rbx 1226299481Sjkim vaesdec %xmm1,%xmm8,%xmm8 1227299481Sjkim vmovdqu 16(%r12),%xmm10 1228299481Sjkim movq %rbx,64+32(%rsp) 1229299481Sjkim vaesdec %xmm1,%xmm9,%xmm9 1230299481Sjkim vmovups -8(%rsi),%xmm1 1231299481Sjkim leaq 16(%r12,%rbx,1),%r12 1232299481Sjkim vaesdec %xmm0,%xmm2,%xmm2 1233299481Sjkim cmpl 32+20(%rsp),%ecx 1234299481Sjkim movq 64+40(%rsp),%rbx 1235299481Sjkim vaesdec %xmm0,%xmm3,%xmm3 1236299481Sjkim prefetcht0 31(%r13) 1237299481Sjkim vaesdec %xmm0,%xmm4,%xmm4 1238299481Sjkim prefetcht0 15(%r11) 1239299481Sjkim vaesdec %xmm0,%xmm5,%xmm5 1240299481Sjkim leaq (%rbx,%r13,1),%rbx 1241299481Sjkim cmovgeq %rsp,%r13 1242299481Sjkim vaesdec %xmm0,%xmm6,%xmm6 1243299481Sjkim cmovgq %rsp,%rbx 1244299481Sjkim vaesdec %xmm0,%xmm7,%xmm7 1245299481Sjkim subq %r13,%rbx 1246299481Sjkim vaesdec %xmm0,%xmm8,%xmm8 1247299481Sjkim vmovdqu 16(%r13),%xmm11 1248299481Sjkim movq %rbx,64+40(%rsp) 1249299481Sjkim vaesdec %xmm0,%xmm9,%xmm9 1250299481Sjkim vmovups 8(%rsi),%xmm0 1251299481Sjkim leaq 16(%r13,%rbx,1),%r13 1252299481Sjkim vaesdec %xmm1,%xmm2,%xmm2 1253299481Sjkim cmpl 32+24(%rsp),%ecx 1254299481Sjkim movq 64+48(%rsp),%rbx 1255299481Sjkim vaesdec %xmm1,%xmm3,%xmm3 1256299481Sjkim prefetcht0 31(%r14) 1257299481Sjkim vaesdec %xmm1,%xmm4,%xmm4 1258299481Sjkim prefetcht0 15(%r12) 1259299481Sjkim vaesdec %xmm1,%xmm5,%xmm5 1260299481Sjkim leaq (%r14,%rbx,1),%rbx 1261299481Sjkim cmovgeq %rsp,%r14 1262299481Sjkim vaesdec %xmm1,%xmm6,%xmm6 1263299481Sjkim cmovgq %rsp,%rbx 1264299481Sjkim vaesdec %xmm1,%xmm7,%xmm7 1265299481Sjkim subq %r14,%rbx 1266299481Sjkim vaesdec %xmm1,%xmm8,%xmm8 1267299481Sjkim vmovdqu 16(%r14),%xmm12 1268299481Sjkim movq %rbx,64+48(%rsp) 1269299481Sjkim vaesdec %xmm1,%xmm9,%xmm9 1270299481Sjkim vmovups 24(%rsi),%xmm1 1271299481Sjkim leaq 16(%r14,%rbx,1),%r14 1272299481Sjkim vaesdec %xmm0,%xmm2,%xmm2 1273299481Sjkim cmpl 32+28(%rsp),%ecx 1274299481Sjkim movq 64+56(%rsp),%rbx 1275299481Sjkim vaesdec %xmm0,%xmm3,%xmm3 1276299481Sjkim prefetcht0 31(%r15) 1277299481Sjkim vaesdec %xmm0,%xmm4,%xmm4 1278299481Sjkim prefetcht0 15(%r13) 1279299481Sjkim vaesdec %xmm0,%xmm5,%xmm5 1280299481Sjkim leaq (%r15,%rbx,1),%rbx 1281299481Sjkim cmovgeq %rsp,%r15 1282299481Sjkim vaesdec %xmm0,%xmm6,%xmm6 1283299481Sjkim cmovgq %rsp,%rbx 1284299481Sjkim vaesdec %xmm0,%xmm7,%xmm7 1285299481Sjkim subq %r15,%rbx 1286299481Sjkim vaesdec %xmm0,%xmm8,%xmm8 1287299481Sjkim vmovdqu 16(%r15),%xmm13 1288299481Sjkim movq %rbx,64+56(%rsp) 1289299481Sjkim vaesdec %xmm0,%xmm9,%xmm9 1290299481Sjkim vmovups 40(%rsi),%xmm0 1291299481Sjkim leaq 16(%r15,%rbx,1),%r15 1292299481Sjkim vmovdqu 32(%rsp),%xmm14 1293299481Sjkim prefetcht0 15(%r14) 1294299481Sjkim prefetcht0 15(%r15) 1295299481Sjkim cmpl $11,%eax 1296299481Sjkim jb .Ldec8x_tail 1297299481Sjkim 1298299481Sjkim vaesdec %xmm1,%xmm2,%xmm2 1299299481Sjkim vaesdec %xmm1,%xmm3,%xmm3 1300299481Sjkim vaesdec %xmm1,%xmm4,%xmm4 1301299481Sjkim vaesdec %xmm1,%xmm5,%xmm5 1302299481Sjkim vaesdec %xmm1,%xmm6,%xmm6 1303299481Sjkim vaesdec %xmm1,%xmm7,%xmm7 1304299481Sjkim vaesdec %xmm1,%xmm8,%xmm8 1305299481Sjkim vaesdec %xmm1,%xmm9,%xmm9 1306299481Sjkim vmovups 176-120(%rsi),%xmm1 1307299481Sjkim 1308299481Sjkim vaesdec %xmm0,%xmm2,%xmm2 1309299481Sjkim vaesdec %xmm0,%xmm3,%xmm3 1310299481Sjkim vaesdec %xmm0,%xmm4,%xmm4 1311299481Sjkim vaesdec %xmm0,%xmm5,%xmm5 1312299481Sjkim vaesdec %xmm0,%xmm6,%xmm6 1313299481Sjkim vaesdec %xmm0,%xmm7,%xmm7 1314299481Sjkim vaesdec %xmm0,%xmm8,%xmm8 1315299481Sjkim vaesdec %xmm0,%xmm9,%xmm9 1316299481Sjkim vmovups 192-120(%rsi),%xmm0 1317299481Sjkim je .Ldec8x_tail 1318299481Sjkim 1319299481Sjkim vaesdec %xmm1,%xmm2,%xmm2 1320299481Sjkim vaesdec %xmm1,%xmm3,%xmm3 1321299481Sjkim vaesdec %xmm1,%xmm4,%xmm4 1322299481Sjkim vaesdec %xmm1,%xmm5,%xmm5 1323299481Sjkim vaesdec %xmm1,%xmm6,%xmm6 1324299481Sjkim vaesdec %xmm1,%xmm7,%xmm7 1325299481Sjkim vaesdec %xmm1,%xmm8,%xmm8 1326299481Sjkim vaesdec %xmm1,%xmm9,%xmm9 1327299481Sjkim vmovups 208-120(%rsi),%xmm1 1328299481Sjkim 1329299481Sjkim vaesdec %xmm0,%xmm2,%xmm2 1330299481Sjkim vaesdec %xmm0,%xmm3,%xmm3 1331299481Sjkim vaesdec %xmm0,%xmm4,%xmm4 1332299481Sjkim vaesdec %xmm0,%xmm5,%xmm5 1333299481Sjkim vaesdec %xmm0,%xmm6,%xmm6 1334299481Sjkim vaesdec %xmm0,%xmm7,%xmm7 1335299481Sjkim vaesdec %xmm0,%xmm8,%xmm8 1336299481Sjkim vaesdec %xmm0,%xmm9,%xmm9 1337299481Sjkim vmovups 224-120(%rsi),%xmm0 1338299481Sjkim 1339299481Sjkim.Ldec8x_tail: 1340299481Sjkim vaesdec %xmm1,%xmm2,%xmm2 1341299481Sjkim vpxor %xmm15,%xmm15,%xmm15 1342299481Sjkim vaesdec %xmm1,%xmm3,%xmm3 1343299481Sjkim vaesdec %xmm1,%xmm4,%xmm4 1344299481Sjkim vpcmpgtd %xmm15,%xmm14,%xmm15 1345299481Sjkim vaesdec %xmm1,%xmm5,%xmm5 1346299481Sjkim vaesdec %xmm1,%xmm6,%xmm6 1347299481Sjkim vpaddd %xmm14,%xmm15,%xmm15 1348299481Sjkim vmovdqu 48(%rsp),%xmm14 1349299481Sjkim vaesdec %xmm1,%xmm7,%xmm7 1350299481Sjkim movq 64(%rsp),%rbx 1351299481Sjkim vaesdec %xmm1,%xmm8,%xmm8 1352299481Sjkim vaesdec %xmm1,%xmm9,%xmm9 1353299481Sjkim vmovups 16-120(%rsi),%xmm1 1354299481Sjkim 1355299481Sjkim vaesdeclast %xmm0,%xmm2,%xmm2 1356299481Sjkim vmovdqa %xmm15,32(%rsp) 1357299481Sjkim vpxor %xmm15,%xmm15,%xmm15 1358299481Sjkim vaesdeclast %xmm0,%xmm3,%xmm3 1359299481Sjkim vpxor 0(%rbp),%xmm2,%xmm2 1360299481Sjkim vaesdeclast %xmm0,%xmm4,%xmm4 1361299481Sjkim vpxor 16(%rbp),%xmm3,%xmm3 1362299481Sjkim vpcmpgtd %xmm15,%xmm14,%xmm15 1363299481Sjkim vaesdeclast %xmm0,%xmm5,%xmm5 1364299481Sjkim vpxor 32(%rbp),%xmm4,%xmm4 1365299481Sjkim vaesdeclast %xmm0,%xmm6,%xmm6 1366299481Sjkim vpxor 48(%rbp),%xmm5,%xmm5 1367299481Sjkim vpaddd %xmm15,%xmm14,%xmm14 1368299481Sjkim vmovdqu -120(%rsi),%xmm15 1369299481Sjkim vaesdeclast %xmm0,%xmm7,%xmm7 1370299481Sjkim vpxor 64(%rbp),%xmm6,%xmm6 1371299481Sjkim vaesdeclast %xmm0,%xmm8,%xmm8 1372299481Sjkim vpxor 80(%rbp),%xmm7,%xmm7 1373299481Sjkim vmovdqa %xmm14,48(%rsp) 1374299481Sjkim vaesdeclast %xmm0,%xmm9,%xmm9 1375299481Sjkim vpxor 96(%rbp),%xmm8,%xmm8 1376299481Sjkim vmovups 32-120(%rsi),%xmm0 1377299481Sjkim 1378299481Sjkim vmovups %xmm2,-16(%r8) 1379299481Sjkim subq %rbx,%r8 1380299481Sjkim vmovdqu 128+0(%rsp),%xmm2 1381299481Sjkim vpxor 112(%rbp),%xmm9,%xmm9 1382299481Sjkim vmovups %xmm3,-16(%r9) 1383299481Sjkim subq 72(%rsp),%r9 1384299481Sjkim vmovdqu %xmm2,0(%rbp) 1385299481Sjkim vpxor %xmm15,%xmm2,%xmm2 1386299481Sjkim vmovdqu 128+16(%rsp),%xmm3 1387299481Sjkim vmovups %xmm4,-16(%r10) 1388299481Sjkim subq 80(%rsp),%r10 1389299481Sjkim vmovdqu %xmm3,16(%rbp) 1390299481Sjkim vpxor %xmm15,%xmm3,%xmm3 1391299481Sjkim vmovdqu 128+32(%rsp),%xmm4 1392299481Sjkim vmovups %xmm5,-16(%r11) 1393299481Sjkim subq 88(%rsp),%r11 1394299481Sjkim vmovdqu %xmm4,32(%rbp) 1395299481Sjkim vpxor %xmm15,%xmm4,%xmm4 1396299481Sjkim vmovdqu 128+48(%rsp),%xmm5 1397299481Sjkim vmovups %xmm6,-16(%r12) 1398299481Sjkim subq 96(%rsp),%r12 1399299481Sjkim vmovdqu %xmm5,48(%rbp) 1400299481Sjkim vpxor %xmm15,%xmm5,%xmm5 1401299481Sjkim vmovdqu %xmm10,64(%rbp) 1402299481Sjkim vpxor %xmm10,%xmm15,%xmm6 1403299481Sjkim vmovups %xmm7,-16(%r13) 1404299481Sjkim subq 104(%rsp),%r13 1405299481Sjkim vmovdqu %xmm11,80(%rbp) 1406299481Sjkim vpxor %xmm11,%xmm15,%xmm7 1407299481Sjkim vmovups %xmm8,-16(%r14) 1408299481Sjkim subq 112(%rsp),%r14 1409299481Sjkim vmovdqu %xmm12,96(%rbp) 1410299481Sjkim vpxor %xmm12,%xmm15,%xmm8 1411299481Sjkim vmovups %xmm9,-16(%r15) 1412299481Sjkim subq 120(%rsp),%r15 1413299481Sjkim vmovdqu %xmm13,112(%rbp) 1414299481Sjkim vpxor %xmm13,%xmm15,%xmm9 1415299481Sjkim 1416299481Sjkim xorq $128,%rbp 1417299481Sjkim decl %edx 1418299481Sjkim jnz .Loop_dec8x 1419299481Sjkim 1420299481Sjkim movq 16(%rsp),%rax 1421299481Sjkim 1422299481Sjkim 1423299481Sjkim 1424299481Sjkim 1425299481Sjkim 1426299481Sjkim.Ldec8x_done: 1427299481Sjkim vzeroupper 1428299481Sjkim movq -48(%rax),%r15 1429299481Sjkim movq -40(%rax),%r14 1430299481Sjkim movq -32(%rax),%r13 1431299481Sjkim movq -24(%rax),%r12 1432299481Sjkim movq -16(%rax),%rbp 1433299481Sjkim movq -8(%rax),%rbx 1434299481Sjkim leaq (%rax),%rsp 1435299481Sjkim.Ldec8x_epilogue: 1436299481Sjkim .byte 0xf3,0xc3 1437299481Sjkim.size aesni_multi_cbc_decrypt_avx,.-aesni_multi_cbc_decrypt_avx 1438