1/* 2 * Implement AES algorithm in Intel AES-NI instructions. 3 * 4 * The white paper of AES-NI instructions can be downloaded from: 5 * http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf 6 * 7 * Copyright (C) 2008, Intel Corp. 8 * Author: Huang Ying <ying.huang@intel.com> 9 * Vinodh Gopal <vinodh.gopal@intel.com> 10 * Kahraman Akdemir 11 * 12 * This program is free software; you can redistribute it and/or modify 13 * it under the terms of the GNU General Public License as published by 14 * the Free Software Foundation; either version 2 of the License, or 15 * (at your option) any later version. 16 */ 17 18#include <linux/linkage.h> 19#include <asm/inst.h> 20 21.text 22 23#define STATE1 %xmm0 24#define STATE2 %xmm4 25#define STATE3 %xmm5 26#define STATE4 %xmm6 27#define STATE STATE1 28#define IN1 %xmm1 29#define IN2 %xmm7 30#define IN3 %xmm8 31#define IN4 %xmm9 32#define IN IN1 33#define KEY %xmm2 34#define IV %xmm3 35#define BSWAP_MASK %xmm10 36#define CTR %xmm11 37#define INC %xmm12 38 39#define KEYP %rdi 40#define OUTP %rsi 41#define INP %rdx 42#define LEN %rcx 43#define IVP %r8 44#define KLEN %r9d 45#define T1 %r10 46#define TKEYP T1 47#define T2 %r11 48#define TCTR_LOW T2 49 50_key_expansion_128: 51_key_expansion_256a: 52 pshufd $0b11111111, %xmm1, %xmm1 53 shufps $0b00010000, %xmm0, %xmm4 54 pxor %xmm4, %xmm0 55 shufps $0b10001100, %xmm0, %xmm4 56 pxor %xmm4, %xmm0 57 pxor %xmm1, %xmm0 58 movaps %xmm0, (%rcx) 59 add $0x10, %rcx 60 ret 61 62_key_expansion_192a: 63 pshufd $0b01010101, %xmm1, %xmm1 64 shufps $0b00010000, %xmm0, %xmm4 65 pxor %xmm4, %xmm0 66 shufps $0b10001100, %xmm0, %xmm4 67 pxor %xmm4, %xmm0 68 pxor %xmm1, %xmm0 69 70 movaps %xmm2, %xmm5 71 movaps %xmm2, %xmm6 72 pslldq $4, %xmm5 73 pshufd $0b11111111, %xmm0, %xmm3 74 pxor %xmm3, %xmm2 75 pxor %xmm5, %xmm2 76 77 movaps %xmm0, %xmm1 78 shufps $0b01000100, %xmm0, %xmm6 79 movaps %xmm6, (%rcx) 80 shufps $0b01001110, %xmm2, %xmm1 81 movaps %xmm1, 16(%rcx) 82 add $0x20, %rcx 83 ret 84 85_key_expansion_192b: 86 pshufd $0b01010101, %xmm1, %xmm1 87 shufps $0b00010000, %xmm0, %xmm4 88 pxor %xmm4, %xmm0 89 shufps $0b10001100, %xmm0, %xmm4 90 pxor %xmm4, %xmm0 91 pxor %xmm1, %xmm0 92 93 movaps %xmm2, %xmm5 94 pslldq $4, %xmm5 95 pshufd $0b11111111, %xmm0, %xmm3 96 pxor %xmm3, %xmm2 97 pxor %xmm5, %xmm2 98 99 movaps %xmm0, (%rcx) 100 add $0x10, %rcx 101 ret 102 103_key_expansion_256b: 104 pshufd $0b10101010, %xmm1, %xmm1 105 shufps $0b00010000, %xmm2, %xmm4 106 pxor %xmm4, %xmm2 107 shufps $0b10001100, %xmm2, %xmm4 108 pxor %xmm4, %xmm2 109 pxor %xmm1, %xmm2 110 movaps %xmm2, (%rcx) 111 add $0x10, %rcx 112 ret 113 114/* 115 * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, 116 * unsigned int key_len) 117 */ 118ENTRY(aesni_set_key) 119 movups (%rsi), %xmm0 # user key (first 16 bytes) 120 movaps %xmm0, (%rdi) 121 lea 0x10(%rdi), %rcx # key addr 122 movl %edx, 480(%rdi) 123 pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x 124 cmp $24, %dl 125 jb .Lenc_key128 126 je .Lenc_key192 127 movups 0x10(%rsi), %xmm2 # other user key 128 movaps %xmm2, (%rcx) 129 add $0x10, %rcx 130 AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 131 call _key_expansion_256a 132 AESKEYGENASSIST 0x1 %xmm0 %xmm1 133 call _key_expansion_256b 134 AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2 135 call _key_expansion_256a 136 AESKEYGENASSIST 0x2 %xmm0 %xmm1 137 call _key_expansion_256b 138 AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3 139 call _key_expansion_256a 140 AESKEYGENASSIST 0x4 %xmm0 %xmm1 141 call _key_expansion_256b 142 AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4 143 call _key_expansion_256a 144 AESKEYGENASSIST 0x8 %xmm0 %xmm1 145 call _key_expansion_256b 146 AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5 147 call _key_expansion_256a 148 AESKEYGENASSIST 0x10 %xmm0 %xmm1 149 call _key_expansion_256b 150 AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6 151 call _key_expansion_256a 152 AESKEYGENASSIST 0x20 %xmm0 %xmm1 153 call _key_expansion_256b 154 AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7 155 call _key_expansion_256a 156 jmp .Ldec_key 157.Lenc_key192: 158 movq 0x10(%rsi), %xmm2 # other user key 159 AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 160 call _key_expansion_192a 161 AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2 162 call _key_expansion_192b 163 AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3 164 call _key_expansion_192a 165 AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4 166 call _key_expansion_192b 167 AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5 168 call _key_expansion_192a 169 AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6 170 call _key_expansion_192b 171 AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7 172 call _key_expansion_192a 173 AESKEYGENASSIST 0x80 %xmm2 %xmm1 # round 8 174 call _key_expansion_192b 175 jmp .Ldec_key 176.Lenc_key128: 177 AESKEYGENASSIST 0x1 %xmm0 %xmm1 # round 1 178 call _key_expansion_128 179 AESKEYGENASSIST 0x2 %xmm0 %xmm1 # round 2 180 call _key_expansion_128 181 AESKEYGENASSIST 0x4 %xmm0 %xmm1 # round 3 182 call _key_expansion_128 183 AESKEYGENASSIST 0x8 %xmm0 %xmm1 # round 4 184 call _key_expansion_128 185 AESKEYGENASSIST 0x10 %xmm0 %xmm1 # round 5 186 call _key_expansion_128 187 AESKEYGENASSIST 0x20 %xmm0 %xmm1 # round 6 188 call _key_expansion_128 189 AESKEYGENASSIST 0x40 %xmm0 %xmm1 # round 7 190 call _key_expansion_128 191 AESKEYGENASSIST 0x80 %xmm0 %xmm1 # round 8 192 call _key_expansion_128 193 AESKEYGENASSIST 0x1b %xmm0 %xmm1 # round 9 194 call _key_expansion_128 195 AESKEYGENASSIST 0x36 %xmm0 %xmm1 # round 10 196 call _key_expansion_128 197.Ldec_key: 198 sub $0x10, %rcx 199 movaps (%rdi), %xmm0 200 movaps (%rcx), %xmm1 201 movaps %xmm0, 240(%rcx) 202 movaps %xmm1, 240(%rdi) 203 add $0x10, %rdi 204 lea 240-16(%rcx), %rsi 205.align 4 206.Ldec_key_loop: 207 movaps (%rdi), %xmm0 208 AESIMC %xmm0 %xmm1 209 movaps %xmm1, (%rsi) 210 add $0x10, %rdi 211 sub $0x10, %rsi 212 cmp %rcx, %rdi 213 jb .Ldec_key_loop 214 xor %rax, %rax 215 ret 216 217/* 218 * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) 219 */ 220ENTRY(aesni_enc) 221 movl 480(KEYP), KLEN # key length 222 movups (INP), STATE # input 223 call _aesni_enc1 224 movups STATE, (OUTP) # output 225 ret 226 227/* 228 * _aesni_enc1: internal ABI 229 * input: 230 * KEYP: key struct pointer 231 * KLEN: round count 232 * STATE: initial state (input) 233 * output: 234 * STATE: finial state (output) 235 * changed: 236 * KEY 237 * TKEYP (T1) 238 */ 239_aesni_enc1: 240 movaps (KEYP), KEY # key 241 mov KEYP, TKEYP 242 pxor KEY, STATE # round 0 243 add $0x30, TKEYP 244 cmp $24, KLEN 245 jb .Lenc128 246 lea 0x20(TKEYP), TKEYP 247 je .Lenc192 248 add $0x20, TKEYP 249 movaps -0x60(TKEYP), KEY 250 AESENC KEY STATE 251 movaps -0x50(TKEYP), KEY 252 AESENC KEY STATE 253.align 4 254.Lenc192: 255 movaps -0x40(TKEYP), KEY 256 AESENC KEY STATE 257 movaps -0x30(TKEYP), KEY 258 AESENC KEY STATE 259.align 4 260.Lenc128: 261 movaps -0x20(TKEYP), KEY 262 AESENC KEY STATE 263 movaps -0x10(TKEYP), KEY 264 AESENC KEY STATE 265 movaps (TKEYP), KEY 266 AESENC KEY STATE 267 movaps 0x10(TKEYP), KEY 268 AESENC KEY STATE 269 movaps 0x20(TKEYP), KEY 270 AESENC KEY STATE 271 movaps 0x30(TKEYP), KEY 272 AESENC KEY STATE 273 movaps 0x40(TKEYP), KEY 274 AESENC KEY STATE 275 movaps 0x50(TKEYP), KEY 276 AESENC KEY STATE 277 movaps 0x60(TKEYP), KEY 278 AESENC KEY STATE 279 movaps 0x70(TKEYP), KEY 280 AESENCLAST KEY STATE 281 ret 282 283/* 284 * _aesni_enc4: internal ABI 285 * input: 286 * KEYP: key struct pointer 287 * KLEN: round count 288 * STATE1: initial state (input) 289 * STATE2 290 * STATE3 291 * STATE4 292 * output: 293 * STATE1: finial state (output) 294 * STATE2 295 * STATE3 296 * STATE4 297 * changed: 298 * KEY 299 * TKEYP (T1) 300 */ 301_aesni_enc4: 302 movaps (KEYP), KEY # key 303 mov KEYP, TKEYP 304 pxor KEY, STATE1 # round 0 305 pxor KEY, STATE2 306 pxor KEY, STATE3 307 pxor KEY, STATE4 308 add $0x30, TKEYP 309 cmp $24, KLEN 310 jb .L4enc128 311 lea 0x20(TKEYP), TKEYP 312 je .L4enc192 313 add $0x20, TKEYP 314 movaps -0x60(TKEYP), KEY 315 AESENC KEY STATE1 316 AESENC KEY STATE2 317 AESENC KEY STATE3 318 AESENC KEY STATE4 319 movaps -0x50(TKEYP), KEY 320 AESENC KEY STATE1 321 AESENC KEY STATE2 322 AESENC KEY STATE3 323 AESENC KEY STATE4 324#.align 4 325.L4enc192: 326 movaps -0x40(TKEYP), KEY 327 AESENC KEY STATE1 328 AESENC KEY STATE2 329 AESENC KEY STATE3 330 AESENC KEY STATE4 331 movaps -0x30(TKEYP), KEY 332 AESENC KEY STATE1 333 AESENC KEY STATE2 334 AESENC KEY STATE3 335 AESENC KEY STATE4 336#.align 4 337.L4enc128: 338 movaps -0x20(TKEYP), KEY 339 AESENC KEY STATE1 340 AESENC KEY STATE2 341 AESENC KEY STATE3 342 AESENC KEY STATE4 343 movaps -0x10(TKEYP), KEY 344 AESENC KEY STATE1 345 AESENC KEY STATE2 346 AESENC KEY STATE3 347 AESENC KEY STATE4 348 movaps (TKEYP), KEY 349 AESENC KEY STATE1 350 AESENC KEY STATE2 351 AESENC KEY STATE3 352 AESENC KEY STATE4 353 movaps 0x10(TKEYP), KEY 354 AESENC KEY STATE1 355 AESENC KEY STATE2 356 AESENC KEY STATE3 357 AESENC KEY STATE4 358 movaps 0x20(TKEYP), KEY 359 AESENC KEY STATE1 360 AESENC KEY STATE2 361 AESENC KEY STATE3 362 AESENC KEY STATE4 363 movaps 0x30(TKEYP), KEY 364 AESENC KEY STATE1 365 AESENC KEY STATE2 366 AESENC KEY STATE3 367 AESENC KEY STATE4 368 movaps 0x40(TKEYP), KEY 369 AESENC KEY STATE1 370 AESENC KEY STATE2 371 AESENC KEY STATE3 372 AESENC KEY STATE4 373 movaps 0x50(TKEYP), KEY 374 AESENC KEY STATE1 375 AESENC KEY STATE2 376 AESENC KEY STATE3 377 AESENC KEY STATE4 378 movaps 0x60(TKEYP), KEY 379 AESENC KEY STATE1 380 AESENC KEY STATE2 381 AESENC KEY STATE3 382 AESENC KEY STATE4 383 movaps 0x70(TKEYP), KEY 384 AESENCLAST KEY STATE1 # last round 385 AESENCLAST KEY STATE2 386 AESENCLAST KEY STATE3 387 AESENCLAST KEY STATE4 388 ret 389 390/* 391 * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) 392 */ 393ENTRY(aesni_dec) 394 mov 480(KEYP), KLEN # key length 395 add $240, KEYP 396 movups (INP), STATE # input 397 call _aesni_dec1 398 movups STATE, (OUTP) #output 399 ret 400 401/* 402 * _aesni_dec1: internal ABI 403 * input: 404 * KEYP: key struct pointer 405 * KLEN: key length 406 * STATE: initial state (input) 407 * output: 408 * STATE: finial state (output) 409 * changed: 410 * KEY 411 * TKEYP (T1) 412 */ 413_aesni_dec1: 414 movaps (KEYP), KEY # key 415 mov KEYP, TKEYP 416 pxor KEY, STATE # round 0 417 add $0x30, TKEYP 418 cmp $24, KLEN 419 jb .Ldec128 420 lea 0x20(TKEYP), TKEYP 421 je .Ldec192 422 add $0x20, TKEYP 423 movaps -0x60(TKEYP), KEY 424 AESDEC KEY STATE 425 movaps -0x50(TKEYP), KEY 426 AESDEC KEY STATE 427.align 4 428.Ldec192: 429 movaps -0x40(TKEYP), KEY 430 AESDEC KEY STATE 431 movaps -0x30(TKEYP), KEY 432 AESDEC KEY STATE 433.align 4 434.Ldec128: 435 movaps -0x20(TKEYP), KEY 436 AESDEC KEY STATE 437 movaps -0x10(TKEYP), KEY 438 AESDEC KEY STATE 439 movaps (TKEYP), KEY 440 AESDEC KEY STATE 441 movaps 0x10(TKEYP), KEY 442 AESDEC KEY STATE 443 movaps 0x20(TKEYP), KEY 444 AESDEC KEY STATE 445 movaps 0x30(TKEYP), KEY 446 AESDEC KEY STATE 447 movaps 0x40(TKEYP), KEY 448 AESDEC KEY STATE 449 movaps 0x50(TKEYP), KEY 450 AESDEC KEY STATE 451 movaps 0x60(TKEYP), KEY 452 AESDEC KEY STATE 453 movaps 0x70(TKEYP), KEY 454 AESDECLAST KEY STATE 455 ret 456 457/* 458 * _aesni_dec4: internal ABI 459 * input: 460 * KEYP: key struct pointer 461 * KLEN: key length 462 * STATE1: initial state (input) 463 * STATE2 464 * STATE3 465 * STATE4 466 * output: 467 * STATE1: finial state (output) 468 * STATE2 469 * STATE3 470 * STATE4 471 * changed: 472 * KEY 473 * TKEYP (T1) 474 */ 475_aesni_dec4: 476 movaps (KEYP), KEY # key 477 mov KEYP, TKEYP 478 pxor KEY, STATE1 # round 0 479 pxor KEY, STATE2 480 pxor KEY, STATE3 481 pxor KEY, STATE4 482 add $0x30, TKEYP 483 cmp $24, KLEN 484 jb .L4dec128 485 lea 0x20(TKEYP), TKEYP 486 je .L4dec192 487 add $0x20, TKEYP 488 movaps -0x60(TKEYP), KEY 489 AESDEC KEY STATE1 490 AESDEC KEY STATE2 491 AESDEC KEY STATE3 492 AESDEC KEY STATE4 493 movaps -0x50(TKEYP), KEY 494 AESDEC KEY STATE1 495 AESDEC KEY STATE2 496 AESDEC KEY STATE3 497 AESDEC KEY STATE4 498.align 4 499.L4dec192: 500 movaps -0x40(TKEYP), KEY 501 AESDEC KEY STATE1 502 AESDEC KEY STATE2 503 AESDEC KEY STATE3 504 AESDEC KEY STATE4 505 movaps -0x30(TKEYP), KEY 506 AESDEC KEY STATE1 507 AESDEC KEY STATE2 508 AESDEC KEY STATE3 509 AESDEC KEY STATE4 510.align 4 511.L4dec128: 512 movaps -0x20(TKEYP), KEY 513 AESDEC KEY STATE1 514 AESDEC KEY STATE2 515 AESDEC KEY STATE3 516 AESDEC KEY STATE4 517 movaps -0x10(TKEYP), KEY 518 AESDEC KEY STATE1 519 AESDEC KEY STATE2 520 AESDEC KEY STATE3 521 AESDEC KEY STATE4 522 movaps (TKEYP), KEY 523 AESDEC KEY STATE1 524 AESDEC KEY STATE2 525 AESDEC KEY STATE3 526 AESDEC KEY STATE4 527 movaps 0x10(TKEYP), KEY 528 AESDEC KEY STATE1 529 AESDEC KEY STATE2 530 AESDEC KEY STATE3 531 AESDEC KEY STATE4 532 movaps 0x20(TKEYP), KEY 533 AESDEC KEY STATE1 534 AESDEC KEY STATE2 535 AESDEC KEY STATE3 536 AESDEC KEY STATE4 537 movaps 0x30(TKEYP), KEY 538 AESDEC KEY STATE1 539 AESDEC KEY STATE2 540 AESDEC KEY STATE3 541 AESDEC KEY STATE4 542 movaps 0x40(TKEYP), KEY 543 AESDEC KEY STATE1 544 AESDEC KEY STATE2 545 AESDEC KEY STATE3 546 AESDEC KEY STATE4 547 movaps 0x50(TKEYP), KEY 548 AESDEC KEY STATE1 549 AESDEC KEY STATE2 550 AESDEC KEY STATE3 551 AESDEC KEY STATE4 552 movaps 0x60(TKEYP), KEY 553 AESDEC KEY STATE1 554 AESDEC KEY STATE2 555 AESDEC KEY STATE3 556 AESDEC KEY STATE4 557 movaps 0x70(TKEYP), KEY 558 AESDECLAST KEY STATE1 # last round 559 AESDECLAST KEY STATE2 560 AESDECLAST KEY STATE3 561 AESDECLAST KEY STATE4 562 ret 563 564/* 565 * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 566 * size_t len) 567 */ 568ENTRY(aesni_ecb_enc) 569 test LEN, LEN # check length 570 jz .Lecb_enc_ret 571 mov 480(KEYP), KLEN 572 cmp $16, LEN 573 jb .Lecb_enc_ret 574 cmp $64, LEN 575 jb .Lecb_enc_loop1 576.align 4 577.Lecb_enc_loop4: 578 movups (INP), STATE1 579 movups 0x10(INP), STATE2 580 movups 0x20(INP), STATE3 581 movups 0x30(INP), STATE4 582 call _aesni_enc4 583 movups STATE1, (OUTP) 584 movups STATE2, 0x10(OUTP) 585 movups STATE3, 0x20(OUTP) 586 movups STATE4, 0x30(OUTP) 587 sub $64, LEN 588 add $64, INP 589 add $64, OUTP 590 cmp $64, LEN 591 jge .Lecb_enc_loop4 592 cmp $16, LEN 593 jb .Lecb_enc_ret 594.align 4 595.Lecb_enc_loop1: 596 movups (INP), STATE1 597 call _aesni_enc1 598 movups STATE1, (OUTP) 599 sub $16, LEN 600 add $16, INP 601 add $16, OUTP 602 cmp $16, LEN 603 jge .Lecb_enc_loop1 604.Lecb_enc_ret: 605 ret 606 607/* 608 * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 609 * size_t len); 610 */ 611ENTRY(aesni_ecb_dec) 612 test LEN, LEN 613 jz .Lecb_dec_ret 614 mov 480(KEYP), KLEN 615 add $240, KEYP 616 cmp $16, LEN 617 jb .Lecb_dec_ret 618 cmp $64, LEN 619 jb .Lecb_dec_loop1 620.align 4 621.Lecb_dec_loop4: 622 movups (INP), STATE1 623 movups 0x10(INP), STATE2 624 movups 0x20(INP), STATE3 625 movups 0x30(INP), STATE4 626 call _aesni_dec4 627 movups STATE1, (OUTP) 628 movups STATE2, 0x10(OUTP) 629 movups STATE3, 0x20(OUTP) 630 movups STATE4, 0x30(OUTP) 631 sub $64, LEN 632 add $64, INP 633 add $64, OUTP 634 cmp $64, LEN 635 jge .Lecb_dec_loop4 636 cmp $16, LEN 637 jb .Lecb_dec_ret 638.align 4 639.Lecb_dec_loop1: 640 movups (INP), STATE1 641 call _aesni_dec1 642 movups STATE1, (OUTP) 643 sub $16, LEN 644 add $16, INP 645 add $16, OUTP 646 cmp $16, LEN 647 jge .Lecb_dec_loop1 648.Lecb_dec_ret: 649 ret 650 651/* 652 * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 653 * size_t len, u8 *iv) 654 */ 655ENTRY(aesni_cbc_enc) 656 cmp $16, LEN 657 jb .Lcbc_enc_ret 658 mov 480(KEYP), KLEN 659 movups (IVP), STATE # load iv as initial state 660.align 4 661.Lcbc_enc_loop: 662 movups (INP), IN # load input 663 pxor IN, STATE 664 call _aesni_enc1 665 movups STATE, (OUTP) # store output 666 sub $16, LEN 667 add $16, INP 668 add $16, OUTP 669 cmp $16, LEN 670 jge .Lcbc_enc_loop 671 movups STATE, (IVP) 672.Lcbc_enc_ret: 673 ret 674 675/* 676 * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 677 * size_t len, u8 *iv) 678 */ 679ENTRY(aesni_cbc_dec) 680 cmp $16, LEN 681 jb .Lcbc_dec_just_ret 682 mov 480(KEYP), KLEN 683 add $240, KEYP 684 movups (IVP), IV 685 cmp $64, LEN 686 jb .Lcbc_dec_loop1 687.align 4 688.Lcbc_dec_loop4: 689 movups (INP), IN1 690 movaps IN1, STATE1 691 movups 0x10(INP), IN2 692 movaps IN2, STATE2 693 movups 0x20(INP), IN3 694 movaps IN3, STATE3 695 movups 0x30(INP), IN4 696 movaps IN4, STATE4 697 call _aesni_dec4 698 pxor IV, STATE1 699 pxor IN1, STATE2 700 pxor IN2, STATE3 701 pxor IN3, STATE4 702 movaps IN4, IV 703 movups STATE1, (OUTP) 704 movups STATE2, 0x10(OUTP) 705 movups STATE3, 0x20(OUTP) 706 movups STATE4, 0x30(OUTP) 707 sub $64, LEN 708 add $64, INP 709 add $64, OUTP 710 cmp $64, LEN 711 jge .Lcbc_dec_loop4 712 cmp $16, LEN 713 jb .Lcbc_dec_ret 714.align 4 715.Lcbc_dec_loop1: 716 movups (INP), IN 717 movaps IN, STATE 718 call _aesni_dec1 719 pxor IV, STATE 720 movups STATE, (OUTP) 721 movaps IN, IV 722 sub $16, LEN 723 add $16, INP 724 add $16, OUTP 725 cmp $16, LEN 726 jge .Lcbc_dec_loop1 727.Lcbc_dec_ret: 728 movups IV, (IVP) 729.Lcbc_dec_just_ret: 730 ret 731 732.align 16 733.Lbswap_mask: 734 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 735 736/* 737 * _aesni_inc_init: internal ABI 738 * setup registers used by _aesni_inc 739 * input: 740 * IV 741 * output: 742 * CTR: == IV, in little endian 743 * TCTR_LOW: == lower qword of CTR 744 * INC: == 1, in little endian 745 * BSWAP_MASK == endian swapping mask 746 */ 747_aesni_inc_init: 748 movaps .Lbswap_mask, BSWAP_MASK 749 movaps IV, CTR 750 PSHUFB_XMM BSWAP_MASK CTR 751 mov $1, TCTR_LOW 752 MOVQ_R64_XMM TCTR_LOW INC 753 MOVQ_R64_XMM CTR TCTR_LOW 754 ret 755 756/* 757 * _aesni_inc: internal ABI 758 * Increase IV by 1, IV is in big endian 759 * input: 760 * IV 761 * CTR: == IV, in little endian 762 * TCTR_LOW: == lower qword of CTR 763 * INC: == 1, in little endian 764 * BSWAP_MASK == endian swapping mask 765 * output: 766 * IV: Increase by 1 767 * changed: 768 * CTR: == output IV, in little endian 769 * TCTR_LOW: == lower qword of CTR 770 */ 771_aesni_inc: 772 paddq INC, CTR 773 add $1, TCTR_LOW 774 jnc .Linc_low 775 pslldq $8, INC 776 paddq INC, CTR 777 psrldq $8, INC 778.Linc_low: 779 movaps CTR, IV 780 PSHUFB_XMM BSWAP_MASK IV 781 ret 782 783/* 784 * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 785 * size_t len, u8 *iv) 786 */ 787ENTRY(aesni_ctr_enc) 788 cmp $16, LEN 789 jb .Lctr_enc_just_ret 790 mov 480(KEYP), KLEN 791 movups (IVP), IV 792 call _aesni_inc_init 793 cmp $64, LEN 794 jb .Lctr_enc_loop1 795.align 4 796.Lctr_enc_loop4: 797 movaps IV, STATE1 798 call _aesni_inc 799 movups (INP), IN1 800 movaps IV, STATE2 801 call _aesni_inc 802 movups 0x10(INP), IN2 803 movaps IV, STATE3 804 call _aesni_inc 805 movups 0x20(INP), IN3 806 movaps IV, STATE4 807 call _aesni_inc 808 movups 0x30(INP), IN4 809 call _aesni_enc4 810 pxor IN1, STATE1 811 movups STATE1, (OUTP) 812 pxor IN2, STATE2 813 movups STATE2, 0x10(OUTP) 814 pxor IN3, STATE3 815 movups STATE3, 0x20(OUTP) 816 pxor IN4, STATE4 817 movups STATE4, 0x30(OUTP) 818 sub $64, LEN 819 add $64, INP 820 add $64, OUTP 821 cmp $64, LEN 822 jge .Lctr_enc_loop4 823 cmp $16, LEN 824 jb .Lctr_enc_ret 825.align 4 826.Lctr_enc_loop1: 827 movaps IV, STATE 828 call _aesni_inc 829 movups (INP), IN 830 call _aesni_enc1 831 pxor IN, STATE 832 movups STATE, (OUTP) 833 sub $16, LEN 834 add $16, INP 835 add $16, OUTP 836 cmp $16, LEN 837 jge .Lctr_enc_loop1 838.Lctr_enc_ret: 839 movups IV, (IVP) 840.Lctr_enc_just_ret: 841 ret 842