aes_via.c revision 1.2
1/* $NetBSD: aes_via.c,v 1.2 2020/06/29 23:41:35 riastradh Exp $ */ 2 3/*- 4 * Copyright (c) 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include <sys/cdefs.h> 30__KERNEL_RCSID(1, "$NetBSD: aes_via.c,v 1.2 2020/06/29 23:41:35 riastradh Exp $"); 31 32#include <sys/types.h> 33#include <sys/evcnt.h> 34#include <sys/systm.h> 35 36#include <crypto/aes/aes.h> 37#include <crypto/aes/aes_bear.h> 38 39#include <x86/cpufunc.h> 40#include <x86/cpuvar.h> 41#include <x86/fpu.h> 42#include <x86/specialreg.h> 43#include <x86/via_padlock.h> 44 45static void 46aesvia_reload_keys(void) 47{ 48 49 asm volatile("pushf; popf"); 50} 51 52static uint32_t 53aesvia_keylen_cw0(unsigned nrounds) 54{ 55 56 /* 57 * Determine the control word bits for the key size / number of 58 * rounds. For AES-128, the hardware can do key expansion on 59 * the fly; for AES-192 and AES-256, software must do it. 60 */ 61 switch (nrounds) { 62 case AES_128_NROUNDS: 63 return C3_CRYPT_CWLO_KEY128; 64 case AES_192_NROUNDS: 65 return C3_CRYPT_CWLO_KEY192 | C3_CRYPT_CWLO_KEYGEN_SW; 66 case AES_256_NROUNDS: 67 return C3_CRYPT_CWLO_KEY256 | C3_CRYPT_CWLO_KEYGEN_SW; 68 default: 69 panic("invalid AES nrounds: %u", nrounds); 70 } 71} 72 73static void 74aesvia_setenckey(struct aesenc *enc, const uint8_t *key, uint32_t nrounds) 75{ 76 size_t key_len; 77 78 switch (nrounds) { 79 case AES_128_NROUNDS: 80 enc->aese_aes.aes_rk[0] = le32dec(key + 4*0); 81 enc->aese_aes.aes_rk[1] = le32dec(key + 4*1); 82 enc->aese_aes.aes_rk[2] = le32dec(key + 4*2); 83 enc->aese_aes.aes_rk[3] = le32dec(key + 4*3); 84 return; 85 case AES_192_NROUNDS: 86 key_len = 24; 87 break; 88 case AES_256_NROUNDS: 89 key_len = 32; 90 break; 91 default: 92 panic("invalid AES nrounds: %u", nrounds); 93 } 94 br_aes_ct_keysched_stdenc(enc->aese_aes.aes_rk, key, key_len); 95} 96 97static void 98aesvia_setdeckey(struct aesdec *dec, const uint8_t *key, uint32_t nrounds) 99{ 100 size_t key_len; 101 102 switch (nrounds) { 103 case AES_128_NROUNDS: 104 dec->aesd_aes.aes_rk[0] = le32dec(key + 4*0); 105 dec->aesd_aes.aes_rk[1] = le32dec(key + 4*1); 106 dec->aesd_aes.aes_rk[2] = le32dec(key + 4*2); 107 dec->aesd_aes.aes_rk[3] = le32dec(key + 4*3); 108 return; 109 case AES_192_NROUNDS: 110 key_len = 24; 111 break; 112 case AES_256_NROUNDS: 113 key_len = 32; 114 break; 115 default: 116 panic("invalid AES nrounds: %u", nrounds); 117 } 118 br_aes_ct_keysched_stddec(dec->aesd_aes.aes_rk, key, key_len); 119} 120 121static inline void 122aesvia_encN(const struct aesenc *enc, const uint8_t in[static 16], 123 uint8_t out[static 16], size_t nblocks, uint32_t cw0) 124{ 125 const uint32_t cw[4] __aligned(16) = { 126 [0] = (cw0 127 | C3_CRYPT_CWLO_ALG_AES 128 | C3_CRYPT_CWLO_ENCRYPT 129 | C3_CRYPT_CWLO_NORMAL), 130 }; 131 132 KASSERT(((uintptr_t)enc & 0xf) == 0); 133 KASSERT(((uintptr_t)in & 0xf) == 0); 134 KASSERT(((uintptr_t)out & 0xf) == 0); 135 136 asm volatile("rep xcryptecb" 137 : "+c"(nblocks), "+S"(in), "+D"(out) 138 : "b"(enc), "d"(cw) 139 : "memory", "cc"); 140} 141 142static inline void 143aesvia_decN(const struct aesdec *dec, const uint8_t in[static 16], 144 uint8_t out[static 16], size_t nblocks, uint32_t cw0) 145{ 146 const uint32_t cw[4] __aligned(16) = { 147 [0] = (cw0 148 | C3_CRYPT_CWLO_ALG_AES 149 | C3_CRYPT_CWLO_DECRYPT 150 | C3_CRYPT_CWLO_NORMAL), 151 }; 152 153 KASSERT(((uintptr_t)dec & 0xf) == 0); 154 KASSERT(((uintptr_t)in & 0xf) == 0); 155 KASSERT(((uintptr_t)out & 0xf) == 0); 156 157 asm volatile("rep xcryptecb" 158 : "+c"(nblocks), "+S"(in), "+D"(out) 159 : "b"(dec), "d"(cw) 160 : "memory", "cc"); 161} 162 163static struct evcnt enc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 164 NULL, "aesvia", "enc aligned"); 165EVCNT_ATTACH_STATIC(enc_aligned_evcnt); 166static struct evcnt enc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 167 NULL, "aesvia", "dec unaligned"); 168EVCNT_ATTACH_STATIC(enc_unaligned_evcnt); 169 170static void 171aesvia_enc(const struct aesenc *enc, const uint8_t in[static 16], 172 uint8_t out[static 16], uint32_t nrounds) 173{ 174 const uint32_t cw0 = aesvia_keylen_cw0(nrounds); 175 176 fpu_kern_enter(); 177 aesvia_reload_keys(); 178 if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0 && 179 ((uintptr_t)in & 0xff0) != 0xff0) { 180 enc_aligned_evcnt.ev_count++; 181 aesvia_encN(enc, in, out, 1, cw0); 182 } else { 183 enc_unaligned_evcnt.ev_count++; 184 /* 185 * VIA requires 16-byte/128-bit alignment, and 186 * xcrypt-ecb reads one block past the one we're 187 * working on -- which may go past the end of the page 188 * into unmapped territory. Use a bounce buffer if 189 * either constraint is violated. 190 */ 191 uint8_t inbuf[16] __aligned(16); 192 uint8_t outbuf[16] __aligned(16); 193 194 memcpy(inbuf, in, 16); 195 aesvia_encN(enc, inbuf, outbuf, 1, cw0); 196 memcpy(out, outbuf, 16); 197 198 explicit_memset(inbuf, 0, sizeof inbuf); 199 explicit_memset(outbuf, 0, sizeof outbuf); 200 } 201 fpu_kern_leave(); 202} 203 204static struct evcnt dec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 205 NULL, "aesvia", "dec aligned"); 206EVCNT_ATTACH_STATIC(dec_aligned_evcnt); 207static struct evcnt dec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 208 NULL, "aesvia", "dec unaligned"); 209EVCNT_ATTACH_STATIC(dec_unaligned_evcnt); 210 211static void 212aesvia_dec(const struct aesdec *dec, const uint8_t in[static 16], 213 uint8_t out[static 16], uint32_t nrounds) 214{ 215 const uint32_t cw0 = aesvia_keylen_cw0(nrounds); 216 217 fpu_kern_enter(); 218 aesvia_reload_keys(); 219 if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0 && 220 ((uintptr_t)in & 0xff0) != 0xff0) { 221 dec_aligned_evcnt.ev_count++; 222 aesvia_decN(dec, in, out, 1, cw0); 223 } else { 224 dec_unaligned_evcnt.ev_count++; 225 /* 226 * VIA requires 16-byte/128-bit alignment, and 227 * xcrypt-ecb reads one block past the one we're 228 * working on -- which may go past the end of the page 229 * into unmapped territory. Use a bounce buffer if 230 * either constraint is violated. 231 */ 232 uint8_t inbuf[16] __aligned(16); 233 uint8_t outbuf[16] __aligned(16); 234 235 memcpy(inbuf, in, 16); 236 aesvia_decN(dec, inbuf, outbuf, 1, cw0); 237 memcpy(out, outbuf, 16); 238 239 explicit_memset(inbuf, 0, sizeof inbuf); 240 explicit_memset(outbuf, 0, sizeof outbuf); 241 } 242 fpu_kern_leave(); 243} 244 245static inline void 246aesvia_cbc_encN(const struct aesenc *enc, const uint8_t in[static 16], 247 uint8_t out[static 16], size_t nblocks, uint8_t **ivp, uint32_t cw0) 248{ 249 const uint32_t cw[4] __aligned(16) = { 250 [0] = (cw0 251 | C3_CRYPT_CWLO_ALG_AES 252 | C3_CRYPT_CWLO_ENCRYPT 253 | C3_CRYPT_CWLO_NORMAL), 254 }; 255 256 KASSERT(((uintptr_t)enc & 0xf) == 0); 257 KASSERT(((uintptr_t)in & 0xf) == 0); 258 KASSERT(((uintptr_t)out & 0xf) == 0); 259 KASSERT(((uintptr_t)*ivp & 0xf) == 0); 260 261 /* 262 * Register effects: 263 * - Counts nblocks down to zero. 264 * - Advances in by nblocks (units of blocks). 265 * - Advances out by nblocks (units of blocks). 266 * - Updates *ivp to point at the last block of out. 267 */ 268 asm volatile("rep xcryptcbc" 269 : "+c"(nblocks), "+S"(in), "+D"(out), "+a"(*ivp) 270 : "b"(enc), "d"(cw) 271 : "memory", "cc"); 272} 273 274static inline void 275aesvia_cbc_decN(const struct aesdec *dec, const uint8_t in[static 16], 276 uint8_t out[static 16], size_t nblocks, uint8_t iv[static 16], 277 uint32_t cw0) 278{ 279 const uint32_t cw[4] __aligned(16) = { 280 [0] = (cw0 281 | C3_CRYPT_CWLO_ALG_AES 282 | C3_CRYPT_CWLO_DECRYPT 283 | C3_CRYPT_CWLO_NORMAL), 284 }; 285 286 KASSERT(((uintptr_t)dec & 0xf) == 0); 287 KASSERT(((uintptr_t)in & 0xf) == 0); 288 KASSERT(((uintptr_t)out & 0xf) == 0); 289 KASSERT(((uintptr_t)iv & 0xf) == 0); 290 291 /* 292 * Register effects: 293 * - Counts nblocks down to zero. 294 * - Advances in by nblocks (units of blocks). 295 * - Advances out by nblocks (units of blocks). 296 * Memory side effects: 297 * - Writes what was the last block of in at the address iv. 298 */ 299 asm volatile("rep xcryptcbc" 300 : "+c"(nblocks), "+S"(in), "+D"(out) 301 : "a"(iv), "b"(dec), "d"(cw) 302 : "memory", "cc"); 303} 304 305static inline void 306xor128(void *x, const void *a, const void *b) 307{ 308 uint32_t *x32 = x; 309 const uint32_t *a32 = a; 310 const uint32_t *b32 = b; 311 312 x32[0] = a32[0] ^ b32[0]; 313 x32[1] = a32[1] ^ b32[1]; 314 x32[2] = a32[2] ^ b32[2]; 315 x32[3] = a32[3] ^ b32[3]; 316} 317 318static struct evcnt cbcenc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 319 NULL, "aesvia", "cbcenc aligned"); 320EVCNT_ATTACH_STATIC(cbcenc_aligned_evcnt); 321static struct evcnt cbcenc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 322 NULL, "aesvia", "cbcenc unaligned"); 323EVCNT_ATTACH_STATIC(cbcenc_unaligned_evcnt); 324 325static void 326aesvia_cbc_enc(const struct aesenc *enc, const uint8_t in[static 16], 327 uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16], 328 uint32_t nrounds) 329{ 330 const uint32_t cw0 = aesvia_keylen_cw0(nrounds); 331 332 KASSERT(nbytes % 16 == 0); 333 if (nbytes == 0) 334 return; 335 336 fpu_kern_enter(); 337 aesvia_reload_keys(); 338 if ((((uintptr_t)in | (uintptr_t)out | (uintptr_t)iv) & 0xf) == 0) { 339 cbcenc_aligned_evcnt.ev_count++; 340 uint8_t *ivp = iv; 341 aesvia_cbc_encN(enc, in, out, nbytes/16, &ivp, cw0); 342 memcpy(iv, ivp, 16); 343 } else { 344 cbcenc_unaligned_evcnt.ev_count++; 345 uint8_t cv[16] __aligned(16); 346 uint8_t tmp[16] __aligned(16); 347 348 memcpy(cv, iv, 16); 349 for (; nbytes; nbytes -= 16, in += 16, out += 16) { 350 memcpy(tmp, in, 16); 351 xor128(tmp, tmp, cv); 352 aesvia_encN(enc, tmp, cv, 1, cw0); 353 memcpy(out, cv, 16); 354 } 355 memcpy(iv, cv, 16); 356 } 357 fpu_kern_leave(); 358} 359 360static struct evcnt cbcdec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 361 NULL, "aesvia", "cbcdec aligned"); 362EVCNT_ATTACH_STATIC(cbcdec_aligned_evcnt); 363static struct evcnt cbcdec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 364 NULL, "aesvia", "cbcdec unaligned"); 365EVCNT_ATTACH_STATIC(cbcdec_unaligned_evcnt); 366 367static void 368aesvia_cbc_dec(const struct aesdec *dec, const uint8_t in[static 16], 369 uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16], 370 uint32_t nrounds) 371{ 372 const uint32_t cw0 = aesvia_keylen_cw0(nrounds); 373 374 KASSERT(nbytes % 16 == 0); 375 if (nbytes == 0) 376 return; 377 378 fpu_kern_enter(); 379 aesvia_reload_keys(); 380 if ((((uintptr_t)in | (uintptr_t)out | (uintptr_t)iv) & 0xf) == 0) { 381 cbcdec_aligned_evcnt.ev_count++; 382 aesvia_cbc_decN(dec, in, out, nbytes/16, iv, cw0); 383 } else { 384 cbcdec_unaligned_evcnt.ev_count++; 385 uint8_t iv0[16] __aligned(16); 386 uint8_t cv[16] __aligned(16); 387 uint8_t tmp[16] __aligned(16); 388 389 memcpy(iv0, iv, 16); 390 memcpy(cv, in + nbytes - 16, 16); 391 memcpy(iv, cv, 16); 392 393 for (;;) { 394 aesvia_decN(dec, cv, tmp, 1, cw0); 395 if ((nbytes -= 16) == 0) 396 break; 397 memcpy(cv, in + nbytes - 16, 16); 398 xor128(tmp, tmp, cv); 399 memcpy(out + nbytes, tmp, 16); 400 } 401 402 xor128(tmp, tmp, iv0); 403 memcpy(out, tmp, 16); 404 explicit_memset(tmp, 0, sizeof tmp); 405 } 406 fpu_kern_leave(); 407} 408 409static inline void 410aesvia_xts_update(uint32_t *t0, uint32_t *t1, uint32_t *t2, uint32_t *t3) 411{ 412 uint32_t s0, s1, s2, s3; 413 414 s0 = *t0 >> 31; 415 s1 = *t1 >> 31; 416 s2 = *t2 >> 31; 417 s3 = *t3 >> 31; 418 *t0 = (*t0 << 1) ^ (-s3 & 0x87); 419 *t1 = (*t1 << 1) ^ s0; 420 *t2 = (*t2 << 1) ^ s1; 421 *t3 = (*t3 << 1) ^ s2; 422} 423 424static int 425aesvia_xts_update_selftest(void) 426{ 427 static const struct { 428 uint32_t in[4], out[4]; 429 } cases[] = { 430 { {1}, {2} }, 431 { {0x80000000U,0,0,0}, {0,1,0,0} }, 432 { {0,0x80000000U,0,0}, {0,0,1,0} }, 433 { {0,0,0x80000000U,0}, {0,0,0,1} }, 434 { {0,0,0,0x80000000U}, {0x87,0,0,0} }, 435 { {0,0x80000000U,0,0x80000000U}, {0x87,0,1,0} }, 436 }; 437 unsigned i; 438 uint32_t t0, t1, t2, t3; 439 440 for (i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) { 441 t0 = cases[i].in[0]; 442 t1 = cases[i].in[1]; 443 t2 = cases[i].in[2]; 444 t3 = cases[i].in[3]; 445 aesvia_xts_update(&t0, &t1, &t2, &t3); 446 if (t0 != cases[i].out[0] || 447 t1 != cases[i].out[1] || 448 t2 != cases[i].out[2] || 449 t3 != cases[i].out[3]) 450 return -1; 451 } 452 453 /* Success! */ 454 return 0; 455} 456 457static struct evcnt xtsenc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 458 NULL, "aesvia", "xtsenc aligned"); 459EVCNT_ATTACH_STATIC(xtsenc_aligned_evcnt); 460static struct evcnt xtsenc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 461 NULL, "aesvia", "xtsenc unaligned"); 462EVCNT_ATTACH_STATIC(xtsenc_unaligned_evcnt); 463 464static void 465aesvia_xts_enc(const struct aesenc *enc, const uint8_t in[static 16], 466 uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16], 467 uint32_t nrounds) 468{ 469 const uint32_t cw0 = aesvia_keylen_cw0(nrounds); 470 uint32_t t[4]; 471 472 KASSERT(nbytes % 16 == 0); 473 474 memcpy(t, tweak, 16); 475 476 fpu_kern_enter(); 477 aesvia_reload_keys(); 478 if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0) { 479 xtsenc_aligned_evcnt.ev_count++; 480 unsigned lastblock = 0; 481 uint32_t buf[8*4] __aligned(16); 482 483 /* 484 * Make sure the last block is not the last block of a 485 * page. (Note that we store the AES input in `out' as 486 * a temporary buffer, rather than reading it directly 487 * from `in', since we have to combine the tweak 488 * first.) 489 */ 490 lastblock = 16*(((uintptr_t)(out + nbytes) & 0xfff) == 0); 491 nbytes -= lastblock; 492 493 /* 494 * Handle an odd number of initial blocks so we can 495 * process the rest in eight-block (128-byte) chunks. 496 */ 497 if (nbytes % 128) { 498 unsigned nbytes128 = nbytes % 128; 499 500 nbytes -= nbytes128; 501 for (; nbytes128; nbytes128 -= 16, in += 16, out += 16) 502 { 503 xor128(out, in, t); 504 aesvia_encN(enc, out, out, 1, cw0); 505 xor128(out, out, t); 506 aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]); 507 } 508 } 509 510 /* Process eight blocks at a time. */ 511 for (; nbytes; nbytes -= 128, in += 128, out += 128) { 512 unsigned i; 513 for (i = 0; i < 8; i++) { 514 memcpy(buf + 4*i, t, 16); 515 xor128(out + 4*i, in + 4*i, t); 516 aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]); 517 } 518 aesvia_encN(enc, out, out, 8, cw0); 519 for (i = 0; i < 8; i++) 520 xor128(out + 4*i, in + 4*i, buf + 4*i); 521 } 522 523 /* Handle the last block of a page, if necessary. */ 524 if (lastblock) { 525 xor128(buf, in, t); 526 aesvia_encN(enc, (const void *)buf, out, 1, cw0); 527 } 528 529 explicit_memset(buf, 0, sizeof buf); 530 } else { 531 xtsenc_unaligned_evcnt.ev_count++; 532 uint8_t buf[16] __aligned(16); 533 534 for (; nbytes; nbytes -= 16, in += 16, out += 16) { 535 memcpy(buf, in, 16); 536 xor128(buf, buf, t); 537 aesvia_encN(enc, buf, buf, 1, cw0); 538 xor128(buf, buf, t); 539 memcpy(out, buf, 16); 540 aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]); 541 } 542 543 explicit_memset(buf, 0, sizeof buf); 544 } 545 fpu_kern_leave(); 546 547 memcpy(tweak, t, 16); 548 explicit_memset(t, 0, sizeof t); 549} 550 551static struct evcnt xtsdec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 552 NULL, "aesvia", "xtsdec aligned"); 553EVCNT_ATTACH_STATIC(xtsdec_aligned_evcnt); 554static struct evcnt xtsdec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 555 NULL, "aesvia", "xtsdec unaligned"); 556EVCNT_ATTACH_STATIC(xtsdec_unaligned_evcnt); 557 558static void 559aesvia_xts_dec(const struct aesdec *dec, const uint8_t in[static 16], 560 uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16], 561 uint32_t nrounds) 562{ 563 const uint32_t cw0 = aesvia_keylen_cw0(nrounds); 564 uint32_t t[4]; 565 566 KASSERT(nbytes % 16 == 0); 567 568 memcpy(t, tweak, 16); 569 570 fpu_kern_enter(); 571 aesvia_reload_keys(); 572 if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0) { 573 xtsdec_aligned_evcnt.ev_count++; 574 unsigned lastblock = 0; 575 uint32_t buf[8*4] __aligned(16); 576 577 /* 578 * Make sure the last block is not the last block of a 579 * page. (Note that we store the AES input in `out' as 580 * a temporary buffer, rather than reading it directly 581 * from `in', since we have to combine the tweak 582 * first.) 583 */ 584 lastblock = 16*(((uintptr_t)(out + nbytes) & 0xfff) == 0); 585 nbytes -= lastblock; 586 587 /* 588 * Handle an odd number of initial blocks so we can 589 * process the rest in eight-block (128-byte) chunks. 590 */ 591 if (nbytes % 128) { 592 unsigned nbytes128 = nbytes % 128; 593 594 nbytes -= nbytes128; 595 for (; nbytes128; nbytes128 -= 16, in += 16, out += 16) 596 { 597 xor128(out, in, t); 598 aesvia_decN(dec, out, out, 1, cw0); 599 xor128(out, out, t); 600 aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]); 601 } 602 } 603 604 /* Process eight blocks at a time. */ 605 for (; nbytes; nbytes -= 128, in += 128, out += 128) { 606 unsigned i; 607 for (i = 0; i < 8; i++) { 608 memcpy(buf + 4*i, t, 16); 609 xor128(out + 4*i, in + 4*i, t); 610 aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]); 611 } 612 aesvia_decN(dec, out, out, 8, cw0); 613 for (i = 0; i < 8; i++) 614 xor128(out + 4*i, in + 4*i, buf + 4*i); 615 } 616 617 /* Handle the last block of a page, if necessary. */ 618 if (lastblock) { 619 xor128(buf, in, t); 620 aesvia_decN(dec, (const void *)buf, out, 1, cw0); 621 } 622 623 explicit_memset(buf, 0, sizeof buf); 624 } else { 625 xtsdec_unaligned_evcnt.ev_count++; 626 uint8_t buf[16] __aligned(16); 627 628 for (; nbytes; nbytes -= 16, in += 16, out += 16) { 629 memcpy(buf, in, 16); 630 xor128(buf, buf, t); 631 aesvia_decN(dec, buf, buf, 1, cw0); 632 xor128(buf, buf, t); 633 memcpy(out, buf, 16); 634 aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]); 635 } 636 637 explicit_memset(buf, 0, sizeof buf); 638 } 639 fpu_kern_leave(); 640 641 memcpy(tweak, t, 16); 642 explicit_memset(t, 0, sizeof t); 643} 644 645static int 646aesvia_probe(void) 647{ 648 649 /* Verify that the CPU advertises VIA ACE support. */ 650 if ((cpu_feature[4] & CPUID_VIA_HAS_ACE) == 0) 651 return -1; 652 653 /* Verify that our XTS tweak update logic works. */ 654 if (aesvia_xts_update_selftest()) 655 return -1; 656 657 /* Success! */ 658 return 0; 659} 660 661struct aes_impl aes_via_impl = { 662 .ai_name = "VIA ACE", 663 .ai_probe = aesvia_probe, 664 .ai_setenckey = aesvia_setenckey, 665 .ai_setdeckey = aesvia_setdeckey, 666 .ai_enc = aesvia_enc, 667 .ai_dec = aesvia_dec, 668 .ai_cbc_enc = aesvia_cbc_enc, 669 .ai_cbc_dec = aesvia_cbc_dec, 670 .ai_xts_enc = aesvia_xts_enc, 671 .ai_xts_dec = aesvia_xts_dec, 672}; 673