aes_via.c revision 1.3
1/* $NetBSD: aes_via.c,v 1.3 2020/06/30 20:32:11 riastradh Exp $ */ 2 3/*- 4 * Copyright (c) 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include <sys/cdefs.h> 30__KERNEL_RCSID(1, "$NetBSD: aes_via.c,v 1.3 2020/06/30 20:32:11 riastradh Exp $"); 31 32#ifdef _KERNEL 33#include <sys/types.h> 34#include <sys/evcnt.h> 35#include <sys/systm.h> 36#else 37#include <assert.h> 38#include <err.h> 39#include <stdint.h> 40#include <string.h> 41#define KASSERT assert 42#define panic(fmt, args...) err(1, fmt, args) 43struct evcnt { uint64_t ev_count; }; 44#define EVCNT_INITIALIZER(a,b,c,d) {0} 45#define EVCNT_ATTACH_STATIC(name) static char name##_attach __unused = 0 46#endif 47 48#include <crypto/aes/aes.h> 49#include <crypto/aes/aes_bear.h> 50 51#ifdef _KERNEL 52#include <x86/cpufunc.h> 53#include <x86/cpuvar.h> 54#include <x86/fpu.h> 55#include <x86/specialreg.h> 56#include <x86/via_padlock.h> 57#else 58#include <cpuid.h> 59#define fpu_kern_enter() ((void)0) 60#define fpu_kern_leave() ((void)0) 61#define C3_CRYPT_CWLO_ROUND_M 0x0000000f 62#define C3_CRYPT_CWLO_ALG_M 0x00000070 63#define C3_CRYPT_CWLO_ALG_AES 0x00000000 64#define C3_CRYPT_CWLO_KEYGEN_M 0x00000080 65#define C3_CRYPT_CWLO_KEYGEN_HW 0x00000000 66#define C3_CRYPT_CWLO_KEYGEN_SW 0x00000080 67#define C3_CRYPT_CWLO_NORMAL 0x00000000 68#define C3_CRYPT_CWLO_INTERMEDIATE 0x00000100 69#define C3_CRYPT_CWLO_ENCRYPT 0x00000000 70#define C3_CRYPT_CWLO_DECRYPT 0x00000200 71#define C3_CRYPT_CWLO_KEY128 0x0000000a /* 128bit, 10 rds */ 72#define C3_CRYPT_CWLO_KEY192 0x0000040c /* 192bit, 12 rds */ 73#define C3_CRYPT_CWLO_KEY256 0x0000080e /* 256bit, 15 rds */ 74#endif 75 76static void 77aesvia_reload_keys(void) 78{ 79 80 asm volatile("pushf; popf"); 81} 82 83static uint32_t 84aesvia_keylen_cw0(unsigned nrounds) 85{ 86 87 /* 88 * Determine the control word bits for the key size / number of 89 * rounds. For AES-128, the hardware can do key expansion on 90 * the fly; for AES-192 and AES-256, software must do it. 91 */ 92 switch (nrounds) { 93 case AES_128_NROUNDS: 94 return C3_CRYPT_CWLO_KEY128; 95 case AES_192_NROUNDS: 96 return C3_CRYPT_CWLO_KEY192 | C3_CRYPT_CWLO_KEYGEN_SW; 97 case AES_256_NROUNDS: 98 return C3_CRYPT_CWLO_KEY256 | C3_CRYPT_CWLO_KEYGEN_SW; 99 default: 100 panic("invalid AES nrounds: %u", nrounds); 101 } 102} 103 104static void 105aesvia_setenckey(struct aesenc *enc, const uint8_t *key, uint32_t nrounds) 106{ 107 size_t key_len; 108 109 switch (nrounds) { 110 case AES_128_NROUNDS: 111 enc->aese_aes.aes_rk[0] = le32dec(key + 4*0); 112 enc->aese_aes.aes_rk[1] = le32dec(key + 4*1); 113 enc->aese_aes.aes_rk[2] = le32dec(key + 4*2); 114 enc->aese_aes.aes_rk[3] = le32dec(key + 4*3); 115 return; 116 case AES_192_NROUNDS: 117 key_len = 24; 118 break; 119 case AES_256_NROUNDS: 120 key_len = 32; 121 break; 122 default: 123 panic("invalid AES nrounds: %u", nrounds); 124 } 125 br_aes_ct_keysched_stdenc(enc->aese_aes.aes_rk, key, key_len); 126} 127 128static void 129aesvia_setdeckey(struct aesdec *dec, const uint8_t *key, uint32_t nrounds) 130{ 131 size_t key_len; 132 133 switch (nrounds) { 134 case AES_128_NROUNDS: 135 dec->aesd_aes.aes_rk[0] = le32dec(key + 4*0); 136 dec->aesd_aes.aes_rk[1] = le32dec(key + 4*1); 137 dec->aesd_aes.aes_rk[2] = le32dec(key + 4*2); 138 dec->aesd_aes.aes_rk[3] = le32dec(key + 4*3); 139 return; 140 case AES_192_NROUNDS: 141 key_len = 24; 142 break; 143 case AES_256_NROUNDS: 144 key_len = 32; 145 break; 146 default: 147 panic("invalid AES nrounds: %u", nrounds); 148 } 149 br_aes_ct_keysched_stddec(dec->aesd_aes.aes_rk, key, key_len); 150} 151 152static inline void 153aesvia_encN(const struct aesenc *enc, const uint8_t in[static 16], 154 uint8_t out[static 16], size_t nblocks, uint32_t cw0) 155{ 156 const uint32_t cw[4] __aligned(16) = { 157 [0] = (cw0 158 | C3_CRYPT_CWLO_ALG_AES 159 | C3_CRYPT_CWLO_ENCRYPT 160 | C3_CRYPT_CWLO_NORMAL), 161 }; 162 163 KASSERT(((uintptr_t)enc & 0xf) == 0); 164 KASSERT(((uintptr_t)in & 0xf) == 0); 165 KASSERT(((uintptr_t)out & 0xf) == 0); 166 167 asm volatile("rep xcryptecb" 168 : "+c"(nblocks), "+S"(in), "+D"(out) 169 : "b"(enc), "d"(cw) 170 : "memory", "cc"); 171} 172 173static inline void 174aesvia_decN(const struct aesdec *dec, const uint8_t in[static 16], 175 uint8_t out[static 16], size_t nblocks, uint32_t cw0) 176{ 177 const uint32_t cw[4] __aligned(16) = { 178 [0] = (cw0 179 | C3_CRYPT_CWLO_ALG_AES 180 | C3_CRYPT_CWLO_DECRYPT 181 | C3_CRYPT_CWLO_NORMAL), 182 }; 183 184 KASSERT(((uintptr_t)dec & 0xf) == 0); 185 KASSERT(((uintptr_t)in & 0xf) == 0); 186 KASSERT(((uintptr_t)out & 0xf) == 0); 187 188 asm volatile("rep xcryptecb" 189 : "+c"(nblocks), "+S"(in), "+D"(out) 190 : "b"(dec), "d"(cw) 191 : "memory", "cc"); 192} 193 194static struct evcnt enc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 195 NULL, "aesvia", "enc aligned"); 196EVCNT_ATTACH_STATIC(enc_aligned_evcnt); 197static struct evcnt enc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 198 NULL, "aesvia", "dec unaligned"); 199EVCNT_ATTACH_STATIC(enc_unaligned_evcnt); 200 201static void 202aesvia_enc(const struct aesenc *enc, const uint8_t in[static 16], 203 uint8_t out[static 16], uint32_t nrounds) 204{ 205 const uint32_t cw0 = aesvia_keylen_cw0(nrounds); 206 207 fpu_kern_enter(); 208 aesvia_reload_keys(); 209 if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0 && 210 ((uintptr_t)in & 0xff0) != 0xff0) { 211 enc_aligned_evcnt.ev_count++; 212 aesvia_encN(enc, in, out, 1, cw0); 213 } else { 214 enc_unaligned_evcnt.ev_count++; 215 /* 216 * VIA requires 16-byte/128-bit alignment, and 217 * xcrypt-ecb reads one block past the one we're 218 * working on -- which may go past the end of the page 219 * into unmapped territory. Use a bounce buffer if 220 * either constraint is violated. 221 */ 222 uint8_t inbuf[16] __aligned(16); 223 uint8_t outbuf[16] __aligned(16); 224 225 memcpy(inbuf, in, 16); 226 aesvia_encN(enc, inbuf, outbuf, 1, cw0); 227 memcpy(out, outbuf, 16); 228 229 explicit_memset(inbuf, 0, sizeof inbuf); 230 explicit_memset(outbuf, 0, sizeof outbuf); 231 } 232 fpu_kern_leave(); 233} 234 235static struct evcnt dec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 236 NULL, "aesvia", "dec aligned"); 237EVCNT_ATTACH_STATIC(dec_aligned_evcnt); 238static struct evcnt dec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 239 NULL, "aesvia", "dec unaligned"); 240EVCNT_ATTACH_STATIC(dec_unaligned_evcnt); 241 242static void 243aesvia_dec(const struct aesdec *dec, const uint8_t in[static 16], 244 uint8_t out[static 16], uint32_t nrounds) 245{ 246 const uint32_t cw0 = aesvia_keylen_cw0(nrounds); 247 248 fpu_kern_enter(); 249 aesvia_reload_keys(); 250 if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0 && 251 ((uintptr_t)in & 0xff0) != 0xff0) { 252 dec_aligned_evcnt.ev_count++; 253 aesvia_decN(dec, in, out, 1, cw0); 254 } else { 255 dec_unaligned_evcnt.ev_count++; 256 /* 257 * VIA requires 16-byte/128-bit alignment, and 258 * xcrypt-ecb reads one block past the one we're 259 * working on -- which may go past the end of the page 260 * into unmapped territory. Use a bounce buffer if 261 * either constraint is violated. 262 */ 263 uint8_t inbuf[16] __aligned(16); 264 uint8_t outbuf[16] __aligned(16); 265 266 memcpy(inbuf, in, 16); 267 aesvia_decN(dec, inbuf, outbuf, 1, cw0); 268 memcpy(out, outbuf, 16); 269 270 explicit_memset(inbuf, 0, sizeof inbuf); 271 explicit_memset(outbuf, 0, sizeof outbuf); 272 } 273 fpu_kern_leave(); 274} 275 276static inline void 277aesvia_cbc_encN(const struct aesenc *enc, const uint8_t in[static 16], 278 uint8_t out[static 16], size_t nblocks, uint8_t **ivp, uint32_t cw0) 279{ 280 const uint32_t cw[4] __aligned(16) = { 281 [0] = (cw0 282 | C3_CRYPT_CWLO_ALG_AES 283 | C3_CRYPT_CWLO_ENCRYPT 284 | C3_CRYPT_CWLO_NORMAL), 285 }; 286 287 KASSERT(((uintptr_t)enc & 0xf) == 0); 288 KASSERT(((uintptr_t)in & 0xf) == 0); 289 KASSERT(((uintptr_t)out & 0xf) == 0); 290 KASSERT(((uintptr_t)*ivp & 0xf) == 0); 291 292 /* 293 * Register effects: 294 * - Counts nblocks down to zero. 295 * - Advances in by nblocks (units of blocks). 296 * - Advances out by nblocks (units of blocks). 297 * - Updates *ivp to point at the last block of out. 298 */ 299 asm volatile("rep xcryptcbc" 300 : "+c"(nblocks), "+S"(in), "+D"(out), "+a"(*ivp) 301 : "b"(enc), "d"(cw) 302 : "memory", "cc"); 303} 304 305static inline void 306aesvia_cbc_decN(const struct aesdec *dec, const uint8_t in[static 16], 307 uint8_t out[static 16], size_t nblocks, uint8_t iv[static 16], 308 uint32_t cw0) 309{ 310 const uint32_t cw[4] __aligned(16) = { 311 [0] = (cw0 312 | C3_CRYPT_CWLO_ALG_AES 313 | C3_CRYPT_CWLO_DECRYPT 314 | C3_CRYPT_CWLO_NORMAL), 315 }; 316 317 KASSERT(((uintptr_t)dec & 0xf) == 0); 318 KASSERT(((uintptr_t)in & 0xf) == 0); 319 KASSERT(((uintptr_t)out & 0xf) == 0); 320 KASSERT(((uintptr_t)iv & 0xf) == 0); 321 322 /* 323 * Register effects: 324 * - Counts nblocks down to zero. 325 * - Advances in by nblocks (units of blocks). 326 * - Advances out by nblocks (units of blocks). 327 * Memory side effects: 328 * - Writes what was the last block of in at the address iv. 329 */ 330 asm volatile("rep xcryptcbc" 331 : "+c"(nblocks), "+S"(in), "+D"(out) 332 : "a"(iv), "b"(dec), "d"(cw) 333 : "memory", "cc"); 334} 335 336static inline void 337xor128(void *x, const void *a, const void *b) 338{ 339 uint32_t *x32 = x; 340 const uint32_t *a32 = a; 341 const uint32_t *b32 = b; 342 343 x32[0] = a32[0] ^ b32[0]; 344 x32[1] = a32[1] ^ b32[1]; 345 x32[2] = a32[2] ^ b32[2]; 346 x32[3] = a32[3] ^ b32[3]; 347} 348 349static struct evcnt cbcenc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 350 NULL, "aesvia", "cbcenc aligned"); 351EVCNT_ATTACH_STATIC(cbcenc_aligned_evcnt); 352static struct evcnt cbcenc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 353 NULL, "aesvia", "cbcenc unaligned"); 354EVCNT_ATTACH_STATIC(cbcenc_unaligned_evcnt); 355 356static void 357aesvia_cbc_enc(const struct aesenc *enc, const uint8_t in[static 16], 358 uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16], 359 uint32_t nrounds) 360{ 361 const uint32_t cw0 = aesvia_keylen_cw0(nrounds); 362 363 KASSERT(nbytes % 16 == 0); 364 if (nbytes == 0) 365 return; 366 367 fpu_kern_enter(); 368 aesvia_reload_keys(); 369 if ((((uintptr_t)in | (uintptr_t)out | (uintptr_t)iv) & 0xf) == 0) { 370 cbcenc_aligned_evcnt.ev_count++; 371 uint8_t *ivp = iv; 372 aesvia_cbc_encN(enc, in, out, nbytes/16, &ivp, cw0); 373 memcpy(iv, ivp, 16); 374 } else { 375 cbcenc_unaligned_evcnt.ev_count++; 376 uint8_t cv[16] __aligned(16); 377 uint8_t tmp[16] __aligned(16); 378 379 memcpy(cv, iv, 16); 380 for (; nbytes; nbytes -= 16, in += 16, out += 16) { 381 memcpy(tmp, in, 16); 382 xor128(tmp, tmp, cv); 383 aesvia_encN(enc, tmp, cv, 1, cw0); 384 memcpy(out, cv, 16); 385 } 386 memcpy(iv, cv, 16); 387 } 388 fpu_kern_leave(); 389} 390 391static struct evcnt cbcdec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 392 NULL, "aesvia", "cbcdec aligned"); 393EVCNT_ATTACH_STATIC(cbcdec_aligned_evcnt); 394static struct evcnt cbcdec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 395 NULL, "aesvia", "cbcdec unaligned"); 396EVCNT_ATTACH_STATIC(cbcdec_unaligned_evcnt); 397 398static void 399aesvia_cbc_dec(const struct aesdec *dec, const uint8_t in[static 16], 400 uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16], 401 uint32_t nrounds) 402{ 403 const uint32_t cw0 = aesvia_keylen_cw0(nrounds); 404 405 KASSERT(nbytes % 16 == 0); 406 if (nbytes == 0) 407 return; 408 409 fpu_kern_enter(); 410 aesvia_reload_keys(); 411 if ((((uintptr_t)in | (uintptr_t)out | (uintptr_t)iv) & 0xf) == 0) { 412 cbcdec_aligned_evcnt.ev_count++; 413 aesvia_cbc_decN(dec, in, out, nbytes/16, iv, cw0); 414 } else { 415 cbcdec_unaligned_evcnt.ev_count++; 416 uint8_t iv0[16] __aligned(16); 417 uint8_t cv[16] __aligned(16); 418 uint8_t tmp[16] __aligned(16); 419 420 memcpy(iv0, iv, 16); 421 memcpy(cv, in + nbytes - 16, 16); 422 memcpy(iv, cv, 16); 423 424 for (;;) { 425 aesvia_decN(dec, cv, tmp, 1, cw0); 426 if ((nbytes -= 16) == 0) 427 break; 428 memcpy(cv, in + nbytes - 16, 16); 429 xor128(tmp, tmp, cv); 430 memcpy(out + nbytes, tmp, 16); 431 } 432 433 xor128(tmp, tmp, iv0); 434 memcpy(out, tmp, 16); 435 explicit_memset(tmp, 0, sizeof tmp); 436 } 437 fpu_kern_leave(); 438} 439 440static inline void 441aesvia_xts_update(uint32_t *t0, uint32_t *t1, uint32_t *t2, uint32_t *t3) 442{ 443 uint32_t s0, s1, s2, s3; 444 445 s0 = *t0 >> 31; 446 s1 = *t1 >> 31; 447 s2 = *t2 >> 31; 448 s3 = *t3 >> 31; 449 *t0 = (*t0 << 1) ^ (-s3 & 0x87); 450 *t1 = (*t1 << 1) ^ s0; 451 *t2 = (*t2 << 1) ^ s1; 452 *t3 = (*t3 << 1) ^ s2; 453} 454 455static int 456aesvia_xts_update_selftest(void) 457{ 458 static const struct { 459 uint32_t in[4], out[4]; 460 } cases[] = { 461 { {1}, {2} }, 462 { {0x80000000U,0,0,0}, {0,1,0,0} }, 463 { {0,0x80000000U,0,0}, {0,0,1,0} }, 464 { {0,0,0x80000000U,0}, {0,0,0,1} }, 465 { {0,0,0,0x80000000U}, {0x87,0,0,0} }, 466 { {0,0x80000000U,0,0x80000000U}, {0x87,0,1,0} }, 467 }; 468 unsigned i; 469 uint32_t t0, t1, t2, t3; 470 471 for (i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) { 472 t0 = cases[i].in[0]; 473 t1 = cases[i].in[1]; 474 t2 = cases[i].in[2]; 475 t3 = cases[i].in[3]; 476 aesvia_xts_update(&t0, &t1, &t2, &t3); 477 if (t0 != cases[i].out[0] || 478 t1 != cases[i].out[1] || 479 t2 != cases[i].out[2] || 480 t3 != cases[i].out[3]) 481 return -1; 482 } 483 484 /* Success! */ 485 return 0; 486} 487 488static struct evcnt xtsenc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 489 NULL, "aesvia", "xtsenc aligned"); 490EVCNT_ATTACH_STATIC(xtsenc_aligned_evcnt); 491static struct evcnt xtsenc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 492 NULL, "aesvia", "xtsenc unaligned"); 493EVCNT_ATTACH_STATIC(xtsenc_unaligned_evcnt); 494 495static void 496aesvia_xts_enc(const struct aesenc *enc, const uint8_t in[static 16], 497 uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16], 498 uint32_t nrounds) 499{ 500 const uint32_t cw0 = aesvia_keylen_cw0(nrounds); 501 uint32_t t[4]; 502 503 KASSERT(nbytes % 16 == 0); 504 505 memcpy(t, tweak, 16); 506 507 fpu_kern_enter(); 508 aesvia_reload_keys(); 509 if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0) { 510 xtsenc_aligned_evcnt.ev_count++; 511 unsigned lastblock = 0; 512 uint32_t buf[8*4] __aligned(16); 513 514 /* 515 * Make sure the last block is not the last block of a 516 * page. (Note that we store the AES input in `out' as 517 * a temporary buffer, rather than reading it directly 518 * from `in', since we have to combine the tweak 519 * first.) 520 */ 521 lastblock = 16*(((uintptr_t)(out + nbytes) & 0xfff) == 0); 522 nbytes -= lastblock; 523 524 /* 525 * Handle an odd number of initial blocks so we can 526 * process the rest in eight-block (128-byte) chunks. 527 */ 528 if (nbytes % 128) { 529 unsigned nbytes128 = nbytes % 128; 530 531 nbytes -= nbytes128; 532 for (; nbytes128; nbytes128 -= 16, in += 16, out += 16) 533 { 534 xor128(out, in, t); 535 aesvia_encN(enc, out, out, 1, cw0); 536 xor128(out, out, t); 537 aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]); 538 } 539 } 540 541 /* Process eight blocks at a time. */ 542 for (; nbytes; nbytes -= 128, in += 128, out += 128) { 543 unsigned i; 544 for (i = 0; i < 8; i++) { 545 memcpy(buf + 4*i, t, 16); 546 xor128(out + 4*i, in + 4*i, t); 547 aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]); 548 } 549 aesvia_encN(enc, out, out, 8, cw0); 550 for (i = 0; i < 8; i++) 551 xor128(out + 4*i, in + 4*i, buf + 4*i); 552 } 553 554 /* Handle the last block of a page, if necessary. */ 555 if (lastblock) { 556 xor128(buf, in, t); 557 aesvia_encN(enc, (const void *)buf, out, 1, cw0); 558 } 559 560 explicit_memset(buf, 0, sizeof buf); 561 } else { 562 xtsenc_unaligned_evcnt.ev_count++; 563 uint8_t buf[16] __aligned(16); 564 565 for (; nbytes; nbytes -= 16, in += 16, out += 16) { 566 memcpy(buf, in, 16); 567 xor128(buf, buf, t); 568 aesvia_encN(enc, buf, buf, 1, cw0); 569 xor128(buf, buf, t); 570 memcpy(out, buf, 16); 571 aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]); 572 } 573 574 explicit_memset(buf, 0, sizeof buf); 575 } 576 fpu_kern_leave(); 577 578 memcpy(tweak, t, 16); 579 explicit_memset(t, 0, sizeof t); 580} 581 582static struct evcnt xtsdec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 583 NULL, "aesvia", "xtsdec aligned"); 584EVCNT_ATTACH_STATIC(xtsdec_aligned_evcnt); 585static struct evcnt xtsdec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 586 NULL, "aesvia", "xtsdec unaligned"); 587EVCNT_ATTACH_STATIC(xtsdec_unaligned_evcnt); 588 589static void 590aesvia_xts_dec(const struct aesdec *dec, const uint8_t in[static 16], 591 uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16], 592 uint32_t nrounds) 593{ 594 const uint32_t cw0 = aesvia_keylen_cw0(nrounds); 595 uint32_t t[4]; 596 597 KASSERT(nbytes % 16 == 0); 598 599 memcpy(t, tweak, 16); 600 601 fpu_kern_enter(); 602 aesvia_reload_keys(); 603 if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0) { 604 xtsdec_aligned_evcnt.ev_count++; 605 unsigned lastblock = 0; 606 uint32_t buf[8*4] __aligned(16); 607 608 /* 609 * Make sure the last block is not the last block of a 610 * page. (Note that we store the AES input in `out' as 611 * a temporary buffer, rather than reading it directly 612 * from `in', since we have to combine the tweak 613 * first.) 614 */ 615 lastblock = 16*(((uintptr_t)(out + nbytes) & 0xfff) == 0); 616 nbytes -= lastblock; 617 618 /* 619 * Handle an odd number of initial blocks so we can 620 * process the rest in eight-block (128-byte) chunks. 621 */ 622 if (nbytes % 128) { 623 unsigned nbytes128 = nbytes % 128; 624 625 nbytes -= nbytes128; 626 for (; nbytes128; nbytes128 -= 16, in += 16, out += 16) 627 { 628 xor128(out, in, t); 629 aesvia_decN(dec, out, out, 1, cw0); 630 xor128(out, out, t); 631 aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]); 632 } 633 } 634 635 /* Process eight blocks at a time. */ 636 for (; nbytes; nbytes -= 128, in += 128, out += 128) { 637 unsigned i; 638 for (i = 0; i < 8; i++) { 639 memcpy(buf + 4*i, t, 16); 640 xor128(out + 4*i, in + 4*i, t); 641 aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]); 642 } 643 aesvia_decN(dec, out, out, 8, cw0); 644 for (i = 0; i < 8; i++) 645 xor128(out + 4*i, in + 4*i, buf + 4*i); 646 } 647 648 /* Handle the last block of a page, if necessary. */ 649 if (lastblock) { 650 xor128(buf, in, t); 651 aesvia_decN(dec, (const void *)buf, out, 1, cw0); 652 } 653 654 explicit_memset(buf, 0, sizeof buf); 655 } else { 656 xtsdec_unaligned_evcnt.ev_count++; 657 uint8_t buf[16] __aligned(16); 658 659 for (; nbytes; nbytes -= 16, in += 16, out += 16) { 660 memcpy(buf, in, 16); 661 xor128(buf, buf, t); 662 aesvia_decN(dec, buf, buf, 1, cw0); 663 xor128(buf, buf, t); 664 memcpy(out, buf, 16); 665 aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]); 666 } 667 668 explicit_memset(buf, 0, sizeof buf); 669 } 670 fpu_kern_leave(); 671 672 memcpy(tweak, t, 16); 673 explicit_memset(t, 0, sizeof t); 674} 675 676static int 677aesvia_probe(void) 678{ 679 680 /* Verify that the CPU advertises VIA ACE support. */ 681#ifdef _KERNEL 682 if ((cpu_feature[4] & CPUID_VIA_HAS_ACE) == 0) 683 return -1; 684#else 685 /* 686 * From the VIA PadLock Programming Guide: 687 * http://linux.via.com.tw/support/beginDownload.action?eleid=181&fid=261 688 */ 689 unsigned eax, ebx, ecx, edx; 690 if (!__get_cpuid(0, &eax, &ebx, &ecx, &edx)) 691 return -1; 692 if (ebx != signature_CENTAUR_ebx || 693 ecx != signature_CENTAUR_ecx || 694 edx != signature_CENTAUR_edx) 695 return -1; 696 if (eax < 0xc0000000) 697 return -1; 698 if (!__get_cpuid(0xc0000000, &eax, &ebx, &ecx, &edx)) 699 return -1; 700 if (eax < 0xc0000001) 701 return -1; 702 if (!__get_cpuid(0xc0000001, &eax, &ebx, &ecx, &edx)) 703 return -1; 704 /* Check whether ACE or ACE2 is both supported and enabled. */ 705 if ((edx & 0x000000c0) != 0x000000c0 || 706 (edx & 0x00000300) != 0x00000300) 707 return -1; 708#endif 709 710 /* Verify that our XTS tweak update logic works. */ 711 if (aesvia_xts_update_selftest()) 712 return -1; 713 714 /* Success! */ 715 return 0; 716} 717 718struct aes_impl aes_via_impl = { 719 .ai_name = "VIA ACE", 720 .ai_probe = aesvia_probe, 721 .ai_setenckey = aesvia_setenckey, 722 .ai_setdeckey = aesvia_setdeckey, 723 .ai_enc = aesvia_enc, 724 .ai_dec = aesvia_dec, 725 .ai_cbc_enc = aesvia_cbc_enc, 726 .ai_cbc_dec = aesvia_cbc_dec, 727 .ai_xts_enc = aesvia_xts_enc, 728 .ai_xts_dec = aesvia_xts_dec, 729}; 730