aes_via.c revision 1.5
1/* $NetBSD: aes_via.c,v 1.5 2020/07/25 22:31:32 riastradh Exp $ */ 2 3/*- 4 * Copyright (c) 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include <sys/cdefs.h> 30__KERNEL_RCSID(1, "$NetBSD: aes_via.c,v 1.5 2020/07/25 22:31:32 riastradh Exp $"); 31 32#ifdef _KERNEL 33#include <sys/types.h> 34#include <sys/evcnt.h> 35#include <sys/systm.h> 36#else 37#include <assert.h> 38#include <err.h> 39#include <stdint.h> 40#include <string.h> 41#define KASSERT assert 42#define panic(fmt, args...) err(1, fmt, args) 43struct evcnt { uint64_t ev_count; }; 44#define EVCNT_INITIALIZER(a,b,c,d) {0} 45#define EVCNT_ATTACH_STATIC(name) static char name##_attach __unused = 0 46#endif 47 48#include <crypto/aes/aes.h> 49#include <crypto/aes/aes_bear.h> 50#include <crypto/aes/aes_impl.h> 51 52#ifdef _KERNEL 53#include <x86/cpufunc.h> 54#include <x86/cpuvar.h> 55#include <x86/fpu.h> 56#include <x86/specialreg.h> 57#include <x86/via_padlock.h> 58#else 59#include <cpuid.h> 60#define fpu_kern_enter() ((void)0) 61#define fpu_kern_leave() ((void)0) 62#define C3_CRYPT_CWLO_ROUND_M 0x0000000f 63#define C3_CRYPT_CWLO_ALG_M 0x00000070 64#define C3_CRYPT_CWLO_ALG_AES 0x00000000 65#define C3_CRYPT_CWLO_KEYGEN_M 0x00000080 66#define C3_CRYPT_CWLO_KEYGEN_HW 0x00000000 67#define C3_CRYPT_CWLO_KEYGEN_SW 0x00000080 68#define C3_CRYPT_CWLO_NORMAL 0x00000000 69#define C3_CRYPT_CWLO_INTERMEDIATE 0x00000100 70#define C3_CRYPT_CWLO_ENCRYPT 0x00000000 71#define C3_CRYPT_CWLO_DECRYPT 0x00000200 72#define C3_CRYPT_CWLO_KEY128 0x0000000a /* 128bit, 10 rds */ 73#define C3_CRYPT_CWLO_KEY192 0x0000040c /* 192bit, 12 rds */ 74#define C3_CRYPT_CWLO_KEY256 0x0000080e /* 256bit, 15 rds */ 75#endif 76 77static void 78aesvia_reload_keys(void) 79{ 80 81 asm volatile("pushf; popf"); 82} 83 84static uint32_t 85aesvia_keylen_cw0(unsigned nrounds) 86{ 87 88 /* 89 * Determine the control word bits for the key size / number of 90 * rounds. For AES-128, the hardware can do key expansion on 91 * the fly; for AES-192 and AES-256, software must do it. 92 */ 93 switch (nrounds) { 94 case AES_128_NROUNDS: 95 return C3_CRYPT_CWLO_KEY128; 96 case AES_192_NROUNDS: 97 return C3_CRYPT_CWLO_KEY192 | C3_CRYPT_CWLO_KEYGEN_SW; 98 case AES_256_NROUNDS: 99 return C3_CRYPT_CWLO_KEY256 | C3_CRYPT_CWLO_KEYGEN_SW; 100 default: 101 panic("invalid AES nrounds: %u", nrounds); 102 } 103} 104 105static void 106aesvia_setenckey(struct aesenc *enc, const uint8_t *key, uint32_t nrounds) 107{ 108 size_t key_len; 109 110 switch (nrounds) { 111 case AES_128_NROUNDS: 112 enc->aese_aes.aes_rk[0] = le32dec(key + 4*0); 113 enc->aese_aes.aes_rk[1] = le32dec(key + 4*1); 114 enc->aese_aes.aes_rk[2] = le32dec(key + 4*2); 115 enc->aese_aes.aes_rk[3] = le32dec(key + 4*3); 116 return; 117 case AES_192_NROUNDS: 118 key_len = 24; 119 break; 120 case AES_256_NROUNDS: 121 key_len = 32; 122 break; 123 default: 124 panic("invalid AES nrounds: %u", nrounds); 125 } 126 br_aes_ct_keysched_stdenc(enc->aese_aes.aes_rk, key, key_len); 127} 128 129static void 130aesvia_setdeckey(struct aesdec *dec, const uint8_t *key, uint32_t nrounds) 131{ 132 size_t key_len; 133 134 switch (nrounds) { 135 case AES_128_NROUNDS: 136 dec->aesd_aes.aes_rk[0] = le32dec(key + 4*0); 137 dec->aesd_aes.aes_rk[1] = le32dec(key + 4*1); 138 dec->aesd_aes.aes_rk[2] = le32dec(key + 4*2); 139 dec->aesd_aes.aes_rk[3] = le32dec(key + 4*3); 140 return; 141 case AES_192_NROUNDS: 142 key_len = 24; 143 break; 144 case AES_256_NROUNDS: 145 key_len = 32; 146 break; 147 default: 148 panic("invalid AES nrounds: %u", nrounds); 149 } 150 br_aes_ct_keysched_stddec(dec->aesd_aes.aes_rk, key, key_len); 151} 152 153static inline void 154aesvia_encN(const struct aesenc *enc, const uint8_t in[static 16], 155 uint8_t out[static 16], size_t nblocks, uint32_t cw0) 156{ 157 const uint32_t cw[4] __aligned(16) = { 158 [0] = (cw0 159 | C3_CRYPT_CWLO_ALG_AES 160 | C3_CRYPT_CWLO_ENCRYPT 161 | C3_CRYPT_CWLO_NORMAL), 162 }; 163 164 KASSERT(((uintptr_t)enc & 0xf) == 0); 165 KASSERT(((uintptr_t)in & 0xf) == 0); 166 KASSERT(((uintptr_t)out & 0xf) == 0); 167 168 asm volatile("rep xcryptecb" 169 : "+c"(nblocks), "+S"(in), "+D"(out) 170 : "b"(enc), "d"(cw) 171 : "memory", "cc"); 172} 173 174static inline void 175aesvia_decN(const struct aesdec *dec, const uint8_t in[static 16], 176 uint8_t out[static 16], size_t nblocks, uint32_t cw0) 177{ 178 const uint32_t cw[4] __aligned(16) = { 179 [0] = (cw0 180 | C3_CRYPT_CWLO_ALG_AES 181 | C3_CRYPT_CWLO_DECRYPT 182 | C3_CRYPT_CWLO_NORMAL), 183 }; 184 185 KASSERT(((uintptr_t)dec & 0xf) == 0); 186 KASSERT(((uintptr_t)in & 0xf) == 0); 187 KASSERT(((uintptr_t)out & 0xf) == 0); 188 189 asm volatile("rep xcryptecb" 190 : "+c"(nblocks), "+S"(in), "+D"(out) 191 : "b"(dec), "d"(cw) 192 : "memory", "cc"); 193} 194 195static struct evcnt enc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 196 NULL, "aesvia", "enc aligned"); 197EVCNT_ATTACH_STATIC(enc_aligned_evcnt); 198static struct evcnt enc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 199 NULL, "aesvia", "dec unaligned"); 200EVCNT_ATTACH_STATIC(enc_unaligned_evcnt); 201 202static void 203aesvia_enc(const struct aesenc *enc, const uint8_t in[static 16], 204 uint8_t out[static 16], uint32_t nrounds) 205{ 206 const uint32_t cw0 = aesvia_keylen_cw0(nrounds); 207 208 fpu_kern_enter(); 209 aesvia_reload_keys(); 210 if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0 && 211 ((uintptr_t)in & 0xff0) != 0xff0) { 212 enc_aligned_evcnt.ev_count++; 213 aesvia_encN(enc, in, out, 1, cw0); 214 } else { 215 enc_unaligned_evcnt.ev_count++; 216 /* 217 * VIA requires 16-byte/128-bit alignment, and 218 * xcrypt-ecb reads one block past the one we're 219 * working on -- which may go past the end of the page 220 * into unmapped territory. Use a bounce buffer if 221 * either constraint is violated. 222 */ 223 uint8_t inbuf[16] __aligned(16); 224 uint8_t outbuf[16] __aligned(16); 225 226 memcpy(inbuf, in, 16); 227 aesvia_encN(enc, inbuf, outbuf, 1, cw0); 228 memcpy(out, outbuf, 16); 229 230 explicit_memset(inbuf, 0, sizeof inbuf); 231 explicit_memset(outbuf, 0, sizeof outbuf); 232 } 233 fpu_kern_leave(); 234} 235 236static struct evcnt dec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 237 NULL, "aesvia", "dec aligned"); 238EVCNT_ATTACH_STATIC(dec_aligned_evcnt); 239static struct evcnt dec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 240 NULL, "aesvia", "dec unaligned"); 241EVCNT_ATTACH_STATIC(dec_unaligned_evcnt); 242 243static void 244aesvia_dec(const struct aesdec *dec, const uint8_t in[static 16], 245 uint8_t out[static 16], uint32_t nrounds) 246{ 247 const uint32_t cw0 = aesvia_keylen_cw0(nrounds); 248 249 fpu_kern_enter(); 250 aesvia_reload_keys(); 251 if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0 && 252 ((uintptr_t)in & 0xff0) != 0xff0) { 253 dec_aligned_evcnt.ev_count++; 254 aesvia_decN(dec, in, out, 1, cw0); 255 } else { 256 dec_unaligned_evcnt.ev_count++; 257 /* 258 * VIA requires 16-byte/128-bit alignment, and 259 * xcrypt-ecb reads one block past the one we're 260 * working on -- which may go past the end of the page 261 * into unmapped territory. Use a bounce buffer if 262 * either constraint is violated. 263 */ 264 uint8_t inbuf[16] __aligned(16); 265 uint8_t outbuf[16] __aligned(16); 266 267 memcpy(inbuf, in, 16); 268 aesvia_decN(dec, inbuf, outbuf, 1, cw0); 269 memcpy(out, outbuf, 16); 270 271 explicit_memset(inbuf, 0, sizeof inbuf); 272 explicit_memset(outbuf, 0, sizeof outbuf); 273 } 274 fpu_kern_leave(); 275} 276 277static inline void 278aesvia_cbc_encN(const struct aesenc *enc, const uint8_t in[static 16], 279 uint8_t out[static 16], size_t nblocks, uint8_t **ivp, uint32_t cw0) 280{ 281 const uint32_t cw[4] __aligned(16) = { 282 [0] = (cw0 283 | C3_CRYPT_CWLO_ALG_AES 284 | C3_CRYPT_CWLO_ENCRYPT 285 | C3_CRYPT_CWLO_NORMAL), 286 }; 287 288 KASSERT(((uintptr_t)enc & 0xf) == 0); 289 KASSERT(((uintptr_t)in & 0xf) == 0); 290 KASSERT(((uintptr_t)out & 0xf) == 0); 291 KASSERT(((uintptr_t)*ivp & 0xf) == 0); 292 293 /* 294 * Register effects: 295 * - Counts nblocks down to zero. 296 * - Advances in by nblocks (units of blocks). 297 * - Advances out by nblocks (units of blocks). 298 * - Updates *ivp to point at the last block of out. 299 */ 300 asm volatile("rep xcryptcbc" 301 : "+c"(nblocks), "+S"(in), "+D"(out), "+a"(*ivp) 302 : "b"(enc), "d"(cw) 303 : "memory", "cc"); 304} 305 306static inline void 307aesvia_cbc_decN(const struct aesdec *dec, const uint8_t in[static 16], 308 uint8_t out[static 16], size_t nblocks, uint8_t iv[static 16], 309 uint32_t cw0) 310{ 311 const uint32_t cw[4] __aligned(16) = { 312 [0] = (cw0 313 | C3_CRYPT_CWLO_ALG_AES 314 | C3_CRYPT_CWLO_DECRYPT 315 | C3_CRYPT_CWLO_NORMAL), 316 }; 317 318 KASSERT(((uintptr_t)dec & 0xf) == 0); 319 KASSERT(((uintptr_t)in & 0xf) == 0); 320 KASSERT(((uintptr_t)out & 0xf) == 0); 321 KASSERT(((uintptr_t)iv & 0xf) == 0); 322 323 /* 324 * Register effects: 325 * - Counts nblocks down to zero. 326 * - Advances in by nblocks (units of blocks). 327 * - Advances out by nblocks (units of blocks). 328 * Memory side effects: 329 * - Writes what was the last block of in at the address iv. 330 */ 331 asm volatile("rep xcryptcbc" 332 : "+c"(nblocks), "+S"(in), "+D"(out) 333 : "a"(iv), "b"(dec), "d"(cw) 334 : "memory", "cc"); 335} 336 337static inline void 338xor128(void *x, const void *a, const void *b) 339{ 340 uint32_t *x32 = x; 341 const uint32_t *a32 = a; 342 const uint32_t *b32 = b; 343 344 x32[0] = a32[0] ^ b32[0]; 345 x32[1] = a32[1] ^ b32[1]; 346 x32[2] = a32[2] ^ b32[2]; 347 x32[3] = a32[3] ^ b32[3]; 348} 349 350static struct evcnt cbcenc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 351 NULL, "aesvia", "cbcenc aligned"); 352EVCNT_ATTACH_STATIC(cbcenc_aligned_evcnt); 353static struct evcnt cbcenc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 354 NULL, "aesvia", "cbcenc unaligned"); 355EVCNT_ATTACH_STATIC(cbcenc_unaligned_evcnt); 356 357static void 358aesvia_cbc_enc(const struct aesenc *enc, const uint8_t in[static 16], 359 uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16], 360 uint32_t nrounds) 361{ 362 const uint32_t cw0 = aesvia_keylen_cw0(nrounds); 363 364 KASSERT(nbytes % 16 == 0); 365 if (nbytes == 0) 366 return; 367 368 fpu_kern_enter(); 369 aesvia_reload_keys(); 370 if ((((uintptr_t)in | (uintptr_t)out | (uintptr_t)iv) & 0xf) == 0) { 371 cbcenc_aligned_evcnt.ev_count++; 372 uint8_t *ivp = iv; 373 aesvia_cbc_encN(enc, in, out, nbytes/16, &ivp, cw0); 374 memcpy(iv, ivp, 16); 375 } else { 376 cbcenc_unaligned_evcnt.ev_count++; 377 uint8_t cv[16] __aligned(16); 378 uint8_t tmp[16] __aligned(16); 379 380 memcpy(cv, iv, 16); 381 for (; nbytes; nbytes -= 16, in += 16, out += 16) { 382 memcpy(tmp, in, 16); 383 xor128(tmp, tmp, cv); 384 aesvia_encN(enc, tmp, cv, 1, cw0); 385 memcpy(out, cv, 16); 386 } 387 memcpy(iv, cv, 16); 388 } 389 fpu_kern_leave(); 390} 391 392static struct evcnt cbcdec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 393 NULL, "aesvia", "cbcdec aligned"); 394EVCNT_ATTACH_STATIC(cbcdec_aligned_evcnt); 395static struct evcnt cbcdec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 396 NULL, "aesvia", "cbcdec unaligned"); 397EVCNT_ATTACH_STATIC(cbcdec_unaligned_evcnt); 398 399static void 400aesvia_cbc_dec(const struct aesdec *dec, const uint8_t in[static 16], 401 uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16], 402 uint32_t nrounds) 403{ 404 const uint32_t cw0 = aesvia_keylen_cw0(nrounds); 405 406 KASSERT(nbytes % 16 == 0); 407 if (nbytes == 0) 408 return; 409 410 fpu_kern_enter(); 411 aesvia_reload_keys(); 412 if ((((uintptr_t)in | (uintptr_t)out | (uintptr_t)iv) & 0xf) == 0) { 413 cbcdec_aligned_evcnt.ev_count++; 414 aesvia_cbc_decN(dec, in, out, nbytes/16, iv, cw0); 415 } else { 416 cbcdec_unaligned_evcnt.ev_count++; 417 uint8_t iv0[16] __aligned(16); 418 uint8_t cv[16] __aligned(16); 419 uint8_t tmp[16] __aligned(16); 420 421 memcpy(iv0, iv, 16); 422 memcpy(cv, in + nbytes - 16, 16); 423 memcpy(iv, cv, 16); 424 425 for (;;) { 426 aesvia_decN(dec, cv, tmp, 1, cw0); 427 if ((nbytes -= 16) == 0) 428 break; 429 memcpy(cv, in + nbytes - 16, 16); 430 xor128(tmp, tmp, cv); 431 memcpy(out + nbytes, tmp, 16); 432 } 433 434 xor128(tmp, tmp, iv0); 435 memcpy(out, tmp, 16); 436 explicit_memset(tmp, 0, sizeof tmp); 437 } 438 fpu_kern_leave(); 439} 440 441static inline void 442aesvia_xts_update(uint32_t *t0, uint32_t *t1, uint32_t *t2, uint32_t *t3) 443{ 444 uint32_t s0, s1, s2, s3; 445 446 s0 = *t0 >> 31; 447 s1 = *t1 >> 31; 448 s2 = *t2 >> 31; 449 s3 = *t3 >> 31; 450 *t0 = (*t0 << 1) ^ (-s3 & 0x87); 451 *t1 = (*t1 << 1) ^ s0; 452 *t2 = (*t2 << 1) ^ s1; 453 *t3 = (*t3 << 1) ^ s2; 454} 455 456static int 457aesvia_xts_update_selftest(void) 458{ 459 static const struct { 460 uint32_t in[4], out[4]; 461 } cases[] = { 462 { {1}, {2} }, 463 { {0x80000000U,0,0,0}, {0,1,0,0} }, 464 { {0,0x80000000U,0,0}, {0,0,1,0} }, 465 { {0,0,0x80000000U,0}, {0,0,0,1} }, 466 { {0,0,0,0x80000000U}, {0x87,0,0,0} }, 467 { {0,0x80000000U,0,0x80000000U}, {0x87,0,1,0} }, 468 }; 469 unsigned i; 470 uint32_t t0, t1, t2, t3; 471 472 for (i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) { 473 t0 = cases[i].in[0]; 474 t1 = cases[i].in[1]; 475 t2 = cases[i].in[2]; 476 t3 = cases[i].in[3]; 477 aesvia_xts_update(&t0, &t1, &t2, &t3); 478 if (t0 != cases[i].out[0] || 479 t1 != cases[i].out[1] || 480 t2 != cases[i].out[2] || 481 t3 != cases[i].out[3]) 482 return -1; 483 } 484 485 /* Success! */ 486 return 0; 487} 488 489static struct evcnt xtsenc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 490 NULL, "aesvia", "xtsenc aligned"); 491EVCNT_ATTACH_STATIC(xtsenc_aligned_evcnt); 492static struct evcnt xtsenc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 493 NULL, "aesvia", "xtsenc unaligned"); 494EVCNT_ATTACH_STATIC(xtsenc_unaligned_evcnt); 495 496static void 497aesvia_xts_enc(const struct aesenc *enc, const uint8_t in[static 16], 498 uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16], 499 uint32_t nrounds) 500{ 501 const uint32_t cw0 = aesvia_keylen_cw0(nrounds); 502 uint32_t t[4]; 503 504 KASSERT(nbytes % 16 == 0); 505 506 memcpy(t, tweak, 16); 507 508 fpu_kern_enter(); 509 aesvia_reload_keys(); 510 if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0) { 511 xtsenc_aligned_evcnt.ev_count++; 512 unsigned lastblock = 0; 513 uint32_t buf[8*4] __aligned(16); 514 515 /* 516 * Make sure the last block is not the last block of a 517 * page. (Note that we store the AES input in `out' as 518 * a temporary buffer, rather than reading it directly 519 * from `in', since we have to combine the tweak 520 * first.) 521 */ 522 lastblock = 16*(((uintptr_t)(out + nbytes) & 0xfff) == 0); 523 nbytes -= lastblock; 524 525 /* 526 * Handle an odd number of initial blocks so we can 527 * process the rest in eight-block (128-byte) chunks. 528 */ 529 if (nbytes % 128) { 530 unsigned nbytes128 = nbytes % 128; 531 532 nbytes -= nbytes128; 533 for (; nbytes128; nbytes128 -= 16, in += 16, out += 16) 534 { 535 xor128(out, in, t); 536 aesvia_encN(enc, out, out, 1, cw0); 537 xor128(out, out, t); 538 aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]); 539 } 540 } 541 542 /* Process eight blocks at a time. */ 543 for (; nbytes; nbytes -= 128, in += 128, out += 128) { 544 unsigned i; 545 for (i = 0; i < 8; i++) { 546 memcpy(buf + 4*i, t, 16); 547 xor128(out + 4*i, in + 4*i, t); 548 aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]); 549 } 550 aesvia_encN(enc, out, out, 8, cw0); 551 for (i = 0; i < 8; i++) 552 xor128(out + 4*i, in + 4*i, buf + 4*i); 553 } 554 555 /* Handle the last block of a page, if necessary. */ 556 if (lastblock) { 557 xor128(buf, in, t); 558 aesvia_encN(enc, (const void *)buf, out, 1, cw0); 559 } 560 561 explicit_memset(buf, 0, sizeof buf); 562 } else { 563 xtsenc_unaligned_evcnt.ev_count++; 564 uint8_t buf[16] __aligned(16); 565 566 for (; nbytes; nbytes -= 16, in += 16, out += 16) { 567 memcpy(buf, in, 16); 568 xor128(buf, buf, t); 569 aesvia_encN(enc, buf, buf, 1, cw0); 570 xor128(buf, buf, t); 571 memcpy(out, buf, 16); 572 aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]); 573 } 574 575 explicit_memset(buf, 0, sizeof buf); 576 } 577 fpu_kern_leave(); 578 579 memcpy(tweak, t, 16); 580 explicit_memset(t, 0, sizeof t); 581} 582 583static struct evcnt xtsdec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 584 NULL, "aesvia", "xtsdec aligned"); 585EVCNT_ATTACH_STATIC(xtsdec_aligned_evcnt); 586static struct evcnt xtsdec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 587 NULL, "aesvia", "xtsdec unaligned"); 588EVCNT_ATTACH_STATIC(xtsdec_unaligned_evcnt); 589 590static void 591aesvia_xts_dec(const struct aesdec *dec, const uint8_t in[static 16], 592 uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16], 593 uint32_t nrounds) 594{ 595 const uint32_t cw0 = aesvia_keylen_cw0(nrounds); 596 uint32_t t[4]; 597 598 KASSERT(nbytes % 16 == 0); 599 600 memcpy(t, tweak, 16); 601 602 fpu_kern_enter(); 603 aesvia_reload_keys(); 604 if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0) { 605 xtsdec_aligned_evcnt.ev_count++; 606 unsigned lastblock = 0; 607 uint32_t buf[8*4] __aligned(16); 608 609 /* 610 * Make sure the last block is not the last block of a 611 * page. (Note that we store the AES input in `out' as 612 * a temporary buffer, rather than reading it directly 613 * from `in', since we have to combine the tweak 614 * first.) 615 */ 616 lastblock = 16*(((uintptr_t)(out + nbytes) & 0xfff) == 0); 617 nbytes -= lastblock; 618 619 /* 620 * Handle an odd number of initial blocks so we can 621 * process the rest in eight-block (128-byte) chunks. 622 */ 623 if (nbytes % 128) { 624 unsigned nbytes128 = nbytes % 128; 625 626 nbytes -= nbytes128; 627 for (; nbytes128; nbytes128 -= 16, in += 16, out += 16) 628 { 629 xor128(out, in, t); 630 aesvia_decN(dec, out, out, 1, cw0); 631 xor128(out, out, t); 632 aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]); 633 } 634 } 635 636 /* Process eight blocks at a time. */ 637 for (; nbytes; nbytes -= 128, in += 128, out += 128) { 638 unsigned i; 639 for (i = 0; i < 8; i++) { 640 memcpy(buf + 4*i, t, 16); 641 xor128(out + 4*i, in + 4*i, t); 642 aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]); 643 } 644 aesvia_decN(dec, out, out, 8, cw0); 645 for (i = 0; i < 8; i++) 646 xor128(out + 4*i, in + 4*i, buf + 4*i); 647 } 648 649 /* Handle the last block of a page, if necessary. */ 650 if (lastblock) { 651 xor128(buf, in, t); 652 aesvia_decN(dec, (const void *)buf, out, 1, cw0); 653 } 654 655 explicit_memset(buf, 0, sizeof buf); 656 } else { 657 xtsdec_unaligned_evcnt.ev_count++; 658 uint8_t buf[16] __aligned(16); 659 660 for (; nbytes; nbytes -= 16, in += 16, out += 16) { 661 memcpy(buf, in, 16); 662 xor128(buf, buf, t); 663 aesvia_decN(dec, buf, buf, 1, cw0); 664 xor128(buf, buf, t); 665 memcpy(out, buf, 16); 666 aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]); 667 } 668 669 explicit_memset(buf, 0, sizeof buf); 670 } 671 fpu_kern_leave(); 672 673 memcpy(tweak, t, 16); 674 explicit_memset(t, 0, sizeof t); 675} 676 677static struct evcnt cbcmac_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 678 NULL, "aesvia", "cbcmac aligned"); 679EVCNT_ATTACH_STATIC(cbcmac_aligned_evcnt); 680static struct evcnt cbcmac_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 681 NULL, "aesvia", "cbcmac unaligned"); 682EVCNT_ATTACH_STATIC(cbcmac_unaligned_evcnt); 683 684static void 685aesvia_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16], 686 size_t nbytes, uint8_t auth0[static 16], uint32_t nrounds) 687{ 688 const uint32_t cw0 = aesvia_keylen_cw0(nrounds); 689 uint8_t authbuf[16] __aligned(16); 690 uint8_t *auth = auth0; 691 692 KASSERT(nbytes); 693 KASSERT(nbytes % 16 == 0); 694 695 if ((uintptr_t)auth0 & 0xf) { 696 memcpy(authbuf, auth0, 16); 697 auth = authbuf; 698 cbcmac_unaligned_evcnt.ev_count++; 699 } else { 700 cbcmac_aligned_evcnt.ev_count++; 701 } 702 703 fpu_kern_enter(); 704 aesvia_reload_keys(); 705 for (; nbytes; nbytes -= 16, in += 16) { 706 xor128(auth, auth, in); 707 aesvia_encN(enc, auth, auth, 1, cw0); 708 } 709 fpu_kern_leave(); 710 711 if ((uintptr_t)auth0 & 0xf) { 712 memcpy(auth0, authbuf, 16); 713 explicit_memset(authbuf, 0, sizeof authbuf); 714 } 715} 716 717static struct evcnt ccmenc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 718 NULL, "aesvia", "ccmenc aligned"); 719EVCNT_ATTACH_STATIC(ccmenc_aligned_evcnt); 720static struct evcnt ccmenc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 721 NULL, "aesvia", "ccmenc unaligned"); 722EVCNT_ATTACH_STATIC(ccmenc_unaligned_evcnt); 723 724static void 725aesvia_ccm_enc1(const struct aesenc *enc, const uint8_t in[static 16], 726 uint8_t out[static 16], size_t nbytes, uint8_t authctr0[static 32], 727 uint32_t nrounds) 728{ 729 const uint32_t cw0 = aesvia_keylen_cw0(nrounds); 730 uint8_t authctrbuf[32] __aligned(16); 731 uint8_t *authctr; 732 uint32_t c0, c1, c2, c3; 733 734 KASSERT(nbytes); 735 KASSERT(nbytes % 16 == 0); 736 737 if ((uintptr_t)authctr0 & 0xf) { 738 memcpy(authctrbuf, authctr0, 16); 739 authctr = authctrbuf; 740 ccmenc_unaligned_evcnt.ev_count++; 741 } else { 742 ccmenc_aligned_evcnt.ev_count++; 743 } 744 c0 = le32dec(authctr0 + 16 + 4*0); 745 c1 = le32dec(authctr0 + 16 + 4*1); 746 c2 = le32dec(authctr0 + 16 + 4*2); 747 c3 = be32dec(authctr0 + 16 + 4*3); 748 749 /* 750 * In principle we could use REP XCRYPTCTR here, but that 751 * doesn't help to compute the CBC-MAC step, and certain VIA 752 * CPUs have some weird errata with REP XCRYPTCTR that make it 753 * kind of a pain to use. So let's just use REP XCRYPTECB to 754 * simultaneously compute the CBC-MAC step and the CTR step. 755 * (Maybe some VIA CPUs will compute REP XCRYPTECB in parallel, 756 * who knows...) 757 */ 758 fpu_kern_enter(); 759 aesvia_reload_keys(); 760 for (; nbytes; nbytes -= 16, in += 16, out += 16) { 761 xor128(authctr, authctr, in); 762 le32enc(authctr + 16 + 4*0, c0); 763 le32enc(authctr + 16 + 4*1, c1); 764 le32enc(authctr + 16 + 4*2, c2); 765 be32enc(authctr + 16 + 4*3, ++c3); 766 aesvia_encN(enc, authctr, authctr, 2, cw0); 767 xor128(out, in, authctr + 16); 768 } 769 fpu_kern_leave(); 770 771 if ((uintptr_t)authctr0 & 0xf) { 772 memcpy(authctr0, authctrbuf, 16); 773 explicit_memset(authctrbuf, 0, sizeof authctrbuf); 774 } 775 776 le32enc(authctr0 + 16 + 4*0, c0); 777 le32enc(authctr0 + 16 + 4*1, c1); 778 le32enc(authctr0 + 16 + 4*2, c2); 779 be32enc(authctr0 + 16 + 4*3, c3); 780} 781 782static struct evcnt ccmdec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 783 NULL, "aesvia", "ccmdec aligned"); 784EVCNT_ATTACH_STATIC(ccmdec_aligned_evcnt); 785static struct evcnt ccmdec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 786 NULL, "aesvia", "ccmdec unaligned"); 787EVCNT_ATTACH_STATIC(ccmdec_unaligned_evcnt); 788 789static void 790aesvia_ccm_dec1(const struct aesenc *enc, const uint8_t in[static 16], 791 uint8_t out[static 16], size_t nbytes, uint8_t authctr0[static 32], 792 uint32_t nrounds) 793{ 794 const uint32_t cw0 = aesvia_keylen_cw0(nrounds); 795 uint8_t authctrbuf[32] __aligned(16); 796 uint8_t *authctr; 797 uint32_t c0, c1, c2, c3; 798 799 KASSERT(nbytes); 800 KASSERT(nbytes % 16 == 0); 801 802 c0 = le32dec(authctr0 + 16 + 4*0); 803 c1 = le32dec(authctr0 + 16 + 4*1); 804 c2 = le32dec(authctr0 + 16 + 4*2); 805 c3 = be32dec(authctr0 + 16 + 4*3); 806 807 if ((uintptr_t)authctr0 & 0xf) { 808 memcpy(authctrbuf, authctr0, 16); 809 authctr = authctrbuf; 810 le32enc(authctr + 16 + 4*0, c0); 811 le32enc(authctr + 16 + 4*1, c1); 812 le32enc(authctr + 16 + 4*2, c2); 813 ccmdec_unaligned_evcnt.ev_count++; 814 } else { 815 ccmdec_aligned_evcnt.ev_count++; 816 } 817 818 fpu_kern_enter(); 819 aesvia_reload_keys(); 820 be32enc(authctr + 16 + 4*3, ++c3); 821 aesvia_encN(enc, authctr + 16, authctr + 16, 1, cw0); 822 for (;; in += 16, out += 16) { 823 xor128(out, authctr + 16, in); 824 xor128(authctr, authctr, out); 825 if ((nbytes -= 16) == 0) 826 break; 827 le32enc(authctr + 16 + 4*0, c0); 828 le32enc(authctr + 16 + 4*1, c1); 829 le32enc(authctr + 16 + 4*2, c2); 830 be32enc(authctr + 16 + 4*3, ++c3); 831 aesvia_encN(enc, authctr, authctr, 2, cw0); 832 } 833 aesvia_encN(enc, authctr, authctr, 1, cw0); 834 fpu_kern_leave(); 835 836 if ((uintptr_t)authctr0 & 0xf) { 837 memcpy(authctr0, authctrbuf, 16); 838 explicit_memset(authctrbuf, 0, sizeof authctrbuf); 839 } 840 841 le32enc(authctr0 + 16 + 4*0, c0); 842 le32enc(authctr0 + 16 + 4*1, c1); 843 le32enc(authctr0 + 16 + 4*2, c2); 844 be32enc(authctr0 + 16 + 4*3, c3); 845} 846 847static int 848aesvia_probe(void) 849{ 850 851 /* Verify that the CPU advertises VIA ACE support. */ 852#ifdef _KERNEL 853 if ((cpu_feature[4] & CPUID_VIA_HAS_ACE) == 0) 854 return -1; 855#else 856 /* 857 * From the VIA PadLock Programming Guide: 858 * http://linux.via.com.tw/support/beginDownload.action?eleid=181&fid=261 859 */ 860 unsigned eax, ebx, ecx, edx; 861 if (!__get_cpuid(0, &eax, &ebx, &ecx, &edx)) 862 return -1; 863 if (ebx != signature_CENTAUR_ebx || 864 ecx != signature_CENTAUR_ecx || 865 edx != signature_CENTAUR_edx) 866 return -1; 867 if (eax < 0xc0000000) 868 return -1; 869 if (!__get_cpuid(0xc0000000, &eax, &ebx, &ecx, &edx)) 870 return -1; 871 if (eax < 0xc0000001) 872 return -1; 873 if (!__get_cpuid(0xc0000001, &eax, &ebx, &ecx, &edx)) 874 return -1; 875 /* Check whether ACE or ACE2 is both supported and enabled. */ 876 if ((edx & 0x000000c0) != 0x000000c0 || 877 (edx & 0x00000300) != 0x00000300) 878 return -1; 879#endif 880 881 /* Verify that our XTS tweak update logic works. */ 882 if (aesvia_xts_update_selftest()) 883 return -1; 884 885 /* Success! */ 886 return 0; 887} 888 889struct aes_impl aes_via_impl = { 890 .ai_name = "VIA ACE", 891 .ai_probe = aesvia_probe, 892 .ai_setenckey = aesvia_setenckey, 893 .ai_setdeckey = aesvia_setdeckey, 894 .ai_enc = aesvia_enc, 895 .ai_dec = aesvia_dec, 896 .ai_cbc_enc = aesvia_cbc_enc, 897 .ai_cbc_dec = aesvia_cbc_dec, 898 .ai_xts_enc = aesvia_xts_enc, 899 .ai_xts_dec = aesvia_xts_dec, 900 .ai_cbcmac_update1 = aesvia_cbcmac_update1, 901 .ai_ccm_enc1 = aesvia_ccm_enc1, 902 .ai_ccm_dec1 = aesvia_ccm_dec1, 903}; 904