aes_via.c revision 1.3
1/*	$NetBSD: aes_via.c,v 1.3 2020/06/30 20:32:11 riastradh Exp $	*/
2
3/*-
4 * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__KERNEL_RCSID(1, "$NetBSD: aes_via.c,v 1.3 2020/06/30 20:32:11 riastradh Exp $");
31
32#ifdef _KERNEL
33#include <sys/types.h>
34#include <sys/evcnt.h>
35#include <sys/systm.h>
36#else
37#include <assert.h>
38#include <err.h>
39#include <stdint.h>
40#include <string.h>
41#define	KASSERT			assert
42#define	panic(fmt, args...)	err(1, fmt, args)
43struct evcnt { uint64_t ev_count; };
44#define	EVCNT_INITIALIZER(a,b,c,d) {0}
45#define	EVCNT_ATTACH_STATIC(name)	static char name##_attach __unused = 0
46#endif
47
48#include <crypto/aes/aes.h>
49#include <crypto/aes/aes_bear.h>
50
51#ifdef _KERNEL
52#include <x86/cpufunc.h>
53#include <x86/cpuvar.h>
54#include <x86/fpu.h>
55#include <x86/specialreg.h>
56#include <x86/via_padlock.h>
57#else
58#include <cpuid.h>
59#define	fpu_kern_enter()	((void)0)
60#define	fpu_kern_leave()	((void)0)
61#define C3_CRYPT_CWLO_ROUND_M		0x0000000f
62#define C3_CRYPT_CWLO_ALG_M		0x00000070
63#define C3_CRYPT_CWLO_ALG_AES		0x00000000
64#define C3_CRYPT_CWLO_KEYGEN_M		0x00000080
65#define C3_CRYPT_CWLO_KEYGEN_HW		0x00000000
66#define C3_CRYPT_CWLO_KEYGEN_SW		0x00000080
67#define C3_CRYPT_CWLO_NORMAL		0x00000000
68#define C3_CRYPT_CWLO_INTERMEDIATE	0x00000100
69#define C3_CRYPT_CWLO_ENCRYPT		0x00000000
70#define C3_CRYPT_CWLO_DECRYPT		0x00000200
71#define C3_CRYPT_CWLO_KEY128		0x0000000a      /* 128bit, 10 rds */
72#define C3_CRYPT_CWLO_KEY192		0x0000040c      /* 192bit, 12 rds */
73#define C3_CRYPT_CWLO_KEY256		0x0000080e      /* 256bit, 15 rds */
74#endif
75
76static void
77aesvia_reload_keys(void)
78{
79
80	asm volatile("pushf; popf");
81}
82
83static uint32_t
84aesvia_keylen_cw0(unsigned nrounds)
85{
86
87	/*
88	 * Determine the control word bits for the key size / number of
89	 * rounds.  For AES-128, the hardware can do key expansion on
90	 * the fly; for AES-192 and AES-256, software must do it.
91	 */
92	switch (nrounds) {
93	case AES_128_NROUNDS:
94		return C3_CRYPT_CWLO_KEY128;
95	case AES_192_NROUNDS:
96		return C3_CRYPT_CWLO_KEY192 | C3_CRYPT_CWLO_KEYGEN_SW;
97	case AES_256_NROUNDS:
98		return C3_CRYPT_CWLO_KEY256 | C3_CRYPT_CWLO_KEYGEN_SW;
99	default:
100		panic("invalid AES nrounds: %u", nrounds);
101	}
102}
103
104static void
105aesvia_setenckey(struct aesenc *enc, const uint8_t *key, uint32_t nrounds)
106{
107	size_t key_len;
108
109	switch (nrounds) {
110	case AES_128_NROUNDS:
111		enc->aese_aes.aes_rk[0] = le32dec(key + 4*0);
112		enc->aese_aes.aes_rk[1] = le32dec(key + 4*1);
113		enc->aese_aes.aes_rk[2] = le32dec(key + 4*2);
114		enc->aese_aes.aes_rk[3] = le32dec(key + 4*3);
115		return;
116	case AES_192_NROUNDS:
117		key_len = 24;
118		break;
119	case AES_256_NROUNDS:
120		key_len = 32;
121		break;
122	default:
123		panic("invalid AES nrounds: %u", nrounds);
124	}
125	br_aes_ct_keysched_stdenc(enc->aese_aes.aes_rk, key, key_len);
126}
127
128static void
129aesvia_setdeckey(struct aesdec *dec, const uint8_t *key, uint32_t nrounds)
130{
131	size_t key_len;
132
133	switch (nrounds) {
134	case AES_128_NROUNDS:
135		dec->aesd_aes.aes_rk[0] = le32dec(key + 4*0);
136		dec->aesd_aes.aes_rk[1] = le32dec(key + 4*1);
137		dec->aesd_aes.aes_rk[2] = le32dec(key + 4*2);
138		dec->aesd_aes.aes_rk[3] = le32dec(key + 4*3);
139		return;
140	case AES_192_NROUNDS:
141		key_len = 24;
142		break;
143	case AES_256_NROUNDS:
144		key_len = 32;
145		break;
146	default:
147		panic("invalid AES nrounds: %u", nrounds);
148	}
149	br_aes_ct_keysched_stddec(dec->aesd_aes.aes_rk, key, key_len);
150}
151
152static inline void
153aesvia_encN(const struct aesenc *enc, const uint8_t in[static 16],
154    uint8_t out[static 16], size_t nblocks, uint32_t cw0)
155{
156	const uint32_t cw[4] __aligned(16) = {
157		[0] = (cw0
158		    | C3_CRYPT_CWLO_ALG_AES
159		    | C3_CRYPT_CWLO_ENCRYPT
160		    | C3_CRYPT_CWLO_NORMAL),
161	};
162
163	KASSERT(((uintptr_t)enc & 0xf) == 0);
164	KASSERT(((uintptr_t)in & 0xf) == 0);
165	KASSERT(((uintptr_t)out & 0xf) == 0);
166
167	asm volatile("rep xcryptecb"
168	    : "+c"(nblocks), "+S"(in), "+D"(out)
169	    : "b"(enc), "d"(cw)
170	    : "memory", "cc");
171}
172
173static inline void
174aesvia_decN(const struct aesdec *dec, const uint8_t in[static 16],
175    uint8_t out[static 16], size_t nblocks, uint32_t cw0)
176{
177	const uint32_t cw[4] __aligned(16) = {
178		[0] = (cw0
179		    | C3_CRYPT_CWLO_ALG_AES
180		    | C3_CRYPT_CWLO_DECRYPT
181		    | C3_CRYPT_CWLO_NORMAL),
182	};
183
184	KASSERT(((uintptr_t)dec & 0xf) == 0);
185	KASSERT(((uintptr_t)in & 0xf) == 0);
186	KASSERT(((uintptr_t)out & 0xf) == 0);
187
188	asm volatile("rep xcryptecb"
189	    : "+c"(nblocks), "+S"(in), "+D"(out)
190	    : "b"(dec), "d"(cw)
191	    : "memory", "cc");
192}
193
194static struct evcnt enc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
195    NULL, "aesvia", "enc aligned");
196EVCNT_ATTACH_STATIC(enc_aligned_evcnt);
197static struct evcnt enc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
198    NULL, "aesvia", "dec unaligned");
199EVCNT_ATTACH_STATIC(enc_unaligned_evcnt);
200
201static void
202aesvia_enc(const struct aesenc *enc, const uint8_t in[static 16],
203    uint8_t out[static 16], uint32_t nrounds)
204{
205	const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
206
207	fpu_kern_enter();
208	aesvia_reload_keys();
209	if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0 &&
210	    ((uintptr_t)in & 0xff0) != 0xff0) {
211		enc_aligned_evcnt.ev_count++;
212		aesvia_encN(enc, in, out, 1, cw0);
213	} else {
214		enc_unaligned_evcnt.ev_count++;
215		/*
216		 * VIA requires 16-byte/128-bit alignment, and
217		 * xcrypt-ecb reads one block past the one we're
218		 * working on -- which may go past the end of the page
219		 * into unmapped territory.  Use a bounce buffer if
220		 * either constraint is violated.
221		 */
222		uint8_t inbuf[16] __aligned(16);
223		uint8_t outbuf[16] __aligned(16);
224
225		memcpy(inbuf, in, 16);
226		aesvia_encN(enc, inbuf, outbuf, 1, cw0);
227		memcpy(out, outbuf, 16);
228
229		explicit_memset(inbuf, 0, sizeof inbuf);
230		explicit_memset(outbuf, 0, sizeof outbuf);
231	}
232	fpu_kern_leave();
233}
234
235static struct evcnt dec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
236    NULL, "aesvia", "dec aligned");
237EVCNT_ATTACH_STATIC(dec_aligned_evcnt);
238static struct evcnt dec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
239    NULL, "aesvia", "dec unaligned");
240EVCNT_ATTACH_STATIC(dec_unaligned_evcnt);
241
242static void
243aesvia_dec(const struct aesdec *dec, const uint8_t in[static 16],
244    uint8_t out[static 16], uint32_t nrounds)
245{
246	const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
247
248	fpu_kern_enter();
249	aesvia_reload_keys();
250	if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0 &&
251	    ((uintptr_t)in & 0xff0) != 0xff0) {
252		dec_aligned_evcnt.ev_count++;
253		aesvia_decN(dec, in, out, 1, cw0);
254	} else {
255		dec_unaligned_evcnt.ev_count++;
256		/*
257		 * VIA requires 16-byte/128-bit alignment, and
258		 * xcrypt-ecb reads one block past the one we're
259		 * working on -- which may go past the end of the page
260		 * into unmapped territory.  Use a bounce buffer if
261		 * either constraint is violated.
262		 */
263		uint8_t inbuf[16] __aligned(16);
264		uint8_t outbuf[16] __aligned(16);
265
266		memcpy(inbuf, in, 16);
267		aesvia_decN(dec, inbuf, outbuf, 1, cw0);
268		memcpy(out, outbuf, 16);
269
270		explicit_memset(inbuf, 0, sizeof inbuf);
271		explicit_memset(outbuf, 0, sizeof outbuf);
272	}
273	fpu_kern_leave();
274}
275
276static inline void
277aesvia_cbc_encN(const struct aesenc *enc, const uint8_t in[static 16],
278    uint8_t out[static 16], size_t nblocks, uint8_t **ivp, uint32_t cw0)
279{
280	const uint32_t cw[4] __aligned(16) = {
281		[0] = (cw0
282		    | C3_CRYPT_CWLO_ALG_AES
283		    | C3_CRYPT_CWLO_ENCRYPT
284		    | C3_CRYPT_CWLO_NORMAL),
285	};
286
287	KASSERT(((uintptr_t)enc & 0xf) == 0);
288	KASSERT(((uintptr_t)in & 0xf) == 0);
289	KASSERT(((uintptr_t)out & 0xf) == 0);
290	KASSERT(((uintptr_t)*ivp & 0xf) == 0);
291
292	/*
293	 * Register effects:
294	 * - Counts nblocks down to zero.
295	 * - Advances in by nblocks (units of blocks).
296	 * - Advances out by nblocks (units of blocks).
297	 * - Updates *ivp to point at the last block of out.
298	 */
299	asm volatile("rep xcryptcbc"
300	    : "+c"(nblocks), "+S"(in), "+D"(out), "+a"(*ivp)
301	    : "b"(enc), "d"(cw)
302	    : "memory", "cc");
303}
304
305static inline void
306aesvia_cbc_decN(const struct aesdec *dec, const uint8_t in[static 16],
307    uint8_t out[static 16], size_t nblocks, uint8_t iv[static 16],
308    uint32_t cw0)
309{
310	const uint32_t cw[4] __aligned(16) = {
311		[0] = (cw0
312		    | C3_CRYPT_CWLO_ALG_AES
313		    | C3_CRYPT_CWLO_DECRYPT
314		    | C3_CRYPT_CWLO_NORMAL),
315	};
316
317	KASSERT(((uintptr_t)dec & 0xf) == 0);
318	KASSERT(((uintptr_t)in & 0xf) == 0);
319	KASSERT(((uintptr_t)out & 0xf) == 0);
320	KASSERT(((uintptr_t)iv & 0xf) == 0);
321
322	/*
323	 * Register effects:
324	 * - Counts nblocks down to zero.
325	 * - Advances in by nblocks (units of blocks).
326	 * - Advances out by nblocks (units of blocks).
327	 * Memory side effects:
328	 * - Writes what was the last block of in at the address iv.
329	 */
330	asm volatile("rep xcryptcbc"
331	    : "+c"(nblocks), "+S"(in), "+D"(out)
332	    : "a"(iv), "b"(dec), "d"(cw)
333	    : "memory", "cc");
334}
335
336static inline void
337xor128(void *x, const void *a, const void *b)
338{
339	uint32_t *x32 = x;
340	const uint32_t *a32 = a;
341	const uint32_t *b32 = b;
342
343	x32[0] = a32[0] ^ b32[0];
344	x32[1] = a32[1] ^ b32[1];
345	x32[2] = a32[2] ^ b32[2];
346	x32[3] = a32[3] ^ b32[3];
347}
348
349static struct evcnt cbcenc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
350    NULL, "aesvia", "cbcenc aligned");
351EVCNT_ATTACH_STATIC(cbcenc_aligned_evcnt);
352static struct evcnt cbcenc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
353    NULL, "aesvia", "cbcenc unaligned");
354EVCNT_ATTACH_STATIC(cbcenc_unaligned_evcnt);
355
356static void
357aesvia_cbc_enc(const struct aesenc *enc, const uint8_t in[static 16],
358    uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
359    uint32_t nrounds)
360{
361	const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
362
363	KASSERT(nbytes % 16 == 0);
364	if (nbytes == 0)
365		return;
366
367	fpu_kern_enter();
368	aesvia_reload_keys();
369	if ((((uintptr_t)in | (uintptr_t)out | (uintptr_t)iv) & 0xf) == 0) {
370		cbcenc_aligned_evcnt.ev_count++;
371		uint8_t *ivp = iv;
372		aesvia_cbc_encN(enc, in, out, nbytes/16, &ivp, cw0);
373		memcpy(iv, ivp, 16);
374	} else {
375		cbcenc_unaligned_evcnt.ev_count++;
376		uint8_t cv[16] __aligned(16);
377		uint8_t tmp[16] __aligned(16);
378
379		memcpy(cv, iv, 16);
380		for (; nbytes; nbytes -= 16, in += 16, out += 16) {
381			memcpy(tmp, in, 16);
382			xor128(tmp, tmp, cv);
383			aesvia_encN(enc, tmp, cv, 1, cw0);
384			memcpy(out, cv, 16);
385		}
386		memcpy(iv, cv, 16);
387	}
388	fpu_kern_leave();
389}
390
391static struct evcnt cbcdec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
392    NULL, "aesvia", "cbcdec aligned");
393EVCNT_ATTACH_STATIC(cbcdec_aligned_evcnt);
394static struct evcnt cbcdec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
395    NULL, "aesvia", "cbcdec unaligned");
396EVCNT_ATTACH_STATIC(cbcdec_unaligned_evcnt);
397
398static void
399aesvia_cbc_dec(const struct aesdec *dec, const uint8_t in[static 16],
400    uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
401    uint32_t nrounds)
402{
403	const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
404
405	KASSERT(nbytes % 16 == 0);
406	if (nbytes == 0)
407		return;
408
409	fpu_kern_enter();
410	aesvia_reload_keys();
411	if ((((uintptr_t)in | (uintptr_t)out | (uintptr_t)iv) & 0xf) == 0) {
412		cbcdec_aligned_evcnt.ev_count++;
413		aesvia_cbc_decN(dec, in, out, nbytes/16, iv, cw0);
414	} else {
415		cbcdec_unaligned_evcnt.ev_count++;
416		uint8_t iv0[16] __aligned(16);
417		uint8_t cv[16] __aligned(16);
418		uint8_t tmp[16] __aligned(16);
419
420		memcpy(iv0, iv, 16);
421		memcpy(cv, in + nbytes - 16, 16);
422		memcpy(iv, cv, 16);
423
424		for (;;) {
425			aesvia_decN(dec, cv, tmp, 1, cw0);
426			if ((nbytes -= 16) == 0)
427				break;
428			memcpy(cv, in + nbytes - 16, 16);
429			xor128(tmp, tmp, cv);
430			memcpy(out + nbytes, tmp, 16);
431		}
432
433		xor128(tmp, tmp, iv0);
434		memcpy(out, tmp, 16);
435		explicit_memset(tmp, 0, sizeof tmp);
436	}
437	fpu_kern_leave();
438}
439
440static inline void
441aesvia_xts_update(uint32_t *t0, uint32_t *t1, uint32_t *t2, uint32_t *t3)
442{
443	uint32_t s0, s1, s2, s3;
444
445	s0 = *t0 >> 31;
446	s1 = *t1 >> 31;
447	s2 = *t2 >> 31;
448	s3 = *t3 >> 31;
449	*t0 = (*t0 << 1) ^ (-s3 & 0x87);
450	*t1 = (*t1 << 1) ^ s0;
451	*t2 = (*t2 << 1) ^ s1;
452	*t3 = (*t3 << 1) ^ s2;
453}
454
455static int
456aesvia_xts_update_selftest(void)
457{
458	static const struct {
459		uint32_t in[4], out[4];
460	} cases[] = {
461		{ {1}, {2} },
462		{ {0x80000000U,0,0,0}, {0,1,0,0} },
463		{ {0,0x80000000U,0,0}, {0,0,1,0} },
464		{ {0,0,0x80000000U,0}, {0,0,0,1} },
465		{ {0,0,0,0x80000000U}, {0x87,0,0,0} },
466		{ {0,0x80000000U,0,0x80000000U}, {0x87,0,1,0} },
467	};
468	unsigned i;
469	uint32_t t0, t1, t2, t3;
470
471	for (i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
472		t0 = cases[i].in[0];
473		t1 = cases[i].in[1];
474		t2 = cases[i].in[2];
475		t3 = cases[i].in[3];
476		aesvia_xts_update(&t0, &t1, &t2, &t3);
477		if (t0 != cases[i].out[0] ||
478		    t1 != cases[i].out[1] ||
479		    t2 != cases[i].out[2] ||
480		    t3 != cases[i].out[3])
481			return -1;
482	}
483
484	/* Success!  */
485	return 0;
486}
487
488static struct evcnt xtsenc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
489    NULL, "aesvia", "xtsenc aligned");
490EVCNT_ATTACH_STATIC(xtsenc_aligned_evcnt);
491static struct evcnt xtsenc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
492    NULL, "aesvia", "xtsenc unaligned");
493EVCNT_ATTACH_STATIC(xtsenc_unaligned_evcnt);
494
495static void
496aesvia_xts_enc(const struct aesenc *enc, const uint8_t in[static 16],
497    uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
498    uint32_t nrounds)
499{
500	const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
501	uint32_t t[4];
502
503	KASSERT(nbytes % 16 == 0);
504
505	memcpy(t, tweak, 16);
506
507	fpu_kern_enter();
508	aesvia_reload_keys();
509	if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0) {
510		xtsenc_aligned_evcnt.ev_count++;
511		unsigned lastblock = 0;
512		uint32_t buf[8*4] __aligned(16);
513
514		/*
515		 * Make sure the last block is not the last block of a
516		 * page.  (Note that we store the AES input in `out' as
517		 * a temporary buffer, rather than reading it directly
518		 * from `in', since we have to combine the tweak
519		 * first.)
520		 */
521		lastblock = 16*(((uintptr_t)(out + nbytes) & 0xfff) == 0);
522		nbytes -= lastblock;
523
524		/*
525		 * Handle an odd number of initial blocks so we can
526		 * process the rest in eight-block (128-byte) chunks.
527		 */
528		if (nbytes % 128) {
529			unsigned nbytes128 = nbytes % 128;
530
531			nbytes -= nbytes128;
532			for (; nbytes128; nbytes128 -= 16, in += 16, out += 16)
533			{
534				xor128(out, in, t);
535				aesvia_encN(enc, out, out, 1, cw0);
536				xor128(out, out, t);
537				aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]);
538			}
539		}
540
541		/* Process eight blocks at a time.  */
542		for (; nbytes; nbytes -= 128, in += 128, out += 128) {
543			unsigned i;
544			for (i = 0; i < 8; i++) {
545				memcpy(buf + 4*i, t, 16);
546				xor128(out + 4*i, in + 4*i, t);
547				aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]);
548			}
549			aesvia_encN(enc, out, out, 8, cw0);
550			for (i = 0; i < 8; i++)
551				xor128(out + 4*i, in + 4*i, buf + 4*i);
552		}
553
554		/* Handle the last block of a page, if necessary.  */
555		if (lastblock) {
556			xor128(buf, in, t);
557			aesvia_encN(enc, (const void *)buf, out, 1, cw0);
558		}
559
560		explicit_memset(buf, 0, sizeof buf);
561	} else {
562		xtsenc_unaligned_evcnt.ev_count++;
563		uint8_t buf[16] __aligned(16);
564
565		for (; nbytes; nbytes -= 16, in += 16, out += 16) {
566			memcpy(buf, in, 16);
567			xor128(buf, buf, t);
568			aesvia_encN(enc, buf, buf, 1, cw0);
569			xor128(buf, buf, t);
570			memcpy(out, buf, 16);
571			aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]);
572		}
573
574		explicit_memset(buf, 0, sizeof buf);
575	}
576	fpu_kern_leave();
577
578	memcpy(tweak, t, 16);
579	explicit_memset(t, 0, sizeof t);
580}
581
582static struct evcnt xtsdec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
583    NULL, "aesvia", "xtsdec aligned");
584EVCNT_ATTACH_STATIC(xtsdec_aligned_evcnt);
585static struct evcnt xtsdec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
586    NULL, "aesvia", "xtsdec unaligned");
587EVCNT_ATTACH_STATIC(xtsdec_unaligned_evcnt);
588
589static void
590aesvia_xts_dec(const struct aesdec *dec, const uint8_t in[static 16],
591    uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
592    uint32_t nrounds)
593{
594	const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
595	uint32_t t[4];
596
597	KASSERT(nbytes % 16 == 0);
598
599	memcpy(t, tweak, 16);
600
601	fpu_kern_enter();
602	aesvia_reload_keys();
603	if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0) {
604		xtsdec_aligned_evcnt.ev_count++;
605		unsigned lastblock = 0;
606		uint32_t buf[8*4] __aligned(16);
607
608		/*
609		 * Make sure the last block is not the last block of a
610		 * page.  (Note that we store the AES input in `out' as
611		 * a temporary buffer, rather than reading it directly
612		 * from `in', since we have to combine the tweak
613		 * first.)
614		 */
615		lastblock = 16*(((uintptr_t)(out + nbytes) & 0xfff) == 0);
616		nbytes -= lastblock;
617
618		/*
619		 * Handle an odd number of initial blocks so we can
620		 * process the rest in eight-block (128-byte) chunks.
621		 */
622		if (nbytes % 128) {
623			unsigned nbytes128 = nbytes % 128;
624
625			nbytes -= nbytes128;
626			for (; nbytes128; nbytes128 -= 16, in += 16, out += 16)
627			{
628				xor128(out, in, t);
629				aesvia_decN(dec, out, out, 1, cw0);
630				xor128(out, out, t);
631				aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]);
632			}
633		}
634
635		/* Process eight blocks at a time.  */
636		for (; nbytes; nbytes -= 128, in += 128, out += 128) {
637			unsigned i;
638			for (i = 0; i < 8; i++) {
639				memcpy(buf + 4*i, t, 16);
640				xor128(out + 4*i, in + 4*i, t);
641				aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]);
642			}
643			aesvia_decN(dec, out, out, 8, cw0);
644			for (i = 0; i < 8; i++)
645				xor128(out + 4*i, in + 4*i, buf + 4*i);
646		}
647
648		/* Handle the last block of a page, if necessary.  */
649		if (lastblock) {
650			xor128(buf, in, t);
651			aesvia_decN(dec, (const void *)buf, out, 1, cw0);
652		}
653
654		explicit_memset(buf, 0, sizeof buf);
655	} else {
656		xtsdec_unaligned_evcnt.ev_count++;
657		uint8_t buf[16] __aligned(16);
658
659		for (; nbytes; nbytes -= 16, in += 16, out += 16) {
660			memcpy(buf, in, 16);
661			xor128(buf, buf, t);
662			aesvia_decN(dec, buf, buf, 1, cw0);
663			xor128(buf, buf, t);
664			memcpy(out, buf, 16);
665			aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]);
666		}
667
668		explicit_memset(buf, 0, sizeof buf);
669	}
670	fpu_kern_leave();
671
672	memcpy(tweak, t, 16);
673	explicit_memset(t, 0, sizeof t);
674}
675
676static int
677aesvia_probe(void)
678{
679
680	/* Verify that the CPU advertises VIA ACE support.  */
681#ifdef _KERNEL
682	if ((cpu_feature[4] & CPUID_VIA_HAS_ACE) == 0)
683		return -1;
684#else
685	/*
686	 * From the VIA PadLock Programming Guide:
687	 * http://linux.via.com.tw/support/beginDownload.action?eleid=181&fid=261
688	 */
689	unsigned eax, ebx, ecx, edx;
690	if (!__get_cpuid(0, &eax, &ebx, &ecx, &edx))
691		return -1;
692	if (ebx != signature_CENTAUR_ebx ||
693	    ecx != signature_CENTAUR_ecx ||
694	    edx != signature_CENTAUR_edx)
695		return -1;
696	if (eax < 0xc0000000)
697		return -1;
698	if (!__get_cpuid(0xc0000000, &eax, &ebx, &ecx, &edx))
699		return -1;
700	if (eax < 0xc0000001)
701		return -1;
702	if (!__get_cpuid(0xc0000001, &eax, &ebx, &ecx, &edx))
703		return -1;
704	/* Check whether ACE or ACE2 is both supported and enabled.  */
705	if ((edx & 0x000000c0) != 0x000000c0 ||
706	    (edx & 0x00000300) != 0x00000300)
707		return -1;
708#endif
709
710	/* Verify that our XTS tweak update logic works.  */
711	if (aesvia_xts_update_selftest())
712		return -1;
713
714	/* Success!  */
715	return 0;
716}
717
718struct aes_impl aes_via_impl = {
719	.ai_name = "VIA ACE",
720	.ai_probe = aesvia_probe,
721	.ai_setenckey = aesvia_setenckey,
722	.ai_setdeckey = aesvia_setdeckey,
723	.ai_enc = aesvia_enc,
724	.ai_dec = aesvia_dec,
725	.ai_cbc_enc = aesvia_cbc_enc,
726	.ai_cbc_dec = aesvia_cbc_dec,
727	.ai_xts_enc = aesvia_xts_enc,
728	.ai_xts_dec = aesvia_xts_dec,
729};
730