aes_via.c revision 1.2
1/*	$NetBSD: aes_via.c,v 1.2 2020/06/29 23:41:35 riastradh Exp $	*/
2
3/*-
4 * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__KERNEL_RCSID(1, "$NetBSD: aes_via.c,v 1.2 2020/06/29 23:41:35 riastradh Exp $");
31
32#include <sys/types.h>
33#include <sys/evcnt.h>
34#include <sys/systm.h>
35
36#include <crypto/aes/aes.h>
37#include <crypto/aes/aes_bear.h>
38
39#include <x86/cpufunc.h>
40#include <x86/cpuvar.h>
41#include <x86/fpu.h>
42#include <x86/specialreg.h>
43#include <x86/via_padlock.h>
44
45static void
46aesvia_reload_keys(void)
47{
48
49	asm volatile("pushf; popf");
50}
51
52static uint32_t
53aesvia_keylen_cw0(unsigned nrounds)
54{
55
56	/*
57	 * Determine the control word bits for the key size / number of
58	 * rounds.  For AES-128, the hardware can do key expansion on
59	 * the fly; for AES-192 and AES-256, software must do it.
60	 */
61	switch (nrounds) {
62	case AES_128_NROUNDS:
63		return C3_CRYPT_CWLO_KEY128;
64	case AES_192_NROUNDS:
65		return C3_CRYPT_CWLO_KEY192 | C3_CRYPT_CWLO_KEYGEN_SW;
66	case AES_256_NROUNDS:
67		return C3_CRYPT_CWLO_KEY256 | C3_CRYPT_CWLO_KEYGEN_SW;
68	default:
69		panic("invalid AES nrounds: %u", nrounds);
70	}
71}
72
73static void
74aesvia_setenckey(struct aesenc *enc, const uint8_t *key, uint32_t nrounds)
75{
76	size_t key_len;
77
78	switch (nrounds) {
79	case AES_128_NROUNDS:
80		enc->aese_aes.aes_rk[0] = le32dec(key + 4*0);
81		enc->aese_aes.aes_rk[1] = le32dec(key + 4*1);
82		enc->aese_aes.aes_rk[2] = le32dec(key + 4*2);
83		enc->aese_aes.aes_rk[3] = le32dec(key + 4*3);
84		return;
85	case AES_192_NROUNDS:
86		key_len = 24;
87		break;
88	case AES_256_NROUNDS:
89		key_len = 32;
90		break;
91	default:
92		panic("invalid AES nrounds: %u", nrounds);
93	}
94	br_aes_ct_keysched_stdenc(enc->aese_aes.aes_rk, key, key_len);
95}
96
97static void
98aesvia_setdeckey(struct aesdec *dec, const uint8_t *key, uint32_t nrounds)
99{
100	size_t key_len;
101
102	switch (nrounds) {
103	case AES_128_NROUNDS:
104		dec->aesd_aes.aes_rk[0] = le32dec(key + 4*0);
105		dec->aesd_aes.aes_rk[1] = le32dec(key + 4*1);
106		dec->aesd_aes.aes_rk[2] = le32dec(key + 4*2);
107		dec->aesd_aes.aes_rk[3] = le32dec(key + 4*3);
108		return;
109	case AES_192_NROUNDS:
110		key_len = 24;
111		break;
112	case AES_256_NROUNDS:
113		key_len = 32;
114		break;
115	default:
116		panic("invalid AES nrounds: %u", nrounds);
117	}
118	br_aes_ct_keysched_stddec(dec->aesd_aes.aes_rk, key, key_len);
119}
120
121static inline void
122aesvia_encN(const struct aesenc *enc, const uint8_t in[static 16],
123    uint8_t out[static 16], size_t nblocks, uint32_t cw0)
124{
125	const uint32_t cw[4] __aligned(16) = {
126		[0] = (cw0
127		    | C3_CRYPT_CWLO_ALG_AES
128		    | C3_CRYPT_CWLO_ENCRYPT
129		    | C3_CRYPT_CWLO_NORMAL),
130	};
131
132	KASSERT(((uintptr_t)enc & 0xf) == 0);
133	KASSERT(((uintptr_t)in & 0xf) == 0);
134	KASSERT(((uintptr_t)out & 0xf) == 0);
135
136	asm volatile("rep xcryptecb"
137	    : "+c"(nblocks), "+S"(in), "+D"(out)
138	    : "b"(enc), "d"(cw)
139	    : "memory", "cc");
140}
141
142static inline void
143aesvia_decN(const struct aesdec *dec, const uint8_t in[static 16],
144    uint8_t out[static 16], size_t nblocks, uint32_t cw0)
145{
146	const uint32_t cw[4] __aligned(16) = {
147		[0] = (cw0
148		    | C3_CRYPT_CWLO_ALG_AES
149		    | C3_CRYPT_CWLO_DECRYPT
150		    | C3_CRYPT_CWLO_NORMAL),
151	};
152
153	KASSERT(((uintptr_t)dec & 0xf) == 0);
154	KASSERT(((uintptr_t)in & 0xf) == 0);
155	KASSERT(((uintptr_t)out & 0xf) == 0);
156
157	asm volatile("rep xcryptecb"
158	    : "+c"(nblocks), "+S"(in), "+D"(out)
159	    : "b"(dec), "d"(cw)
160	    : "memory", "cc");
161}
162
163static struct evcnt enc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
164    NULL, "aesvia", "enc aligned");
165EVCNT_ATTACH_STATIC(enc_aligned_evcnt);
166static struct evcnt enc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
167    NULL, "aesvia", "dec unaligned");
168EVCNT_ATTACH_STATIC(enc_unaligned_evcnt);
169
170static void
171aesvia_enc(const struct aesenc *enc, const uint8_t in[static 16],
172    uint8_t out[static 16], uint32_t nrounds)
173{
174	const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
175
176	fpu_kern_enter();
177	aesvia_reload_keys();
178	if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0 &&
179	    ((uintptr_t)in & 0xff0) != 0xff0) {
180		enc_aligned_evcnt.ev_count++;
181		aesvia_encN(enc, in, out, 1, cw0);
182	} else {
183		enc_unaligned_evcnt.ev_count++;
184		/*
185		 * VIA requires 16-byte/128-bit alignment, and
186		 * xcrypt-ecb reads one block past the one we're
187		 * working on -- which may go past the end of the page
188		 * into unmapped territory.  Use a bounce buffer if
189		 * either constraint is violated.
190		 */
191		uint8_t inbuf[16] __aligned(16);
192		uint8_t outbuf[16] __aligned(16);
193
194		memcpy(inbuf, in, 16);
195		aesvia_encN(enc, inbuf, outbuf, 1, cw0);
196		memcpy(out, outbuf, 16);
197
198		explicit_memset(inbuf, 0, sizeof inbuf);
199		explicit_memset(outbuf, 0, sizeof outbuf);
200	}
201	fpu_kern_leave();
202}
203
204static struct evcnt dec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
205    NULL, "aesvia", "dec aligned");
206EVCNT_ATTACH_STATIC(dec_aligned_evcnt);
207static struct evcnt dec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
208    NULL, "aesvia", "dec unaligned");
209EVCNT_ATTACH_STATIC(dec_unaligned_evcnt);
210
211static void
212aesvia_dec(const struct aesdec *dec, const uint8_t in[static 16],
213    uint8_t out[static 16], uint32_t nrounds)
214{
215	const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
216
217	fpu_kern_enter();
218	aesvia_reload_keys();
219	if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0 &&
220	    ((uintptr_t)in & 0xff0) != 0xff0) {
221		dec_aligned_evcnt.ev_count++;
222		aesvia_decN(dec, in, out, 1, cw0);
223	} else {
224		dec_unaligned_evcnt.ev_count++;
225		/*
226		 * VIA requires 16-byte/128-bit alignment, and
227		 * xcrypt-ecb reads one block past the one we're
228		 * working on -- which may go past the end of the page
229		 * into unmapped territory.  Use a bounce buffer if
230		 * either constraint is violated.
231		 */
232		uint8_t inbuf[16] __aligned(16);
233		uint8_t outbuf[16] __aligned(16);
234
235		memcpy(inbuf, in, 16);
236		aesvia_decN(dec, inbuf, outbuf, 1, cw0);
237		memcpy(out, outbuf, 16);
238
239		explicit_memset(inbuf, 0, sizeof inbuf);
240		explicit_memset(outbuf, 0, sizeof outbuf);
241	}
242	fpu_kern_leave();
243}
244
245static inline void
246aesvia_cbc_encN(const struct aesenc *enc, const uint8_t in[static 16],
247    uint8_t out[static 16], size_t nblocks, uint8_t **ivp, uint32_t cw0)
248{
249	const uint32_t cw[4] __aligned(16) = {
250		[0] = (cw0
251		    | C3_CRYPT_CWLO_ALG_AES
252		    | C3_CRYPT_CWLO_ENCRYPT
253		    | C3_CRYPT_CWLO_NORMAL),
254	};
255
256	KASSERT(((uintptr_t)enc & 0xf) == 0);
257	KASSERT(((uintptr_t)in & 0xf) == 0);
258	KASSERT(((uintptr_t)out & 0xf) == 0);
259	KASSERT(((uintptr_t)*ivp & 0xf) == 0);
260
261	/*
262	 * Register effects:
263	 * - Counts nblocks down to zero.
264	 * - Advances in by nblocks (units of blocks).
265	 * - Advances out by nblocks (units of blocks).
266	 * - Updates *ivp to point at the last block of out.
267	 */
268	asm volatile("rep xcryptcbc"
269	    : "+c"(nblocks), "+S"(in), "+D"(out), "+a"(*ivp)
270	    : "b"(enc), "d"(cw)
271	    : "memory", "cc");
272}
273
274static inline void
275aesvia_cbc_decN(const struct aesdec *dec, const uint8_t in[static 16],
276    uint8_t out[static 16], size_t nblocks, uint8_t iv[static 16],
277    uint32_t cw0)
278{
279	const uint32_t cw[4] __aligned(16) = {
280		[0] = (cw0
281		    | C3_CRYPT_CWLO_ALG_AES
282		    | C3_CRYPT_CWLO_DECRYPT
283		    | C3_CRYPT_CWLO_NORMAL),
284	};
285
286	KASSERT(((uintptr_t)dec & 0xf) == 0);
287	KASSERT(((uintptr_t)in & 0xf) == 0);
288	KASSERT(((uintptr_t)out & 0xf) == 0);
289	KASSERT(((uintptr_t)iv & 0xf) == 0);
290
291	/*
292	 * Register effects:
293	 * - Counts nblocks down to zero.
294	 * - Advances in by nblocks (units of blocks).
295	 * - Advances out by nblocks (units of blocks).
296	 * Memory side effects:
297	 * - Writes what was the last block of in at the address iv.
298	 */
299	asm volatile("rep xcryptcbc"
300	    : "+c"(nblocks), "+S"(in), "+D"(out)
301	    : "a"(iv), "b"(dec), "d"(cw)
302	    : "memory", "cc");
303}
304
305static inline void
306xor128(void *x, const void *a, const void *b)
307{
308	uint32_t *x32 = x;
309	const uint32_t *a32 = a;
310	const uint32_t *b32 = b;
311
312	x32[0] = a32[0] ^ b32[0];
313	x32[1] = a32[1] ^ b32[1];
314	x32[2] = a32[2] ^ b32[2];
315	x32[3] = a32[3] ^ b32[3];
316}
317
318static struct evcnt cbcenc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
319    NULL, "aesvia", "cbcenc aligned");
320EVCNT_ATTACH_STATIC(cbcenc_aligned_evcnt);
321static struct evcnt cbcenc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
322    NULL, "aesvia", "cbcenc unaligned");
323EVCNT_ATTACH_STATIC(cbcenc_unaligned_evcnt);
324
325static void
326aesvia_cbc_enc(const struct aesenc *enc, const uint8_t in[static 16],
327    uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
328    uint32_t nrounds)
329{
330	const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
331
332	KASSERT(nbytes % 16 == 0);
333	if (nbytes == 0)
334		return;
335
336	fpu_kern_enter();
337	aesvia_reload_keys();
338	if ((((uintptr_t)in | (uintptr_t)out | (uintptr_t)iv) & 0xf) == 0) {
339		cbcenc_aligned_evcnt.ev_count++;
340		uint8_t *ivp = iv;
341		aesvia_cbc_encN(enc, in, out, nbytes/16, &ivp, cw0);
342		memcpy(iv, ivp, 16);
343	} else {
344		cbcenc_unaligned_evcnt.ev_count++;
345		uint8_t cv[16] __aligned(16);
346		uint8_t tmp[16] __aligned(16);
347
348		memcpy(cv, iv, 16);
349		for (; nbytes; nbytes -= 16, in += 16, out += 16) {
350			memcpy(tmp, in, 16);
351			xor128(tmp, tmp, cv);
352			aesvia_encN(enc, tmp, cv, 1, cw0);
353			memcpy(out, cv, 16);
354		}
355		memcpy(iv, cv, 16);
356	}
357	fpu_kern_leave();
358}
359
360static struct evcnt cbcdec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
361    NULL, "aesvia", "cbcdec aligned");
362EVCNT_ATTACH_STATIC(cbcdec_aligned_evcnt);
363static struct evcnt cbcdec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
364    NULL, "aesvia", "cbcdec unaligned");
365EVCNT_ATTACH_STATIC(cbcdec_unaligned_evcnt);
366
367static void
368aesvia_cbc_dec(const struct aesdec *dec, const uint8_t in[static 16],
369    uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
370    uint32_t nrounds)
371{
372	const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
373
374	KASSERT(nbytes % 16 == 0);
375	if (nbytes == 0)
376		return;
377
378	fpu_kern_enter();
379	aesvia_reload_keys();
380	if ((((uintptr_t)in | (uintptr_t)out | (uintptr_t)iv) & 0xf) == 0) {
381		cbcdec_aligned_evcnt.ev_count++;
382		aesvia_cbc_decN(dec, in, out, nbytes/16, iv, cw0);
383	} else {
384		cbcdec_unaligned_evcnt.ev_count++;
385		uint8_t iv0[16] __aligned(16);
386		uint8_t cv[16] __aligned(16);
387		uint8_t tmp[16] __aligned(16);
388
389		memcpy(iv0, iv, 16);
390		memcpy(cv, in + nbytes - 16, 16);
391		memcpy(iv, cv, 16);
392
393		for (;;) {
394			aesvia_decN(dec, cv, tmp, 1, cw0);
395			if ((nbytes -= 16) == 0)
396				break;
397			memcpy(cv, in + nbytes - 16, 16);
398			xor128(tmp, tmp, cv);
399			memcpy(out + nbytes, tmp, 16);
400		}
401
402		xor128(tmp, tmp, iv0);
403		memcpy(out, tmp, 16);
404		explicit_memset(tmp, 0, sizeof tmp);
405	}
406	fpu_kern_leave();
407}
408
409static inline void
410aesvia_xts_update(uint32_t *t0, uint32_t *t1, uint32_t *t2, uint32_t *t3)
411{
412	uint32_t s0, s1, s2, s3;
413
414	s0 = *t0 >> 31;
415	s1 = *t1 >> 31;
416	s2 = *t2 >> 31;
417	s3 = *t3 >> 31;
418	*t0 = (*t0 << 1) ^ (-s3 & 0x87);
419	*t1 = (*t1 << 1) ^ s0;
420	*t2 = (*t2 << 1) ^ s1;
421	*t3 = (*t3 << 1) ^ s2;
422}
423
424static int
425aesvia_xts_update_selftest(void)
426{
427	static const struct {
428		uint32_t in[4], out[4];
429	} cases[] = {
430		{ {1}, {2} },
431		{ {0x80000000U,0,0,0}, {0,1,0,0} },
432		{ {0,0x80000000U,0,0}, {0,0,1,0} },
433		{ {0,0,0x80000000U,0}, {0,0,0,1} },
434		{ {0,0,0,0x80000000U}, {0x87,0,0,0} },
435		{ {0,0x80000000U,0,0x80000000U}, {0x87,0,1,0} },
436	};
437	unsigned i;
438	uint32_t t0, t1, t2, t3;
439
440	for (i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
441		t0 = cases[i].in[0];
442		t1 = cases[i].in[1];
443		t2 = cases[i].in[2];
444		t3 = cases[i].in[3];
445		aesvia_xts_update(&t0, &t1, &t2, &t3);
446		if (t0 != cases[i].out[0] ||
447		    t1 != cases[i].out[1] ||
448		    t2 != cases[i].out[2] ||
449		    t3 != cases[i].out[3])
450			return -1;
451	}
452
453	/* Success!  */
454	return 0;
455}
456
457static struct evcnt xtsenc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
458    NULL, "aesvia", "xtsenc aligned");
459EVCNT_ATTACH_STATIC(xtsenc_aligned_evcnt);
460static struct evcnt xtsenc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
461    NULL, "aesvia", "xtsenc unaligned");
462EVCNT_ATTACH_STATIC(xtsenc_unaligned_evcnt);
463
464static void
465aesvia_xts_enc(const struct aesenc *enc, const uint8_t in[static 16],
466    uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
467    uint32_t nrounds)
468{
469	const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
470	uint32_t t[4];
471
472	KASSERT(nbytes % 16 == 0);
473
474	memcpy(t, tweak, 16);
475
476	fpu_kern_enter();
477	aesvia_reload_keys();
478	if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0) {
479		xtsenc_aligned_evcnt.ev_count++;
480		unsigned lastblock = 0;
481		uint32_t buf[8*4] __aligned(16);
482
483		/*
484		 * Make sure the last block is not the last block of a
485		 * page.  (Note that we store the AES input in `out' as
486		 * a temporary buffer, rather than reading it directly
487		 * from `in', since we have to combine the tweak
488		 * first.)
489		 */
490		lastblock = 16*(((uintptr_t)(out + nbytes) & 0xfff) == 0);
491		nbytes -= lastblock;
492
493		/*
494		 * Handle an odd number of initial blocks so we can
495		 * process the rest in eight-block (128-byte) chunks.
496		 */
497		if (nbytes % 128) {
498			unsigned nbytes128 = nbytes % 128;
499
500			nbytes -= nbytes128;
501			for (; nbytes128; nbytes128 -= 16, in += 16, out += 16)
502			{
503				xor128(out, in, t);
504				aesvia_encN(enc, out, out, 1, cw0);
505				xor128(out, out, t);
506				aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]);
507			}
508		}
509
510		/* Process eight blocks at a time.  */
511		for (; nbytes; nbytes -= 128, in += 128, out += 128) {
512			unsigned i;
513			for (i = 0; i < 8; i++) {
514				memcpy(buf + 4*i, t, 16);
515				xor128(out + 4*i, in + 4*i, t);
516				aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]);
517			}
518			aesvia_encN(enc, out, out, 8, cw0);
519			for (i = 0; i < 8; i++)
520				xor128(out + 4*i, in + 4*i, buf + 4*i);
521		}
522
523		/* Handle the last block of a page, if necessary.  */
524		if (lastblock) {
525			xor128(buf, in, t);
526			aesvia_encN(enc, (const void *)buf, out, 1, cw0);
527		}
528
529		explicit_memset(buf, 0, sizeof buf);
530	} else {
531		xtsenc_unaligned_evcnt.ev_count++;
532		uint8_t buf[16] __aligned(16);
533
534		for (; nbytes; nbytes -= 16, in += 16, out += 16) {
535			memcpy(buf, in, 16);
536			xor128(buf, buf, t);
537			aesvia_encN(enc, buf, buf, 1, cw0);
538			xor128(buf, buf, t);
539			memcpy(out, buf, 16);
540			aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]);
541		}
542
543		explicit_memset(buf, 0, sizeof buf);
544	}
545	fpu_kern_leave();
546
547	memcpy(tweak, t, 16);
548	explicit_memset(t, 0, sizeof t);
549}
550
551static struct evcnt xtsdec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
552    NULL, "aesvia", "xtsdec aligned");
553EVCNT_ATTACH_STATIC(xtsdec_aligned_evcnt);
554static struct evcnt xtsdec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
555    NULL, "aesvia", "xtsdec unaligned");
556EVCNT_ATTACH_STATIC(xtsdec_unaligned_evcnt);
557
558static void
559aesvia_xts_dec(const struct aesdec *dec, const uint8_t in[static 16],
560    uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
561    uint32_t nrounds)
562{
563	const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
564	uint32_t t[4];
565
566	KASSERT(nbytes % 16 == 0);
567
568	memcpy(t, tweak, 16);
569
570	fpu_kern_enter();
571	aesvia_reload_keys();
572	if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0) {
573		xtsdec_aligned_evcnt.ev_count++;
574		unsigned lastblock = 0;
575		uint32_t buf[8*4] __aligned(16);
576
577		/*
578		 * Make sure the last block is not the last block of a
579		 * page.  (Note that we store the AES input in `out' as
580		 * a temporary buffer, rather than reading it directly
581		 * from `in', since we have to combine the tweak
582		 * first.)
583		 */
584		lastblock = 16*(((uintptr_t)(out + nbytes) & 0xfff) == 0);
585		nbytes -= lastblock;
586
587		/*
588		 * Handle an odd number of initial blocks so we can
589		 * process the rest in eight-block (128-byte) chunks.
590		 */
591		if (nbytes % 128) {
592			unsigned nbytes128 = nbytes % 128;
593
594			nbytes -= nbytes128;
595			for (; nbytes128; nbytes128 -= 16, in += 16, out += 16)
596			{
597				xor128(out, in, t);
598				aesvia_decN(dec, out, out, 1, cw0);
599				xor128(out, out, t);
600				aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]);
601			}
602		}
603
604		/* Process eight blocks at a time.  */
605		for (; nbytes; nbytes -= 128, in += 128, out += 128) {
606			unsigned i;
607			for (i = 0; i < 8; i++) {
608				memcpy(buf + 4*i, t, 16);
609				xor128(out + 4*i, in + 4*i, t);
610				aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]);
611			}
612			aesvia_decN(dec, out, out, 8, cw0);
613			for (i = 0; i < 8; i++)
614				xor128(out + 4*i, in + 4*i, buf + 4*i);
615		}
616
617		/* Handle the last block of a page, if necessary.  */
618		if (lastblock) {
619			xor128(buf, in, t);
620			aesvia_decN(dec, (const void *)buf, out, 1, cw0);
621		}
622
623		explicit_memset(buf, 0, sizeof buf);
624	} else {
625		xtsdec_unaligned_evcnt.ev_count++;
626		uint8_t buf[16] __aligned(16);
627
628		for (; nbytes; nbytes -= 16, in += 16, out += 16) {
629			memcpy(buf, in, 16);
630			xor128(buf, buf, t);
631			aesvia_decN(dec, buf, buf, 1, cw0);
632			xor128(buf, buf, t);
633			memcpy(out, buf, 16);
634			aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]);
635		}
636
637		explicit_memset(buf, 0, sizeof buf);
638	}
639	fpu_kern_leave();
640
641	memcpy(tweak, t, 16);
642	explicit_memset(t, 0, sizeof t);
643}
644
645static int
646aesvia_probe(void)
647{
648
649	/* Verify that the CPU advertises VIA ACE support.  */
650	if ((cpu_feature[4] & CPUID_VIA_HAS_ACE) == 0)
651		return -1;
652
653	/* Verify that our XTS tweak update logic works.  */
654	if (aesvia_xts_update_selftest())
655		return -1;
656
657	/* Success!  */
658	return 0;
659}
660
661struct aes_impl aes_via_impl = {
662	.ai_name = "VIA ACE",
663	.ai_probe = aesvia_probe,
664	.ai_setenckey = aesvia_setenckey,
665	.ai_setdeckey = aesvia_setdeckey,
666	.ai_enc = aesvia_enc,
667	.ai_dec = aesvia_dec,
668	.ai_cbc_enc = aesvia_cbc_enc,
669	.ai_cbc_dec = aesvia_cbc_dec,
670	.ai_xts_enc = aesvia_xts_enc,
671	.ai_xts_dec = aesvia_xts_dec,
672};
673