1210409Skib/*-
2247061Spjd * Copyright (C) 2008 Damien Miller <djm@mindrot.org>
3210409Skib * Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org>
4226839Spjd * Copyright (c) 2010-2011 Pawel Jakub Dawidek <pawel@dawidek.net>
5255187Sjmg * Copyright 2012-2013 John-Mark Gurney <jmg@FreeBSD.org>
6275732Sjmg * Copyright (c) 2014 The FreeBSD Foundation
7210409Skib * All rights reserved.
8210409Skib *
9275732Sjmg * Portions of this software were developed by John-Mark Gurney
10275732Sjmg * under sponsorship of the FreeBSD Foundation and
11275732Sjmg * Rubicon Communications, LLC (Netgate).
12275732Sjmg *
13210409Skib * Redistribution and use in source and binary forms, with or without
14210409Skib * modification, are permitted provided that the following conditions
15210409Skib * are met:
16210409Skib * 1. Redistributions of source code must retain the above copyright
17210409Skib *    notice, this list of conditions and the following disclaimer.
18210409Skib * 2. Redistributions in binary form must reproduce the above copyright
19210409Skib *    notice, this list of conditions and the following disclaimer in the
20210409Skib *    documentation and/or other materials provided with the distribution.
21210409Skib *
22210409Skib * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
23210409Skib * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24210409Skib * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25210409Skib * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
26210409Skib * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27210409Skib * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28210409Skib * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29210409Skib * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30210409Skib * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31210409Skib * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32210409Skib * SUCH DAMAGE.
33210409Skib */
34210409Skib
35210409Skib#include <sys/cdefs.h>
36210409Skib__FBSDID("$FreeBSD$");
37275732Sjmg
38210409Skib#include <sys/param.h>
39210409Skib#include <sys/libkern.h>
40210409Skib#include <sys/malloc.h>
41210409Skib#include <sys/proc.h>
42210409Skib#include <sys/systm.h>
43210409Skib#include <crypto/aesni/aesni.h>
44275732Sjmg
45275732Sjmg#include <opencrypto/gmac.h>
46275732Sjmg
47255187Sjmg#include "aesencdec.h"
48275732Sjmg#include <smmintrin.h>
49210409Skib
50210409SkibMALLOC_DECLARE(M_AESNI);
51210409Skib
52257757Sjmgstruct blocks8 {
53257757Sjmg	__m128i	blk[8];
54257757Sjmg} __packed;
55257757Sjmg
56210409Skibvoid
57210409Skibaesni_encrypt_cbc(int rounds, const void *key_schedule, size_t len,
58300773Scem    const uint8_t *from, uint8_t *to, const uint8_t iv[static AES_BLOCK_LEN])
59210409Skib{
60255187Sjmg	__m128i tot, ivreg;
61210409Skib	size_t i;
62210409Skib
63210409Skib	len /= AES_BLOCK_LEN;
64255187Sjmg	ivreg = _mm_loadu_si128((const __m128i *)iv);
65210409Skib	for (i = 0; i < len; i++) {
66255187Sjmg		tot = aesni_enc(rounds - 1, key_schedule,
67255187Sjmg		    _mm_loadu_si128((const __m128i *)from) ^ ivreg);
68255187Sjmg		ivreg = tot;
69255187Sjmg		_mm_storeu_si128((__m128i *)to, tot);
70210409Skib		from += AES_BLOCK_LEN;
71210409Skib		to += AES_BLOCK_LEN;
72210409Skib	}
73210409Skib}
74210409Skib
75210409Skibvoid
76255187Sjmgaesni_decrypt_cbc(int rounds, const void *key_schedule, size_t len,
77300773Scem    uint8_t *buf, const uint8_t iv[static AES_BLOCK_LEN])
78255187Sjmg{
79255187Sjmg	__m128i blocks[8];
80257757Sjmg	struct blocks8 *blks;
81255187Sjmg	__m128i ivreg, nextiv;
82255187Sjmg	size_t i, j, cnt;
83255187Sjmg
84255187Sjmg	ivreg = _mm_loadu_si128((const __m128i *)iv);
85255187Sjmg	cnt = len / AES_BLOCK_LEN / 8;
86255187Sjmg	for (i = 0; i < cnt; i++) {
87257757Sjmg		blks = (struct blocks8 *)buf;
88257757Sjmg		aesni_dec8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1],
89257757Sjmg		    blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5],
90257757Sjmg		    blks->blk[6], blks->blk[7], &blocks[0]);
91255187Sjmg		for (j = 0; j < 8; j++) {
92257757Sjmg			nextiv = blks->blk[j];
93257757Sjmg			blks->blk[j] = blocks[j] ^ ivreg;
94255187Sjmg			ivreg = nextiv;
95255187Sjmg		}
96255187Sjmg		buf += AES_BLOCK_LEN * 8;
97255187Sjmg	}
98255187Sjmg	i *= 8;
99255187Sjmg	cnt = len / AES_BLOCK_LEN;
100255187Sjmg	for (; i < cnt; i++) {
101257757Sjmg		nextiv = _mm_loadu_si128((void *)buf);
102257757Sjmg		_mm_storeu_si128((void *)buf,
103257757Sjmg		    aesni_dec(rounds - 1, key_schedule, nextiv) ^ ivreg);
104255187Sjmg		ivreg = nextiv;
105255187Sjmg		buf += AES_BLOCK_LEN;
106255187Sjmg	}
107255187Sjmg}
108255187Sjmg
109255187Sjmgvoid
110210409Skibaesni_encrypt_ecb(int rounds, const void *key_schedule, size_t len,
111255187Sjmg    const uint8_t *from, uint8_t *to)
112210409Skib{
113255187Sjmg	__m128i tot;
114257757Sjmg	__m128i tout[8];
115257757Sjmg	struct blocks8 *top;
116257757Sjmg	const struct blocks8 *blks;
117255187Sjmg	size_t i, cnt;
118210409Skib
119255187Sjmg	cnt = len / AES_BLOCK_LEN / 8;
120255187Sjmg	for (i = 0; i < cnt; i++) {
121257757Sjmg		blks = (const struct blocks8 *)from;
122257757Sjmg		top = (struct blocks8 *)to;
123257757Sjmg		aesni_enc8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1],
124257757Sjmg		    blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5],
125257757Sjmg		    blks->blk[6], blks->blk[7], tout);
126257757Sjmg		top->blk[0] = tout[0];
127257757Sjmg		top->blk[1] = tout[1];
128257757Sjmg		top->blk[2] = tout[2];
129257757Sjmg		top->blk[3] = tout[3];
130257757Sjmg		top->blk[4] = tout[4];
131257757Sjmg		top->blk[5] = tout[5];
132257757Sjmg		top->blk[6] = tout[6];
133257757Sjmg		top->blk[7] = tout[7];
134255187Sjmg		from += AES_BLOCK_LEN * 8;
135255187Sjmg		to += AES_BLOCK_LEN * 8;
136255187Sjmg	}
137255187Sjmg	i *= 8;
138255187Sjmg	cnt = len / AES_BLOCK_LEN;
139255187Sjmg	for (; i < cnt; i++) {
140255187Sjmg		tot = aesni_enc(rounds - 1, key_schedule,
141255187Sjmg		    _mm_loadu_si128((const __m128i *)from));
142255187Sjmg		_mm_storeu_si128((__m128i *)to, tot);
143210409Skib		from += AES_BLOCK_LEN;
144210409Skib		to += AES_BLOCK_LEN;
145210409Skib	}
146210409Skib}
147210409Skib
148210409Skibvoid
149210409Skibaesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len,
150210409Skib    const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN])
151210409Skib{
152255187Sjmg	__m128i tot;
153257757Sjmg	__m128i tout[8];
154257757Sjmg	const struct blocks8 *blks;
155257757Sjmg	struct blocks8 *top;
156255187Sjmg	size_t i, cnt;
157210409Skib
158255187Sjmg	cnt = len / AES_BLOCK_LEN / 8;
159255187Sjmg	for (i = 0; i < cnt; i++) {
160257757Sjmg		blks = (const struct blocks8 *)from;
161257757Sjmg		top = (struct blocks8 *)to;
162257757Sjmg		aesni_dec8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1],
163257757Sjmg		    blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5],
164257757Sjmg		    blks->blk[6], blks->blk[7], tout);
165257757Sjmg		top->blk[0] = tout[0];
166257757Sjmg		top->blk[1] = tout[1];
167257757Sjmg		top->blk[2] = tout[2];
168257757Sjmg		top->blk[3] = tout[3];
169257757Sjmg		top->blk[4] = tout[4];
170257757Sjmg		top->blk[5] = tout[5];
171257757Sjmg		top->blk[6] = tout[6];
172257757Sjmg		top->blk[7] = tout[7];
173255187Sjmg		from += AES_BLOCK_LEN * 8;
174255187Sjmg		to += AES_BLOCK_LEN * 8;
175255187Sjmg	}
176255187Sjmg	i *= 8;
177255187Sjmg	cnt = len / AES_BLOCK_LEN;
178255187Sjmg	for (; i < cnt; i++) {
179255187Sjmg		tot = aesni_dec(rounds - 1, key_schedule,
180255187Sjmg		    _mm_loadu_si128((const __m128i *)from));
181255187Sjmg		_mm_storeu_si128((__m128i *)to, tot);
182210409Skib		from += AES_BLOCK_LEN;
183210409Skib		to += AES_BLOCK_LEN;
184210409Skib	}
185210409Skib}
186210409Skib
187275732Sjmg/*
188275732Sjmg * mixed endian increment, low 64bits stored in hi word to be compatible
189275732Sjmg * with _icm's BSWAP.
190275732Sjmg */
191275732Sjmgstatic inline __m128i
192275732Sjmgnextc(__m128i x)
193275732Sjmg{
194275732Sjmg	const __m128i ONE = _mm_setr_epi32(0, 0, 1, 0);
195275732Sjmg	const __m128i ZERO = _mm_setzero_si128();
196275732Sjmg
197275732Sjmg	x = _mm_add_epi64(x, ONE);
198275732Sjmg	__m128i t = _mm_cmpeq_epi64(x, ZERO);
199275732Sjmg	t = _mm_unpackhi_epi64(t, ZERO);
200275732Sjmg	x = _mm_sub_epi64(x, t);
201275732Sjmg
202275732Sjmg	return x;
203275732Sjmg}
204275732Sjmg
205275732Sjmgvoid
206275732Sjmgaesni_encrypt_icm(int rounds, const void *key_schedule, size_t len,
207300773Scem    const uint8_t *from, uint8_t *to, const uint8_t iv[static AES_BLOCK_LEN])
208275732Sjmg{
209275732Sjmg	__m128i tot;
210275732Sjmg	__m128i tmp1, tmp2, tmp3, tmp4;
211275732Sjmg	__m128i tmp5, tmp6, tmp7, tmp8;
212275732Sjmg	__m128i ctr1, ctr2, ctr3, ctr4;
213275732Sjmg	__m128i ctr5, ctr6, ctr7, ctr8;
214275732Sjmg	__m128i BSWAP_EPI64;
215275732Sjmg	__m128i tout[8];
216275732Sjmg	struct blocks8 *top;
217275732Sjmg	const struct blocks8 *blks;
218275732Sjmg	size_t i, cnt;
219275732Sjmg
220275732Sjmg	BSWAP_EPI64 = _mm_set_epi8(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7);
221275732Sjmg
222275732Sjmg	ctr1 = _mm_loadu_si128((__m128i*)iv);
223275732Sjmg	ctr1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
224275732Sjmg
225275732Sjmg	cnt = len / AES_BLOCK_LEN / 8;
226275732Sjmg	for (i = 0; i < cnt; i++) {
227275732Sjmg		tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
228275732Sjmg		ctr2 = nextc(ctr1);
229275732Sjmg		tmp2 = _mm_shuffle_epi8(ctr2, BSWAP_EPI64);
230275732Sjmg		ctr3 = nextc(ctr2);
231275732Sjmg		tmp3 = _mm_shuffle_epi8(ctr3, BSWAP_EPI64);
232275732Sjmg		ctr4 = nextc(ctr3);
233275732Sjmg		tmp4 = _mm_shuffle_epi8(ctr4, BSWAP_EPI64);
234275732Sjmg		ctr5 = nextc(ctr4);
235275732Sjmg		tmp5 = _mm_shuffle_epi8(ctr5, BSWAP_EPI64);
236275732Sjmg		ctr6 = nextc(ctr5);
237275732Sjmg		tmp6 = _mm_shuffle_epi8(ctr6, BSWAP_EPI64);
238275732Sjmg		ctr7 = nextc(ctr6);
239275732Sjmg		tmp7 = _mm_shuffle_epi8(ctr7, BSWAP_EPI64);
240275732Sjmg		ctr8 = nextc(ctr7);
241275732Sjmg		tmp8 = _mm_shuffle_epi8(ctr8, BSWAP_EPI64);
242275732Sjmg		ctr1 = nextc(ctr8);
243275732Sjmg
244275732Sjmg		blks = (const struct blocks8 *)from;
245275732Sjmg		top = (struct blocks8 *)to;
246275732Sjmg		aesni_enc8(rounds - 1, key_schedule, tmp1, tmp2, tmp3, tmp4,
247275732Sjmg		    tmp5, tmp6, tmp7, tmp8, tout);
248275732Sjmg
249275732Sjmg		top->blk[0] = blks->blk[0] ^ tout[0];
250275732Sjmg		top->blk[1] = blks->blk[1] ^ tout[1];
251275732Sjmg		top->blk[2] = blks->blk[2] ^ tout[2];
252275732Sjmg		top->blk[3] = blks->blk[3] ^ tout[3];
253275732Sjmg		top->blk[4] = blks->blk[4] ^ tout[4];
254275732Sjmg		top->blk[5] = blks->blk[5] ^ tout[5];
255275732Sjmg		top->blk[6] = blks->blk[6] ^ tout[6];
256275732Sjmg		top->blk[7] = blks->blk[7] ^ tout[7];
257275732Sjmg
258275732Sjmg		from += AES_BLOCK_LEN * 8;
259275732Sjmg		to += AES_BLOCK_LEN * 8;
260275732Sjmg	}
261275732Sjmg	i *= 8;
262275732Sjmg	cnt = len / AES_BLOCK_LEN;
263275732Sjmg	for (; i < cnt; i++) {
264275732Sjmg		tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
265275732Sjmg		ctr1 = nextc(ctr1);
266275732Sjmg
267275732Sjmg		tot = aesni_enc(rounds - 1, key_schedule, tmp1);
268275732Sjmg
269275732Sjmg		tot = tot ^ _mm_loadu_si128((const __m128i *)from);
270275732Sjmg		_mm_storeu_si128((__m128i *)to, tot);
271275732Sjmg
272275732Sjmg		from += AES_BLOCK_LEN;
273275732Sjmg		to += AES_BLOCK_LEN;
274275732Sjmg	}
275275732Sjmg
276275732Sjmg	/* handle remaining partial round */
277275732Sjmg	if (len % AES_BLOCK_LEN != 0) {
278275732Sjmg		tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
279275732Sjmg		tot = aesni_enc(rounds - 1, key_schedule, tmp1);
280275732Sjmg		tot = tot ^ _mm_loadu_si128((const __m128i *)from);
281275732Sjmg		memcpy(to, &tot, len % AES_BLOCK_LEN);
282275732Sjmg	}
283275732Sjmg}
284275732Sjmg
285213069Spjd#define	AES_XTS_BLOCKSIZE	16
286213069Spjd#define	AES_XTS_IVSIZE		8
287213069Spjd#define	AES_XTS_ALPHA		0x87	/* GF(2^128) generator polynomial */
288213069Spjd
289255187Sjmgstatic inline __m128i
290255187Sjmgxts_crank_lfsr(__m128i inp)
291255187Sjmg{
292255187Sjmg	const __m128i alphamask = _mm_set_epi32(1, 1, 1, AES_XTS_ALPHA);
293255187Sjmg	__m128i xtweak, ret;
294255187Sjmg
295255187Sjmg	/* set up xor mask */
296255187Sjmg	xtweak = _mm_shuffle_epi32(inp, 0x93);
297255187Sjmg	xtweak = _mm_srai_epi32(xtweak, 31);
298255187Sjmg	xtweak &= alphamask;
299255187Sjmg
300255187Sjmg	/* next term */
301255187Sjmg	ret = _mm_slli_epi32(inp, 1);
302255187Sjmg	ret ^= xtweak;
303255187Sjmg
304255187Sjmg	return ret;
305255187Sjmg}
306255187Sjmg
307213069Spjdstatic void
308257757Sjmgaesni_crypt_xts_block(int rounds, const __m128i *key_schedule, __m128i *tweak,
309257757Sjmg    const uint8_t *from, uint8_t *to, int do_encrypt)
310213069Spjd{
311255187Sjmg	__m128i block;
312213069Spjd
313257757Sjmg	block = _mm_loadu_si128((const __m128i *)from) ^ *tweak;
314213069Spjd
315213069Spjd	if (do_encrypt)
316255187Sjmg		block = aesni_enc(rounds - 1, key_schedule, block);
317213069Spjd	else
318255187Sjmg		block = aesni_dec(rounds - 1, key_schedule, block);
319213069Spjd
320257757Sjmg	_mm_storeu_si128((__m128i *)to, block ^ *tweak);
321213069Spjd
322255187Sjmg	*tweak = xts_crank_lfsr(*tweak);
323255187Sjmg}
324226837Spjd
325255187Sjmgstatic void
326257757Sjmgaesni_crypt_xts_block8(int rounds, const __m128i *key_schedule, __m128i *tweak,
327257757Sjmg    const uint8_t *from, uint8_t *to, int do_encrypt)
328255187Sjmg{
329255187Sjmg	__m128i tmptweak;
330255187Sjmg	__m128i a, b, c, d, e, f, g, h;
331255187Sjmg	__m128i tweaks[8];
332255187Sjmg	__m128i tmp[8];
333257757Sjmg	__m128i *top;
334257757Sjmg	const __m128i *fromp;
335255187Sjmg
336255187Sjmg	tmptweak = *tweak;
337255187Sjmg
338255187Sjmg	/*
339255187Sjmg	 * unroll the loop.  This lets gcc put values directly in the
340255187Sjmg	 * register and saves memory accesses.
341255187Sjmg	 */
342257757Sjmg	fromp = (const __m128i *)from;
343255187Sjmg#define PREPINP(v, pos) 					\
344255187Sjmg		do {						\
345255187Sjmg			tweaks[(pos)] = tmptweak;		\
346257757Sjmg			(v) = _mm_loadu_si128(&fromp[pos]) ^	\
347257757Sjmg			    tmptweak;				\
348255187Sjmg			tmptweak = xts_crank_lfsr(tmptweak);	\
349255187Sjmg		} while (0)
350255187Sjmg	PREPINP(a, 0);
351255187Sjmg	PREPINP(b, 1);
352255187Sjmg	PREPINP(c, 2);
353255187Sjmg	PREPINP(d, 3);
354255187Sjmg	PREPINP(e, 4);
355255187Sjmg	PREPINP(f, 5);
356255187Sjmg	PREPINP(g, 6);
357255187Sjmg	PREPINP(h, 7);
358255187Sjmg	*tweak = tmptweak;
359255187Sjmg
360255187Sjmg	if (do_encrypt)
361255187Sjmg		aesni_enc8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h,
362255187Sjmg		    tmp);
363255187Sjmg	else
364255187Sjmg		aesni_dec8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h,
365255187Sjmg		    tmp);
366255187Sjmg
367257757Sjmg	top = (__m128i *)to;
368257757Sjmg	_mm_storeu_si128(&top[0], tmp[0] ^ tweaks[0]);
369257757Sjmg	_mm_storeu_si128(&top[1], tmp[1] ^ tweaks[1]);
370257757Sjmg	_mm_storeu_si128(&top[2], tmp[2] ^ tweaks[2]);
371257757Sjmg	_mm_storeu_si128(&top[3], tmp[3] ^ tweaks[3]);
372257757Sjmg	_mm_storeu_si128(&top[4], tmp[4] ^ tweaks[4]);
373257757Sjmg	_mm_storeu_si128(&top[5], tmp[5] ^ tweaks[5]);
374257757Sjmg	_mm_storeu_si128(&top[6], tmp[6] ^ tweaks[6]);
375257757Sjmg	_mm_storeu_si128(&top[7], tmp[7] ^ tweaks[7]);
376213069Spjd}
377213069Spjd
378213069Spjdstatic void
379257757Sjmgaesni_crypt_xts(int rounds, const __m128i *data_schedule,
380257757Sjmg    const __m128i *tweak_schedule, size_t len, const uint8_t *from,
381300773Scem    uint8_t *to, const uint8_t iv[static AES_BLOCK_LEN], int do_encrypt)
382213069Spjd{
383255187Sjmg	__m128i tweakreg;
384255187Sjmg	uint8_t tweak[AES_XTS_BLOCKSIZE] __aligned(16);
385255187Sjmg	size_t i, cnt;
386213069Spjd
387213069Spjd	/*
388213069Spjd	 * Prepare tweak as E_k2(IV). IV is specified as LE representation
389213069Spjd	 * of a 64-bit block number which we allow to be passed in directly.
390213069Spjd	 */
391226837Spjd#if BYTE_ORDER == LITTLE_ENDIAN
392226837Spjd	bcopy(iv, tweak, AES_XTS_IVSIZE);
393213069Spjd	/* Last 64 bits of IV are always zero. */
394213069Spjd	bzero(tweak + AES_XTS_IVSIZE, AES_XTS_IVSIZE);
395226837Spjd#else
396226837Spjd#error Only LITTLE_ENDIAN architectures are supported.
397226837Spjd#endif
398255187Sjmg	tweakreg = _mm_loadu_si128((__m128i *)&tweak[0]);
399255187Sjmg	tweakreg = aesni_enc(rounds - 1, tweak_schedule, tweakreg);
400213069Spjd
401255187Sjmg	cnt = len / AES_XTS_BLOCKSIZE / 8;
402255187Sjmg	for (i = 0; i < cnt; i++) {
403255187Sjmg		aesni_crypt_xts_block8(rounds, data_schedule, &tweakreg,
404257757Sjmg		    from, to, do_encrypt);
405255187Sjmg		from += AES_XTS_BLOCKSIZE * 8;
406255187Sjmg		to += AES_XTS_BLOCKSIZE * 8;
407255187Sjmg	}
408255187Sjmg	i *= 8;
409255187Sjmg	cnt = len / AES_XTS_BLOCKSIZE;
410255187Sjmg	for (; i < cnt; i++) {
411255187Sjmg		aesni_crypt_xts_block(rounds, data_schedule, &tweakreg,
412257757Sjmg		    from, to, do_encrypt);
413213069Spjd		from += AES_XTS_BLOCKSIZE;
414213069Spjd		to += AES_XTS_BLOCKSIZE;
415213069Spjd	}
416213069Spjd}
417213069Spjd
418255187Sjmgvoid
419213069Spjdaesni_encrypt_xts(int rounds, const void *data_schedule,
420213069Spjd    const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
421300773Scem    const uint8_t iv[static AES_BLOCK_LEN])
422213069Spjd{
423213069Spjd
424213069Spjd	aesni_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to,
425213069Spjd	    iv, 1);
426213069Spjd}
427213069Spjd
428255187Sjmgvoid
429213069Spjdaesni_decrypt_xts(int rounds, const void *data_schedule,
430213069Spjd    const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
431300773Scem    const uint8_t iv[static AES_BLOCK_LEN])
432213069Spjd{
433213069Spjd
434213069Spjd	aesni_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to,
435213069Spjd	    iv, 0);
436213069Spjd}
437213069Spjd
438267815Skibint
439213066Spjdaesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key,
440213066Spjd    int keylen)
441210409Skib{
442275732Sjmg	int decsched;
443210409Skib
444275732Sjmg	decsched = 1;
445275732Sjmg
446213069Spjd	switch (ses->algo) {
447275732Sjmg	case CRYPTO_AES_ICM:
448275732Sjmg	case CRYPTO_AES_NIST_GCM_16:
449275732Sjmg		decsched = 0;
450275732Sjmg		/* FALLTHROUGH */
451213069Spjd	case CRYPTO_AES_CBC:
452213069Spjd		switch (keylen) {
453213069Spjd		case 128:
454213069Spjd			ses->rounds = AES128_ROUNDS;
455213069Spjd			break;
456213069Spjd		case 192:
457213069Spjd			ses->rounds = AES192_ROUNDS;
458213069Spjd			break;
459213069Spjd		case 256:
460213069Spjd			ses->rounds = AES256_ROUNDS;
461213069Spjd			break;
462213069Spjd		default:
463275732Sjmg			CRYPTDEB("invalid CBC/ICM/GCM key length");
464213069Spjd			return (EINVAL);
465213069Spjd		}
466210409Skib		break;
467213069Spjd	case CRYPTO_AES_XTS:
468213069Spjd		switch (keylen) {
469213069Spjd		case 256:
470213069Spjd			ses->rounds = AES128_ROUNDS;
471213069Spjd			break;
472213069Spjd		case 512:
473213069Spjd			ses->rounds = AES256_ROUNDS;
474213069Spjd			break;
475213069Spjd		default:
476275732Sjmg			CRYPTDEB("invalid XTS key length");
477213069Spjd			return (EINVAL);
478213069Spjd		}
479210409Skib		break;
480210409Skib	default:
481210409Skib		return (EINVAL);
482210409Skib	}
483213069Spjd
484213066Spjd	aesni_set_enckey(key, ses->enc_schedule, ses->rounds);
485275732Sjmg	if (decsched)
486275732Sjmg		aesni_set_deckey(ses->enc_schedule, ses->dec_schedule,
487275732Sjmg		    ses->rounds);
488275732Sjmg
489275732Sjmg	if (ses->algo == CRYPTO_AES_XTS)
490213069Spjd		aesni_set_enckey(key + keylen / 16, ses->xts_schedule,
491213069Spjd		    ses->rounds);
492210409Skib
493213066Spjd	return (0);
494210409Skib}
495