aesni_wrap.c revision 268034
1/*-
2 * Copyright (C) 2008 Damien Miller <djm@mindrot.org>
3 * Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org>
4 * Copyright (c) 2010-2011 Pawel Jakub Dawidek <pawel@dawidek.net>
5 * Copyright 2012-2013 John-Mark Gurney <jmg@FreeBSD.org>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: stable/10/sys/crypto/aesni/aesni_wrap.c 268034 2014-06-30 09:51:27Z kib $");
32
33#include <sys/param.h>
34#include <sys/libkern.h>
35#include <sys/malloc.h>
36#include <sys/proc.h>
37#include <sys/systm.h>
38#include <crypto/aesni/aesni.h>
39
40#include "aesencdec.h"
41
42MALLOC_DECLARE(M_AESNI);
43
44struct blocks8 {
45	__m128i	blk[8];
46} __packed;
47
48void
49aesni_encrypt_cbc(int rounds, const void *key_schedule, size_t len,
50    const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN])
51{
52	__m128i tot, ivreg;
53	size_t i;
54
55	len /= AES_BLOCK_LEN;
56	ivreg = _mm_loadu_si128((const __m128i *)iv);
57	for (i = 0; i < len; i++) {
58		tot = aesni_enc(rounds - 1, key_schedule,
59		    _mm_loadu_si128((const __m128i *)from) ^ ivreg);
60		ivreg = tot;
61		_mm_storeu_si128((__m128i *)to, tot);
62		from += AES_BLOCK_LEN;
63		to += AES_BLOCK_LEN;
64	}
65}
66
67void
68aesni_decrypt_cbc(int rounds, const void *key_schedule, size_t len,
69    uint8_t *buf, const uint8_t iv[AES_BLOCK_LEN])
70{
71	__m128i blocks[8];
72	struct blocks8 *blks;
73	__m128i ivreg, nextiv;
74	size_t i, j, cnt;
75
76	ivreg = _mm_loadu_si128((const __m128i *)iv);
77	cnt = len / AES_BLOCK_LEN / 8;
78	for (i = 0; i < cnt; i++) {
79		blks = (struct blocks8 *)buf;
80		aesni_dec8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1],
81		    blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5],
82		    blks->blk[6], blks->blk[7], &blocks[0]);
83		for (j = 0; j < 8; j++) {
84			nextiv = blks->blk[j];
85			blks->blk[j] = blocks[j] ^ ivreg;
86			ivreg = nextiv;
87		}
88		buf += AES_BLOCK_LEN * 8;
89	}
90	i *= 8;
91	cnt = len / AES_BLOCK_LEN;
92	for (; i < cnt; i++) {
93		nextiv = _mm_loadu_si128((void *)buf);
94		_mm_storeu_si128((void *)buf,
95		    aesni_dec(rounds - 1, key_schedule, nextiv) ^ ivreg);
96		ivreg = nextiv;
97		buf += AES_BLOCK_LEN;
98	}
99}
100
101void
102aesni_encrypt_ecb(int rounds, const void *key_schedule, size_t len,
103    const uint8_t *from, uint8_t *to)
104{
105	__m128i tot;
106	__m128i tout[8];
107	struct blocks8 *top;
108	const struct blocks8 *blks;
109	size_t i, cnt;
110
111	cnt = len / AES_BLOCK_LEN / 8;
112	for (i = 0; i < cnt; i++) {
113		blks = (const struct blocks8 *)from;
114		top = (struct blocks8 *)to;
115		aesni_enc8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1],
116		    blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5],
117		    blks->blk[6], blks->blk[7], tout);
118		top->blk[0] = tout[0];
119		top->blk[1] = tout[1];
120		top->blk[2] = tout[2];
121		top->blk[3] = tout[3];
122		top->blk[4] = tout[4];
123		top->blk[5] = tout[5];
124		top->blk[6] = tout[6];
125		top->blk[7] = tout[7];
126		from += AES_BLOCK_LEN * 8;
127		to += AES_BLOCK_LEN * 8;
128	}
129	i *= 8;
130	cnt = len / AES_BLOCK_LEN;
131	for (; i < cnt; i++) {
132		tot = aesni_enc(rounds - 1, key_schedule,
133		    _mm_loadu_si128((const __m128i *)from));
134		_mm_storeu_si128((__m128i *)to, tot);
135		from += AES_BLOCK_LEN;
136		to += AES_BLOCK_LEN;
137	}
138}
139
140void
141aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len,
142    const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN])
143{
144	__m128i tot;
145	__m128i tout[8];
146	const struct blocks8 *blks;
147	struct blocks8 *top;
148	size_t i, cnt;
149
150	cnt = len / AES_BLOCK_LEN / 8;
151	for (i = 0; i < cnt; i++) {
152		blks = (const struct blocks8 *)from;
153		top = (struct blocks8 *)to;
154		aesni_dec8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1],
155		    blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5],
156		    blks->blk[6], blks->blk[7], tout);
157		top->blk[0] = tout[0];
158		top->blk[1] = tout[1];
159		top->blk[2] = tout[2];
160		top->blk[3] = tout[3];
161		top->blk[4] = tout[4];
162		top->blk[5] = tout[5];
163		top->blk[6] = tout[6];
164		top->blk[7] = tout[7];
165		from += AES_BLOCK_LEN * 8;
166		to += AES_BLOCK_LEN * 8;
167	}
168	i *= 8;
169	cnt = len / AES_BLOCK_LEN;
170	for (; i < cnt; i++) {
171		tot = aesni_dec(rounds - 1, key_schedule,
172		    _mm_loadu_si128((const __m128i *)from));
173		_mm_storeu_si128((__m128i *)to, tot);
174		from += AES_BLOCK_LEN;
175		to += AES_BLOCK_LEN;
176	}
177}
178
179#define	AES_XTS_BLOCKSIZE	16
180#define	AES_XTS_IVSIZE		8
181#define	AES_XTS_ALPHA		0x87	/* GF(2^128) generator polynomial */
182
183static inline __m128i
184xts_crank_lfsr(__m128i inp)
185{
186	const __m128i alphamask = _mm_set_epi32(1, 1, 1, AES_XTS_ALPHA);
187	__m128i xtweak, ret;
188
189	/* set up xor mask */
190	xtweak = _mm_shuffle_epi32(inp, 0x93);
191	xtweak = _mm_srai_epi32(xtweak, 31);
192	xtweak &= alphamask;
193
194	/* next term */
195	ret = _mm_slli_epi32(inp, 1);
196	ret ^= xtweak;
197
198	return ret;
199}
200
201static void
202aesni_crypt_xts_block(int rounds, const __m128i *key_schedule, __m128i *tweak,
203    const uint8_t *from, uint8_t *to, int do_encrypt)
204{
205	__m128i block;
206
207	block = _mm_loadu_si128((const __m128i *)from) ^ *tweak;
208
209	if (do_encrypt)
210		block = aesni_enc(rounds - 1, key_schedule, block);
211	else
212		block = aesni_dec(rounds - 1, key_schedule, block);
213
214	_mm_storeu_si128((__m128i *)to, block ^ *tweak);
215
216	*tweak = xts_crank_lfsr(*tweak);
217}
218
219static void
220aesni_crypt_xts_block8(int rounds, const __m128i *key_schedule, __m128i *tweak,
221    const uint8_t *from, uint8_t *to, int do_encrypt)
222{
223	__m128i tmptweak;
224	__m128i a, b, c, d, e, f, g, h;
225	__m128i tweaks[8];
226	__m128i tmp[8];
227	__m128i *top;
228	const __m128i *fromp;
229
230	tmptweak = *tweak;
231
232	/*
233	 * unroll the loop.  This lets gcc put values directly in the
234	 * register and saves memory accesses.
235	 */
236	fromp = (const __m128i *)from;
237#define PREPINP(v, pos) 					\
238		do {						\
239			tweaks[(pos)] = tmptweak;		\
240			(v) = _mm_loadu_si128(&fromp[pos]) ^	\
241			    tmptweak;				\
242			tmptweak = xts_crank_lfsr(tmptweak);	\
243		} while (0)
244	PREPINP(a, 0);
245	PREPINP(b, 1);
246	PREPINP(c, 2);
247	PREPINP(d, 3);
248	PREPINP(e, 4);
249	PREPINP(f, 5);
250	PREPINP(g, 6);
251	PREPINP(h, 7);
252	*tweak = tmptweak;
253
254	if (do_encrypt)
255		aesni_enc8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h,
256		    tmp);
257	else
258		aesni_dec8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h,
259		    tmp);
260
261	top = (__m128i *)to;
262	_mm_storeu_si128(&top[0], tmp[0] ^ tweaks[0]);
263	_mm_storeu_si128(&top[1], tmp[1] ^ tweaks[1]);
264	_mm_storeu_si128(&top[2], tmp[2] ^ tweaks[2]);
265	_mm_storeu_si128(&top[3], tmp[3] ^ tweaks[3]);
266	_mm_storeu_si128(&top[4], tmp[4] ^ tweaks[4]);
267	_mm_storeu_si128(&top[5], tmp[5] ^ tweaks[5]);
268	_mm_storeu_si128(&top[6], tmp[6] ^ tweaks[6]);
269	_mm_storeu_si128(&top[7], tmp[7] ^ tweaks[7]);
270}
271
272static void
273aesni_crypt_xts(int rounds, const __m128i *data_schedule,
274    const __m128i *tweak_schedule, size_t len, const uint8_t *from,
275    uint8_t *to, const uint8_t iv[AES_BLOCK_LEN], int do_encrypt)
276{
277	__m128i tweakreg;
278	uint8_t tweak[AES_XTS_BLOCKSIZE] __aligned(16);
279	size_t i, cnt;
280
281	/*
282	 * Prepare tweak as E_k2(IV). IV is specified as LE representation
283	 * of a 64-bit block number which we allow to be passed in directly.
284	 */
285#if BYTE_ORDER == LITTLE_ENDIAN
286	bcopy(iv, tweak, AES_XTS_IVSIZE);
287	/* Last 64 bits of IV are always zero. */
288	bzero(tweak + AES_XTS_IVSIZE, AES_XTS_IVSIZE);
289#else
290#error Only LITTLE_ENDIAN architectures are supported.
291#endif
292	tweakreg = _mm_loadu_si128((__m128i *)&tweak[0]);
293	tweakreg = aesni_enc(rounds - 1, tweak_schedule, tweakreg);
294
295	cnt = len / AES_XTS_BLOCKSIZE / 8;
296	for (i = 0; i < cnt; i++) {
297		aesni_crypt_xts_block8(rounds, data_schedule, &tweakreg,
298		    from, to, do_encrypt);
299		from += AES_XTS_BLOCKSIZE * 8;
300		to += AES_XTS_BLOCKSIZE * 8;
301	}
302	i *= 8;
303	cnt = len / AES_XTS_BLOCKSIZE;
304	for (; i < cnt; i++) {
305		aesni_crypt_xts_block(rounds, data_schedule, &tweakreg,
306		    from, to, do_encrypt);
307		from += AES_XTS_BLOCKSIZE;
308		to += AES_XTS_BLOCKSIZE;
309	}
310}
311
312void
313aesni_encrypt_xts(int rounds, const void *data_schedule,
314    const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
315    const uint8_t iv[AES_BLOCK_LEN])
316{
317
318	aesni_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to,
319	    iv, 1);
320}
321
322void
323aesni_decrypt_xts(int rounds, const void *data_schedule,
324    const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
325    const uint8_t iv[AES_BLOCK_LEN])
326{
327
328	aesni_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to,
329	    iv, 0);
330}
331
332int
333aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key,
334    int keylen)
335{
336
337	switch (ses->algo) {
338	case CRYPTO_AES_CBC:
339		switch (keylen) {
340		case 128:
341			ses->rounds = AES128_ROUNDS;
342			break;
343		case 192:
344			ses->rounds = AES192_ROUNDS;
345			break;
346		case 256:
347			ses->rounds = AES256_ROUNDS;
348			break;
349		default:
350			return (EINVAL);
351		}
352		break;
353	case CRYPTO_AES_XTS:
354		switch (keylen) {
355		case 256:
356			ses->rounds = AES128_ROUNDS;
357			break;
358		case 512:
359			ses->rounds = AES256_ROUNDS;
360			break;
361		default:
362			return (EINVAL);
363		}
364		break;
365	default:
366		return (EINVAL);
367	}
368
369	aesni_set_enckey(key, ses->enc_schedule, ses->rounds);
370	aesni_set_deckey(ses->enc_schedule, ses->dec_schedule, ses->rounds);
371	if (ses->algo == CRYPTO_AES_CBC)
372		arc4rand(ses->iv, sizeof(ses->iv), 0);
373	else /* if (ses->algo == CRYPTO_AES_XTS) */ {
374		aesni_set_enckey(key + keylen / 16, ses->xts_schedule,
375		    ses->rounds);
376	}
377
378	return (0);
379}
380