aesni_wrap.c revision 267815
1210409Skib/*-
2247061Spjd * Copyright (C) 2008 Damien Miller <djm@mindrot.org>
3210409Skib * Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org>
4226839Spjd * Copyright (c) 2010-2011 Pawel Jakub Dawidek <pawel@dawidek.net>
5255187Sjmg * Copyright 2012-2013 John-Mark Gurney <jmg@FreeBSD.org>
6210409Skib * All rights reserved.
7210409Skib *
8210409Skib * Redistribution and use in source and binary forms, with or without
9210409Skib * modification, are permitted provided that the following conditions
10210409Skib * are met:
11210409Skib * 1. Redistributions of source code must retain the above copyright
12210409Skib *    notice, this list of conditions and the following disclaimer.
13210409Skib * 2. Redistributions in binary form must reproduce the above copyright
14210409Skib *    notice, this list of conditions and the following disclaimer in the
15210409Skib *    documentation and/or other materials provided with the distribution.
16210409Skib *
17210409Skib * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
18210409Skib * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19210409Skib * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20210409Skib * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
21210409Skib * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22210409Skib * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23210409Skib * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24210409Skib * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25210409Skib * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26210409Skib * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27210409Skib * SUCH DAMAGE.
28210409Skib */
29210409Skib
30210409Skib#include <sys/cdefs.h>
31210409Skib__FBSDID("$FreeBSD: head/sys/crypto/aesni/aesni_wrap.c 267815 2014-06-24 06:55:49Z kib $");
32255187Sjmg
33210409Skib#include <sys/param.h>
34210409Skib#include <sys/libkern.h>
35210409Skib#include <sys/malloc.h>
36210409Skib#include <sys/proc.h>
37210409Skib#include <sys/systm.h>
38210409Skib#include <crypto/aesni/aesni.h>
39255187Sjmg
40255187Sjmg#include "aesencdec.h"
41210409Skib
42210409SkibMALLOC_DECLARE(M_AESNI);
43210409Skib
44257757Sjmgstruct blocks8 {
45257757Sjmg	__m128i	blk[8];
46257757Sjmg} __packed;
47257757Sjmg
48210409Skibvoid
49210409Skibaesni_encrypt_cbc(int rounds, const void *key_schedule, size_t len,
50210409Skib    const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN])
51210409Skib{
52255187Sjmg	__m128i tot, ivreg;
53210409Skib	size_t i;
54210409Skib
55210409Skib	len /= AES_BLOCK_LEN;
56255187Sjmg	ivreg = _mm_loadu_si128((const __m128i *)iv);
57210409Skib	for (i = 0; i < len; i++) {
58255187Sjmg		tot = aesni_enc(rounds - 1, key_schedule,
59255187Sjmg		    _mm_loadu_si128((const __m128i *)from) ^ ivreg);
60255187Sjmg		ivreg = tot;
61255187Sjmg		_mm_storeu_si128((__m128i *)to, tot);
62210409Skib		from += AES_BLOCK_LEN;
63210409Skib		to += AES_BLOCK_LEN;
64210409Skib	}
65210409Skib}
66210409Skib
67210409Skibvoid
68255187Sjmgaesni_decrypt_cbc(int rounds, const void *key_schedule, size_t len,
69255187Sjmg    uint8_t *buf, const uint8_t iv[AES_BLOCK_LEN])
70255187Sjmg{
71255187Sjmg	__m128i blocks[8];
72257757Sjmg	struct blocks8 *blks;
73255187Sjmg	__m128i ivreg, nextiv;
74255187Sjmg	size_t i, j, cnt;
75255187Sjmg
76255187Sjmg	ivreg = _mm_loadu_si128((const __m128i *)iv);
77255187Sjmg	cnt = len / AES_BLOCK_LEN / 8;
78255187Sjmg	for (i = 0; i < cnt; i++) {
79257757Sjmg		blks = (struct blocks8 *)buf;
80257757Sjmg		aesni_dec8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1],
81257757Sjmg		    blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5],
82257757Sjmg		    blks->blk[6], blks->blk[7], &blocks[0]);
83255187Sjmg		for (j = 0; j < 8; j++) {
84257757Sjmg			nextiv = blks->blk[j];
85257757Sjmg			blks->blk[j] = blocks[j] ^ ivreg;
86255187Sjmg			ivreg = nextiv;
87255187Sjmg		}
88255187Sjmg		buf += AES_BLOCK_LEN * 8;
89255187Sjmg	}
90255187Sjmg	i *= 8;
91255187Sjmg	cnt = len / AES_BLOCK_LEN;
92255187Sjmg	for (; i < cnt; i++) {
93257757Sjmg		nextiv = _mm_loadu_si128((void *)buf);
94257757Sjmg		_mm_storeu_si128((void *)buf,
95257757Sjmg		    aesni_dec(rounds - 1, key_schedule, nextiv) ^ ivreg);
96255187Sjmg		ivreg = nextiv;
97255187Sjmg		buf += AES_BLOCK_LEN;
98255187Sjmg	}
99255187Sjmg}
100255187Sjmg
101255187Sjmgvoid
102210409Skibaesni_encrypt_ecb(int rounds, const void *key_schedule, size_t len,
103255187Sjmg    const uint8_t *from, uint8_t *to)
104210409Skib{
105255187Sjmg	__m128i tot;
106257757Sjmg	__m128i tout[8];
107257757Sjmg	struct blocks8 *top;
108257757Sjmg	const struct blocks8 *blks;
109255187Sjmg	size_t i, cnt;
110210409Skib
111255187Sjmg	cnt = len / AES_BLOCK_LEN / 8;
112255187Sjmg	for (i = 0; i < cnt; i++) {
113257757Sjmg		blks = (const struct blocks8 *)from;
114257757Sjmg		top = (struct blocks8 *)to;
115257757Sjmg		aesni_enc8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1],
116257757Sjmg		    blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5],
117257757Sjmg		    blks->blk[6], blks->blk[7], tout);
118257757Sjmg		top->blk[0] = tout[0];
119257757Sjmg		top->blk[1] = tout[1];
120257757Sjmg		top->blk[2] = tout[2];
121257757Sjmg		top->blk[3] = tout[3];
122257757Sjmg		top->blk[4] = tout[4];
123257757Sjmg		top->blk[5] = tout[5];
124257757Sjmg		top->blk[6] = tout[6];
125257757Sjmg		top->blk[7] = tout[7];
126255187Sjmg		from += AES_BLOCK_LEN * 8;
127255187Sjmg		to += AES_BLOCK_LEN * 8;
128255187Sjmg	}
129255187Sjmg	i *= 8;
130255187Sjmg	cnt = len / AES_BLOCK_LEN;
131255187Sjmg	for (; i < cnt; i++) {
132255187Sjmg		tot = aesni_enc(rounds - 1, key_schedule,
133255187Sjmg		    _mm_loadu_si128((const __m128i *)from));
134255187Sjmg		_mm_storeu_si128((__m128i *)to, tot);
135210409Skib		from += AES_BLOCK_LEN;
136210409Skib		to += AES_BLOCK_LEN;
137210409Skib	}
138210409Skib}
139210409Skib
140210409Skibvoid
141210409Skibaesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len,
142210409Skib    const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN])
143210409Skib{
144255187Sjmg	__m128i tot;
145257757Sjmg	__m128i tout[8];
146257757Sjmg	const struct blocks8 *blks;
147257757Sjmg	struct blocks8 *top;
148255187Sjmg	size_t i, cnt;
149210409Skib
150255187Sjmg	cnt = len / AES_BLOCK_LEN / 8;
151255187Sjmg	for (i = 0; i < cnt; i++) {
152257757Sjmg		blks = (const struct blocks8 *)from;
153257757Sjmg		top = (struct blocks8 *)to;
154257757Sjmg		aesni_dec8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1],
155257757Sjmg		    blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5],
156257757Sjmg		    blks->blk[6], blks->blk[7], tout);
157257757Sjmg		top->blk[0] = tout[0];
158257757Sjmg		top->blk[1] = tout[1];
159257757Sjmg		top->blk[2] = tout[2];
160257757Sjmg		top->blk[3] = tout[3];
161257757Sjmg		top->blk[4] = tout[4];
162257757Sjmg		top->blk[5] = tout[5];
163257757Sjmg		top->blk[6] = tout[6];
164257757Sjmg		top->blk[7] = tout[7];
165255187Sjmg		from += AES_BLOCK_LEN * 8;
166255187Sjmg		to += AES_BLOCK_LEN * 8;
167255187Sjmg	}
168255187Sjmg	i *= 8;
169255187Sjmg	cnt = len / AES_BLOCK_LEN;
170255187Sjmg	for (; i < cnt; i++) {
171255187Sjmg		tot = aesni_dec(rounds - 1, key_schedule,
172255187Sjmg		    _mm_loadu_si128((const __m128i *)from));
173255187Sjmg		_mm_storeu_si128((__m128i *)to, tot);
174210409Skib		from += AES_BLOCK_LEN;
175210409Skib		to += AES_BLOCK_LEN;
176210409Skib	}
177210409Skib}
178210409Skib
179213069Spjd#define	AES_XTS_BLOCKSIZE	16
180213069Spjd#define	AES_XTS_IVSIZE		8
181213069Spjd#define	AES_XTS_ALPHA		0x87	/* GF(2^128) generator polynomial */
182213069Spjd
183255187Sjmgstatic inline __m128i
184255187Sjmgxts_crank_lfsr(__m128i inp)
185255187Sjmg{
186255187Sjmg	const __m128i alphamask = _mm_set_epi32(1, 1, 1, AES_XTS_ALPHA);
187255187Sjmg	__m128i xtweak, ret;
188255187Sjmg
189255187Sjmg	/* set up xor mask */
190255187Sjmg	xtweak = _mm_shuffle_epi32(inp, 0x93);
191255187Sjmg	xtweak = _mm_srai_epi32(xtweak, 31);
192255187Sjmg	xtweak &= alphamask;
193255187Sjmg
194255187Sjmg	/* next term */
195255187Sjmg	ret = _mm_slli_epi32(inp, 1);
196255187Sjmg	ret ^= xtweak;
197255187Sjmg
198255187Sjmg	return ret;
199255187Sjmg}
200255187Sjmg
201213069Spjdstatic void
202257757Sjmgaesni_crypt_xts_block(int rounds, const __m128i *key_schedule, __m128i *tweak,
203257757Sjmg    const uint8_t *from, uint8_t *to, int do_encrypt)
204213069Spjd{
205255187Sjmg	__m128i block;
206213069Spjd
207257757Sjmg	block = _mm_loadu_si128((const __m128i *)from) ^ *tweak;
208213069Spjd
209213069Spjd	if (do_encrypt)
210255187Sjmg		block = aesni_enc(rounds - 1, key_schedule, block);
211213069Spjd	else
212255187Sjmg		block = aesni_dec(rounds - 1, key_schedule, block);
213213069Spjd
214257757Sjmg	_mm_storeu_si128((__m128i *)to, block ^ *tweak);
215213069Spjd
216255187Sjmg	*tweak = xts_crank_lfsr(*tweak);
217255187Sjmg}
218226837Spjd
219255187Sjmgstatic void
220257757Sjmgaesni_crypt_xts_block8(int rounds, const __m128i *key_schedule, __m128i *tweak,
221257757Sjmg    const uint8_t *from, uint8_t *to, int do_encrypt)
222255187Sjmg{
223255187Sjmg	__m128i tmptweak;
224255187Sjmg	__m128i a, b, c, d, e, f, g, h;
225255187Sjmg	__m128i tweaks[8];
226255187Sjmg	__m128i tmp[8];
227257757Sjmg	__m128i *top;
228257757Sjmg	const __m128i *fromp;
229255187Sjmg
230255187Sjmg	tmptweak = *tweak;
231255187Sjmg
232255187Sjmg	/*
233255187Sjmg	 * unroll the loop.  This lets gcc put values directly in the
234255187Sjmg	 * register and saves memory accesses.
235255187Sjmg	 */
236257757Sjmg	fromp = (const __m128i *)from;
237255187Sjmg#define PREPINP(v, pos) 					\
238255187Sjmg		do {						\
239255187Sjmg			tweaks[(pos)] = tmptweak;		\
240257757Sjmg			(v) = _mm_loadu_si128(&fromp[pos]) ^	\
241257757Sjmg			    tmptweak;				\
242255187Sjmg			tmptweak = xts_crank_lfsr(tmptweak);	\
243255187Sjmg		} while (0)
244255187Sjmg	PREPINP(a, 0);
245255187Sjmg	PREPINP(b, 1);
246255187Sjmg	PREPINP(c, 2);
247255187Sjmg	PREPINP(d, 3);
248255187Sjmg	PREPINP(e, 4);
249255187Sjmg	PREPINP(f, 5);
250255187Sjmg	PREPINP(g, 6);
251255187Sjmg	PREPINP(h, 7);
252255187Sjmg	*tweak = tmptweak;
253255187Sjmg
254255187Sjmg	if (do_encrypt)
255255187Sjmg		aesni_enc8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h,
256255187Sjmg		    tmp);
257255187Sjmg	else
258255187Sjmg		aesni_dec8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h,
259255187Sjmg		    tmp);
260255187Sjmg
261257757Sjmg	top = (__m128i *)to;
262257757Sjmg	_mm_storeu_si128(&top[0], tmp[0] ^ tweaks[0]);
263257757Sjmg	_mm_storeu_si128(&top[1], tmp[1] ^ tweaks[1]);
264257757Sjmg	_mm_storeu_si128(&top[2], tmp[2] ^ tweaks[2]);
265257757Sjmg	_mm_storeu_si128(&top[3], tmp[3] ^ tweaks[3]);
266257757Sjmg	_mm_storeu_si128(&top[4], tmp[4] ^ tweaks[4]);
267257757Sjmg	_mm_storeu_si128(&top[5], tmp[5] ^ tweaks[5]);
268257757Sjmg	_mm_storeu_si128(&top[6], tmp[6] ^ tweaks[6]);
269257757Sjmg	_mm_storeu_si128(&top[7], tmp[7] ^ tweaks[7]);
270213069Spjd}
271213069Spjd
272213069Spjdstatic void
273257757Sjmgaesni_crypt_xts(int rounds, const __m128i *data_schedule,
274257757Sjmg    const __m128i *tweak_schedule, size_t len, const uint8_t *from,
275257757Sjmg    uint8_t *to, const uint8_t iv[AES_BLOCK_LEN], int do_encrypt)
276213069Spjd{
277255187Sjmg	__m128i tweakreg;
278255187Sjmg	uint8_t tweak[AES_XTS_BLOCKSIZE] __aligned(16);
279255187Sjmg	size_t i, cnt;
280213069Spjd
281213069Spjd	/*
282213069Spjd	 * Prepare tweak as E_k2(IV). IV is specified as LE representation
283213069Spjd	 * of a 64-bit block number which we allow to be passed in directly.
284213069Spjd	 */
285226837Spjd#if BYTE_ORDER == LITTLE_ENDIAN
286226837Spjd	bcopy(iv, tweak, AES_XTS_IVSIZE);
287213069Spjd	/* Last 64 bits of IV are always zero. */
288213069Spjd	bzero(tweak + AES_XTS_IVSIZE, AES_XTS_IVSIZE);
289226837Spjd#else
290226837Spjd#error Only LITTLE_ENDIAN architectures are supported.
291226837Spjd#endif
292255187Sjmg	tweakreg = _mm_loadu_si128((__m128i *)&tweak[0]);
293255187Sjmg	tweakreg = aesni_enc(rounds - 1, tweak_schedule, tweakreg);
294213069Spjd
295255187Sjmg	cnt = len / AES_XTS_BLOCKSIZE / 8;
296255187Sjmg	for (i = 0; i < cnt; i++) {
297255187Sjmg		aesni_crypt_xts_block8(rounds, data_schedule, &tweakreg,
298257757Sjmg		    from, to, do_encrypt);
299255187Sjmg		from += AES_XTS_BLOCKSIZE * 8;
300255187Sjmg		to += AES_XTS_BLOCKSIZE * 8;
301255187Sjmg	}
302255187Sjmg	i *= 8;
303255187Sjmg	cnt = len / AES_XTS_BLOCKSIZE;
304255187Sjmg	for (; i < cnt; i++) {
305255187Sjmg		aesni_crypt_xts_block(rounds, data_schedule, &tweakreg,
306257757Sjmg		    from, to, do_encrypt);
307213069Spjd		from += AES_XTS_BLOCKSIZE;
308213069Spjd		to += AES_XTS_BLOCKSIZE;
309213069Spjd	}
310213069Spjd}
311213069Spjd
312255187Sjmgvoid
313213069Spjdaesni_encrypt_xts(int rounds, const void *data_schedule,
314213069Spjd    const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
315213069Spjd    const uint8_t iv[AES_BLOCK_LEN])
316213069Spjd{
317213069Spjd
318213069Spjd	aesni_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to,
319213069Spjd	    iv, 1);
320213069Spjd}
321213069Spjd
322255187Sjmgvoid
323213069Spjdaesni_decrypt_xts(int rounds, const void *data_schedule,
324213069Spjd    const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
325213069Spjd    const uint8_t iv[AES_BLOCK_LEN])
326213069Spjd{
327213069Spjd
328213069Spjd	aesni_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to,
329213069Spjd	    iv, 0);
330213069Spjd}
331213069Spjd
332267815Skibint
333213066Spjdaesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key,
334213066Spjd    int keylen)
335210409Skib{
336210409Skib
337213069Spjd	switch (ses->algo) {
338213069Spjd	case CRYPTO_AES_CBC:
339213069Spjd		switch (keylen) {
340213069Spjd		case 128:
341213069Spjd			ses->rounds = AES128_ROUNDS;
342213069Spjd			break;
343213069Spjd		case 192:
344213069Spjd			ses->rounds = AES192_ROUNDS;
345213069Spjd			break;
346213069Spjd		case 256:
347213069Spjd			ses->rounds = AES256_ROUNDS;
348213069Spjd			break;
349213069Spjd		default:
350213069Spjd			return (EINVAL);
351213069Spjd		}
352210409Skib		break;
353213069Spjd	case CRYPTO_AES_XTS:
354213069Spjd		switch (keylen) {
355213069Spjd		case 256:
356213069Spjd			ses->rounds = AES128_ROUNDS;
357213069Spjd			break;
358213069Spjd		case 512:
359213069Spjd			ses->rounds = AES256_ROUNDS;
360213069Spjd			break;
361213069Spjd		default:
362213069Spjd			return (EINVAL);
363213069Spjd		}
364210409Skib		break;
365210409Skib	default:
366210409Skib		return (EINVAL);
367210409Skib	}
368213069Spjd
369213066Spjd	aesni_set_enckey(key, ses->enc_schedule, ses->rounds);
370213066Spjd	aesni_set_deckey(ses->enc_schedule, ses->dec_schedule, ses->rounds);
371213166Spjd	if (ses->algo == CRYPTO_AES_CBC)
372213069Spjd		arc4rand(ses->iv, sizeof(ses->iv), 0);
373213069Spjd	else /* if (ses->algo == CRYPTO_AES_XTS) */ {
374213069Spjd		aesni_set_enckey(key + keylen / 16, ses->xts_schedule,
375213069Spjd		    ses->rounds);
376213069Spjd	}
377210409Skib
378213066Spjd	return (0);
379210409Skib}
380