Deleted Added
full compact
aesni_wrap.c (247061) aesni_wrap.c (255187)
1/*-
2 * Copyright (C) 2008 Damien Miller <djm@mindrot.org>
3 * Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org>
4 * Copyright (c) 2010-2011 Pawel Jakub Dawidek <pawel@dawidek.net>
1/*-
2 * Copyright (C) 2008 Damien Miller <djm@mindrot.org>
3 * Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org>
4 * Copyright (c) 2010-2011 Pawel Jakub Dawidek <pawel@dawidek.net>
5 * Copyright 2012-2013 John-Mark Gurney <jmg@FreeBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright

--- 9 unchanged lines hidden (view full) ---

22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright

--- 9 unchanged lines hidden (view full) ---

23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/crypto/aesni/aesni_wrap.c 247061 2013-02-20 22:59:53Z pjd $");
31
31__FBSDID("$FreeBSD: head/sys/crypto/aesni/aesni_wrap.c 255187 2013-09-03 18:31:23Z jmg $");
32
32#include <sys/param.h>
33#include <sys/libkern.h>
34#include <sys/malloc.h>
35#include <sys/proc.h>
36#include <sys/systm.h>
37#include <crypto/aesni/aesni.h>
33#include <sys/param.h>
34#include <sys/libkern.h>
35#include <sys/malloc.h>
36#include <sys/proc.h>
37#include <sys/systm.h>
38#include <crypto/aesni/aesni.h>
39
40#include "aesencdec.h"
38
39MALLOC_DECLARE(M_AESNI);
40
41void
42aesni_encrypt_cbc(int rounds, const void *key_schedule, size_t len,
43 const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN])
44{
41
42MALLOC_DECLARE(M_AESNI);
43
44void
45aesni_encrypt_cbc(int rounds, const void *key_schedule, size_t len,
46 const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN])
47{
45 const uint8_t *ivp;
48 __m128i tot, ivreg;
46 size_t i;
47
48 len /= AES_BLOCK_LEN;
49 size_t i;
50
51 len /= AES_BLOCK_LEN;
49 ivp = iv;
52 ivreg = _mm_loadu_si128((const __m128i *)iv);
50 for (i = 0; i < len; i++) {
53 for (i = 0; i < len; i++) {
51 aesni_enc(rounds - 1, key_schedule, from, to, ivp);
52 ivp = to;
54 tot = aesni_enc(rounds - 1, key_schedule,
55 _mm_loadu_si128((const __m128i *)from) ^ ivreg);
56 ivreg = tot;
57 _mm_storeu_si128((__m128i *)to, tot);
53 from += AES_BLOCK_LEN;
54 to += AES_BLOCK_LEN;
55 }
56}
57
58void
58 from += AES_BLOCK_LEN;
59 to += AES_BLOCK_LEN;
60 }
61}
62
63void
64aesni_decrypt_cbc(int rounds, const void *key_schedule, size_t len,
65 uint8_t *buf, const uint8_t iv[AES_BLOCK_LEN])
66{
67 __m128i blocks[8];
68 __m128i *bufs;
69 __m128i ivreg, nextiv;
70 size_t i, j, cnt;
71
72 ivreg = _mm_loadu_si128((const __m128i *)iv);
73 cnt = len / AES_BLOCK_LEN / 8;
74 for (i = 0; i < cnt; i++) {
75 bufs = (__m128i *)buf;
76 aesni_dec8(rounds - 1, key_schedule, bufs[0], bufs[1],
77 bufs[2], bufs[3], bufs[4], bufs[5], bufs[6],
78 bufs[7], &blocks[0]);
79 for (j = 0; j < 8; j++) {
80 nextiv = bufs[j];
81 bufs[j] = blocks[j] ^ ivreg;
82 ivreg = nextiv;
83 }
84 buf += AES_BLOCK_LEN * 8;
85 }
86 i *= 8;
87 cnt = len / AES_BLOCK_LEN;
88 for (; i < cnt; i++) {
89 bufs = (__m128i *)buf;
90 nextiv = bufs[0];
91 bufs[0] = aesni_dec(rounds - 1, key_schedule, bufs[0]) ^ ivreg;
92 ivreg = nextiv;
93 buf += AES_BLOCK_LEN;
94 }
95}
96
97void
59aesni_encrypt_ecb(int rounds, const void *key_schedule, size_t len,
98aesni_encrypt_ecb(int rounds, const void *key_schedule, size_t len,
60 const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN])
99 const uint8_t *from, uint8_t *to)
61{
100{
62 size_t i;
101 __m128i tot;
102 const __m128i *blocks;
103 size_t i, cnt;
63
104
64 len /= AES_BLOCK_LEN;
65 for (i = 0; i < len; i++) {
66 aesni_enc(rounds - 1, key_schedule, from, to, NULL);
105 cnt = len / AES_BLOCK_LEN / 8;
106 for (i = 0; i < cnt; i++) {
107 blocks = (const __m128i *)from;
108 aesni_enc8(rounds - 1, key_schedule, blocks[0], blocks[1],
109 blocks[2], blocks[3], blocks[4], blocks[5], blocks[6],
110 blocks[7], (__m128i *)to);
111 from += AES_BLOCK_LEN * 8;
112 to += AES_BLOCK_LEN * 8;
113 }
114 i *= 8;
115 cnt = len / AES_BLOCK_LEN;
116 for (; i < cnt; i++) {
117 tot = aesni_enc(rounds - 1, key_schedule,
118 _mm_loadu_si128((const __m128i *)from));
119 _mm_storeu_si128((__m128i *)to, tot);
67 from += AES_BLOCK_LEN;
68 to += AES_BLOCK_LEN;
69 }
70}
71
72void
73aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len,
74 const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN])
75{
120 from += AES_BLOCK_LEN;
121 to += AES_BLOCK_LEN;
122 }
123}
124
125void
126aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len,
127 const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN])
128{
76 size_t i;
129 __m128i tot;
130 const __m128i *blocks;
131 size_t i, cnt;
77
132
78 len /= AES_BLOCK_LEN;
79 for (i = 0; i < len; i++) {
80 aesni_dec(rounds - 1, key_schedule, from, to, NULL);
133 cnt = len / AES_BLOCK_LEN / 8;
134 for (i = 0; i < cnt; i++) {
135 blocks = (const __m128i *)from;
136 aesni_dec8(rounds - 1, key_schedule, blocks[0], blocks[1],
137 blocks[2], blocks[3], blocks[4], blocks[5], blocks[6],
138 blocks[7], (__m128i *)to);
139 from += AES_BLOCK_LEN * 8;
140 to += AES_BLOCK_LEN * 8;
141 }
142 i *= 8;
143 cnt = len / AES_BLOCK_LEN;
144 for (; i < cnt; i++) {
145 tot = aesni_dec(rounds - 1, key_schedule,
146 _mm_loadu_si128((const __m128i *)from));
147 _mm_storeu_si128((__m128i *)to, tot);
81 from += AES_BLOCK_LEN;
82 to += AES_BLOCK_LEN;
83 }
84}
85
86#define AES_XTS_BLOCKSIZE 16
87#define AES_XTS_IVSIZE 8
88#define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */
89
148 from += AES_BLOCK_LEN;
149 to += AES_BLOCK_LEN;
150 }
151}
152
153#define AES_XTS_BLOCKSIZE 16
154#define AES_XTS_IVSIZE 8
155#define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */
156
157static inline __m128i
158xts_crank_lfsr(__m128i inp)
159{
160 const __m128i alphamask = _mm_set_epi32(1, 1, 1, AES_XTS_ALPHA);
161 __m128i xtweak, ret;
162
163 /* set up xor mask */
164 xtweak = _mm_shuffle_epi32(inp, 0x93);
165 xtweak = _mm_srai_epi32(xtweak, 31);
166 xtweak &= alphamask;
167
168 /* next term */
169 ret = _mm_slli_epi32(inp, 1);
170 ret ^= xtweak;
171
172 return ret;
173}
174
90static void
175static void
91aesni_crypt_xts_block(int rounds, const void *key_schedule, uint64_t *tweak,
92 const uint64_t *from, uint64_t *to, uint64_t *block, int do_encrypt)
176aesni_crypt_xts_block(int rounds, const void *key_schedule, __m128i *tweak,
177 const __m128i *from, __m128i *to, int do_encrypt)
93{
178{
94 int carry;
179 __m128i block;
95
180
96 block[0] = from[0] ^ tweak[0];
97 block[1] = from[1] ^ tweak[1];
181 block = *from ^ *tweak;
98
99 if (do_encrypt)
182
183 if (do_encrypt)
100 aesni_enc(rounds - 1, key_schedule, (uint8_t *)block, (uint8_t *)to, NULL);
184 block = aesni_enc(rounds - 1, key_schedule, block);
101 else
185 else
102 aesni_dec(rounds - 1, key_schedule, (uint8_t *)block, (uint8_t *)to, NULL);
186 block = aesni_dec(rounds - 1, key_schedule, block);
103
187
104 to[0] ^= tweak[0];
105 to[1] ^= tweak[1];
188 *to = block ^ *tweak;
106
189
107 /* Exponentiate tweak. */
108 carry = ((tweak[0] & 0x8000000000000000ULL) > 0);
109 tweak[0] <<= 1;
110 if (tweak[1] & 0x8000000000000000ULL) {
111 uint8_t *twk = (uint8_t *)tweak;
190 *tweak = xts_crank_lfsr(*tweak);
191}
112
192
113 twk[0] ^= AES_XTS_ALPHA;
114 }
115 tweak[1] <<= 1;
116 if (carry)
117 tweak[1] |= 1;
193static void
194aesni_crypt_xts_block8(int rounds, const void *key_schedule, __m128i *tweak,
195 const __m128i *from, __m128i *to, int do_encrypt)
196{
197 __m128i tmptweak;
198 __m128i a, b, c, d, e, f, g, h;
199 __m128i tweaks[8];
200 __m128i tmp[8];
201
202 tmptweak = *tweak;
203
204 /*
205 * unroll the loop. This lets gcc put values directly in the
206 * register and saves memory accesses.
207 */
208#define PREPINP(v, pos) \
209 do { \
210 tweaks[(pos)] = tmptweak; \
211 (v) = from[(pos)] ^ tmptweak; \
212 tmptweak = xts_crank_lfsr(tmptweak); \
213 } while (0)
214 PREPINP(a, 0);
215 PREPINP(b, 1);
216 PREPINP(c, 2);
217 PREPINP(d, 3);
218 PREPINP(e, 4);
219 PREPINP(f, 5);
220 PREPINP(g, 6);
221 PREPINP(h, 7);
222 *tweak = tmptweak;
223
224 if (do_encrypt)
225 aesni_enc8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h,
226 tmp);
227 else
228 aesni_dec8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h,
229 tmp);
230
231 to[0] = tmp[0] ^ tweaks[0];
232 to[1] = tmp[1] ^ tweaks[1];
233 to[2] = tmp[2] ^ tweaks[2];
234 to[3] = tmp[3] ^ tweaks[3];
235 to[4] = tmp[4] ^ tweaks[4];
236 to[5] = tmp[5] ^ tweaks[5];
237 to[6] = tmp[6] ^ tweaks[6];
238 to[7] = tmp[7] ^ tweaks[7];
118}
119
120static void
121aesni_crypt_xts(int rounds, const void *data_schedule,
122 const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
123 const uint8_t iv[AES_BLOCK_LEN], int do_encrypt)
124{
239}
240
241static void
242aesni_crypt_xts(int rounds, const void *data_schedule,
243 const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
244 const uint8_t iv[AES_BLOCK_LEN], int do_encrypt)
245{
125 uint64_t block[AES_XTS_BLOCKSIZE / 8];
126 uint8_t tweak[AES_XTS_BLOCKSIZE];
127 size_t i;
246 __m128i tweakreg;
247 uint8_t tweak[AES_XTS_BLOCKSIZE] __aligned(16);
248 size_t i, cnt;
128
129 /*
130 * Prepare tweak as E_k2(IV). IV is specified as LE representation
131 * of a 64-bit block number which we allow to be passed in directly.
132 */
133#if BYTE_ORDER == LITTLE_ENDIAN
134 bcopy(iv, tweak, AES_XTS_IVSIZE);
135 /* Last 64 bits of IV are always zero. */
136 bzero(tweak + AES_XTS_IVSIZE, AES_XTS_IVSIZE);
137#else
138#error Only LITTLE_ENDIAN architectures are supported.
139#endif
249
250 /*
251 * Prepare tweak as E_k2(IV). IV is specified as LE representation
252 * of a 64-bit block number which we allow to be passed in directly.
253 */
254#if BYTE_ORDER == LITTLE_ENDIAN
255 bcopy(iv, tweak, AES_XTS_IVSIZE);
256 /* Last 64 bits of IV are always zero. */
257 bzero(tweak + AES_XTS_IVSIZE, AES_XTS_IVSIZE);
258#else
259#error Only LITTLE_ENDIAN architectures are supported.
260#endif
140 aesni_enc(rounds - 1, tweak_schedule, tweak, tweak, NULL);
261 tweakreg = _mm_loadu_si128((__m128i *)&tweak[0]);
262 tweakreg = aesni_enc(rounds - 1, tweak_schedule, tweakreg);
141
263
142 len /= AES_XTS_BLOCKSIZE;
143 for (i = 0; i < len; i++) {
144 aesni_crypt_xts_block(rounds, data_schedule, (uint64_t *)tweak,
145 (const uint64_t *)from, (uint64_t *)to, block, do_encrypt);
264 cnt = len / AES_XTS_BLOCKSIZE / 8;
265 for (i = 0; i < cnt; i++) {
266 aesni_crypt_xts_block8(rounds, data_schedule, &tweakreg,
267 (const __m128i *)from, (__m128i *)to, do_encrypt);
268 from += AES_XTS_BLOCKSIZE * 8;
269 to += AES_XTS_BLOCKSIZE * 8;
270 }
271 i *= 8;
272 cnt = len / AES_XTS_BLOCKSIZE;
273 for (; i < cnt; i++) {
274 aesni_crypt_xts_block(rounds, data_schedule, &tweakreg,
275 (const __m128i *)from, (__m128i *)to, do_encrypt);
146 from += AES_XTS_BLOCKSIZE;
147 to += AES_XTS_BLOCKSIZE;
148 }
276 from += AES_XTS_BLOCKSIZE;
277 to += AES_XTS_BLOCKSIZE;
278 }
149
150 bzero(tweak, sizeof(tweak));
151 bzero(block, sizeof(block));
152}
153
279}
280
154static void
281void
155aesni_encrypt_xts(int rounds, const void *data_schedule,
156 const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
157 const uint8_t iv[AES_BLOCK_LEN])
158{
159
160 aesni_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to,
161 iv, 1);
162}
163
282aesni_encrypt_xts(int rounds, const void *data_schedule,
283 const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
284 const uint8_t iv[AES_BLOCK_LEN])
285{
286
287 aesni_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to,
288 iv, 1);
289}
290
164static void
291void
165aesni_decrypt_xts(int rounds, const void *data_schedule,
166 const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
167 const uint8_t iv[AES_BLOCK_LEN])
168{
169
170 aesni_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to,
171 iv, 0);
172}

--- 148 unchanged lines hidden ---
292aesni_decrypt_xts(int rounds, const void *data_schedule,
293 const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
294 const uint8_t iv[AES_BLOCK_LEN])
295{
296
297 aesni_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to,
298 iv, 0);
299}

--- 148 unchanged lines hidden ---