Deleted Added
full compact
aesni_wrap.c (255187) aesni_wrap.c (257757)
1/*-
2 * Copyright (C) 2008 Damien Miller <djm@mindrot.org>
3 * Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org>
4 * Copyright (c) 2010-2011 Pawel Jakub Dawidek <pawel@dawidek.net>
5 * Copyright 2012-2013 John-Mark Gurney <jmg@FreeBSD.org>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without

--- 14 unchanged lines hidden (view full) ---

23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
1/*-
2 * Copyright (C) 2008 Damien Miller <djm@mindrot.org>
3 * Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org>
4 * Copyright (c) 2010-2011 Pawel Jakub Dawidek <pawel@dawidek.net>
5 * Copyright 2012-2013 John-Mark Gurney <jmg@FreeBSD.org>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without

--- 14 unchanged lines hidden (view full) ---

23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sys/crypto/aesni/aesni_wrap.c 255187 2013-09-03 18:31:23Z jmg $");
31__FBSDID("$FreeBSD: head/sys/crypto/aesni/aesni_wrap.c 257757 2013-11-06 19:14:49Z jmg $");
32
33#include <sys/param.h>
34#include <sys/libkern.h>
35#include <sys/malloc.h>
36#include <sys/proc.h>
37#include <sys/systm.h>
38#include <crypto/aesni/aesni.h>
39
40#include "aesencdec.h"
41
42MALLOC_DECLARE(M_AESNI);
43
32
33#include <sys/param.h>
34#include <sys/libkern.h>
35#include <sys/malloc.h>
36#include <sys/proc.h>
37#include <sys/systm.h>
38#include <crypto/aesni/aesni.h>
39
40#include "aesencdec.h"
41
42MALLOC_DECLARE(M_AESNI);
43
44struct blocks8 {
45 __m128i blk[8];
46} __packed;
47
44void
45aesni_encrypt_cbc(int rounds, const void *key_schedule, size_t len,
46 const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN])
47{
48 __m128i tot, ivreg;
49 size_t i;
50
51 len /= AES_BLOCK_LEN;

--- 8 unchanged lines hidden (view full) ---

60 }
61}
62
63void
64aesni_decrypt_cbc(int rounds, const void *key_schedule, size_t len,
65 uint8_t *buf, const uint8_t iv[AES_BLOCK_LEN])
66{
67 __m128i blocks[8];
48void
49aesni_encrypt_cbc(int rounds, const void *key_schedule, size_t len,
50 const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN])
51{
52 __m128i tot, ivreg;
53 size_t i;
54
55 len /= AES_BLOCK_LEN;

--- 8 unchanged lines hidden (view full) ---

64 }
65}
66
67void
68aesni_decrypt_cbc(int rounds, const void *key_schedule, size_t len,
69 uint8_t *buf, const uint8_t iv[AES_BLOCK_LEN])
70{
71 __m128i blocks[8];
68 __m128i *bufs;
72 struct blocks8 *blks;
69 __m128i ivreg, nextiv;
70 size_t i, j, cnt;
71
72 ivreg = _mm_loadu_si128((const __m128i *)iv);
73 cnt = len / AES_BLOCK_LEN / 8;
74 for (i = 0; i < cnt; i++) {
73 __m128i ivreg, nextiv;
74 size_t i, j, cnt;
75
76 ivreg = _mm_loadu_si128((const __m128i *)iv);
77 cnt = len / AES_BLOCK_LEN / 8;
78 for (i = 0; i < cnt; i++) {
75 bufs = (__m128i *)buf;
76 aesni_dec8(rounds - 1, key_schedule, bufs[0], bufs[1],
77 bufs[2], bufs[3], bufs[4], bufs[5], bufs[6],
78 bufs[7], &blocks[0]);
79 blks = (struct blocks8 *)buf;
80 aesni_dec8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1],
81 blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5],
82 blks->blk[6], blks->blk[7], &blocks[0]);
79 for (j = 0; j < 8; j++) {
83 for (j = 0; j < 8; j++) {
80 nextiv = bufs[j];
81 bufs[j] = blocks[j] ^ ivreg;
84 nextiv = blks->blk[j];
85 blks->blk[j] = blocks[j] ^ ivreg;
82 ivreg = nextiv;
83 }
84 buf += AES_BLOCK_LEN * 8;
85 }
86 i *= 8;
87 cnt = len / AES_BLOCK_LEN;
88 for (; i < cnt; i++) {
86 ivreg = nextiv;
87 }
88 buf += AES_BLOCK_LEN * 8;
89 }
90 i *= 8;
91 cnt = len / AES_BLOCK_LEN;
92 for (; i < cnt; i++) {
89 bufs = (__m128i *)buf;
90 nextiv = bufs[0];
91 bufs[0] = aesni_dec(rounds - 1, key_schedule, bufs[0]) ^ ivreg;
93 nextiv = _mm_loadu_si128((void *)buf);
94 _mm_storeu_si128((void *)buf,
95 aesni_dec(rounds - 1, key_schedule, nextiv) ^ ivreg);
92 ivreg = nextiv;
93 buf += AES_BLOCK_LEN;
94 }
95}
96
97void
98aesni_encrypt_ecb(int rounds, const void *key_schedule, size_t len,
99 const uint8_t *from, uint8_t *to)
100{
101 __m128i tot;
96 ivreg = nextiv;
97 buf += AES_BLOCK_LEN;
98 }
99}
100
101void
102aesni_encrypt_ecb(int rounds, const void *key_schedule, size_t len,
103 const uint8_t *from, uint8_t *to)
104{
105 __m128i tot;
102 const __m128i *blocks;
106 __m128i tout[8];
107 struct blocks8 *top;
108 const struct blocks8 *blks;
103 size_t i, cnt;
104
105 cnt = len / AES_BLOCK_LEN / 8;
106 for (i = 0; i < cnt; i++) {
109 size_t i, cnt;
110
111 cnt = len / AES_BLOCK_LEN / 8;
112 for (i = 0; i < cnt; i++) {
107 blocks = (const __m128i *)from;
108 aesni_enc8(rounds - 1, key_schedule, blocks[0], blocks[1],
109 blocks[2], blocks[3], blocks[4], blocks[5], blocks[6],
110 blocks[7], (__m128i *)to);
113 blks = (const struct blocks8 *)from;
114 top = (struct blocks8 *)to;
115 aesni_enc8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1],
116 blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5],
117 blks->blk[6], blks->blk[7], tout);
118 top->blk[0] = tout[0];
119 top->blk[1] = tout[1];
120 top->blk[2] = tout[2];
121 top->blk[3] = tout[3];
122 top->blk[4] = tout[4];
123 top->blk[5] = tout[5];
124 top->blk[6] = tout[6];
125 top->blk[7] = tout[7];
111 from += AES_BLOCK_LEN * 8;
112 to += AES_BLOCK_LEN * 8;
113 }
114 i *= 8;
115 cnt = len / AES_BLOCK_LEN;
116 for (; i < cnt; i++) {
117 tot = aesni_enc(rounds - 1, key_schedule,
118 _mm_loadu_si128((const __m128i *)from));
119 _mm_storeu_si128((__m128i *)to, tot);
120 from += AES_BLOCK_LEN;
121 to += AES_BLOCK_LEN;
122 }
123}
124
125void
126aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len,
127 const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN])
128{
129 __m128i tot;
126 from += AES_BLOCK_LEN * 8;
127 to += AES_BLOCK_LEN * 8;
128 }
129 i *= 8;
130 cnt = len / AES_BLOCK_LEN;
131 for (; i < cnt; i++) {
132 tot = aesni_enc(rounds - 1, key_schedule,
133 _mm_loadu_si128((const __m128i *)from));
134 _mm_storeu_si128((__m128i *)to, tot);
135 from += AES_BLOCK_LEN;
136 to += AES_BLOCK_LEN;
137 }
138}
139
140void
141aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len,
142 const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN])
143{
144 __m128i tot;
130 const __m128i *blocks;
145 __m128i tout[8];
146 const struct blocks8 *blks;
147 struct blocks8 *top;
131 size_t i, cnt;
132
133 cnt = len / AES_BLOCK_LEN / 8;
134 for (i = 0; i < cnt; i++) {
148 size_t i, cnt;
149
150 cnt = len / AES_BLOCK_LEN / 8;
151 for (i = 0; i < cnt; i++) {
135 blocks = (const __m128i *)from;
136 aesni_dec8(rounds - 1, key_schedule, blocks[0], blocks[1],
137 blocks[2], blocks[3], blocks[4], blocks[5], blocks[6],
138 blocks[7], (__m128i *)to);
152 blks = (const struct blocks8 *)from;
153 top = (struct blocks8 *)to;
154 aesni_dec8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1],
155 blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5],
156 blks->blk[6], blks->blk[7], tout);
157 top->blk[0] = tout[0];
158 top->blk[1] = tout[1];
159 top->blk[2] = tout[2];
160 top->blk[3] = tout[3];
161 top->blk[4] = tout[4];
162 top->blk[5] = tout[5];
163 top->blk[6] = tout[6];
164 top->blk[7] = tout[7];
139 from += AES_BLOCK_LEN * 8;
140 to += AES_BLOCK_LEN * 8;
141 }
142 i *= 8;
143 cnt = len / AES_BLOCK_LEN;
144 for (; i < cnt; i++) {
145 tot = aesni_dec(rounds - 1, key_schedule,
146 _mm_loadu_si128((const __m128i *)from));

--- 21 unchanged lines hidden (view full) ---

168 /* next term */
169 ret = _mm_slli_epi32(inp, 1);
170 ret ^= xtweak;
171
172 return ret;
173}
174
175static void
165 from += AES_BLOCK_LEN * 8;
166 to += AES_BLOCK_LEN * 8;
167 }
168 i *= 8;
169 cnt = len / AES_BLOCK_LEN;
170 for (; i < cnt; i++) {
171 tot = aesni_dec(rounds - 1, key_schedule,
172 _mm_loadu_si128((const __m128i *)from));

--- 21 unchanged lines hidden (view full) ---

194 /* next term */
195 ret = _mm_slli_epi32(inp, 1);
196 ret ^= xtweak;
197
198 return ret;
199}
200
201static void
176aesni_crypt_xts_block(int rounds, const void *key_schedule, __m128i *tweak,
177 const __m128i *from, __m128i *to, int do_encrypt)
202aesni_crypt_xts_block(int rounds, const __m128i *key_schedule, __m128i *tweak,
203 const uint8_t *from, uint8_t *to, int do_encrypt)
178{
179 __m128i block;
180
204{
205 __m128i block;
206
181 block = *from ^ *tweak;
207 block = _mm_loadu_si128((const __m128i *)from) ^ *tweak;
182
183 if (do_encrypt)
184 block = aesni_enc(rounds - 1, key_schedule, block);
185 else
186 block = aesni_dec(rounds - 1, key_schedule, block);
187
208
209 if (do_encrypt)
210 block = aesni_enc(rounds - 1, key_schedule, block);
211 else
212 block = aesni_dec(rounds - 1, key_schedule, block);
213
188 *to = block ^ *tweak;
214 _mm_storeu_si128((__m128i *)to, block ^ *tweak);
189
190 *tweak = xts_crank_lfsr(*tweak);
191}
192
193static void
215
216 *tweak = xts_crank_lfsr(*tweak);
217}
218
219static void
194aesni_crypt_xts_block8(int rounds, const void *key_schedule, __m128i *tweak,
195 const __m128i *from, __m128i *to, int do_encrypt)
220aesni_crypt_xts_block8(int rounds, const __m128i *key_schedule, __m128i *tweak,
221 const uint8_t *from, uint8_t *to, int do_encrypt)
196{
197 __m128i tmptweak;
198 __m128i a, b, c, d, e, f, g, h;
199 __m128i tweaks[8];
200 __m128i tmp[8];
222{
223 __m128i tmptweak;
224 __m128i a, b, c, d, e, f, g, h;
225 __m128i tweaks[8];
226 __m128i tmp[8];
227 __m128i *top;
228 const __m128i *fromp;
201
202 tmptweak = *tweak;
203
204 /*
205 * unroll the loop. This lets gcc put values directly in the
206 * register and saves memory accesses.
207 */
229
230 tmptweak = *tweak;
231
232 /*
233 * unroll the loop. This lets gcc put values directly in the
234 * register and saves memory accesses.
235 */
236 fromp = (const __m128i *)from;
208#define PREPINP(v, pos) \
209 do { \
210 tweaks[(pos)] = tmptweak; \
237#define PREPINP(v, pos) \
238 do { \
239 tweaks[(pos)] = tmptweak; \
211 (v) = from[(pos)] ^ tmptweak; \
240 (v) = _mm_loadu_si128(&fromp[pos]) ^ \
241 tmptweak; \
212 tmptweak = xts_crank_lfsr(tmptweak); \
213 } while (0)
214 PREPINP(a, 0);
215 PREPINP(b, 1);
216 PREPINP(c, 2);
217 PREPINP(d, 3);
218 PREPINP(e, 4);
219 PREPINP(f, 5);
220 PREPINP(g, 6);
221 PREPINP(h, 7);
222 *tweak = tmptweak;
223
224 if (do_encrypt)
225 aesni_enc8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h,
226 tmp);
227 else
228 aesni_dec8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h,
229 tmp);
230
242 tmptweak = xts_crank_lfsr(tmptweak); \
243 } while (0)
244 PREPINP(a, 0);
245 PREPINP(b, 1);
246 PREPINP(c, 2);
247 PREPINP(d, 3);
248 PREPINP(e, 4);
249 PREPINP(f, 5);
250 PREPINP(g, 6);
251 PREPINP(h, 7);
252 *tweak = tmptweak;
253
254 if (do_encrypt)
255 aesni_enc8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h,
256 tmp);
257 else
258 aesni_dec8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h,
259 tmp);
260
231 to[0] = tmp[0] ^ tweaks[0];
232 to[1] = tmp[1] ^ tweaks[1];
233 to[2] = tmp[2] ^ tweaks[2];
234 to[3] = tmp[3] ^ tweaks[3];
235 to[4] = tmp[4] ^ tweaks[4];
236 to[5] = tmp[5] ^ tweaks[5];
237 to[6] = tmp[6] ^ tweaks[6];
238 to[7] = tmp[7] ^ tweaks[7];
261 top = (__m128i *)to;
262 _mm_storeu_si128(&top[0], tmp[0] ^ tweaks[0]);
263 _mm_storeu_si128(&top[1], tmp[1] ^ tweaks[1]);
264 _mm_storeu_si128(&top[2], tmp[2] ^ tweaks[2]);
265 _mm_storeu_si128(&top[3], tmp[3] ^ tweaks[3]);
266 _mm_storeu_si128(&top[4], tmp[4] ^ tweaks[4]);
267 _mm_storeu_si128(&top[5], tmp[5] ^ tweaks[5]);
268 _mm_storeu_si128(&top[6], tmp[6] ^ tweaks[6]);
269 _mm_storeu_si128(&top[7], tmp[7] ^ tweaks[7]);
239}
240
241static void
270}
271
272static void
242aesni_crypt_xts(int rounds, const void *data_schedule,
243 const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
244 const uint8_t iv[AES_BLOCK_LEN], int do_encrypt)
273aesni_crypt_xts(int rounds, const __m128i *data_schedule,
274 const __m128i *tweak_schedule, size_t len, const uint8_t *from,
275 uint8_t *to, const uint8_t iv[AES_BLOCK_LEN], int do_encrypt)
245{
246 __m128i tweakreg;
247 uint8_t tweak[AES_XTS_BLOCKSIZE] __aligned(16);
248 size_t i, cnt;
249
250 /*
251 * Prepare tweak as E_k2(IV). IV is specified as LE representation
252 * of a 64-bit block number which we allow to be passed in directly.

--- 6 unchanged lines hidden (view full) ---

259#error Only LITTLE_ENDIAN architectures are supported.
260#endif
261 tweakreg = _mm_loadu_si128((__m128i *)&tweak[0]);
262 tweakreg = aesni_enc(rounds - 1, tweak_schedule, tweakreg);
263
264 cnt = len / AES_XTS_BLOCKSIZE / 8;
265 for (i = 0; i < cnt; i++) {
266 aesni_crypt_xts_block8(rounds, data_schedule, &tweakreg,
276{
277 __m128i tweakreg;
278 uint8_t tweak[AES_XTS_BLOCKSIZE] __aligned(16);
279 size_t i, cnt;
280
281 /*
282 * Prepare tweak as E_k2(IV). IV is specified as LE representation
283 * of a 64-bit block number which we allow to be passed in directly.

--- 6 unchanged lines hidden (view full) ---

290#error Only LITTLE_ENDIAN architectures are supported.
291#endif
292 tweakreg = _mm_loadu_si128((__m128i *)&tweak[0]);
293 tweakreg = aesni_enc(rounds - 1, tweak_schedule, tweakreg);
294
295 cnt = len / AES_XTS_BLOCKSIZE / 8;
296 for (i = 0; i < cnt; i++) {
297 aesni_crypt_xts_block8(rounds, data_schedule, &tweakreg,
267 (const __m128i *)from, (__m128i *)to, do_encrypt);
298 from, to, do_encrypt);
268 from += AES_XTS_BLOCKSIZE * 8;
269 to += AES_XTS_BLOCKSIZE * 8;
270 }
271 i *= 8;
272 cnt = len / AES_XTS_BLOCKSIZE;
273 for (; i < cnt; i++) {
274 aesni_crypt_xts_block(rounds, data_schedule, &tweakreg,
299 from += AES_XTS_BLOCKSIZE * 8;
300 to += AES_XTS_BLOCKSIZE * 8;
301 }
302 i *= 8;
303 cnt = len / AES_XTS_BLOCKSIZE;
304 for (; i < cnt; i++) {
305 aesni_crypt_xts_block(rounds, data_schedule, &tweakreg,
275 (const __m128i *)from, (__m128i *)to, do_encrypt);
306 from, to, do_encrypt);
276 from += AES_XTS_BLOCKSIZE;
277 to += AES_XTS_BLOCKSIZE;
278 }
279}
280
281void
282aesni_encrypt_xts(int rounds, const void *data_schedule,
283 const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,

--- 164 unchanged lines hidden ---
307 from += AES_XTS_BLOCKSIZE;
308 to += AES_XTS_BLOCKSIZE;
309 }
310}
311
312void
313aesni_encrypt_xts(int rounds, const void *data_schedule,
314 const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,

--- 164 unchanged lines hidden ---