1/*	$NetBSD: aes_ssse3_subr.c,v 1.3 2020/07/25 22:31:04 riastradh Exp $	*/
2
3/*-
4 * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__KERNEL_RCSID(1, "$NetBSD: aes_ssse3_subr.c,v 1.3 2020/07/25 22:31:04 riastradh Exp $");
31
32#ifdef _KERNEL
33#include <sys/systm.h>
34#include <lib/libkern/libkern.h>
35#else
36#include <assert.h>
37#include <inttypes.h>
38#include <stdio.h>
39#define	KASSERT			assert
40#endif
41
42#include "aes_ssse3_impl.h"
43
44static inline __m128i
45loadblock(const void *in)
46{
47	return _mm_loadu_epi8(in);
48}
49
50static inline void
51storeblock(void *out, __m128i block)
52{
53	_mm_storeu_epi8(out, block);
54}
55
56void
57aes_ssse3_enc(const struct aesenc *enc, const uint8_t in[static 16],
58    uint8_t out[static 16], uint32_t nrounds)
59{
60	__m128i block;
61
62	block = loadblock(in);
63	block = aes_ssse3_enc1(enc, block, nrounds);
64	storeblock(out, block);
65}
66
67void
68aes_ssse3_dec(const struct aesdec *dec, const uint8_t in[static 16],
69    uint8_t out[static 16], uint32_t nrounds)
70{
71	__m128i block;
72
73	block = loadblock(in);
74	block = aes_ssse3_dec1(dec, block, nrounds);
75	storeblock(out, block);
76}
77
78void
79aes_ssse3_cbc_enc(const struct aesenc *enc, const uint8_t in[static 16],
80    uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
81    uint32_t nrounds)
82{
83	__m128i cv;
84
85	KASSERT(nbytes);
86
87	cv = loadblock(iv);
88	for (; nbytes; nbytes -= 16, in += 16, out += 16) {
89		cv ^= loadblock(in);
90		cv = aes_ssse3_enc1(enc, cv, nrounds);
91		storeblock(out, cv);
92	}
93	storeblock(iv, cv);
94}
95
96void
97aes_ssse3_cbc_dec(const struct aesdec *dec, const uint8_t in[static 16],
98    uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
99    uint32_t nrounds)
100{
101	__m128i iv0, cv, b;
102
103	KASSERT(nbytes);
104	KASSERT(nbytes % 16 == 0);
105
106	iv0 = loadblock(iv);
107	cv = loadblock(in + nbytes - 16);
108	storeblock(iv, cv);
109
110	for (;;) {
111		b = aes_ssse3_dec1(dec, cv, nrounds);
112		if ((nbytes -= 16) == 0)
113			break;
114		cv = loadblock(in + nbytes - 16);
115		storeblock(out + nbytes, b ^ cv);
116	}
117	storeblock(out, b ^ iv0);
118}
119
120static inline __m128i
121aes_ssse3_xts_update(__m128i t)
122{
123	const __m128i one = _mm_set_epi64x(1, 1);
124	__m128i s, m, c;
125
126	s = _mm_srli_epi64(t, 63);	/* 1 if high bit set else 0 */
127	m = _mm_sub_epi64(s, one);	/* 0 if high bit set else -1 */
128	m = _mm_shuffle_epi32(m, 0x4e);	/* swap halves */
129	c = _mm_set_epi64x(1, 0x87);	/* carry */
130
131	return _mm_slli_epi64(t, 1) ^ (c & ~m);
132}
133
134static int
135aes_ssse3_xts_update_selftest(void)
136{
137	static const struct {
138		uint32_t in[4], out[4];
139	} cases[] = {
140		[0] = { {1}, {2} },
141		[1] = { {0x80000000U,0,0,0}, {0,1,0,0} },
142		[2] = { {0,0x80000000U,0,0}, {0,0,1,0} },
143		[3] = { {0,0,0x80000000U,0}, {0,0,0,1} },
144		[4] = { {0,0,0,0x80000000U}, {0x87,0,0,0} },
145		[5] = { {0,0x80000000U,0,0x80000000U}, {0x87,0,1,0} },
146	};
147	unsigned i;
148	uint32_t t[4];
149	int result = 0;
150
151	for (i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
152		t[0] = cases[i].in[0];
153		t[1] = cases[i].in[1];
154		t[2] = cases[i].in[2];
155		t[3] = cases[i].in[3];
156		storeblock(t, aes_ssse3_xts_update(loadblock(t)));
157		if (t[0] != cases[i].out[0] ||
158		    t[1] != cases[i].out[1] ||
159		    t[2] != cases[i].out[2] ||
160		    t[3] != cases[i].out[3]) {
161			printf("%s %u:"
162			    " %"PRIx32" %"PRIx32" %"PRIx32" %"PRIx32"\n",
163			    __func__, i, t[0], t[1], t[2], t[3]);
164			result = -1;
165		}
166	}
167
168	return result;
169}
170
171void
172aes_ssse3_xts_enc(const struct aesenc *enc, const uint8_t in[static 16],
173    uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
174    uint32_t nrounds)
175{
176	__m128i t, b;
177
178	KASSERT(nbytes);
179	KASSERT(nbytes % 16 == 0);
180
181	t = loadblock(tweak);
182	for (; nbytes; nbytes -= 16, in += 16, out += 16) {
183		b = t ^ loadblock(in);
184		b = aes_ssse3_enc1(enc, b, nrounds);
185		storeblock(out, t ^ b);
186		t = aes_ssse3_xts_update(t);
187	}
188	storeblock(tweak, t);
189}
190
191void
192aes_ssse3_xts_dec(const struct aesdec *dec, const uint8_t in[static 16],
193    uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
194    uint32_t nrounds)
195{
196	__m128i t, b;
197
198	KASSERT(nbytes);
199	KASSERT(nbytes % 16 == 0);
200
201	t = loadblock(tweak);
202	for (; nbytes; nbytes -= 16, in += 16, out += 16) {
203		b = t ^ loadblock(in);
204		b = aes_ssse3_dec1(dec, b, nrounds);
205		storeblock(out, t ^ b);
206		t = aes_ssse3_xts_update(t);
207	}
208	storeblock(tweak, t);
209}
210
211void
212aes_ssse3_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16],
213    size_t nbytes, uint8_t auth0[static 16], uint32_t nrounds)
214{
215	__m128i auth;
216
217	KASSERT(nbytes);
218	KASSERT(nbytes % 16 == 0);
219
220	auth = loadblock(auth0);
221	for (; nbytes; nbytes -= 16, in += 16)
222		auth = aes_ssse3_enc1(enc, auth ^ loadblock(in), nrounds);
223	storeblock(auth0, auth);
224}
225
226void
227aes_ssse3_ccm_enc1(const struct aesenc *enc, const uint8_t in[static 16],
228    uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32],
229    uint32_t nrounds)
230{
231	const __m128i ctr32_inc = _mm_set_epi32(1, 0, 0, 0);
232	const __m128i bs32 =
233	    _mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203);
234	__m128i auth, ctr_be, ctr, ptxt;
235
236	KASSERT(nbytes);
237	KASSERT(nbytes % 16 == 0);
238
239	auth = loadblock(authctr);
240	ctr_be = loadblock(authctr + 16);
241	ctr = _mm_shuffle_epi8(ctr_be, bs32);
242	for (; nbytes; nbytes -= 16, in += 16, out += 16) {
243		ptxt = loadblock(in);
244		auth = aes_ssse3_enc1(enc, auth ^ ptxt, nrounds);
245		ctr = _mm_add_epi32(ctr, ctr32_inc);
246		ctr_be = _mm_shuffle_epi8(ctr, bs32);
247		storeblock(out, ptxt ^ aes_ssse3_enc1(enc, ctr_be, nrounds));
248	}
249	storeblock(authctr, auth);
250	storeblock(authctr + 16, ctr_be);
251}
252
253void
254aes_ssse3_ccm_dec1(const struct aesenc *enc, const uint8_t in[static 16],
255    uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32],
256    uint32_t nrounds)
257{
258	const __m128i ctr32_inc = _mm_set_epi32(1, 0, 0, 0);
259	const __m128i bs32 =
260	    _mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203);
261	__m128i auth, ctr_be, ctr, ptxt;
262
263	KASSERT(nbytes);
264	KASSERT(nbytes % 16 == 0);
265
266	auth = loadblock(authctr);
267	ctr_be = loadblock(authctr + 16);
268	ctr = _mm_shuffle_epi8(ctr_be, bs32);
269	for (; nbytes; nbytes -= 16, in += 16, out += 16) {
270		ctr = _mm_add_epi32(ctr, ctr32_inc);
271		ctr_be = _mm_shuffle_epi8(ctr, bs32);
272		ptxt = loadblock(in) ^ aes_ssse3_enc1(enc, ctr_be, nrounds);
273		storeblock(out, ptxt);
274		auth = aes_ssse3_enc1(enc, auth ^ ptxt, nrounds);
275	}
276	storeblock(authctr, auth);
277	storeblock(authctr + 16, ctr_be);
278}
279
280int
281aes_ssse3_selftest(void)
282{
283
284	if (aes_ssse3_xts_update_selftest())
285		return -1;
286
287	return 0;
288}
289