1/*
2 * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25#include "inner.h"
26
27/* see bearssl_block.h */
28void
29br_aes_ct_ctrcbc_init(br_aes_ct_ctrcbc_keys *ctx,
30	const void *key, size_t len)
31{
32	ctx->vtable = &br_aes_ct_ctrcbc_vtable;
33	ctx->num_rounds = br_aes_ct_keysched(ctx->skey, key, len);
34}
35
36static void
37xorbuf(void *dst, const void *src, size_t len)
38{
39	unsigned char *d;
40	const unsigned char *s;
41
42	d = dst;
43	s = src;
44	while (len -- > 0) {
45		*d ++ ^= *s ++;
46	}
47}
48
49/* see bearssl_block.h */
50void
51br_aes_ct_ctrcbc_ctr(const br_aes_ct_ctrcbc_keys *ctx,
52	void *ctr, void *data, size_t len)
53{
54	unsigned char *buf;
55	unsigned char *ivbuf;
56	uint32_t iv0, iv1, iv2, iv3;
57	uint32_t sk_exp[120];
58
59	br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
60
61	/*
62	 * We keep the counter as four 32-bit values, with big-endian
63	 * convention, because that's what is expected for purposes of
64	 * incrementing the counter value.
65	 */
66	ivbuf = ctr;
67	iv0 = br_dec32be(ivbuf +  0);
68	iv1 = br_dec32be(ivbuf +  4);
69	iv2 = br_dec32be(ivbuf +  8);
70	iv3 = br_dec32be(ivbuf + 12);
71
72	buf = data;
73	while (len > 0) {
74		uint32_t q[8], carry;
75		unsigned char tmp[32];
76
77		/*
78		 * The bitslice implementation expects values in
79		 * little-endian convention, so we have to byteswap them.
80		 */
81		q[0] = br_swap32(iv0);
82		q[2] = br_swap32(iv1);
83		q[4] = br_swap32(iv2);
84		q[6] = br_swap32(iv3);
85		iv3 ++;
86		carry = ~(iv3 | -iv3) >> 31;
87		iv2 += carry;
88		carry &= -(~(iv2 | -iv2) >> 31);
89		iv1 += carry;
90		carry &= -(~(iv1 | -iv1) >> 31);
91		iv0 += carry;
92		q[1] = br_swap32(iv0);
93		q[3] = br_swap32(iv1);
94		q[5] = br_swap32(iv2);
95		q[7] = br_swap32(iv3);
96		if (len > 16) {
97			iv3 ++;
98			carry = ~(iv3 | -iv3) >> 31;
99			iv2 += carry;
100			carry &= -(~(iv2 | -iv2) >> 31);
101			iv1 += carry;
102			carry &= -(~(iv1 | -iv1) >> 31);
103			iv0 += carry;
104		}
105
106		br_aes_ct_ortho(q);
107		br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
108		br_aes_ct_ortho(q);
109
110		br_enc32le(tmp, q[0]);
111		br_enc32le(tmp + 4, q[2]);
112		br_enc32le(tmp + 8, q[4]);
113		br_enc32le(tmp + 12, q[6]);
114		br_enc32le(tmp + 16, q[1]);
115		br_enc32le(tmp + 20, q[3]);
116		br_enc32le(tmp + 24, q[5]);
117		br_enc32le(tmp + 28, q[7]);
118
119		if (len <= 32) {
120			xorbuf(buf, tmp, len);
121			break;
122		}
123		xorbuf(buf, tmp, 32);
124		buf += 32;
125		len -= 32;
126	}
127	br_enc32be(ivbuf +  0, iv0);
128	br_enc32be(ivbuf +  4, iv1);
129	br_enc32be(ivbuf +  8, iv2);
130	br_enc32be(ivbuf + 12, iv3);
131}
132
133/* see bearssl_block.h */
134void
135br_aes_ct_ctrcbc_mac(const br_aes_ct_ctrcbc_keys *ctx,
136	void *cbcmac, const void *data, size_t len)
137{
138	const unsigned char *buf;
139	uint32_t cm0, cm1, cm2, cm3;
140	uint32_t q[8];
141	uint32_t sk_exp[120];
142
143	br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
144
145	buf = data;
146	cm0 = br_dec32le((unsigned char *)cbcmac +  0);
147	cm1 = br_dec32le((unsigned char *)cbcmac +  4);
148	cm2 = br_dec32le((unsigned char *)cbcmac +  8);
149	cm3 = br_dec32le((unsigned char *)cbcmac + 12);
150	q[1] = 0;
151	q[3] = 0;
152	q[5] = 0;
153	q[7] = 0;
154
155	while (len > 0) {
156		q[0] = cm0 ^ br_dec32le(buf +  0);
157		q[2] = cm1 ^ br_dec32le(buf +  4);
158		q[4] = cm2 ^ br_dec32le(buf +  8);
159		q[6] = cm3 ^ br_dec32le(buf + 12);
160
161		br_aes_ct_ortho(q);
162		br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
163		br_aes_ct_ortho(q);
164
165		cm0 = q[0];
166		cm1 = q[2];
167		cm2 = q[4];
168		cm3 = q[6];
169		buf += 16;
170		len -= 16;
171	}
172
173	br_enc32le((unsigned char *)cbcmac +  0, cm0);
174	br_enc32le((unsigned char *)cbcmac +  4, cm1);
175	br_enc32le((unsigned char *)cbcmac +  8, cm2);
176	br_enc32le((unsigned char *)cbcmac + 12, cm3);
177}
178
179/* see bearssl_block.h */
180void
181br_aes_ct_ctrcbc_encrypt(const br_aes_ct_ctrcbc_keys *ctx,
182	void *ctr, void *cbcmac, void *data, size_t len)
183{
184	/*
185	 * When encrypting, the CBC-MAC processing must be lagging by
186	 * one block, since it operates on the encrypted values, so
187	 * it must wait for that encryption to complete.
188	 */
189
190	unsigned char *buf;
191	unsigned char *ivbuf;
192	uint32_t iv0, iv1, iv2, iv3;
193	uint32_t cm0, cm1, cm2, cm3;
194	uint32_t sk_exp[120];
195	int first_iter;
196
197	br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
198
199	/*
200	 * We keep the counter as four 32-bit values, with big-endian
201	 * convention, because that's what is expected for purposes of
202	 * incrementing the counter value.
203	 */
204	ivbuf = ctr;
205	iv0 = br_dec32be(ivbuf +  0);
206	iv1 = br_dec32be(ivbuf +  4);
207	iv2 = br_dec32be(ivbuf +  8);
208	iv3 = br_dec32be(ivbuf + 12);
209
210	/*
211	 * The current CBC-MAC value is kept in little-endian convention.
212	 */
213	cm0 = br_dec32le((unsigned char *)cbcmac +  0);
214	cm1 = br_dec32le((unsigned char *)cbcmac +  4);
215	cm2 = br_dec32le((unsigned char *)cbcmac +  8);
216	cm3 = br_dec32le((unsigned char *)cbcmac + 12);
217
218	buf = data;
219	first_iter = 1;
220	while (len > 0) {
221		uint32_t q[8], carry;
222
223		/*
224		 * The bitslice implementation expects values in
225		 * little-endian convention, so we have to byteswap them.
226		 */
227		q[0] = br_swap32(iv0);
228		q[2] = br_swap32(iv1);
229		q[4] = br_swap32(iv2);
230		q[6] = br_swap32(iv3);
231		iv3 ++;
232		carry = ~(iv3 | -iv3) >> 31;
233		iv2 += carry;
234		carry &= -(~(iv2 | -iv2) >> 31);
235		iv1 += carry;
236		carry &= -(~(iv1 | -iv1) >> 31);
237		iv0 += carry;
238
239		/*
240		 * The odd values are used for CBC-MAC.
241		 */
242		q[1] = cm0;
243		q[3] = cm1;
244		q[5] = cm2;
245		q[7] = cm3;
246
247		br_aes_ct_ortho(q);
248		br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
249		br_aes_ct_ortho(q);
250
251		/*
252		 * We do the XOR with the plaintext in 32-bit registers,
253		 * so that the value are available for CBC-MAC processing
254		 * as well.
255		 */
256		q[0] ^= br_dec32le(buf +  0);
257		q[2] ^= br_dec32le(buf +  4);
258		q[4] ^= br_dec32le(buf +  8);
259		q[6] ^= br_dec32le(buf + 12);
260		br_enc32le(buf +  0, q[0]);
261		br_enc32le(buf +  4, q[2]);
262		br_enc32le(buf +  8, q[4]);
263		br_enc32le(buf + 12, q[6]);
264
265		buf += 16;
266		len -= 16;
267
268		/*
269		 * We set the cm* values to the block to encrypt in the
270		 * next iteration.
271		 */
272		if (first_iter) {
273			first_iter = 0;
274			cm0 ^= q[0];
275			cm1 ^= q[2];
276			cm2 ^= q[4];
277			cm3 ^= q[6];
278		} else {
279			cm0 = q[0] ^ q[1];
280			cm1 = q[2] ^ q[3];
281			cm2 = q[4] ^ q[5];
282			cm3 = q[6] ^ q[7];
283		}
284
285		/*
286		 * If this was the last iteration, then compute the
287		 * extra block encryption to complete CBC-MAC.
288		 */
289		if (len == 0) {
290			q[0] = cm0;
291			q[2] = cm1;
292			q[4] = cm2;
293			q[6] = cm3;
294			br_aes_ct_ortho(q);
295			br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
296			br_aes_ct_ortho(q);
297			cm0 = q[0];
298			cm1 = q[2];
299			cm2 = q[4];
300			cm3 = q[6];
301			break;
302		}
303	}
304
305	br_enc32be(ivbuf +  0, iv0);
306	br_enc32be(ivbuf +  4, iv1);
307	br_enc32be(ivbuf +  8, iv2);
308	br_enc32be(ivbuf + 12, iv3);
309	br_enc32le((unsigned char *)cbcmac +  0, cm0);
310	br_enc32le((unsigned char *)cbcmac +  4, cm1);
311	br_enc32le((unsigned char *)cbcmac +  8, cm2);
312	br_enc32le((unsigned char *)cbcmac + 12, cm3);
313}
314
315/* see bearssl_block.h */
316void
317br_aes_ct_ctrcbc_decrypt(const br_aes_ct_ctrcbc_keys *ctx,
318	void *ctr, void *cbcmac, void *data, size_t len)
319{
320	unsigned char *buf;
321	unsigned char *ivbuf;
322	uint32_t iv0, iv1, iv2, iv3;
323	uint32_t cm0, cm1, cm2, cm3;
324	uint32_t sk_exp[120];
325
326	br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
327
328	/*
329	 * We keep the counter as four 32-bit values, with big-endian
330	 * convention, because that's what is expected for purposes of
331	 * incrementing the counter value.
332	 */
333	ivbuf = ctr;
334	iv0 = br_dec32be(ivbuf +  0);
335	iv1 = br_dec32be(ivbuf +  4);
336	iv2 = br_dec32be(ivbuf +  8);
337	iv3 = br_dec32be(ivbuf + 12);
338
339	/*
340	 * The current CBC-MAC value is kept in little-endian convention.
341	 */
342	cm0 = br_dec32le((unsigned char *)cbcmac +  0);
343	cm1 = br_dec32le((unsigned char *)cbcmac +  4);
344	cm2 = br_dec32le((unsigned char *)cbcmac +  8);
345	cm3 = br_dec32le((unsigned char *)cbcmac + 12);
346
347	buf = data;
348	while (len > 0) {
349		uint32_t q[8], carry;
350		unsigned char tmp[16];
351
352		/*
353		 * The bitslice implementation expects values in
354		 * little-endian convention, so we have to byteswap them.
355		 */
356		q[0] = br_swap32(iv0);
357		q[2] = br_swap32(iv1);
358		q[4] = br_swap32(iv2);
359		q[6] = br_swap32(iv3);
360		iv3 ++;
361		carry = ~(iv3 | -iv3) >> 31;
362		iv2 += carry;
363		carry &= -(~(iv2 | -iv2) >> 31);
364		iv1 += carry;
365		carry &= -(~(iv1 | -iv1) >> 31);
366		iv0 += carry;
367
368		/*
369		 * The odd values are used for CBC-MAC.
370		 */
371		q[1] = cm0 ^ br_dec32le(buf +  0);
372		q[3] = cm1 ^ br_dec32le(buf +  4);
373		q[5] = cm2 ^ br_dec32le(buf +  8);
374		q[7] = cm3 ^ br_dec32le(buf + 12);
375
376		br_aes_ct_ortho(q);
377		br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
378		br_aes_ct_ortho(q);
379
380		br_enc32le(tmp +  0, q[0]);
381		br_enc32le(tmp +  4, q[2]);
382		br_enc32le(tmp +  8, q[4]);
383		br_enc32le(tmp + 12, q[6]);
384		xorbuf(buf, tmp, 16);
385		cm0 = q[1];
386		cm1 = q[3];
387		cm2 = q[5];
388		cm3 = q[7];
389		buf += 16;
390		len -= 16;
391	}
392
393	br_enc32be(ivbuf +  0, iv0);
394	br_enc32be(ivbuf +  4, iv1);
395	br_enc32be(ivbuf +  8, iv2);
396	br_enc32be(ivbuf + 12, iv3);
397	br_enc32le((unsigned char *)cbcmac +  0, cm0);
398	br_enc32le((unsigned char *)cbcmac +  4, cm1);
399	br_enc32le((unsigned char *)cbcmac +  8, cm2);
400	br_enc32le((unsigned char *)cbcmac + 12, cm3);
401}
402
403/* see bearssl_block.h */
404const br_block_ctrcbc_class br_aes_ct_ctrcbc_vtable = {
405	sizeof(br_aes_ct_ctrcbc_keys),
406	16,
407	4,
408	(void (*)(const br_block_ctrcbc_class **, const void *, size_t))
409		&br_aes_ct_ctrcbc_init,
410	(void (*)(const br_block_ctrcbc_class *const *,
411		void *, void *, void *, size_t))
412		&br_aes_ct_ctrcbc_encrypt,
413	(void (*)(const br_block_ctrcbc_class *const *,
414		void *, void *, void *, size_t))
415		&br_aes_ct_ctrcbc_decrypt,
416	(void (*)(const br_block_ctrcbc_class *const *,
417		void *, void *, size_t))
418		&br_aes_ct_ctrcbc_ctr,
419	(void (*)(const br_block_ctrcbc_class *const *,
420		void *, const void *, size_t))
421		&br_aes_ct_ctrcbc_mac
422};
423