1/*
2 * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25#include "inner.h"
26
27/* see bearssl_block.h */
28void
29br_aes_ct64_ctrcbc_init(br_aes_ct64_ctrcbc_keys *ctx,
30	const void *key, size_t len)
31{
32	ctx->vtable = &br_aes_ct64_ctrcbc_vtable;
33	ctx->num_rounds = br_aes_ct64_keysched(ctx->skey, key, len);
34}
35
36static void
37xorbuf(void *dst, const void *src, size_t len)
38{
39	unsigned char *d;
40	const unsigned char *s;
41
42	d = dst;
43	s = src;
44	while (len -- > 0) {
45		*d ++ ^= *s ++;
46	}
47}
48
49/* see bearssl_block.h */
50void
51br_aes_ct64_ctrcbc_ctr(const br_aes_ct64_ctrcbc_keys *ctx,
52	void *ctr, void *data, size_t len)
53{
54	unsigned char *buf;
55	unsigned char *ivbuf;
56	uint32_t iv0, iv1, iv2, iv3;
57	uint64_t sk_exp[120];
58
59	br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
60
61	/*
62	 * We keep the counter as four 32-bit values, with big-endian
63	 * convention, because that's what is expected for purposes of
64	 * incrementing the counter value.
65	 */
66	ivbuf = ctr;
67	iv0 = br_dec32be(ivbuf +  0);
68	iv1 = br_dec32be(ivbuf +  4);
69	iv2 = br_dec32be(ivbuf +  8);
70	iv3 = br_dec32be(ivbuf + 12);
71
72	buf = data;
73	while (len > 0) {
74		uint64_t q[8];
75		uint32_t w[16];
76		unsigned char tmp[64];
77		int i, j;
78
79		/*
80		 * The bitslice implementation expects values in
81		 * little-endian convention, so we have to byteswap them.
82		 */
83		j = (len >= 64) ? 16 : (int)(len >> 2);
84		for (i = 0; i < j; i += 4) {
85			uint32_t carry;
86
87			w[i + 0] = br_swap32(iv0);
88			w[i + 1] = br_swap32(iv1);
89			w[i + 2] = br_swap32(iv2);
90			w[i + 3] = br_swap32(iv3);
91			iv3 ++;
92			carry = ~(iv3 | -iv3) >> 31;
93			iv2 += carry;
94			carry &= -(~(iv2 | -iv2) >> 31);
95			iv1 += carry;
96			carry &= -(~(iv1 | -iv1) >> 31);
97			iv0 += carry;
98		}
99		memset(w + i, 0, (16 - i) * sizeof(uint32_t));
100
101		for (i = 0; i < 4; i ++) {
102			br_aes_ct64_interleave_in(
103				&q[i], &q[i + 4], w + (i << 2));
104		}
105		br_aes_ct64_ortho(q);
106		br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
107		br_aes_ct64_ortho(q);
108		for (i = 0; i < 4; i ++) {
109			br_aes_ct64_interleave_out(
110				w + (i << 2), q[i], q[i + 4]);
111		}
112
113		br_range_enc32le(tmp, w, 16);
114		if (len <= 64) {
115			xorbuf(buf, tmp, len);
116			break;
117		}
118		xorbuf(buf, tmp, 64);
119		buf += 64;
120		len -= 64;
121	}
122	br_enc32be(ivbuf +  0, iv0);
123	br_enc32be(ivbuf +  4, iv1);
124	br_enc32be(ivbuf +  8, iv2);
125	br_enc32be(ivbuf + 12, iv3);
126}
127
128/* see bearssl_block.h */
129void
130br_aes_ct64_ctrcbc_mac(const br_aes_ct64_ctrcbc_keys *ctx,
131	void *cbcmac, const void *data, size_t len)
132{
133	const unsigned char *buf;
134	uint32_t cm0, cm1, cm2, cm3;
135	uint64_t q[8];
136	uint64_t sk_exp[120];
137
138	br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
139
140	cm0 = br_dec32le((unsigned char *)cbcmac +  0);
141	cm1 = br_dec32le((unsigned char *)cbcmac +  4);
142	cm2 = br_dec32le((unsigned char *)cbcmac +  8);
143	cm3 = br_dec32le((unsigned char *)cbcmac + 12);
144
145	buf = data;
146	memset(q, 0, sizeof q);
147	while (len > 0) {
148		uint32_t w[4];
149
150		w[0] = cm0 ^ br_dec32le(buf +  0);
151		w[1] = cm1 ^ br_dec32le(buf +  4);
152		w[2] = cm2 ^ br_dec32le(buf +  8);
153		w[3] = cm3 ^ br_dec32le(buf + 12);
154
155		br_aes_ct64_interleave_in(&q[0], &q[4], w);
156		br_aes_ct64_ortho(q);
157		br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
158		br_aes_ct64_ortho(q);
159		br_aes_ct64_interleave_out(w, q[0], q[4]);
160
161		cm0 = w[0];
162		cm1 = w[1];
163		cm2 = w[2];
164		cm3 = w[3];
165		buf += 16;
166		len -= 16;
167	}
168
169	br_enc32le((unsigned char *)cbcmac +  0, cm0);
170	br_enc32le((unsigned char *)cbcmac +  4, cm1);
171	br_enc32le((unsigned char *)cbcmac +  8, cm2);
172	br_enc32le((unsigned char *)cbcmac + 12, cm3);
173}
174
175/* see bearssl_block.h */
176void
177br_aes_ct64_ctrcbc_encrypt(const br_aes_ct64_ctrcbc_keys *ctx,
178	void *ctr, void *cbcmac, void *data, size_t len)
179{
180	/*
181	 * When encrypting, the CBC-MAC processing must be lagging by
182	 * one block, since it operates on the encrypted values, so
183	 * it must wait for that encryption to complete.
184	 */
185
186	unsigned char *buf;
187	unsigned char *ivbuf;
188	uint32_t iv0, iv1, iv2, iv3;
189	uint32_t cm0, cm1, cm2, cm3;
190	uint64_t sk_exp[120];
191	uint64_t q[8];
192	int first_iter;
193
194	br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
195
196	/*
197	 * We keep the counter as four 32-bit values, with big-endian
198	 * convention, because that's what is expected for purposes of
199	 * incrementing the counter value.
200	 */
201	ivbuf = ctr;
202	iv0 = br_dec32be(ivbuf +  0);
203	iv1 = br_dec32be(ivbuf +  4);
204	iv2 = br_dec32be(ivbuf +  8);
205	iv3 = br_dec32be(ivbuf + 12);
206
207	/*
208	 * The current CBC-MAC value is kept in little-endian convention.
209	 */
210	cm0 = br_dec32le((unsigned char *)cbcmac +  0);
211	cm1 = br_dec32le((unsigned char *)cbcmac +  4);
212	cm2 = br_dec32le((unsigned char *)cbcmac +  8);
213	cm3 = br_dec32le((unsigned char *)cbcmac + 12);
214
215	buf = data;
216	first_iter = 1;
217	memset(q, 0, sizeof q);
218	while (len > 0) {
219		uint32_t w[8], carry;
220
221		/*
222		 * The bitslice implementation expects values in
223		 * little-endian convention, so we have to byteswap them.
224		 */
225		w[0] = br_swap32(iv0);
226		w[1] = br_swap32(iv1);
227		w[2] = br_swap32(iv2);
228		w[3] = br_swap32(iv3);
229		iv3 ++;
230		carry = ~(iv3 | -iv3) >> 31;
231		iv2 += carry;
232		carry &= -(~(iv2 | -iv2) >> 31);
233		iv1 += carry;
234		carry &= -(~(iv1 | -iv1) >> 31);
235		iv0 += carry;
236
237		/*
238		 * The block for CBC-MAC.
239		 */
240		w[4] = cm0;
241		w[5] = cm1;
242		w[6] = cm2;
243		w[7] = cm3;
244
245		br_aes_ct64_interleave_in(&q[0], &q[4], w);
246		br_aes_ct64_interleave_in(&q[1], &q[5], w + 4);
247		br_aes_ct64_ortho(q);
248		br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
249		br_aes_ct64_ortho(q);
250		br_aes_ct64_interleave_out(w, q[0], q[4]);
251		br_aes_ct64_interleave_out(w + 4, q[1], q[5]);
252
253		/*
254		 * We do the XOR with the plaintext in 32-bit registers,
255		 * so that the value are available for CBC-MAC processing
256		 * as well.
257		 */
258		w[0] ^= br_dec32le(buf +  0);
259		w[1] ^= br_dec32le(buf +  4);
260		w[2] ^= br_dec32le(buf +  8);
261		w[3] ^= br_dec32le(buf + 12);
262		br_enc32le(buf +  0, w[0]);
263		br_enc32le(buf +  4, w[1]);
264		br_enc32le(buf +  8, w[2]);
265		br_enc32le(buf + 12, w[3]);
266
267		buf += 16;
268		len -= 16;
269
270		/*
271		 * We set the cm* values to the block to encrypt in the
272		 * next iteration.
273		 */
274		if (first_iter) {
275			first_iter = 0;
276			cm0 ^= w[0];
277			cm1 ^= w[1];
278			cm2 ^= w[2];
279			cm3 ^= w[3];
280		} else {
281			cm0 = w[0] ^ w[4];
282			cm1 = w[1] ^ w[5];
283			cm2 = w[2] ^ w[6];
284			cm3 = w[3] ^ w[7];
285		}
286
287		/*
288		 * If this was the last iteration, then compute the
289		 * extra block encryption to complete CBC-MAC.
290		 */
291		if (len == 0) {
292			w[0] = cm0;
293			w[1] = cm1;
294			w[2] = cm2;
295			w[3] = cm3;
296			br_aes_ct64_interleave_in(&q[0], &q[4], w);
297			br_aes_ct64_ortho(q);
298			br_aes_ct64_bitslice_encrypt(
299				ctx->num_rounds, sk_exp, q);
300			br_aes_ct64_ortho(q);
301			br_aes_ct64_interleave_out(w, q[0], q[4]);
302			cm0 = w[0];
303			cm1 = w[1];
304			cm2 = w[2];
305			cm3 = w[3];
306			break;
307		}
308	}
309
310	br_enc32be(ivbuf +  0, iv0);
311	br_enc32be(ivbuf +  4, iv1);
312	br_enc32be(ivbuf +  8, iv2);
313	br_enc32be(ivbuf + 12, iv3);
314	br_enc32le((unsigned char *)cbcmac +  0, cm0);
315	br_enc32le((unsigned char *)cbcmac +  4, cm1);
316	br_enc32le((unsigned char *)cbcmac +  8, cm2);
317	br_enc32le((unsigned char *)cbcmac + 12, cm3);
318}
319
320/* see bearssl_block.h */
321void
322br_aes_ct64_ctrcbc_decrypt(const br_aes_ct64_ctrcbc_keys *ctx,
323	void *ctr, void *cbcmac, void *data, size_t len)
324{
325	unsigned char *buf;
326	unsigned char *ivbuf;
327	uint32_t iv0, iv1, iv2, iv3;
328	uint32_t cm0, cm1, cm2, cm3;
329	uint64_t sk_exp[120];
330	uint64_t q[8];
331
332	br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
333
334	/*
335	 * We keep the counter as four 32-bit values, with big-endian
336	 * convention, because that's what is expected for purposes of
337	 * incrementing the counter value.
338	 */
339	ivbuf = ctr;
340	iv0 = br_dec32be(ivbuf +  0);
341	iv1 = br_dec32be(ivbuf +  4);
342	iv2 = br_dec32be(ivbuf +  8);
343	iv3 = br_dec32be(ivbuf + 12);
344
345	/*
346	 * The current CBC-MAC value is kept in little-endian convention.
347	 */
348	cm0 = br_dec32le((unsigned char *)cbcmac +  0);
349	cm1 = br_dec32le((unsigned char *)cbcmac +  4);
350	cm2 = br_dec32le((unsigned char *)cbcmac +  8);
351	cm3 = br_dec32le((unsigned char *)cbcmac + 12);
352
353	buf = data;
354	memset(q, 0, sizeof q);
355	while (len > 0) {
356		uint32_t w[8], carry;
357		unsigned char tmp[16];
358
359		/*
360		 * The bitslice implementation expects values in
361		 * little-endian convention, so we have to byteswap them.
362		 */
363		w[0] = br_swap32(iv0);
364		w[1] = br_swap32(iv1);
365		w[2] = br_swap32(iv2);
366		w[3] = br_swap32(iv3);
367		iv3 ++;
368		carry = ~(iv3 | -iv3) >> 31;
369		iv2 += carry;
370		carry &= -(~(iv2 | -iv2) >> 31);
371		iv1 += carry;
372		carry &= -(~(iv1 | -iv1) >> 31);
373		iv0 += carry;
374
375		/*
376		 * The block for CBC-MAC.
377		 */
378		w[4] = cm0 ^ br_dec32le(buf +  0);
379		w[5] = cm1 ^ br_dec32le(buf +  4);
380		w[6] = cm2 ^ br_dec32le(buf +  8);
381		w[7] = cm3 ^ br_dec32le(buf + 12);
382
383		br_aes_ct64_interleave_in(&q[0], &q[4], w);
384		br_aes_ct64_interleave_in(&q[1], &q[5], w + 4);
385		br_aes_ct64_ortho(q);
386		br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
387		br_aes_ct64_ortho(q);
388		br_aes_ct64_interleave_out(w, q[0], q[4]);
389		br_aes_ct64_interleave_out(w + 4, q[1], q[5]);
390
391		br_enc32le(tmp +  0, w[0]);
392		br_enc32le(tmp +  4, w[1]);
393		br_enc32le(tmp +  8, w[2]);
394		br_enc32le(tmp + 12, w[3]);
395		xorbuf(buf, tmp, 16);
396		cm0 = w[4];
397		cm1 = w[5];
398		cm2 = w[6];
399		cm3 = w[7];
400		buf += 16;
401		len -= 16;
402	}
403
404	br_enc32be(ivbuf +  0, iv0);
405	br_enc32be(ivbuf +  4, iv1);
406	br_enc32be(ivbuf +  8, iv2);
407	br_enc32be(ivbuf + 12, iv3);
408	br_enc32le((unsigned char *)cbcmac +  0, cm0);
409	br_enc32le((unsigned char *)cbcmac +  4, cm1);
410	br_enc32le((unsigned char *)cbcmac +  8, cm2);
411	br_enc32le((unsigned char *)cbcmac + 12, cm3);
412}
413
414/* see bearssl_block.h */
415const br_block_ctrcbc_class br_aes_ct64_ctrcbc_vtable = {
416	sizeof(br_aes_ct64_ctrcbc_keys),
417	16,
418	4,
419	(void (*)(const br_block_ctrcbc_class **, const void *, size_t))
420		&br_aes_ct64_ctrcbc_init,
421	(void (*)(const br_block_ctrcbc_class *const *,
422		void *, void *, void *, size_t))
423		&br_aes_ct64_ctrcbc_encrypt,
424	(void (*)(const br_block_ctrcbc_class *const *,
425		void *, void *, void *, size_t))
426		&br_aes_ct64_ctrcbc_decrypt,
427	(void (*)(const br_block_ctrcbc_class *const *,
428		void *, void *, size_t))
429		&br_aes_ct64_ctrcbc_ctr,
430	(void (*)(const br_block_ctrcbc_class *const *,
431		void *, const void *, size_t))
432		&br_aes_ct64_ctrcbc_mac
433};
434