1/*
2 * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25#include "inner.h"
26
27static void
28in_cbc_init(br_sslrec_in_cbc_context *cc,
29	const br_block_cbcdec_class *bc_impl,
30	const void *bc_key, size_t bc_key_len,
31	const br_hash_class *dig_impl,
32	const void *mac_key, size_t mac_key_len, size_t mac_out_len,
33	const void *iv)
34{
35	cc->vtable = &br_sslrec_in_cbc_vtable;
36	cc->seq = 0;
37	bc_impl->init(&cc->bc.vtable, bc_key, bc_key_len);
38	br_hmac_key_init(&cc->mac, dig_impl, mac_key, mac_key_len);
39	cc->mac_len = mac_out_len;
40	if (iv == NULL) {
41		memset(cc->iv, 0, sizeof cc->iv);
42		cc->explicit_IV = 1;
43	} else {
44		memcpy(cc->iv, iv, bc_impl->block_size);
45		cc->explicit_IV = 0;
46	}
47}
48
49static int
50cbc_check_length(const br_sslrec_in_cbc_context *cc, size_t rlen)
51{
52	/*
53	 * Plaintext size: at most 16384 bytes
54	 * Padding: at most 256 bytes
55	 * MAC: mac_len extra bytes
56	 * TLS 1.1+: each record has an explicit IV
57	 *
58	 * Minimum length includes at least one byte of padding, and the
59	 * MAC.
60	 *
61	 * Total length must be a multiple of the block size.
62	 */
63	size_t blen;
64	size_t min_len, max_len;
65
66	blen = cc->bc.vtable->block_size;
67	min_len = (blen + cc->mac_len) & ~(blen - 1);
68	max_len = (16384 + 256 + cc->mac_len) & ~(blen - 1);
69	if (cc->explicit_IV) {
70		min_len += blen;
71		max_len += blen;
72	}
73	return min_len <= rlen && rlen <= max_len;
74}
75
76/*
77 * Rotate array buf[] of length 'len' to the left (towards low indices)
78 * by 'num' bytes if ctl is 1; otherwise, leave it unchanged. This is
79 * constant-time. 'num' MUST be lower than 'len'. 'len' MUST be lower
80 * than or equal to 64.
81 */
82static void
83cond_rotate(uint32_t ctl, unsigned char *buf, size_t len, size_t num)
84{
85	unsigned char tmp[64];
86	size_t u, v;
87
88	for (u = 0, v = num; u < len; u ++) {
89		tmp[u] = MUX(ctl, buf[v], buf[u]);
90		if (++ v == len) {
91			v = 0;
92		}
93	}
94	memcpy(buf, tmp, len);
95}
96
97static unsigned char *
98cbc_decrypt(br_sslrec_in_cbc_context *cc,
99	int record_type, unsigned version, void *data, size_t *data_len)
100{
101	/*
102	 * We represent all lengths on 32-bit integers, because:
103	 * -- SSL record lengths always fit in 32 bits;
104	 * -- our constant-time primitives operate on 32-bit integers.
105	 */
106	unsigned char *buf;
107	uint32_t u, v, len, blen, min_len, max_len;
108	uint32_t good, pad_len, rot_count, len_withmac, len_nomac;
109	unsigned char tmp1[64], tmp2[64];
110	int i;
111	br_hmac_context hc;
112
113	buf = data;
114	len = *data_len;
115	blen = cc->bc.vtable->block_size;
116
117	/*
118	 * Decrypt data, and skip the explicit IV (if applicable). Note
119	 * that the total length is supposed to have been verified by
120	 * the caller. If there is an explicit IV, then we actually
121	 * "decrypt" it using the implicit IV (from previous record),
122	 * which is useless but harmless.
123	 */
124	cc->bc.vtable->run(&cc->bc.vtable, cc->iv, data, len);
125	if (cc->explicit_IV) {
126		buf += blen;
127		len -= blen;
128	}
129
130	/*
131	 * Compute minimum and maximum length of plaintext + MAC. These
132	 * lengths can be inferred from the outside: they are not secret.
133	 */
134	min_len = (cc->mac_len + 256 < len) ? len - 256 : cc->mac_len;
135	max_len = len - 1;
136
137	/*
138	 * Use the last decrypted byte to compute the actual payload
139	 * length. Take care not to underflow (we use unsigned types).
140	 */
141	pad_len = buf[max_len];
142	good = LE(pad_len, (uint32_t)(max_len - min_len));
143	len = MUX(good, (uint32_t)(max_len - pad_len), min_len);
144
145	/*
146	 * Check padding contents: all padding bytes must be equal to
147	 * the value of pad_len.
148	 */
149	for (u = min_len; u < max_len; u ++) {
150		good &= LT(u, len) | EQ(buf[u], pad_len);
151	}
152
153	/*
154	 * Extract the MAC value. This is done in one pass, but results
155	 * in a "rotated" MAC value depending on where it actually
156	 * occurs. The 'rot_count' value is set to the offset of the
157	 * first MAC byte within tmp1[].
158	 *
159	 * min_len and max_len are also adjusted to the minimum and
160	 * maximum lengths of the plaintext alone (without the MAC).
161	 */
162	len_withmac = (uint32_t)len;
163	len_nomac = len_withmac - cc->mac_len;
164	min_len -= cc->mac_len;
165	rot_count = 0;
166	memset(tmp1, 0, cc->mac_len);
167	v = 0;
168	for (u = min_len; u < max_len; u ++) {
169		tmp1[v] |= MUX(GE(u, len_nomac) & LT(u, len_withmac),
170			buf[u], 0x00);
171		rot_count = MUX(EQ(u, len_nomac), v, rot_count);
172		if (++ v == cc->mac_len) {
173			v = 0;
174		}
175	}
176	max_len -= cc->mac_len;
177
178	/*
179	 * Rotate back the MAC value. The loop below does the constant-time
180	 * rotation in time n*log n for a MAC output of length n. We assume
181	 * that the MAC output length is no more than 64 bytes, so the
182	 * rotation count fits on 6 bits.
183	 */
184	for (i = 5; i >= 0; i --) {
185		uint32_t rc;
186
187		rc = (uint32_t)1 << i;
188		cond_rotate(rot_count >> i, tmp1, cc->mac_len, rc);
189		rot_count &= ~rc;
190	}
191
192	/*
193	 * Recompute the HMAC value. The input is the concatenation of
194	 * the sequence number (8 bytes), the record header (5 bytes),
195	 * and the payload.
196	 *
197	 * At that point, min_len is the minimum plaintext length, but
198	 * max_len still includes the MAC length.
199	 */
200	br_enc64be(tmp2, cc->seq ++);
201	tmp2[8] = (unsigned char)record_type;
202	br_enc16be(tmp2 + 9, version);
203	br_enc16be(tmp2 + 11, len_nomac);
204	br_hmac_init(&hc, &cc->mac, cc->mac_len);
205	br_hmac_update(&hc, tmp2, 13);
206	br_hmac_outCT(&hc, buf, len_nomac, min_len, max_len, tmp2);
207
208	/*
209	 * Compare the extracted and recomputed MAC values.
210	 */
211	for (u = 0; u < cc->mac_len; u ++) {
212		good &= EQ0(tmp1[u] ^ tmp2[u]);
213	}
214
215	/*
216	 * Check that the plaintext length is valid. The previous
217	 * check was on the encrypted length, but the padding may have
218	 * turned shorter than expected.
219	 *
220	 * Once this final test is done, the critical "constant-time"
221	 * section ends and we can make conditional jumps again.
222	 */
223	good &= LE(len_nomac, 16384);
224
225	if (!good) {
226		return 0;
227	}
228	*data_len = len_nomac;
229	return buf;
230}
231
232/* see bearssl_ssl.h */
233const br_sslrec_in_cbc_class br_sslrec_in_cbc_vtable = {
234	{
235		sizeof(br_sslrec_in_cbc_context),
236		(int (*)(const br_sslrec_in_class *const *, size_t))
237			&cbc_check_length,
238		(unsigned char *(*)(const br_sslrec_in_class **,
239			int, unsigned, void *, size_t *))
240			&cbc_decrypt
241	},
242	(void (*)(const br_sslrec_in_cbc_class **,
243		const br_block_cbcdec_class *, const void *, size_t,
244		const br_hash_class *, const void *, size_t, size_t,
245		const void *))
246		&in_cbc_init
247};
248
249/*
250 * For CBC output:
251 *
252 * -- With TLS 1.1+, there is an explicit IV. Generation method uses
253 * HMAC, computed over the current sequence number, and the current MAC
254 * key. The resulting value is truncated to the size of a block, and
255 * added at the head of the plaintext; it will get encrypted along with
256 * the data. This custom generation mechanism is "safe" under the
257 * assumption that HMAC behaves like a random oracle; since the MAC for
258 * a record is computed over the concatenation of the sequence number,
259 * the record header and the plaintext, the HMAC-for-IV will not collide
260 * with the normal HMAC.
261 *
262 * -- With TLS 1.0, for application data, we want to enforce a 1/n-1
263 * split, as a countermeasure against chosen-plaintext attacks. We thus
264 * need to leave some room in the buffer for that extra record.
265 */
266
267static void
268out_cbc_init(br_sslrec_out_cbc_context *cc,
269	const br_block_cbcenc_class *bc_impl,
270	const void *bc_key, size_t bc_key_len,
271	const br_hash_class *dig_impl,
272	const void *mac_key, size_t mac_key_len, size_t mac_out_len,
273	const void *iv)
274{
275	cc->vtable = &br_sslrec_out_cbc_vtable;
276	cc->seq = 0;
277	bc_impl->init(&cc->bc.vtable, bc_key, bc_key_len);
278	br_hmac_key_init(&cc->mac, dig_impl, mac_key, mac_key_len);
279	cc->mac_len = mac_out_len;
280	if (iv == NULL) {
281		memset(cc->iv, 0, sizeof cc->iv);
282		cc->explicit_IV = 1;
283	} else {
284		memcpy(cc->iv, iv, bc_impl->block_size);
285		cc->explicit_IV = 0;
286	}
287}
288
289static void
290cbc_max_plaintext(const br_sslrec_out_cbc_context *cc,
291	size_t *start, size_t *end)
292{
293	size_t blen, len;
294
295	blen = cc->bc.vtable->block_size;
296	if (cc->explicit_IV) {
297		*start += blen;
298	} else {
299		*start += 4 + ((cc->mac_len + blen + 1) & ~(blen - 1));
300	}
301	len = (*end - *start) & ~(blen - 1);
302	len -= 1 + cc->mac_len;
303	if (len > 16384) {
304		len = 16384;
305	}
306	*end = *start + len;
307}
308
309static unsigned char *
310cbc_encrypt(br_sslrec_out_cbc_context *cc,
311	int record_type, unsigned version, void *data, size_t *data_len)
312{
313	unsigned char *buf, *rbuf;
314	size_t len, blen, plen;
315	unsigned char tmp[13];
316	br_hmac_context hc;
317
318	buf = data;
319	len = *data_len;
320	blen = cc->bc.vtable->block_size;
321
322	/*
323	 * If using TLS 1.0, with more than one byte of plaintext, and
324	 * the record is application data, then we need to compute
325	 * a "split". We do not perform the split on other record types
326	 * because it turned out that some existing, deployed
327	 * implementations of SSL/TLS do not tolerate the splitting of
328	 * some message types (in particular the Finished message).
329	 *
330	 * If using TLS 1.1+, then there is an explicit IV. We produce
331	 * that IV by adding an extra initial plaintext block, whose
332	 * value is computed with HMAC over the record sequence number.
333	 */
334	if (cc->explicit_IV) {
335		/*
336		 * We use here the fact that all the HMAC variants we
337		 * support can produce at least 16 bytes, while all the
338		 * block ciphers we support have blocks of no more than
339		 * 16 bytes. Thus, we can always truncate the HMAC output
340		 * down to the block size.
341		 */
342		br_enc64be(tmp, cc->seq);
343		br_hmac_init(&hc, &cc->mac, blen);
344		br_hmac_update(&hc, tmp, 8);
345		br_hmac_out(&hc, buf - blen);
346		rbuf = buf - blen - 5;
347	} else {
348		if (len > 1 && record_type == BR_SSL_APPLICATION_DATA) {
349			/*
350			 * To do the split, we use a recursive invocation;
351			 * since we only give one byte to the inner call,
352			 * the recursion stops there.
353			 *
354			 * We need to compute the exact size of the extra
355			 * record, so that the two resulting records end up
356			 * being sequential in RAM.
357			 *
358			 * We use here the fact that cbc_max_plaintext()
359			 * adjusted the start offset to leave room for the
360			 * initial fragment.
361			 */
362			size_t xlen;
363
364			rbuf = buf - 4
365				- ((cc->mac_len + blen + 1) & ~(blen - 1));
366			rbuf[0] = buf[0];
367			xlen = 1;
368			rbuf = cbc_encrypt(cc, record_type,
369				version, rbuf, &xlen);
370			buf ++;
371			len --;
372		} else {
373			rbuf = buf - 5;
374		}
375	}
376
377	/*
378	 * Compute MAC.
379	 */
380	br_enc64be(tmp, cc->seq ++);
381	tmp[8] = record_type;
382	br_enc16be(tmp + 9, version);
383	br_enc16be(tmp + 11, len);
384	br_hmac_init(&hc, &cc->mac, cc->mac_len);
385	br_hmac_update(&hc, tmp, 13);
386	br_hmac_update(&hc, buf, len);
387	br_hmac_out(&hc, buf + len);
388	len += cc->mac_len;
389
390	/*
391	 * Add padding.
392	 */
393	plen = blen - (len & (blen - 1));
394	memset(buf + len, (unsigned)plen - 1, plen);
395	len += plen;
396
397	/*
398	 * If an explicit IV is used, the corresponding extra block was
399	 * already put in place earlier; we just have to account for it
400	 * here.
401	 */
402	if (cc->explicit_IV) {
403		buf -= blen;
404		len += blen;
405	}
406
407	/*
408	 * Encrypt the whole thing. If there is an explicit IV, we also
409	 * encrypt it, which is fine (encryption of a uniformly random
410	 * block is still a uniformly random block).
411	 */
412	cc->bc.vtable->run(&cc->bc.vtable, cc->iv, buf, len);
413
414	/*
415	 * Add the header and return.
416	 */
417	buf[-5] = record_type;
418	br_enc16be(buf - 4, version);
419	br_enc16be(buf - 2, len);
420	*data_len = (size_t)((buf + len) - rbuf);
421	return rbuf;
422}
423
424/* see bearssl_ssl.h */
425const br_sslrec_out_cbc_class br_sslrec_out_cbc_vtable = {
426	{
427		sizeof(br_sslrec_out_cbc_context),
428		(void (*)(const br_sslrec_out_class *const *,
429			size_t *, size_t *))
430			&cbc_max_plaintext,
431		(unsigned char *(*)(const br_sslrec_out_class **,
432			int, unsigned, void *, size_t *))
433			&cbc_encrypt
434	},
435	(void (*)(const br_sslrec_out_cbc_class **,
436		const br_block_cbcenc_class *, const void *, size_t,
437		const br_hash_class *, const void *, size_t, size_t,
438		const void *))
439		&out_cbc_init
440};
441