1// SPDX-License-Identifier: GPL-2.0+ and MIT
2/*
3 * RSA library - generate parameters for a public key
4 *
5 * Copyright (c) 2019 Linaro Limited
6 * Author: AKASHI Takahiro
7 *
8 * Big number routines in this file come from BearSSL:
9 * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
10 */
11
12#include <image.h>
13#include <malloc.h>
14#include <crypto/internal/rsa.h>
15#include <u-boot/rsa-mod-exp.h>
16#include <asm/unaligned.h>
17
18/**
19 * br_dec16be() - Convert 16-bit big-endian integer to native
20 * @src:	Pointer to data
21 * Return:	Native-endian integer
22 */
23static unsigned br_dec16be(const void *src)
24{
25	return get_unaligned_be16(src);
26}
27
28/**
29 * br_dec32be() - Convert 32-bit big-endian integer to native
30 * @src:	Pointer to data
31 * Return:	Native-endian integer
32 */
33static uint32_t br_dec32be(const void *src)
34{
35	return get_unaligned_be32(src);
36}
37
38/**
39 * br_enc32be() - Convert native 32-bit integer to big-endian
40 * @dst:	Pointer to buffer to store big-endian integer in
41 * @x:		Native 32-bit integer
42 */
43static void br_enc32be(void *dst, uint32_t x)
44{
45	__be32 tmp;
46
47	tmp = cpu_to_be32(x);
48	memcpy(dst, &tmp, sizeof(tmp));
49}
50
51/* from BearSSL's src/inner.h */
52
53/*
54 * Negate a boolean.
55 */
56static uint32_t NOT(uint32_t ctl)
57{
58	return ctl ^ 1;
59}
60
61/*
62 * Multiplexer: returns x if ctl == 1, y if ctl == 0.
63 */
64static uint32_t MUX(uint32_t ctl, uint32_t x, uint32_t y)
65{
66	return y ^ (-ctl & (x ^ y));
67}
68
69/*
70 * Equality check: returns 1 if x == y, 0 otherwise.
71 */
72static uint32_t EQ(uint32_t x, uint32_t y)
73{
74	uint32_t q;
75
76	q = x ^ y;
77	return NOT((q | -q) >> 31);
78}
79
80/*
81 * Inequality check: returns 1 if x != y, 0 otherwise.
82 */
83static uint32_t NEQ(uint32_t x, uint32_t y)
84{
85	uint32_t q;
86
87	q = x ^ y;
88	return (q | -q) >> 31;
89}
90
91/*
92 * Comparison: returns 1 if x > y, 0 otherwise.
93 */
94static uint32_t GT(uint32_t x, uint32_t y)
95{
96	/*
97	 * If both x < 2^31 and y < 2^31, then y-x will have its high
98	 * bit set if x > y, cleared otherwise.
99	 *
100	 * If either x >= 2^31 or y >= 2^31 (but not both), then the
101	 * result is the high bit of x.
102	 *
103	 * If both x >= 2^31 and y >= 2^31, then we can virtually
104	 * subtract 2^31 from both, and we are back to the first case.
105	 * Since (y-2^31)-(x-2^31) = y-x, the subtraction is already
106	 * fine.
107	 */
108	uint32_t z;
109
110	z = y - x;
111	return (z ^ ((x ^ y) & (x ^ z))) >> 31;
112}
113
114/*
115 * Compute the bit length of a 32-bit integer. Returned value is between 0
116 * and 32 (inclusive).
117 */
118static uint32_t BIT_LENGTH(uint32_t x)
119{
120	uint32_t k, c;
121
122	k = NEQ(x, 0);
123	c = GT(x, 0xFFFF); x = MUX(c, x >> 16, x); k += c << 4;
124	c = GT(x, 0x00FF); x = MUX(c, x >>  8, x); k += c << 3;
125	c = GT(x, 0x000F); x = MUX(c, x >>  4, x); k += c << 2;
126	c = GT(x, 0x0003); x = MUX(c, x >>  2, x); k += c << 1;
127	k += GT(x, 0x0001);
128	return k;
129}
130
131#define GE(x, y)   NOT(GT(y, x))
132#define LT(x, y)   GT(y, x)
133#define MUL(x, y)   ((uint64_t)(x) * (uint64_t)(y))
134
135/*
136 * Integers 'i32'
137 * --------------
138 *
139 * The 'i32' functions implement computations on big integers using
140 * an internal representation as an array of 32-bit integers. For
141 * an array x[]:
142 *  -- x[0] contains the "announced bit length" of the integer
143 *  -- x[1], x[2]... contain the value in little-endian order (x[1]
144 *     contains the least significant 32 bits)
145 *
146 * Multiplications rely on the elementary 32x32->64 multiplication.
147 *
148 * The announced bit length specifies the number of bits that are
149 * significant in the subsequent 32-bit words. Unused bits in the
150 * last (most significant) word are set to 0; subsequent words are
151 * uninitialized and need not exist at all.
152 *
153 * The execution time and memory access patterns of all computations
154 * depend on the announced bit length, but not on the actual word
155 * values. For modular integers, the announced bit length of any integer
156 * modulo n is equal to the actual bit length of n; thus, computations
157 * on modular integers are "constant-time" (only the modulus length may
158 * leak).
159 */
160
161/*
162 * Extract one word from an integer. The offset is counted in bits.
163 * The word MUST entirely fit within the word elements corresponding
164 * to the announced bit length of a[].
165 */
166static uint32_t br_i32_word(const uint32_t *a, uint32_t off)
167{
168	size_t u;
169	unsigned j;
170
171	u = (size_t)(off >> 5) + 1;
172	j = (unsigned)off & 31;
173	if (j == 0) {
174		return a[u];
175	} else {
176		return (a[u] >> j) | (a[u + 1] << (32 - j));
177	}
178}
179
180/* from BearSSL's src/int/i32_bitlen.c */
181
182/*
183 * Compute the actual bit length of an integer. The argument x should
184 * point to the first (least significant) value word of the integer.
185 * The len 'xlen' contains the number of 32-bit words to access.
186 *
187 * CT: value or length of x does not leak.
188 */
189static uint32_t br_i32_bit_length(uint32_t *x, size_t xlen)
190{
191	uint32_t tw, twk;
192
193	tw = 0;
194	twk = 0;
195	while (xlen -- > 0) {
196		uint32_t w, c;
197
198		c = EQ(tw, 0);
199		w = x[xlen];
200		tw = MUX(c, w, tw);
201		twk = MUX(c, (uint32_t)xlen, twk);
202	}
203	return (twk << 5) + BIT_LENGTH(tw);
204}
205
206/* from BearSSL's src/int/i32_decode.c */
207
208/*
209 * Decode an integer from its big-endian unsigned representation. The
210 * "true" bit length of the integer is computed, but all words of x[]
211 * corresponding to the full 'len' bytes of the source are set.
212 *
213 * CT: value or length of x does not leak.
214 */
215static void br_i32_decode(uint32_t *x, const void *src, size_t len)
216{
217	const unsigned char *buf;
218	size_t u, v;
219
220	buf = src;
221	u = len;
222	v = 1;
223	for (;;) {
224		if (u < 4) {
225			uint32_t w;
226
227			if (u < 2) {
228				if (u == 0) {
229					break;
230				} else {
231					w = buf[0];
232				}
233			} else {
234				if (u == 2) {
235					w = br_dec16be(buf);
236				} else {
237					w = ((uint32_t)buf[0] << 16)
238						| br_dec16be(buf + 1);
239				}
240			}
241			x[v ++] = w;
242			break;
243		} else {
244			u -= 4;
245			x[v ++] = br_dec32be(buf + u);
246		}
247	}
248	x[0] = br_i32_bit_length(x + 1, v - 1);
249}
250
251/* from BearSSL's src/int/i32_encode.c */
252
253/*
254 * Encode an integer into its big-endian unsigned representation. The
255 * output length in bytes is provided (parameter 'len'); if the length
256 * is too short then the integer is appropriately truncated; if it is
257 * too long then the extra bytes are set to 0.
258 */
259static void br_i32_encode(void *dst, size_t len, const uint32_t *x)
260{
261	unsigned char *buf;
262	size_t k;
263
264	buf = dst;
265
266	/*
267	 * Compute the announced size of x in bytes; extra bytes are
268	 * filled with zeros.
269	 */
270	k = (x[0] + 7) >> 3;
271	while (len > k) {
272		*buf ++ = 0;
273		len --;
274	}
275
276	/*
277	 * Now we use k as index within x[]. That index starts at 1;
278	 * we initialize it to the topmost complete word, and process
279	 * any remaining incomplete word.
280	 */
281	k = (len + 3) >> 2;
282	switch (len & 3) {
283	case 3:
284		*buf ++ = x[k] >> 16;
285		/* fall through */
286	case 2:
287		*buf ++ = x[k] >> 8;
288		/* fall through */
289	case 1:
290		*buf ++ = x[k];
291		k --;
292	}
293
294	/*
295	 * Encode all complete words.
296	 */
297	while (k > 0) {
298		br_enc32be(buf, x[k]);
299		k --;
300		buf += 4;
301	}
302}
303
304/* from BearSSL's src/int/i32_ninv32.c */
305
306/*
307 * Compute -(1/x) mod 2^32. If x is even, then this function returns 0.
308 */
309static uint32_t br_i32_ninv32(uint32_t x)
310{
311	uint32_t y;
312
313	y = 2 - x;
314	y *= 2 - y * x;
315	y *= 2 - y * x;
316	y *= 2 - y * x;
317	y *= 2 - y * x;
318	return MUX(x & 1, -y, 0);
319}
320
321/* from BearSSL's src/int/i32_add.c */
322
323/*
324 * Add b[] to a[] and return the carry (0 or 1). If ctl is 0, then a[]
325 * is unmodified, but the carry is still computed and returned. The
326 * arrays a[] and b[] MUST have the same announced bit length.
327 *
328 * a[] and b[] MAY be the same array, but partial overlap is not allowed.
329 */
330static uint32_t br_i32_add(uint32_t *a, const uint32_t *b, uint32_t ctl)
331{
332	uint32_t cc;
333	size_t u, m;
334
335	cc = 0;
336	m = (a[0] + 63) >> 5;
337	for (u = 1; u < m; u ++) {
338		uint32_t aw, bw, naw;
339
340		aw = a[u];
341		bw = b[u];
342		naw = aw + bw + cc;
343
344		/*
345		 * Carry is 1 if naw < aw. Carry is also 1 if naw == aw
346		 * AND the carry was already 1.
347		 */
348		cc = (cc & EQ(naw, aw)) | LT(naw, aw);
349		a[u] = MUX(ctl, naw, aw);
350	}
351	return cc;
352}
353
354/* from BearSSL's src/int/i32_sub.c */
355
356/*
357 * Subtract b[] from a[] and return the carry (0 or 1). If ctl is 0,
358 * then a[] is unmodified, but the carry is still computed and returned.
359 * The arrays a[] and b[] MUST have the same announced bit length.
360 *
361 * a[] and b[] MAY be the same array, but partial overlap is not allowed.
362 */
363static uint32_t br_i32_sub(uint32_t *a, const uint32_t *b, uint32_t ctl)
364{
365	uint32_t cc;
366	size_t u, m;
367
368	cc = 0;
369	m = (a[0] + 63) >> 5;
370	for (u = 1; u < m; u ++) {
371		uint32_t aw, bw, naw;
372
373		aw = a[u];
374		bw = b[u];
375		naw = aw - bw - cc;
376
377		/*
378		 * Carry is 1 if naw > aw. Carry is 1 also if naw == aw
379		 * AND the carry was already 1.
380		 */
381		cc = (cc & EQ(naw, aw)) | GT(naw, aw);
382		a[u] = MUX(ctl, naw, aw);
383	}
384	return cc;
385}
386
387/* from BearSSL's src/int/i32_div32.c */
388
389/*
390 * Constant-time division. The dividend hi:lo is divided by the
391 * divisor d; the quotient is returned and the remainder is written
392 * in *r. If hi == d, then the quotient does not fit on 32 bits;
393 * returned value is thus truncated. If hi > d, returned values are
394 * indeterminate.
395 */
396static uint32_t br_divrem(uint32_t hi, uint32_t lo, uint32_t d, uint32_t *r)
397{
398	/* TODO: optimize this */
399	uint32_t q;
400	uint32_t ch, cf;
401	int k;
402
403	q = 0;
404	ch = EQ(hi, d);
405	hi = MUX(ch, 0, hi);
406	for (k = 31; k > 0; k --) {
407		int j;
408		uint32_t w, ctl, hi2, lo2;
409
410		j = 32 - k;
411		w = (hi << j) | (lo >> k);
412		ctl = GE(w, d) | (hi >> k);
413		hi2 = (w - d) >> j;
414		lo2 = lo - (d << k);
415		hi = MUX(ctl, hi2, hi);
416		lo = MUX(ctl, lo2, lo);
417		q |= ctl << k;
418	}
419	cf = GE(lo, d) | hi;
420	q |= cf;
421	*r = MUX(cf, lo - d, lo);
422	return q;
423}
424
425/*
426 * Wrapper for br_divrem(); the remainder is returned, and the quotient
427 * is discarded.
428 */
429static uint32_t br_rem(uint32_t hi, uint32_t lo, uint32_t d)
430{
431	uint32_t r;
432
433	br_divrem(hi, lo, d, &r);
434	return r;
435}
436
437/*
438 * Wrapper for br_divrem(); the quotient is returned, and the remainder
439 * is discarded.
440 */
441static uint32_t br_div(uint32_t hi, uint32_t lo, uint32_t d)
442{
443	uint32_t r;
444
445	return br_divrem(hi, lo, d, &r);
446}
447
448/* from BearSSL's src/int/i32_muladd.c */
449
450/*
451 * Multiply x[] by 2^32 and then add integer z, modulo m[]. This
452 * function assumes that x[] and m[] have the same announced bit
453 * length, and the announced bit length of m[] matches its true
454 * bit length.
455 *
456 * x[] and m[] MUST be distinct arrays.
457 *
458 * CT: only the common announced bit length of x and m leaks, not
459 * the values of x, z or m.
460 */
461static void br_i32_muladd_small(uint32_t *x, uint32_t z, const uint32_t *m)
462{
463	uint32_t m_bitlen;
464	size_t u, mlen;
465	uint32_t a0, a1, b0, hi, g, q, tb;
466	uint32_t chf, clow, under, over;
467	uint64_t cc;
468
469	/*
470	 * We can test on the modulus bit length since we accept to
471	 * leak that length.
472	 */
473	m_bitlen = m[0];
474	if (m_bitlen == 0) {
475		return;
476	}
477	if (m_bitlen <= 32) {
478		x[1] = br_rem(x[1], z, m[1]);
479		return;
480	}
481	mlen = (m_bitlen + 31) >> 5;
482
483	/*
484	 * Principle: we estimate the quotient (x*2^32+z)/m by
485	 * doing a 64/32 division with the high words.
486	 *
487	 * Let:
488	 *   w = 2^32
489	 *   a = (w*a0 + a1) * w^N + a2
490	 *   b = b0 * w^N + b2
491	 * such that:
492	 *   0 <= a0 < w
493	 *   0 <= a1 < w
494	 *   0 <= a2 < w^N
495	 *   w/2 <= b0 < w
496	 *   0 <= b2 < w^N
497	 *   a < w*b
498	 * I.e. the two top words of a are a0:a1, the top word of b is
499	 * b0, we ensured that b0 is "full" (high bit set), and a is
500	 * such that the quotient q = a/b fits on one word (0 <= q < w).
501	 *
502	 * If a = b*q + r (with 0 <= r < q), we can estimate q by
503	 * doing an Euclidean division on the top words:
504	 *   a0*w+a1 = b0*u + v  (with 0 <= v < w)
505	 * Then the following holds:
506	 *   0 <= u <= w
507	 *   u-2 <= q <= u
508	 */
509	a0 = br_i32_word(x, m_bitlen - 32);
510	hi = x[mlen];
511	memmove(x + 2, x + 1, (mlen - 1) * sizeof *x);
512	x[1] = z;
513	a1 = br_i32_word(x, m_bitlen - 32);
514	b0 = br_i32_word(m, m_bitlen - 32);
515
516	/*
517	 * We estimate a divisor q. If the quotient returned by br_div()
518	 * is g:
519	 * -- If a0 == b0 then g == 0; we want q = 0xFFFFFFFF.
520	 * -- Otherwise:
521	 *    -- if g == 0 then we set q = 0;
522	 *    -- otherwise, we set q = g - 1.
523	 * The properties described above then ensure that the true
524	 * quotient is q-1, q or q+1.
525	 */
526	g = br_div(a0, a1, b0);
527	q = MUX(EQ(a0, b0), 0xFFFFFFFF, MUX(EQ(g, 0), 0, g - 1));
528
529	/*
530	 * We subtract q*m from x (with the extra high word of value 'hi').
531	 * Since q may be off by 1 (in either direction), we may have to
532	 * add or subtract m afterwards.
533	 *
534	 * The 'tb' flag will be true (1) at the end of the loop if the
535	 * result is greater than or equal to the modulus (not counting
536	 * 'hi' or the carry).
537	 */
538	cc = 0;
539	tb = 1;
540	for (u = 1; u <= mlen; u ++) {
541		uint32_t mw, zw, xw, nxw;
542		uint64_t zl;
543
544		mw = m[u];
545		zl = MUL(mw, q) + cc;
546		cc = (uint32_t)(zl >> 32);
547		zw = (uint32_t)zl;
548		xw = x[u];
549		nxw = xw - zw;
550		cc += (uint64_t)GT(nxw, xw);
551		x[u] = nxw;
552		tb = MUX(EQ(nxw, mw), tb, GT(nxw, mw));
553	}
554
555	/*
556	 * If we underestimated q, then either cc < hi (one extra bit
557	 * beyond the top array word), or cc == hi and tb is true (no
558	 * extra bit, but the result is not lower than the modulus). In
559	 * these cases we must subtract m once.
560	 *
561	 * Otherwise, we may have overestimated, which will show as
562	 * cc > hi (thus a negative result). Correction is adding m once.
563	 */
564	chf = (uint32_t)(cc >> 32);
565	clow = (uint32_t)cc;
566	over = chf | GT(clow, hi);
567	under = ~over & (tb | (~chf & LT(clow, hi)));
568	br_i32_add(x, m, over);
569	br_i32_sub(x, m, under);
570}
571
572/* from BearSSL's src/int/i32_reduce.c */
573
574/*
575 * Reduce an integer (a[]) modulo another (m[]). The result is written
576 * in x[] and its announced bit length is set to be equal to that of m[].
577 *
578 * x[] MUST be distinct from a[] and m[].
579 *
580 * CT: only announced bit lengths leak, not values of x, a or m.
581 */
582static void br_i32_reduce(uint32_t *x, const uint32_t *a, const uint32_t *m)
583{
584	uint32_t m_bitlen, a_bitlen;
585	size_t mlen, alen, u;
586
587	m_bitlen = m[0];
588	mlen = (m_bitlen + 31) >> 5;
589
590	x[0] = m_bitlen;
591	if (m_bitlen == 0) {
592		return;
593	}
594
595	/*
596	 * If the source is shorter, then simply copy all words from a[]
597	 * and zero out the upper words.
598	 */
599	a_bitlen = a[0];
600	alen = (a_bitlen + 31) >> 5;
601	if (a_bitlen < m_bitlen) {
602		memcpy(x + 1, a + 1, alen * sizeof *a);
603		for (u = alen; u < mlen; u ++) {
604			x[u + 1] = 0;
605		}
606		return;
607	}
608
609	/*
610	 * The source length is at least equal to that of the modulus.
611	 * We must thus copy N-1 words, and input the remaining words
612	 * one by one.
613	 */
614	memcpy(x + 1, a + 2 + (alen - mlen), (mlen - 1) * sizeof *a);
615	x[mlen] = 0;
616	for (u = 1 + alen - mlen; u > 0; u --) {
617		br_i32_muladd_small(x, a[u], m);
618	}
619}
620
621/**
622 * rsa_free_key_prop() - Free key properties
623 * @prop:	Pointer to struct key_prop
624 *
625 * This function frees all the memories allocated by rsa_gen_key_prop().
626 */
627void rsa_free_key_prop(struct key_prop *prop)
628{
629	if (!prop)
630		return;
631
632	free((void *)prop->modulus);
633	free((void *)prop->public_exponent);
634	free((void *)prop->rr);
635
636	free(prop);
637}
638
639/**
640 * rsa_gen_key_prop() - Generate key properties of RSA public key
641 * @key:	Specifies key data in DER format
642 * @keylen:	Length of @key
643 * @prop:	Generated key property
644 *
645 * This function takes a blob of encoded RSA public key data in DER
646 * format, parse it and generate all the relevant properties
647 * in key_prop structure.
648 * Return a pointer to struct key_prop in @prop on success.
649 *
650 * Return:	0 on success, negative on error
651 */
652int rsa_gen_key_prop(const void *key, uint32_t keylen, struct key_prop **prop)
653{
654	struct rsa_key rsa_key;
655	uint32_t *n = NULL, *rr = NULL, *rrtmp = NULL;
656	int rlen, i, ret = 0;
657
658	*prop = calloc(sizeof(**prop), 1);
659	if (!(*prop)) {
660		ret = -ENOMEM;
661		goto out;
662	}
663
664	ret = rsa_parse_pub_key(&rsa_key, key, keylen);
665	if (ret)
666		goto out;
667
668	/* modulus */
669	/* removing leading 0's */
670	for (i = 0; i < rsa_key.n_sz && !rsa_key.n[i]; i++)
671		;
672	(*prop)->num_bits = (rsa_key.n_sz - i) * 8;
673	(*prop)->modulus = malloc(rsa_key.n_sz - i);
674	if (!(*prop)->modulus) {
675		ret = -ENOMEM;
676		goto out;
677	}
678	memcpy((void *)(*prop)->modulus, &rsa_key.n[i], rsa_key.n_sz - i);
679
680	n = calloc(sizeof(uint32_t), 1 + ((*prop)->num_bits >> 5));
681	rr = calloc(sizeof(uint32_t), 1 + (((*prop)->num_bits * 2) >> 5));
682	rrtmp = calloc(sizeof(uint32_t), 2 + (((*prop)->num_bits * 2) >> 5));
683	if (!n || !rr || !rrtmp) {
684		ret = -ENOMEM;
685		goto out;
686	}
687
688	/* exponent */
689	(*prop)->public_exponent = calloc(1, sizeof(uint64_t));
690	if (!(*prop)->public_exponent) {
691		ret = -ENOMEM;
692		goto out;
693	}
694	memcpy((void *)(*prop)->public_exponent + sizeof(uint64_t)
695						- rsa_key.e_sz,
696	       rsa_key.e, rsa_key.e_sz);
697	(*prop)->exp_len = sizeof(uint64_t);
698
699	/* n0 inverse */
700	br_i32_decode(n, &rsa_key.n[i], rsa_key.n_sz - i);
701	(*prop)->n0inv = br_i32_ninv32(n[1]);
702
703	/* R^2 mod n; R = 2^(num_bits) */
704	rlen = (*prop)->num_bits * 2; /* #bits of R^2 = (2^num_bits)^2 */
705	rr[0] = 0;
706	*(uint8_t *)&rr[0] = (1 << (rlen % 8));
707	for (i = 1; i < (((rlen + 31) >> 5) + 1); i++)
708		rr[i] = 0;
709	br_i32_decode(rrtmp, rr, ((rlen + 7) >> 3) + 1);
710	br_i32_reduce(rr, rrtmp, n);
711
712	rlen = ((*prop)->num_bits + 7) >> 3; /* #bytes of R^2 mod n */
713	(*prop)->rr = malloc(rlen);
714	if (!(*prop)->rr) {
715		ret = -ENOMEM;
716		goto out;
717	}
718	br_i32_encode((void *)(*prop)->rr, rlen, rr);
719
720out:
721	free(n);
722	free(rr);
723	free(rrtmp);
724	if (ret < 0)
725		rsa_free_key_prop(*prop);
726	return ret;
727}
728