1// SPDX-License-Identifier: GPL-2.0+ and MIT 2/* 3 * RSA library - generate parameters for a public key 4 * 5 * Copyright (c) 2019 Linaro Limited 6 * Author: AKASHI Takahiro 7 * 8 * Big number routines in this file come from BearSSL: 9 * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org> 10 */ 11 12#include <image.h> 13#include <malloc.h> 14#include <crypto/internal/rsa.h> 15#include <u-boot/rsa-mod-exp.h> 16#include <asm/unaligned.h> 17 18/** 19 * br_dec16be() - Convert 16-bit big-endian integer to native 20 * @src: Pointer to data 21 * Return: Native-endian integer 22 */ 23static unsigned br_dec16be(const void *src) 24{ 25 return get_unaligned_be16(src); 26} 27 28/** 29 * br_dec32be() - Convert 32-bit big-endian integer to native 30 * @src: Pointer to data 31 * Return: Native-endian integer 32 */ 33static uint32_t br_dec32be(const void *src) 34{ 35 return get_unaligned_be32(src); 36} 37 38/** 39 * br_enc32be() - Convert native 32-bit integer to big-endian 40 * @dst: Pointer to buffer to store big-endian integer in 41 * @x: Native 32-bit integer 42 */ 43static void br_enc32be(void *dst, uint32_t x) 44{ 45 __be32 tmp; 46 47 tmp = cpu_to_be32(x); 48 memcpy(dst, &tmp, sizeof(tmp)); 49} 50 51/* from BearSSL's src/inner.h */ 52 53/* 54 * Negate a boolean. 55 */ 56static uint32_t NOT(uint32_t ctl) 57{ 58 return ctl ^ 1; 59} 60 61/* 62 * Multiplexer: returns x if ctl == 1, y if ctl == 0. 63 */ 64static uint32_t MUX(uint32_t ctl, uint32_t x, uint32_t y) 65{ 66 return y ^ (-ctl & (x ^ y)); 67} 68 69/* 70 * Equality check: returns 1 if x == y, 0 otherwise. 71 */ 72static uint32_t EQ(uint32_t x, uint32_t y) 73{ 74 uint32_t q; 75 76 q = x ^ y; 77 return NOT((q | -q) >> 31); 78} 79 80/* 81 * Inequality check: returns 1 if x != y, 0 otherwise. 82 */ 83static uint32_t NEQ(uint32_t x, uint32_t y) 84{ 85 uint32_t q; 86 87 q = x ^ y; 88 return (q | -q) >> 31; 89} 90 91/* 92 * Comparison: returns 1 if x > y, 0 otherwise. 93 */ 94static uint32_t GT(uint32_t x, uint32_t y) 95{ 96 /* 97 * If both x < 2^31 and y < 2^31, then y-x will have its high 98 * bit set if x > y, cleared otherwise. 99 * 100 * If either x >= 2^31 or y >= 2^31 (but not both), then the 101 * result is the high bit of x. 102 * 103 * If both x >= 2^31 and y >= 2^31, then we can virtually 104 * subtract 2^31 from both, and we are back to the first case. 105 * Since (y-2^31)-(x-2^31) = y-x, the subtraction is already 106 * fine. 107 */ 108 uint32_t z; 109 110 z = y - x; 111 return (z ^ ((x ^ y) & (x ^ z))) >> 31; 112} 113 114/* 115 * Compute the bit length of a 32-bit integer. Returned value is between 0 116 * and 32 (inclusive). 117 */ 118static uint32_t BIT_LENGTH(uint32_t x) 119{ 120 uint32_t k, c; 121 122 k = NEQ(x, 0); 123 c = GT(x, 0xFFFF); x = MUX(c, x >> 16, x); k += c << 4; 124 c = GT(x, 0x00FF); x = MUX(c, x >> 8, x); k += c << 3; 125 c = GT(x, 0x000F); x = MUX(c, x >> 4, x); k += c << 2; 126 c = GT(x, 0x0003); x = MUX(c, x >> 2, x); k += c << 1; 127 k += GT(x, 0x0001); 128 return k; 129} 130 131#define GE(x, y) NOT(GT(y, x)) 132#define LT(x, y) GT(y, x) 133#define MUL(x, y) ((uint64_t)(x) * (uint64_t)(y)) 134 135/* 136 * Integers 'i32' 137 * -------------- 138 * 139 * The 'i32' functions implement computations on big integers using 140 * an internal representation as an array of 32-bit integers. For 141 * an array x[]: 142 * -- x[0] contains the "announced bit length" of the integer 143 * -- x[1], x[2]... contain the value in little-endian order (x[1] 144 * contains the least significant 32 bits) 145 * 146 * Multiplications rely on the elementary 32x32->64 multiplication. 147 * 148 * The announced bit length specifies the number of bits that are 149 * significant in the subsequent 32-bit words. Unused bits in the 150 * last (most significant) word are set to 0; subsequent words are 151 * uninitialized and need not exist at all. 152 * 153 * The execution time and memory access patterns of all computations 154 * depend on the announced bit length, but not on the actual word 155 * values. For modular integers, the announced bit length of any integer 156 * modulo n is equal to the actual bit length of n; thus, computations 157 * on modular integers are "constant-time" (only the modulus length may 158 * leak). 159 */ 160 161/* 162 * Extract one word from an integer. The offset is counted in bits. 163 * The word MUST entirely fit within the word elements corresponding 164 * to the announced bit length of a[]. 165 */ 166static uint32_t br_i32_word(const uint32_t *a, uint32_t off) 167{ 168 size_t u; 169 unsigned j; 170 171 u = (size_t)(off >> 5) + 1; 172 j = (unsigned)off & 31; 173 if (j == 0) { 174 return a[u]; 175 } else { 176 return (a[u] >> j) | (a[u + 1] << (32 - j)); 177 } 178} 179 180/* from BearSSL's src/int/i32_bitlen.c */ 181 182/* 183 * Compute the actual bit length of an integer. The argument x should 184 * point to the first (least significant) value word of the integer. 185 * The len 'xlen' contains the number of 32-bit words to access. 186 * 187 * CT: value or length of x does not leak. 188 */ 189static uint32_t br_i32_bit_length(uint32_t *x, size_t xlen) 190{ 191 uint32_t tw, twk; 192 193 tw = 0; 194 twk = 0; 195 while (xlen -- > 0) { 196 uint32_t w, c; 197 198 c = EQ(tw, 0); 199 w = x[xlen]; 200 tw = MUX(c, w, tw); 201 twk = MUX(c, (uint32_t)xlen, twk); 202 } 203 return (twk << 5) + BIT_LENGTH(tw); 204} 205 206/* from BearSSL's src/int/i32_decode.c */ 207 208/* 209 * Decode an integer from its big-endian unsigned representation. The 210 * "true" bit length of the integer is computed, but all words of x[] 211 * corresponding to the full 'len' bytes of the source are set. 212 * 213 * CT: value or length of x does not leak. 214 */ 215static void br_i32_decode(uint32_t *x, const void *src, size_t len) 216{ 217 const unsigned char *buf; 218 size_t u, v; 219 220 buf = src; 221 u = len; 222 v = 1; 223 for (;;) { 224 if (u < 4) { 225 uint32_t w; 226 227 if (u < 2) { 228 if (u == 0) { 229 break; 230 } else { 231 w = buf[0]; 232 } 233 } else { 234 if (u == 2) { 235 w = br_dec16be(buf); 236 } else { 237 w = ((uint32_t)buf[0] << 16) 238 | br_dec16be(buf + 1); 239 } 240 } 241 x[v ++] = w; 242 break; 243 } else { 244 u -= 4; 245 x[v ++] = br_dec32be(buf + u); 246 } 247 } 248 x[0] = br_i32_bit_length(x + 1, v - 1); 249} 250 251/* from BearSSL's src/int/i32_encode.c */ 252 253/* 254 * Encode an integer into its big-endian unsigned representation. The 255 * output length in bytes is provided (parameter 'len'); if the length 256 * is too short then the integer is appropriately truncated; if it is 257 * too long then the extra bytes are set to 0. 258 */ 259static void br_i32_encode(void *dst, size_t len, const uint32_t *x) 260{ 261 unsigned char *buf; 262 size_t k; 263 264 buf = dst; 265 266 /* 267 * Compute the announced size of x in bytes; extra bytes are 268 * filled with zeros. 269 */ 270 k = (x[0] + 7) >> 3; 271 while (len > k) { 272 *buf ++ = 0; 273 len --; 274 } 275 276 /* 277 * Now we use k as index within x[]. That index starts at 1; 278 * we initialize it to the topmost complete word, and process 279 * any remaining incomplete word. 280 */ 281 k = (len + 3) >> 2; 282 switch (len & 3) { 283 case 3: 284 *buf ++ = x[k] >> 16; 285 /* fall through */ 286 case 2: 287 *buf ++ = x[k] >> 8; 288 /* fall through */ 289 case 1: 290 *buf ++ = x[k]; 291 k --; 292 } 293 294 /* 295 * Encode all complete words. 296 */ 297 while (k > 0) { 298 br_enc32be(buf, x[k]); 299 k --; 300 buf += 4; 301 } 302} 303 304/* from BearSSL's src/int/i32_ninv32.c */ 305 306/* 307 * Compute -(1/x) mod 2^32. If x is even, then this function returns 0. 308 */ 309static uint32_t br_i32_ninv32(uint32_t x) 310{ 311 uint32_t y; 312 313 y = 2 - x; 314 y *= 2 - y * x; 315 y *= 2 - y * x; 316 y *= 2 - y * x; 317 y *= 2 - y * x; 318 return MUX(x & 1, -y, 0); 319} 320 321/* from BearSSL's src/int/i32_add.c */ 322 323/* 324 * Add b[] to a[] and return the carry (0 or 1). If ctl is 0, then a[] 325 * is unmodified, but the carry is still computed and returned. The 326 * arrays a[] and b[] MUST have the same announced bit length. 327 * 328 * a[] and b[] MAY be the same array, but partial overlap is not allowed. 329 */ 330static uint32_t br_i32_add(uint32_t *a, const uint32_t *b, uint32_t ctl) 331{ 332 uint32_t cc; 333 size_t u, m; 334 335 cc = 0; 336 m = (a[0] + 63) >> 5; 337 for (u = 1; u < m; u ++) { 338 uint32_t aw, bw, naw; 339 340 aw = a[u]; 341 bw = b[u]; 342 naw = aw + bw + cc; 343 344 /* 345 * Carry is 1 if naw < aw. Carry is also 1 if naw == aw 346 * AND the carry was already 1. 347 */ 348 cc = (cc & EQ(naw, aw)) | LT(naw, aw); 349 a[u] = MUX(ctl, naw, aw); 350 } 351 return cc; 352} 353 354/* from BearSSL's src/int/i32_sub.c */ 355 356/* 357 * Subtract b[] from a[] and return the carry (0 or 1). If ctl is 0, 358 * then a[] is unmodified, but the carry is still computed and returned. 359 * The arrays a[] and b[] MUST have the same announced bit length. 360 * 361 * a[] and b[] MAY be the same array, but partial overlap is not allowed. 362 */ 363static uint32_t br_i32_sub(uint32_t *a, const uint32_t *b, uint32_t ctl) 364{ 365 uint32_t cc; 366 size_t u, m; 367 368 cc = 0; 369 m = (a[0] + 63) >> 5; 370 for (u = 1; u < m; u ++) { 371 uint32_t aw, bw, naw; 372 373 aw = a[u]; 374 bw = b[u]; 375 naw = aw - bw - cc; 376 377 /* 378 * Carry is 1 if naw > aw. Carry is 1 also if naw == aw 379 * AND the carry was already 1. 380 */ 381 cc = (cc & EQ(naw, aw)) | GT(naw, aw); 382 a[u] = MUX(ctl, naw, aw); 383 } 384 return cc; 385} 386 387/* from BearSSL's src/int/i32_div32.c */ 388 389/* 390 * Constant-time division. The dividend hi:lo is divided by the 391 * divisor d; the quotient is returned and the remainder is written 392 * in *r. If hi == d, then the quotient does not fit on 32 bits; 393 * returned value is thus truncated. If hi > d, returned values are 394 * indeterminate. 395 */ 396static uint32_t br_divrem(uint32_t hi, uint32_t lo, uint32_t d, uint32_t *r) 397{ 398 /* TODO: optimize this */ 399 uint32_t q; 400 uint32_t ch, cf; 401 int k; 402 403 q = 0; 404 ch = EQ(hi, d); 405 hi = MUX(ch, 0, hi); 406 for (k = 31; k > 0; k --) { 407 int j; 408 uint32_t w, ctl, hi2, lo2; 409 410 j = 32 - k; 411 w = (hi << j) | (lo >> k); 412 ctl = GE(w, d) | (hi >> k); 413 hi2 = (w - d) >> j; 414 lo2 = lo - (d << k); 415 hi = MUX(ctl, hi2, hi); 416 lo = MUX(ctl, lo2, lo); 417 q |= ctl << k; 418 } 419 cf = GE(lo, d) | hi; 420 q |= cf; 421 *r = MUX(cf, lo - d, lo); 422 return q; 423} 424 425/* 426 * Wrapper for br_divrem(); the remainder is returned, and the quotient 427 * is discarded. 428 */ 429static uint32_t br_rem(uint32_t hi, uint32_t lo, uint32_t d) 430{ 431 uint32_t r; 432 433 br_divrem(hi, lo, d, &r); 434 return r; 435} 436 437/* 438 * Wrapper for br_divrem(); the quotient is returned, and the remainder 439 * is discarded. 440 */ 441static uint32_t br_div(uint32_t hi, uint32_t lo, uint32_t d) 442{ 443 uint32_t r; 444 445 return br_divrem(hi, lo, d, &r); 446} 447 448/* from BearSSL's src/int/i32_muladd.c */ 449 450/* 451 * Multiply x[] by 2^32 and then add integer z, modulo m[]. This 452 * function assumes that x[] and m[] have the same announced bit 453 * length, and the announced bit length of m[] matches its true 454 * bit length. 455 * 456 * x[] and m[] MUST be distinct arrays. 457 * 458 * CT: only the common announced bit length of x and m leaks, not 459 * the values of x, z or m. 460 */ 461static void br_i32_muladd_small(uint32_t *x, uint32_t z, const uint32_t *m) 462{ 463 uint32_t m_bitlen; 464 size_t u, mlen; 465 uint32_t a0, a1, b0, hi, g, q, tb; 466 uint32_t chf, clow, under, over; 467 uint64_t cc; 468 469 /* 470 * We can test on the modulus bit length since we accept to 471 * leak that length. 472 */ 473 m_bitlen = m[0]; 474 if (m_bitlen == 0) { 475 return; 476 } 477 if (m_bitlen <= 32) { 478 x[1] = br_rem(x[1], z, m[1]); 479 return; 480 } 481 mlen = (m_bitlen + 31) >> 5; 482 483 /* 484 * Principle: we estimate the quotient (x*2^32+z)/m by 485 * doing a 64/32 division with the high words. 486 * 487 * Let: 488 * w = 2^32 489 * a = (w*a0 + a1) * w^N + a2 490 * b = b0 * w^N + b2 491 * such that: 492 * 0 <= a0 < w 493 * 0 <= a1 < w 494 * 0 <= a2 < w^N 495 * w/2 <= b0 < w 496 * 0 <= b2 < w^N 497 * a < w*b 498 * I.e. the two top words of a are a0:a1, the top word of b is 499 * b0, we ensured that b0 is "full" (high bit set), and a is 500 * such that the quotient q = a/b fits on one word (0 <= q < w). 501 * 502 * If a = b*q + r (with 0 <= r < q), we can estimate q by 503 * doing an Euclidean division on the top words: 504 * a0*w+a1 = b0*u + v (with 0 <= v < w) 505 * Then the following holds: 506 * 0 <= u <= w 507 * u-2 <= q <= u 508 */ 509 a0 = br_i32_word(x, m_bitlen - 32); 510 hi = x[mlen]; 511 memmove(x + 2, x + 1, (mlen - 1) * sizeof *x); 512 x[1] = z; 513 a1 = br_i32_word(x, m_bitlen - 32); 514 b0 = br_i32_word(m, m_bitlen - 32); 515 516 /* 517 * We estimate a divisor q. If the quotient returned by br_div() 518 * is g: 519 * -- If a0 == b0 then g == 0; we want q = 0xFFFFFFFF. 520 * -- Otherwise: 521 * -- if g == 0 then we set q = 0; 522 * -- otherwise, we set q = g - 1. 523 * The properties described above then ensure that the true 524 * quotient is q-1, q or q+1. 525 */ 526 g = br_div(a0, a1, b0); 527 q = MUX(EQ(a0, b0), 0xFFFFFFFF, MUX(EQ(g, 0), 0, g - 1)); 528 529 /* 530 * We subtract q*m from x (with the extra high word of value 'hi'). 531 * Since q may be off by 1 (in either direction), we may have to 532 * add or subtract m afterwards. 533 * 534 * The 'tb' flag will be true (1) at the end of the loop if the 535 * result is greater than or equal to the modulus (not counting 536 * 'hi' or the carry). 537 */ 538 cc = 0; 539 tb = 1; 540 for (u = 1; u <= mlen; u ++) { 541 uint32_t mw, zw, xw, nxw; 542 uint64_t zl; 543 544 mw = m[u]; 545 zl = MUL(mw, q) + cc; 546 cc = (uint32_t)(zl >> 32); 547 zw = (uint32_t)zl; 548 xw = x[u]; 549 nxw = xw - zw; 550 cc += (uint64_t)GT(nxw, xw); 551 x[u] = nxw; 552 tb = MUX(EQ(nxw, mw), tb, GT(nxw, mw)); 553 } 554 555 /* 556 * If we underestimated q, then either cc < hi (one extra bit 557 * beyond the top array word), or cc == hi and tb is true (no 558 * extra bit, but the result is not lower than the modulus). In 559 * these cases we must subtract m once. 560 * 561 * Otherwise, we may have overestimated, which will show as 562 * cc > hi (thus a negative result). Correction is adding m once. 563 */ 564 chf = (uint32_t)(cc >> 32); 565 clow = (uint32_t)cc; 566 over = chf | GT(clow, hi); 567 under = ~over & (tb | (~chf & LT(clow, hi))); 568 br_i32_add(x, m, over); 569 br_i32_sub(x, m, under); 570} 571 572/* from BearSSL's src/int/i32_reduce.c */ 573 574/* 575 * Reduce an integer (a[]) modulo another (m[]). The result is written 576 * in x[] and its announced bit length is set to be equal to that of m[]. 577 * 578 * x[] MUST be distinct from a[] and m[]. 579 * 580 * CT: only announced bit lengths leak, not values of x, a or m. 581 */ 582static void br_i32_reduce(uint32_t *x, const uint32_t *a, const uint32_t *m) 583{ 584 uint32_t m_bitlen, a_bitlen; 585 size_t mlen, alen, u; 586 587 m_bitlen = m[0]; 588 mlen = (m_bitlen + 31) >> 5; 589 590 x[0] = m_bitlen; 591 if (m_bitlen == 0) { 592 return; 593 } 594 595 /* 596 * If the source is shorter, then simply copy all words from a[] 597 * and zero out the upper words. 598 */ 599 a_bitlen = a[0]; 600 alen = (a_bitlen + 31) >> 5; 601 if (a_bitlen < m_bitlen) { 602 memcpy(x + 1, a + 1, alen * sizeof *a); 603 for (u = alen; u < mlen; u ++) { 604 x[u + 1] = 0; 605 } 606 return; 607 } 608 609 /* 610 * The source length is at least equal to that of the modulus. 611 * We must thus copy N-1 words, and input the remaining words 612 * one by one. 613 */ 614 memcpy(x + 1, a + 2 + (alen - mlen), (mlen - 1) * sizeof *a); 615 x[mlen] = 0; 616 for (u = 1 + alen - mlen; u > 0; u --) { 617 br_i32_muladd_small(x, a[u], m); 618 } 619} 620 621/** 622 * rsa_free_key_prop() - Free key properties 623 * @prop: Pointer to struct key_prop 624 * 625 * This function frees all the memories allocated by rsa_gen_key_prop(). 626 */ 627void rsa_free_key_prop(struct key_prop *prop) 628{ 629 if (!prop) 630 return; 631 632 free((void *)prop->modulus); 633 free((void *)prop->public_exponent); 634 free((void *)prop->rr); 635 636 free(prop); 637} 638 639/** 640 * rsa_gen_key_prop() - Generate key properties of RSA public key 641 * @key: Specifies key data in DER format 642 * @keylen: Length of @key 643 * @prop: Generated key property 644 * 645 * This function takes a blob of encoded RSA public key data in DER 646 * format, parse it and generate all the relevant properties 647 * in key_prop structure. 648 * Return a pointer to struct key_prop in @prop on success. 649 * 650 * Return: 0 on success, negative on error 651 */ 652int rsa_gen_key_prop(const void *key, uint32_t keylen, struct key_prop **prop) 653{ 654 struct rsa_key rsa_key; 655 uint32_t *n = NULL, *rr = NULL, *rrtmp = NULL; 656 int rlen, i, ret = 0; 657 658 *prop = calloc(sizeof(**prop), 1); 659 if (!(*prop)) { 660 ret = -ENOMEM; 661 goto out; 662 } 663 664 ret = rsa_parse_pub_key(&rsa_key, key, keylen); 665 if (ret) 666 goto out; 667 668 /* modulus */ 669 /* removing leading 0's */ 670 for (i = 0; i < rsa_key.n_sz && !rsa_key.n[i]; i++) 671 ; 672 (*prop)->num_bits = (rsa_key.n_sz - i) * 8; 673 (*prop)->modulus = malloc(rsa_key.n_sz - i); 674 if (!(*prop)->modulus) { 675 ret = -ENOMEM; 676 goto out; 677 } 678 memcpy((void *)(*prop)->modulus, &rsa_key.n[i], rsa_key.n_sz - i); 679 680 n = calloc(sizeof(uint32_t), 1 + ((*prop)->num_bits >> 5)); 681 rr = calloc(sizeof(uint32_t), 1 + (((*prop)->num_bits * 2) >> 5)); 682 rrtmp = calloc(sizeof(uint32_t), 2 + (((*prop)->num_bits * 2) >> 5)); 683 if (!n || !rr || !rrtmp) { 684 ret = -ENOMEM; 685 goto out; 686 } 687 688 /* exponent */ 689 (*prop)->public_exponent = calloc(1, sizeof(uint64_t)); 690 if (!(*prop)->public_exponent) { 691 ret = -ENOMEM; 692 goto out; 693 } 694 memcpy((void *)(*prop)->public_exponent + sizeof(uint64_t) 695 - rsa_key.e_sz, 696 rsa_key.e, rsa_key.e_sz); 697 (*prop)->exp_len = sizeof(uint64_t); 698 699 /* n0 inverse */ 700 br_i32_decode(n, &rsa_key.n[i], rsa_key.n_sz - i); 701 (*prop)->n0inv = br_i32_ninv32(n[1]); 702 703 /* R^2 mod n; R = 2^(num_bits) */ 704 rlen = (*prop)->num_bits * 2; /* #bits of R^2 = (2^num_bits)^2 */ 705 rr[0] = 0; 706 *(uint8_t *)&rr[0] = (1 << (rlen % 8)); 707 for (i = 1; i < (((rlen + 31) >> 5) + 1); i++) 708 rr[i] = 0; 709 br_i32_decode(rrtmp, rr, ((rlen + 7) >> 3) + 1); 710 br_i32_reduce(rr, rrtmp, n); 711 712 rlen = ((*prop)->num_bits + 7) >> 3; /* #bytes of R^2 mod n */ 713 (*prop)->rr = malloc(rlen); 714 if (!(*prop)->rr) { 715 ret = -ENOMEM; 716 goto out; 717 } 718 br_i32_encode((void *)(*prop)->rr, rlen, rr); 719 720out: 721 free(n); 722 free(rr); 723 free(rrtmp); 724 if (ret < 0) 725 rsa_free_key_prop(*prop); 726 return ret; 727} 728