ec_mult.c revision 340704
1/* crypto/ec/ec_mult.c */ 2/* 3 * Originally written by Bodo Moeller and Nils Larsch for the OpenSSL project. 4 */ 5/* ==================================================================== 6 * Copyright (c) 1998-2018 The OpenSSL Project. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 20 * 3. All advertising materials mentioning features or use of this 21 * software must display the following acknowledgment: 22 * "This product includes software developed by the OpenSSL Project 23 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" 24 * 25 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 26 * endorse or promote products derived from this software without 27 * prior written permission. For written permission, please contact 28 * openssl-core@openssl.org. 29 * 30 * 5. Products derived from this software may not be called "OpenSSL" 31 * nor may "OpenSSL" appear in their names without prior written 32 * permission of the OpenSSL Project. 33 * 34 * 6. Redistributions of any form whatsoever must retain the following 35 * acknowledgment: 36 * "This product includes software developed by the OpenSSL Project 37 * for use in the OpenSSL Toolkit (http://www.openssl.org/)" 38 * 39 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 40 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 41 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 42 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 43 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 44 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 45 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 46 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 48 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 49 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 50 * OF THE POSSIBILITY OF SUCH DAMAGE. 51 * ==================================================================== 52 * 53 * This product includes cryptographic software written by Eric Young 54 * (eay@cryptsoft.com). This product includes software written by Tim 55 * Hudson (tjh@cryptsoft.com). 56 * 57 */ 58/* ==================================================================== 59 * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED. 60 * Portions of this software developed by SUN MICROSYSTEMS, INC., 61 * and contributed to the OpenSSL project. 62 */ 63 64#include <string.h> 65 66#include <openssl/err.h> 67 68#include "ec_lcl.h" 69 70/* 71 * This file implements the wNAF-based interleaving multi-exponentiation method 72 * Formerly at: 73 * http://www.informatik.tu-darmstadt.de/TI/Mitarbeiter/moeller.html#multiexp 74 * You might now find it here: 75 * http://link.springer.com/chapter/10.1007%2F3-540-45537-X_13 76 * http://www.bmoeller.de/pdf/TI-01-08.multiexp.pdf 77 * For multiplication with precomputation, we use wNAF splitting, formerly at: 78 * http://www.informatik.tu-darmstadt.de/TI/Mitarbeiter/moeller.html#fastexp 79 */ 80 81/* structure for precomputed multiples of the generator */ 82typedef struct ec_pre_comp_st { 83 const EC_GROUP *group; /* parent EC_GROUP object */ 84 size_t blocksize; /* block size for wNAF splitting */ 85 size_t numblocks; /* max. number of blocks for which we have 86 * precomputation */ 87 size_t w; /* window size */ 88 EC_POINT **points; /* array with pre-calculated multiples of 89 * generator: 'num' pointers to EC_POINT 90 * objects followed by a NULL */ 91 size_t num; /* numblocks * 2^(w-1) */ 92 int references; 93} EC_PRE_COMP; 94 95/* functions to manage EC_PRE_COMP within the EC_GROUP extra_data framework */ 96static void *ec_pre_comp_dup(void *); 97static void ec_pre_comp_free(void *); 98static void ec_pre_comp_clear_free(void *); 99 100static EC_PRE_COMP *ec_pre_comp_new(const EC_GROUP *group) 101{ 102 EC_PRE_COMP *ret = NULL; 103 104 if (!group) 105 return NULL; 106 107 ret = (EC_PRE_COMP *)OPENSSL_malloc(sizeof(EC_PRE_COMP)); 108 if (!ret) { 109 ECerr(EC_F_EC_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE); 110 return ret; 111 } 112 ret->group = group; 113 ret->blocksize = 8; /* default */ 114 ret->numblocks = 0; 115 ret->w = 4; /* default */ 116 ret->points = NULL; 117 ret->num = 0; 118 ret->references = 1; 119 return ret; 120} 121 122static void *ec_pre_comp_dup(void *src_) 123{ 124 EC_PRE_COMP *src = src_; 125 126 /* no need to actually copy, these objects never change! */ 127 128 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP); 129 130 return src_; 131} 132 133static void ec_pre_comp_free(void *pre_) 134{ 135 int i; 136 EC_PRE_COMP *pre = pre_; 137 138 if (!pre) 139 return; 140 141 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP); 142 if (i > 0) 143 return; 144 145 if (pre->points) { 146 EC_POINT **p; 147 148 for (p = pre->points; *p != NULL; p++) 149 EC_POINT_free(*p); 150 OPENSSL_free(pre->points); 151 } 152 OPENSSL_free(pre); 153} 154 155static void ec_pre_comp_clear_free(void *pre_) 156{ 157 int i; 158 EC_PRE_COMP *pre = pre_; 159 160 if (!pre) 161 return; 162 163 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP); 164 if (i > 0) 165 return; 166 167 if (pre->points) { 168 EC_POINT **p; 169 170 for (p = pre->points; *p != NULL; p++) { 171 EC_POINT_clear_free(*p); 172 OPENSSL_cleanse(p, sizeof(*p)); 173 } 174 OPENSSL_free(pre->points); 175 } 176 OPENSSL_cleanse(pre, sizeof(*pre)); 177 OPENSSL_free(pre); 178} 179 180/*- 181 * Determine the modified width-(w+1) Non-Adjacent Form (wNAF) of 'scalar'. 182 * This is an array r[] of values that are either zero or odd with an 183 * absolute value less than 2^w satisfying 184 * scalar = \sum_j r[j]*2^j 185 * where at most one of any w+1 consecutive digits is non-zero 186 * with the exception that the most significant digit may be only 187 * w-1 zeros away from that next non-zero digit. 188 */ 189static signed char *compute_wNAF(const BIGNUM *scalar, int w, size_t *ret_len) 190{ 191 int window_val; 192 int ok = 0; 193 signed char *r = NULL; 194 int sign = 1; 195 int bit, next_bit, mask; 196 size_t len = 0, j; 197 198 if (BN_is_zero(scalar)) { 199 r = OPENSSL_malloc(1); 200 if (!r) { 201 ECerr(EC_F_COMPUTE_WNAF, ERR_R_MALLOC_FAILURE); 202 goto err; 203 } 204 r[0] = 0; 205 *ret_len = 1; 206 return r; 207 } 208 209 if (w <= 0 || w > 7) { /* 'signed char' can represent integers with 210 * absolute values less than 2^7 */ 211 ECerr(EC_F_COMPUTE_WNAF, ERR_R_INTERNAL_ERROR); 212 goto err; 213 } 214 bit = 1 << w; /* at most 128 */ 215 next_bit = bit << 1; /* at most 256 */ 216 mask = next_bit - 1; /* at most 255 */ 217 218 if (BN_is_negative(scalar)) { 219 sign = -1; 220 } 221 222 if (scalar->d == NULL || scalar->top == 0) { 223 ECerr(EC_F_COMPUTE_WNAF, ERR_R_INTERNAL_ERROR); 224 goto err; 225 } 226 227 len = BN_num_bits(scalar); 228 r = OPENSSL_malloc(len + 1); /* modified wNAF may be one digit longer 229 * than binary representation (*ret_len will 230 * be set to the actual length, i.e. at most 231 * BN_num_bits(scalar) + 1) */ 232 if (r == NULL) { 233 ECerr(EC_F_COMPUTE_WNAF, ERR_R_MALLOC_FAILURE); 234 goto err; 235 } 236 window_val = scalar->d[0] & mask; 237 j = 0; 238 while ((window_val != 0) || (j + w + 1 < len)) { /* if j+w+1 >= len, 239 * window_val will not 240 * increase */ 241 int digit = 0; 242 243 /* 0 <= window_val <= 2^(w+1) */ 244 245 if (window_val & 1) { 246 /* 0 < window_val < 2^(w+1) */ 247 248 if (window_val & bit) { 249 digit = window_val - next_bit; /* -2^w < digit < 0 */ 250 251#if 1 /* modified wNAF */ 252 if (j + w + 1 >= len) { 253 /* 254 * special case for generating modified wNAFs: no new 255 * bits will be added into window_val, so using a 256 * positive digit here will decrease the total length of 257 * the representation 258 */ 259 260 digit = window_val & (mask >> 1); /* 0 < digit < 2^w */ 261 } 262#endif 263 } else { 264 digit = window_val; /* 0 < digit < 2^w */ 265 } 266 267 if (digit <= -bit || digit >= bit || !(digit & 1)) { 268 ECerr(EC_F_COMPUTE_WNAF, ERR_R_INTERNAL_ERROR); 269 goto err; 270 } 271 272 window_val -= digit; 273 274 /* 275 * now window_val is 0 or 2^(w+1) in standard wNAF generation; 276 * for modified window NAFs, it may also be 2^w 277 */ 278 if (window_val != 0 && window_val != next_bit 279 && window_val != bit) { 280 ECerr(EC_F_COMPUTE_WNAF, ERR_R_INTERNAL_ERROR); 281 goto err; 282 } 283 } 284 285 r[j++] = sign * digit; 286 287 window_val >>= 1; 288 window_val += bit * BN_is_bit_set(scalar, j + w); 289 290 if (window_val > next_bit) { 291 ECerr(EC_F_COMPUTE_WNAF, ERR_R_INTERNAL_ERROR); 292 goto err; 293 } 294 } 295 296 if (j > len + 1) { 297 ECerr(EC_F_COMPUTE_WNAF, ERR_R_INTERNAL_ERROR); 298 goto err; 299 } 300 len = j; 301 ok = 1; 302 303 err: 304 if (!ok) { 305 OPENSSL_free(r); 306 r = NULL; 307 } 308 if (ok) 309 *ret_len = len; 310 return r; 311} 312 313#define EC_POINT_BN_set_flags(P, flags) do { \ 314 BN_set_flags(&(P)->X, (flags)); \ 315 BN_set_flags(&(P)->Y, (flags)); \ 316 BN_set_flags(&(P)->Z, (flags)); \ 317} while(0) 318 319/*- 320 * This functions computes (in constant time) a point multiplication over the 321 * EC group. 322 * 323 * At a high level, it is Montgomery ladder with conditional swaps. 324 * 325 * It performs either a fixed scalar point multiplication 326 * (scalar * generator) 327 * when point is NULL, or a generic scalar point multiplication 328 * (scalar * point) 329 * when point is not NULL. 330 * 331 * scalar should be in the range [0,n) otherwise all constant time bets are off. 332 * 333 * NB: This says nothing about EC_POINT_add and EC_POINT_dbl, 334 * which of course are not constant time themselves. 335 * 336 * The product is stored in r. 337 * 338 * Returns 1 on success, 0 otherwise. 339 */ 340static int ec_mul_consttime(const EC_GROUP *group, EC_POINT *r, 341 const BIGNUM *scalar, const EC_POINT *point, 342 BN_CTX *ctx) 343{ 344 int i, cardinality_bits, group_top, kbit, pbit, Z_is_one; 345 EC_POINT *s = NULL; 346 BIGNUM *k = NULL; 347 BIGNUM *lambda = NULL; 348 BIGNUM *cardinality = NULL; 349 BN_CTX *new_ctx = NULL; 350 int ret = 0; 351 352 if (ctx == NULL && (ctx = new_ctx = BN_CTX_new()) == NULL) 353 return 0; 354 355 BN_CTX_start(ctx); 356 357 s = EC_POINT_new(group); 358 if (s == NULL) 359 goto err; 360 361 if (point == NULL) { 362 if (!EC_POINT_copy(s, group->generator)) 363 goto err; 364 } else { 365 if (!EC_POINT_copy(s, point)) 366 goto err; 367 } 368 369 EC_POINT_BN_set_flags(s, BN_FLG_CONSTTIME); 370 371 cardinality = BN_CTX_get(ctx); 372 lambda = BN_CTX_get(ctx); 373 k = BN_CTX_get(ctx); 374 if (k == NULL || !BN_mul(cardinality, &group->order, &group->cofactor, ctx)) 375 goto err; 376 377 /* 378 * Group cardinalities are often on a word boundary. 379 * So when we pad the scalar, some timing diff might 380 * pop if it needs to be expanded due to carries. 381 * So expand ahead of time. 382 */ 383 cardinality_bits = BN_num_bits(cardinality); 384 group_top = cardinality->top; 385 if ((bn_wexpand(k, group_top + 2) == NULL) 386 || (bn_wexpand(lambda, group_top + 2) == NULL)) 387 goto err; 388 389 if (!BN_copy(k, scalar)) 390 goto err; 391 392 BN_set_flags(k, BN_FLG_CONSTTIME); 393 394 if ((BN_num_bits(k) > cardinality_bits) || (BN_is_negative(k))) { 395 /*- 396 * this is an unusual input, and we don't guarantee 397 * constant-timeness 398 */ 399 if (!BN_nnmod(k, k, cardinality, ctx)) 400 goto err; 401 } 402 403 if (!BN_add(lambda, k, cardinality)) 404 goto err; 405 BN_set_flags(lambda, BN_FLG_CONSTTIME); 406 if (!BN_add(k, lambda, cardinality)) 407 goto err; 408 /* 409 * lambda := scalar + cardinality 410 * k := scalar + 2*cardinality 411 */ 412 kbit = BN_is_bit_set(lambda, cardinality_bits); 413 BN_consttime_swap(kbit, k, lambda, group_top + 2); 414 415 group_top = group->field.top; 416 if ((bn_wexpand(&s->X, group_top) == NULL) 417 || (bn_wexpand(&s->Y, group_top) == NULL) 418 || (bn_wexpand(&s->Z, group_top) == NULL) 419 || (bn_wexpand(&r->X, group_top) == NULL) 420 || (bn_wexpand(&r->Y, group_top) == NULL) 421 || (bn_wexpand(&r->Z, group_top) == NULL)) 422 goto err; 423 424 /* top bit is a 1, in a fixed pos */ 425 if (!EC_POINT_copy(r, s)) 426 goto err; 427 428 EC_POINT_BN_set_flags(r, BN_FLG_CONSTTIME); 429 430 if (!EC_POINT_dbl(group, s, s, ctx)) 431 goto err; 432 433 pbit = 0; 434 435#define EC_POINT_CSWAP(c, a, b, w, t) do { \ 436 BN_consttime_swap(c, &(a)->X, &(b)->X, w); \ 437 BN_consttime_swap(c, &(a)->Y, &(b)->Y, w); \ 438 BN_consttime_swap(c, &(a)->Z, &(b)->Z, w); \ 439 t = ((a)->Z_is_one ^ (b)->Z_is_one) & (c); \ 440 (a)->Z_is_one ^= (t); \ 441 (b)->Z_is_one ^= (t); \ 442} while(0) 443 444 /*- 445 * The ladder step, with branches, is 446 * 447 * k[i] == 0: S = add(R, S), R = dbl(R) 448 * k[i] == 1: R = add(S, R), S = dbl(S) 449 * 450 * Swapping R, S conditionally on k[i] leaves you with state 451 * 452 * k[i] == 0: T, U = R, S 453 * k[i] == 1: T, U = S, R 454 * 455 * Then perform the ECC ops. 456 * 457 * U = add(T, U) 458 * T = dbl(T) 459 * 460 * Which leaves you with state 461 * 462 * k[i] == 0: U = add(R, S), T = dbl(R) 463 * k[i] == 1: U = add(S, R), T = dbl(S) 464 * 465 * Swapping T, U conditionally on k[i] leaves you with state 466 * 467 * k[i] == 0: R, S = T, U 468 * k[i] == 1: R, S = U, T 469 * 470 * Which leaves you with state 471 * 472 * k[i] == 0: S = add(R, S), R = dbl(R) 473 * k[i] == 1: R = add(S, R), S = dbl(S) 474 * 475 * So we get the same logic, but instead of a branch it's a 476 * conditional swap, followed by ECC ops, then another conditional swap. 477 * 478 * Optimization: The end of iteration i and start of i-1 looks like 479 * 480 * ... 481 * CSWAP(k[i], R, S) 482 * ECC 483 * CSWAP(k[i], R, S) 484 * (next iteration) 485 * CSWAP(k[i-1], R, S) 486 * ECC 487 * CSWAP(k[i-1], R, S) 488 * ... 489 * 490 * So instead of two contiguous swaps, you can merge the condition 491 * bits and do a single swap. 492 * 493 * k[i] k[i-1] Outcome 494 * 0 0 No Swap 495 * 0 1 Swap 496 * 1 0 Swap 497 * 1 1 No Swap 498 * 499 * This is XOR. pbit tracks the previous bit of k. 500 */ 501 502 for (i = cardinality_bits - 1; i >= 0; i--) { 503 kbit = BN_is_bit_set(k, i) ^ pbit; 504 EC_POINT_CSWAP(kbit, r, s, group_top, Z_is_one); 505 if (!EC_POINT_add(group, s, r, s, ctx)) 506 goto err; 507 if (!EC_POINT_dbl(group, r, r, ctx)) 508 goto err; 509 /* 510 * pbit logic merges this cswap with that of the 511 * next iteration 512 */ 513 pbit ^= kbit; 514 } 515 /* one final cswap to move the right value into r */ 516 EC_POINT_CSWAP(pbit, r, s, group_top, Z_is_one); 517#undef EC_POINT_CSWAP 518 519 ret = 1; 520 521 err: 522 EC_POINT_free(s); 523 BN_CTX_end(ctx); 524 BN_CTX_free(new_ctx); 525 526 return ret; 527} 528 529#undef EC_POINT_BN_set_flags 530 531/* 532 * TODO: table should be optimised for the wNAF-based implementation, 533 * sometimes smaller windows will give better performance (thus the 534 * boundaries should be increased) 535 */ 536#define EC_window_bits_for_scalar_size(b) \ 537 ((size_t) \ 538 ((b) >= 2000 ? 6 : \ 539 (b) >= 800 ? 5 : \ 540 (b) >= 300 ? 4 : \ 541 (b) >= 70 ? 3 : \ 542 (b) >= 20 ? 2 : \ 543 1)) 544 545/*- 546 * Compute 547 * \sum scalars[i]*points[i], 548 * also including 549 * scalar*generator 550 * in the addition if scalar != NULL 551 */ 552int ec_wNAF_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *scalar, 553 size_t num, const EC_POINT *points[], const BIGNUM *scalars[], 554 BN_CTX *ctx) 555{ 556 BN_CTX *new_ctx = NULL; 557 const EC_POINT *generator = NULL; 558 EC_POINT *tmp = NULL; 559 size_t totalnum; 560 size_t blocksize = 0, numblocks = 0; /* for wNAF splitting */ 561 size_t pre_points_per_block = 0; 562 size_t i, j; 563 int k; 564 int r_is_inverted = 0; 565 int r_is_at_infinity = 1; 566 size_t *wsize = NULL; /* individual window sizes */ 567 signed char **wNAF = NULL; /* individual wNAFs */ 568 size_t *wNAF_len = NULL; 569 size_t max_len = 0; 570 size_t num_val; 571 EC_POINT **val = NULL; /* precomputation */ 572 EC_POINT **v; 573 EC_POINT ***val_sub = NULL; /* pointers to sub-arrays of 'val' or 574 * 'pre_comp->points' */ 575 const EC_PRE_COMP *pre_comp = NULL; 576 int num_scalar = 0; /* flag: will be set to 1 if 'scalar' must be 577 * treated like other scalars, i.e. 578 * precomputation is not available */ 579 int ret = 0; 580 581 if (group->meth != r->meth) { 582 ECerr(EC_F_EC_WNAF_MUL, EC_R_INCOMPATIBLE_OBJECTS); 583 return 0; 584 } 585 586 if ((scalar == NULL) && (num == 0)) { 587 return EC_POINT_set_to_infinity(group, r); 588 } 589 590 if (!BN_is_zero(&group->order) && !BN_is_zero(&group->cofactor)) { 591 /*- 592 * Handle the common cases where the scalar is secret, enforcing a constant 593 * time scalar multiplication algorithm. 594 */ 595 if ((scalar != NULL) && (num == 0)) { 596 /*- 597 * In this case we want to compute scalar * GeneratorPoint: this 598 * codepath is reached most prominently by (ephemeral) key generation 599 * of EC cryptosystems (i.e. ECDSA keygen and sign setup, ECDH 600 * keygen/first half), where the scalar is always secret. This is why 601 * we ignore if BN_FLG_CONSTTIME is actually set and we always call the 602 * constant time version. 603 */ 604 return ec_mul_consttime(group, r, scalar, NULL, ctx); 605 } 606 if ((scalar == NULL) && (num == 1)) { 607 /*- 608 * In this case we want to compute scalar * GenericPoint: this codepath 609 * is reached most prominently by the second half of ECDH, where the 610 * secret scalar is multiplied by the peer's public point. To protect 611 * the secret scalar, we ignore if BN_FLG_CONSTTIME is actually set and 612 * we always call the constant time version. 613 */ 614 return ec_mul_consttime(group, r, scalars[0], points[0], ctx); 615 } 616 } 617 618 for (i = 0; i < num; i++) { 619 if (group->meth != points[i]->meth) { 620 ECerr(EC_F_EC_WNAF_MUL, EC_R_INCOMPATIBLE_OBJECTS); 621 return 0; 622 } 623 } 624 625 if (ctx == NULL) { 626 ctx = new_ctx = BN_CTX_new(); 627 if (ctx == NULL) 628 goto err; 629 } 630 631 if (scalar != NULL) { 632 generator = EC_GROUP_get0_generator(group); 633 if (generator == NULL) { 634 ECerr(EC_F_EC_WNAF_MUL, EC_R_UNDEFINED_GENERATOR); 635 goto err; 636 } 637 638 /* look if we can use precomputed multiples of generator */ 639 640 pre_comp = 641 EC_EX_DATA_get_data(group->extra_data, ec_pre_comp_dup, 642 ec_pre_comp_free, ec_pre_comp_clear_free); 643 644 if (pre_comp && pre_comp->numblocks 645 && (EC_POINT_cmp(group, generator, pre_comp->points[0], ctx) == 646 0)) { 647 blocksize = pre_comp->blocksize; 648 649 /* 650 * determine maximum number of blocks that wNAF splitting may 651 * yield (NB: maximum wNAF length is bit length plus one) 652 */ 653 numblocks = (BN_num_bits(scalar) / blocksize) + 1; 654 655 /* 656 * we cannot use more blocks than we have precomputation for 657 */ 658 if (numblocks > pre_comp->numblocks) 659 numblocks = pre_comp->numblocks; 660 661 pre_points_per_block = (size_t)1 << (pre_comp->w - 1); 662 663 /* check that pre_comp looks sane */ 664 if (pre_comp->num != (pre_comp->numblocks * pre_points_per_block)) { 665 ECerr(EC_F_EC_WNAF_MUL, ERR_R_INTERNAL_ERROR); 666 goto err; 667 } 668 } else { 669 /* can't use precomputation */ 670 pre_comp = NULL; 671 numblocks = 1; 672 num_scalar = 1; /* treat 'scalar' like 'num'-th element of 673 * 'scalars' */ 674 } 675 } 676 677 totalnum = num + numblocks; 678 679 wsize = OPENSSL_malloc(totalnum * sizeof(wsize[0])); 680 wNAF_len = OPENSSL_malloc(totalnum * sizeof(wNAF_len[0])); 681 /* include space for pivot */ 682 wNAF = OPENSSL_malloc((totalnum + 1) * sizeof(wNAF[0])); 683 val_sub = OPENSSL_malloc(totalnum * sizeof(val_sub[0])); 684 685 /* Ensure wNAF is initialised in case we end up going to err */ 686 if (wNAF) 687 wNAF[0] = NULL; /* preliminary pivot */ 688 689 if (!wsize || !wNAF_len || !wNAF || !val_sub) { 690 ECerr(EC_F_EC_WNAF_MUL, ERR_R_MALLOC_FAILURE); 691 goto err; 692 } 693 694 /* 695 * num_val will be the total number of temporarily precomputed points 696 */ 697 num_val = 0; 698 699 for (i = 0; i < num + num_scalar; i++) { 700 size_t bits; 701 702 bits = i < num ? BN_num_bits(scalars[i]) : BN_num_bits(scalar); 703 wsize[i] = EC_window_bits_for_scalar_size(bits); 704 num_val += (size_t)1 << (wsize[i] - 1); 705 wNAF[i + 1] = NULL; /* make sure we always have a pivot */ 706 wNAF[i] = 707 compute_wNAF((i < num ? scalars[i] : scalar), wsize[i], 708 &wNAF_len[i]); 709 if (wNAF[i] == NULL) 710 goto err; 711 if (wNAF_len[i] > max_len) 712 max_len = wNAF_len[i]; 713 } 714 715 if (numblocks) { 716 /* we go here iff scalar != NULL */ 717 718 if (pre_comp == NULL) { 719 if (num_scalar != 1) { 720 ECerr(EC_F_EC_WNAF_MUL, ERR_R_INTERNAL_ERROR); 721 goto err; 722 } 723 /* we have already generated a wNAF for 'scalar' */ 724 } else { 725 signed char *tmp_wNAF = NULL; 726 size_t tmp_len = 0; 727 728 if (num_scalar != 0) { 729 ECerr(EC_F_EC_WNAF_MUL, ERR_R_INTERNAL_ERROR); 730 goto err; 731 } 732 733 /* 734 * use the window size for which we have precomputation 735 */ 736 wsize[num] = pre_comp->w; 737 tmp_wNAF = compute_wNAF(scalar, wsize[num], &tmp_len); 738 if (!tmp_wNAF) 739 goto err; 740 741 if (tmp_len <= max_len) { 742 /* 743 * One of the other wNAFs is at least as long as the wNAF 744 * belonging to the generator, so wNAF splitting will not buy 745 * us anything. 746 */ 747 748 numblocks = 1; 749 totalnum = num + 1; /* don't use wNAF splitting */ 750 wNAF[num] = tmp_wNAF; 751 wNAF[num + 1] = NULL; 752 wNAF_len[num] = tmp_len; 753 if (tmp_len > max_len) 754 max_len = tmp_len; 755 /* 756 * pre_comp->points starts with the points that we need here: 757 */ 758 val_sub[num] = pre_comp->points; 759 } else { 760 /* 761 * don't include tmp_wNAF directly into wNAF array - use wNAF 762 * splitting and include the blocks 763 */ 764 765 signed char *pp; 766 EC_POINT **tmp_points; 767 768 if (tmp_len < numblocks * blocksize) { 769 /* 770 * possibly we can do with fewer blocks than estimated 771 */ 772 numblocks = (tmp_len + blocksize - 1) / blocksize; 773 if (numblocks > pre_comp->numblocks) { 774 ECerr(EC_F_EC_WNAF_MUL, ERR_R_INTERNAL_ERROR); 775 goto err; 776 } 777 totalnum = num + numblocks; 778 } 779 780 /* split wNAF in 'numblocks' parts */ 781 pp = tmp_wNAF; 782 tmp_points = pre_comp->points; 783 784 for (i = num; i < totalnum; i++) { 785 if (i < totalnum - 1) { 786 wNAF_len[i] = blocksize; 787 if (tmp_len < blocksize) { 788 ECerr(EC_F_EC_WNAF_MUL, ERR_R_INTERNAL_ERROR); 789 goto err; 790 } 791 tmp_len -= blocksize; 792 } else 793 /* 794 * last block gets whatever is left (this could be 795 * more or less than 'blocksize'!) 796 */ 797 wNAF_len[i] = tmp_len; 798 799 wNAF[i + 1] = NULL; 800 wNAF[i] = OPENSSL_malloc(wNAF_len[i]); 801 if (wNAF[i] == NULL) { 802 ECerr(EC_F_EC_WNAF_MUL, ERR_R_MALLOC_FAILURE); 803 OPENSSL_free(tmp_wNAF); 804 goto err; 805 } 806 memcpy(wNAF[i], pp, wNAF_len[i]); 807 if (wNAF_len[i] > max_len) 808 max_len = wNAF_len[i]; 809 810 if (*tmp_points == NULL) { 811 ECerr(EC_F_EC_WNAF_MUL, ERR_R_INTERNAL_ERROR); 812 OPENSSL_free(tmp_wNAF); 813 goto err; 814 } 815 val_sub[i] = tmp_points; 816 tmp_points += pre_points_per_block; 817 pp += blocksize; 818 } 819 OPENSSL_free(tmp_wNAF); 820 } 821 } 822 } 823 824 /* 825 * All points we precompute now go into a single array 'val'. 826 * 'val_sub[i]' is a pointer to the subarray for the i-th point, or to a 827 * subarray of 'pre_comp->points' if we already have precomputation. 828 */ 829 val = OPENSSL_malloc((num_val + 1) * sizeof(val[0])); 830 if (val == NULL) { 831 ECerr(EC_F_EC_WNAF_MUL, ERR_R_MALLOC_FAILURE); 832 goto err; 833 } 834 val[num_val] = NULL; /* pivot element */ 835 836 /* allocate points for precomputation */ 837 v = val; 838 for (i = 0; i < num + num_scalar; i++) { 839 val_sub[i] = v; 840 for (j = 0; j < ((size_t)1 << (wsize[i] - 1)); j++) { 841 *v = EC_POINT_new(group); 842 if (*v == NULL) 843 goto err; 844 v++; 845 } 846 } 847 if (!(v == val + num_val)) { 848 ECerr(EC_F_EC_WNAF_MUL, ERR_R_INTERNAL_ERROR); 849 goto err; 850 } 851 852 if (!(tmp = EC_POINT_new(group))) 853 goto err; 854 855 /*- 856 * prepare precomputed values: 857 * val_sub[i][0] := points[i] 858 * val_sub[i][1] := 3 * points[i] 859 * val_sub[i][2] := 5 * points[i] 860 * ... 861 */ 862 for (i = 0; i < num + num_scalar; i++) { 863 if (i < num) { 864 if (!EC_POINT_copy(val_sub[i][0], points[i])) 865 goto err; 866 } else { 867 if (!EC_POINT_copy(val_sub[i][0], generator)) 868 goto err; 869 } 870 871 if (wsize[i] > 1) { 872 if (!EC_POINT_dbl(group, tmp, val_sub[i][0], ctx)) 873 goto err; 874 for (j = 1; j < ((size_t)1 << (wsize[i] - 1)); j++) { 875 if (!EC_POINT_add 876 (group, val_sub[i][j], val_sub[i][j - 1], tmp, ctx)) 877 goto err; 878 } 879 } 880 } 881 882#if 1 /* optional; EC_window_bits_for_scalar_size 883 * assumes we do this step */ 884 if (!EC_POINTs_make_affine(group, num_val, val, ctx)) 885 goto err; 886#endif 887 888 r_is_at_infinity = 1; 889 890 for (k = max_len - 1; k >= 0; k--) { 891 if (!r_is_at_infinity) { 892 if (!EC_POINT_dbl(group, r, r, ctx)) 893 goto err; 894 } 895 896 for (i = 0; i < totalnum; i++) { 897 if (wNAF_len[i] > (size_t)k) { 898 int digit = wNAF[i][k]; 899 int is_neg; 900 901 if (digit) { 902 is_neg = digit < 0; 903 904 if (is_neg) 905 digit = -digit; 906 907 if (is_neg != r_is_inverted) { 908 if (!r_is_at_infinity) { 909 if (!EC_POINT_invert(group, r, ctx)) 910 goto err; 911 } 912 r_is_inverted = !r_is_inverted; 913 } 914 915 /* digit > 0 */ 916 917 if (r_is_at_infinity) { 918 if (!EC_POINT_copy(r, val_sub[i][digit >> 1])) 919 goto err; 920 r_is_at_infinity = 0; 921 } else { 922 if (!EC_POINT_add 923 (group, r, r, val_sub[i][digit >> 1], ctx)) 924 goto err; 925 } 926 } 927 } 928 } 929 } 930 931 if (r_is_at_infinity) { 932 if (!EC_POINT_set_to_infinity(group, r)) 933 goto err; 934 } else { 935 if (r_is_inverted) 936 if (!EC_POINT_invert(group, r, ctx)) 937 goto err; 938 } 939 940 ret = 1; 941 942 err: 943 if (new_ctx != NULL) 944 BN_CTX_free(new_ctx); 945 if (tmp != NULL) 946 EC_POINT_free(tmp); 947 if (wsize != NULL) 948 OPENSSL_free(wsize); 949 if (wNAF_len != NULL) 950 OPENSSL_free(wNAF_len); 951 if (wNAF != NULL) { 952 signed char **w; 953 954 for (w = wNAF; *w != NULL; w++) 955 OPENSSL_free(*w); 956 957 OPENSSL_free(wNAF); 958 } 959 if (val != NULL) { 960 for (v = val; *v != NULL; v++) 961 EC_POINT_clear_free(*v); 962 963 OPENSSL_free(val); 964 } 965 if (val_sub != NULL) { 966 OPENSSL_free(val_sub); 967 } 968 return ret; 969} 970 971/*- 972 * ec_wNAF_precompute_mult() 973 * creates an EC_PRE_COMP object with preprecomputed multiples of the generator 974 * for use with wNAF splitting as implemented in ec_wNAF_mul(). 975 * 976 * 'pre_comp->points' is an array of multiples of the generator 977 * of the following form: 978 * points[0] = generator; 979 * points[1] = 3 * generator; 980 * ... 981 * points[2^(w-1)-1] = (2^(w-1)-1) * generator; 982 * points[2^(w-1)] = 2^blocksize * generator; 983 * points[2^(w-1)+1] = 3 * 2^blocksize * generator; 984 * ... 985 * points[2^(w-1)*(numblocks-1)-1] = (2^(w-1)) * 2^(blocksize*(numblocks-2)) * generator 986 * points[2^(w-1)*(numblocks-1)] = 2^(blocksize*(numblocks-1)) * generator 987 * ... 988 * points[2^(w-1)*numblocks-1] = (2^(w-1)) * 2^(blocksize*(numblocks-1)) * generator 989 * points[2^(w-1)*numblocks] = NULL 990 */ 991int ec_wNAF_precompute_mult(EC_GROUP *group, BN_CTX *ctx) 992{ 993 const EC_POINT *generator; 994 EC_POINT *tmp_point = NULL, *base = NULL, **var; 995 BN_CTX *new_ctx = NULL; 996 BIGNUM *order; 997 size_t i, bits, w, pre_points_per_block, blocksize, numblocks, num; 998 EC_POINT **points = NULL; 999 EC_PRE_COMP *pre_comp; 1000 int ret = 0; 1001 1002 /* if there is an old EC_PRE_COMP object, throw it away */ 1003 EC_EX_DATA_free_data(&group->extra_data, ec_pre_comp_dup, 1004 ec_pre_comp_free, ec_pre_comp_clear_free); 1005 1006 if ((pre_comp = ec_pre_comp_new(group)) == NULL) 1007 return 0; 1008 1009 generator = EC_GROUP_get0_generator(group); 1010 if (generator == NULL) { 1011 ECerr(EC_F_EC_WNAF_PRECOMPUTE_MULT, EC_R_UNDEFINED_GENERATOR); 1012 goto err; 1013 } 1014 1015 if (ctx == NULL) { 1016 ctx = new_ctx = BN_CTX_new(); 1017 if (ctx == NULL) 1018 goto err; 1019 } 1020 1021 BN_CTX_start(ctx); 1022 order = BN_CTX_get(ctx); 1023 if (order == NULL) 1024 goto err; 1025 1026 if (!EC_GROUP_get_order(group, order, ctx)) 1027 goto err; 1028 if (BN_is_zero(order)) { 1029 ECerr(EC_F_EC_WNAF_PRECOMPUTE_MULT, EC_R_UNKNOWN_ORDER); 1030 goto err; 1031 } 1032 1033 bits = BN_num_bits(order); 1034 /* 1035 * The following parameters mean we precompute (approximately) one point 1036 * per bit. TBD: The combination 8, 4 is perfect for 160 bits; for other 1037 * bit lengths, other parameter combinations might provide better 1038 * efficiency. 1039 */ 1040 blocksize = 8; 1041 w = 4; 1042 if (EC_window_bits_for_scalar_size(bits) > w) { 1043 /* let's not make the window too small ... */ 1044 w = EC_window_bits_for_scalar_size(bits); 1045 } 1046 1047 numblocks = (bits + blocksize - 1) / blocksize; /* max. number of blocks 1048 * to use for wNAF 1049 * splitting */ 1050 1051 pre_points_per_block = (size_t)1 << (w - 1); 1052 num = pre_points_per_block * numblocks; /* number of points to compute 1053 * and store */ 1054 1055 points = OPENSSL_malloc(sizeof(EC_POINT *) * (num + 1)); 1056 if (!points) { 1057 ECerr(EC_F_EC_WNAF_PRECOMPUTE_MULT, ERR_R_MALLOC_FAILURE); 1058 goto err; 1059 } 1060 1061 var = points; 1062 var[num] = NULL; /* pivot */ 1063 for (i = 0; i < num; i++) { 1064 if ((var[i] = EC_POINT_new(group)) == NULL) { 1065 ECerr(EC_F_EC_WNAF_PRECOMPUTE_MULT, ERR_R_MALLOC_FAILURE); 1066 goto err; 1067 } 1068 } 1069 1070 if (!(tmp_point = EC_POINT_new(group)) || !(base = EC_POINT_new(group))) { 1071 ECerr(EC_F_EC_WNAF_PRECOMPUTE_MULT, ERR_R_MALLOC_FAILURE); 1072 goto err; 1073 } 1074 1075 if (!EC_POINT_copy(base, generator)) 1076 goto err; 1077 1078 /* do the precomputation */ 1079 for (i = 0; i < numblocks; i++) { 1080 size_t j; 1081 1082 if (!EC_POINT_dbl(group, tmp_point, base, ctx)) 1083 goto err; 1084 1085 if (!EC_POINT_copy(*var++, base)) 1086 goto err; 1087 1088 for (j = 1; j < pre_points_per_block; j++, var++) { 1089 /* 1090 * calculate odd multiples of the current base point 1091 */ 1092 if (!EC_POINT_add(group, *var, tmp_point, *(var - 1), ctx)) 1093 goto err; 1094 } 1095 1096 if (i < numblocks - 1) { 1097 /* 1098 * get the next base (multiply current one by 2^blocksize) 1099 */ 1100 size_t k; 1101 1102 if (blocksize <= 2) { 1103 ECerr(EC_F_EC_WNAF_PRECOMPUTE_MULT, ERR_R_INTERNAL_ERROR); 1104 goto err; 1105 } 1106 1107 if (!EC_POINT_dbl(group, base, tmp_point, ctx)) 1108 goto err; 1109 for (k = 2; k < blocksize; k++) { 1110 if (!EC_POINT_dbl(group, base, base, ctx)) 1111 goto err; 1112 } 1113 } 1114 } 1115 1116 if (!EC_POINTs_make_affine(group, num, points, ctx)) 1117 goto err; 1118 1119 pre_comp->group = group; 1120 pre_comp->blocksize = blocksize; 1121 pre_comp->numblocks = numblocks; 1122 pre_comp->w = w; 1123 pre_comp->points = points; 1124 points = NULL; 1125 pre_comp->num = num; 1126 1127 if (!EC_EX_DATA_set_data(&group->extra_data, pre_comp, 1128 ec_pre_comp_dup, ec_pre_comp_free, 1129 ec_pre_comp_clear_free)) 1130 goto err; 1131 pre_comp = NULL; 1132 1133 ret = 1; 1134 err: 1135 if (ctx != NULL) 1136 BN_CTX_end(ctx); 1137 if (new_ctx != NULL) 1138 BN_CTX_free(new_ctx); 1139 if (pre_comp) 1140 ec_pre_comp_free(pre_comp); 1141 if (points) { 1142 EC_POINT **p; 1143 1144 for (p = points; *p != NULL; p++) 1145 EC_POINT_free(*p); 1146 OPENSSL_free(points); 1147 } 1148 if (tmp_point) 1149 EC_POINT_free(tmp_point); 1150 if (base) 1151 EC_POINT_free(base); 1152 return ret; 1153} 1154 1155int ec_wNAF_have_precompute_mult(const EC_GROUP *group) 1156{ 1157 if (EC_EX_DATA_get_data 1158 (group->extra_data, ec_pre_comp_dup, ec_pre_comp_free, 1159 ec_pre_comp_clear_free) != NULL) 1160 return 1; 1161 else 1162 return 0; 1163} 1164