1/* crypto/ec/ecp_nistp256.c */ 2/* 3 * Written by Adam Langley (Google) for the OpenSSL project 4 */ 5/* Copyright 2011 Google Inc. 6 * 7 * Licensed under the Apache License, Version 2.0 (the "License"); 8 * 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21/* 22 * A 64-bit implementation of the NIST P-256 elliptic curve point multiplication 23 * 24 * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c. 25 * Otherwise based on Emilia's P224 work, which was inspired by my curve25519 26 * work which got its smarts from Daniel J. Bernstein's work on the same. 27 */ 28 29#include <openssl/opensslconf.h> 30#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 31 32# ifndef OPENSSL_SYS_VMS 33# include <stdint.h> 34# else 35# include <inttypes.h> 36# endif 37 38# include <string.h> 39# include <openssl/err.h> 40# include "ec_lcl.h" 41# include "bn_int.h" /* bn_bn2lebinpad, bn_lebin2bn */ 42 43# if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) 44 /* even with gcc, the typedef won't work for 32-bit platforms */ 45typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit 46 * platforms */ 47typedef __int128_t int128_t; 48# else 49# error "Need GCC 3.1 or later to define type uint128_t" 50# endif 51 52typedef uint8_t u8; 53typedef uint32_t u32; 54typedef uint64_t u64; 55 56/* 57 * The underlying field. P256 operates over GF(2^256-2^224+2^192+2^96-1). We 58 * can serialise an element of this field into 32 bytes. We call this an 59 * felem_bytearray. 60 */ 61 62typedef u8 felem_bytearray[32]; 63 64/* 65 * These are the parameters of P256, taken from FIPS 186-3, page 86. These 66 * values are big-endian. 67 */ 68static const felem_bytearray nistp256_curve_params[5] = { 69 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* p */ 70 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 71 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 72 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, 73 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* a = -3 */ 74 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 75 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 76 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc}, /* b */ 77 {0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7, 78 0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc, 79 0x65, 0x1d, 0x06, 0xb0, 0xcc, 0x53, 0xb0, 0xf6, 80 0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b}, 81 {0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, /* x */ 82 0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2, 83 0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0, 84 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96}, 85 {0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, /* y */ 86 0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 0x16, 87 0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce, 88 0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5} 89}; 90 91/*- 92 * The representation of field elements. 93 * ------------------------------------ 94 * 95 * We represent field elements with either four 128-bit values, eight 128-bit 96 * values, or four 64-bit values. The field element represented is: 97 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + v[3]*2^192 (mod p) 98 * or: 99 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + ... + v[8]*2^512 (mod p) 100 * 101 * 128-bit values are called 'limbs'. Since the limbs are spaced only 64 bits 102 * apart, but are 128-bits wide, the most significant bits of each limb overlap 103 * with the least significant bits of the next. 104 * 105 * A field element with four limbs is an 'felem'. One with eight limbs is a 106 * 'longfelem' 107 * 108 * A field element with four, 64-bit values is called a 'smallfelem'. Small 109 * values are used as intermediate values before multiplication. 110 */ 111 112# define NLIMBS 4 113 114typedef uint128_t limb; 115typedef limb felem[NLIMBS]; 116typedef limb longfelem[NLIMBS * 2]; 117typedef u64 smallfelem[NLIMBS]; 118 119/* This is the value of the prime as four 64-bit words, little-endian. */ 120static const u64 kPrime[4] = 121 { 0xfffffffffffffffful, 0xffffffff, 0, 0xffffffff00000001ul }; 122static const u64 bottom63bits = 0x7ffffffffffffffful; 123 124/* 125 * bin32_to_felem takes a little-endian byte array and converts it into felem 126 * form. This assumes that the CPU is little-endian. 127 */ 128static void bin32_to_felem(felem out, const u8 in[32]) 129{ 130 out[0] = *((u64 *)&in[0]); 131 out[1] = *((u64 *)&in[8]); 132 out[2] = *((u64 *)&in[16]); 133 out[3] = *((u64 *)&in[24]); 134} 135 136/* 137 * smallfelem_to_bin32 takes a smallfelem and serialises into a little 138 * endian, 32 byte array. This assumes that the CPU is little-endian. 139 */ 140static void smallfelem_to_bin32(u8 out[32], const smallfelem in) 141{ 142 *((u64 *)&out[0]) = in[0]; 143 *((u64 *)&out[8]) = in[1]; 144 *((u64 *)&out[16]) = in[2]; 145 *((u64 *)&out[24]) = in[3]; 146} 147 148/* BN_to_felem converts an OpenSSL BIGNUM into an felem */ 149static int BN_to_felem(felem out, const BIGNUM *bn) 150{ 151 felem_bytearray b_out; 152 int num_bytes; 153 154 if (BN_is_negative(bn)) { 155 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE); 156 return 0; 157 } 158 num_bytes = bn_bn2lebinpad(bn, b_out, sizeof(b_out)); 159 if (num_bytes < 0) { 160 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE); 161 return 0; 162 } 163 bin32_to_felem(out, b_out); 164 return 1; 165} 166 167/* felem_to_BN converts an felem into an OpenSSL BIGNUM */ 168static BIGNUM *smallfelem_to_BN(BIGNUM *out, const smallfelem in) 169{ 170 felem_bytearray b_out; 171 smallfelem_to_bin32(b_out, in); 172 return bn_lebin2bn(b_out, sizeof(b_out), out); 173} 174 175/*- 176 * Field operations 177 * ---------------- 178 */ 179 180static void smallfelem_one(smallfelem out) 181{ 182 out[0] = 1; 183 out[1] = 0; 184 out[2] = 0; 185 out[3] = 0; 186} 187 188static void smallfelem_assign(smallfelem out, const smallfelem in) 189{ 190 out[0] = in[0]; 191 out[1] = in[1]; 192 out[2] = in[2]; 193 out[3] = in[3]; 194} 195 196static void felem_assign(felem out, const felem in) 197{ 198 out[0] = in[0]; 199 out[1] = in[1]; 200 out[2] = in[2]; 201 out[3] = in[3]; 202} 203 204/* felem_sum sets out = out + in. */ 205static void felem_sum(felem out, const felem in) 206{ 207 out[0] += in[0]; 208 out[1] += in[1]; 209 out[2] += in[2]; 210 out[3] += in[3]; 211} 212 213/* felem_small_sum sets out = out + in. */ 214static void felem_small_sum(felem out, const smallfelem in) 215{ 216 out[0] += in[0]; 217 out[1] += in[1]; 218 out[2] += in[2]; 219 out[3] += in[3]; 220} 221 222/* felem_scalar sets out = out * scalar */ 223static void felem_scalar(felem out, const u64 scalar) 224{ 225 out[0] *= scalar; 226 out[1] *= scalar; 227 out[2] *= scalar; 228 out[3] *= scalar; 229} 230 231/* longfelem_scalar sets out = out * scalar */ 232static void longfelem_scalar(longfelem out, const u64 scalar) 233{ 234 out[0] *= scalar; 235 out[1] *= scalar; 236 out[2] *= scalar; 237 out[3] *= scalar; 238 out[4] *= scalar; 239 out[5] *= scalar; 240 out[6] *= scalar; 241 out[7] *= scalar; 242} 243 244# define two105m41m9 (((limb)1) << 105) - (((limb)1) << 41) - (((limb)1) << 9) 245# define two105 (((limb)1) << 105) 246# define two105m41p9 (((limb)1) << 105) - (((limb)1) << 41) + (((limb)1) << 9) 247 248/* zero105 is 0 mod p */ 249static const felem zero105 = 250 { two105m41m9, two105, two105m41p9, two105m41p9 }; 251 252/*- 253 * smallfelem_neg sets |out| to |-small| 254 * On exit: 255 * out[i] < out[i] + 2^105 256 */ 257static void smallfelem_neg(felem out, const smallfelem small) 258{ 259 /* In order to prevent underflow, we subtract from 0 mod p. */ 260 out[0] = zero105[0] - small[0]; 261 out[1] = zero105[1] - small[1]; 262 out[2] = zero105[2] - small[2]; 263 out[3] = zero105[3] - small[3]; 264} 265 266/*- 267 * felem_diff subtracts |in| from |out| 268 * On entry: 269 * in[i] < 2^104 270 * On exit: 271 * out[i] < out[i] + 2^105 272 */ 273static void felem_diff(felem out, const felem in) 274{ 275 /* 276 * In order to prevent underflow, we add 0 mod p before subtracting. 277 */ 278 out[0] += zero105[0]; 279 out[1] += zero105[1]; 280 out[2] += zero105[2]; 281 out[3] += zero105[3]; 282 283 out[0] -= in[0]; 284 out[1] -= in[1]; 285 out[2] -= in[2]; 286 out[3] -= in[3]; 287} 288 289# define two107m43m11 (((limb)1) << 107) - (((limb)1) << 43) - (((limb)1) << 11) 290# define two107 (((limb)1) << 107) 291# define two107m43p11 (((limb)1) << 107) - (((limb)1) << 43) + (((limb)1) << 11) 292 293/* zero107 is 0 mod p */ 294static const felem zero107 = 295 { two107m43m11, two107, two107m43p11, two107m43p11 }; 296 297/*- 298 * An alternative felem_diff for larger inputs |in| 299 * felem_diff_zero107 subtracts |in| from |out| 300 * On entry: 301 * in[i] < 2^106 302 * On exit: 303 * out[i] < out[i] + 2^107 304 */ 305static void felem_diff_zero107(felem out, const felem in) 306{ 307 /* 308 * In order to prevent underflow, we add 0 mod p before subtracting. 309 */ 310 out[0] += zero107[0]; 311 out[1] += zero107[1]; 312 out[2] += zero107[2]; 313 out[3] += zero107[3]; 314 315 out[0] -= in[0]; 316 out[1] -= in[1]; 317 out[2] -= in[2]; 318 out[3] -= in[3]; 319} 320 321/*- 322 * longfelem_diff subtracts |in| from |out| 323 * On entry: 324 * in[i] < 7*2^67 325 * On exit: 326 * out[i] < out[i] + 2^70 + 2^40 327 */ 328static void longfelem_diff(longfelem out, const longfelem in) 329{ 330 static const limb two70m8p6 = 331 (((limb) 1) << 70) - (((limb) 1) << 8) + (((limb) 1) << 6); 332 static const limb two70p40 = (((limb) 1) << 70) + (((limb) 1) << 40); 333 static const limb two70 = (((limb) 1) << 70); 334 static const limb two70m40m38p6 = 335 (((limb) 1) << 70) - (((limb) 1) << 40) - (((limb) 1) << 38) + 336 (((limb) 1) << 6); 337 static const limb two70m6 = (((limb) 1) << 70) - (((limb) 1) << 6); 338 339 /* add 0 mod p to avoid underflow */ 340 out[0] += two70m8p6; 341 out[1] += two70p40; 342 out[2] += two70; 343 out[3] += two70m40m38p6; 344 out[4] += two70m6; 345 out[5] += two70m6; 346 out[6] += two70m6; 347 out[7] += two70m6; 348 349 /* in[i] < 7*2^67 < 2^70 - 2^40 - 2^38 + 2^6 */ 350 out[0] -= in[0]; 351 out[1] -= in[1]; 352 out[2] -= in[2]; 353 out[3] -= in[3]; 354 out[4] -= in[4]; 355 out[5] -= in[5]; 356 out[6] -= in[6]; 357 out[7] -= in[7]; 358} 359 360# define two64m0 (((limb)1) << 64) - 1 361# define two110p32m0 (((limb)1) << 110) + (((limb)1) << 32) - 1 362# define two64m46 (((limb)1) << 64) - (((limb)1) << 46) 363# define two64m32 (((limb)1) << 64) - (((limb)1) << 32) 364 365/* zero110 is 0 mod p */ 366static const felem zero110 = { two64m0, two110p32m0, two64m46, two64m32 }; 367 368/*- 369 * felem_shrink converts an felem into a smallfelem. The result isn't quite 370 * minimal as the value may be greater than p. 371 * 372 * On entry: 373 * in[i] < 2^109 374 * On exit: 375 * out[i] < 2^64 376 */ 377static void felem_shrink(smallfelem out, const felem in) 378{ 379 felem tmp; 380 u64 a, b, mask; 381 u64 high, low; 382 static const u64 kPrime3Test = 0x7fffffff00000001ul; /* 2^63 - 2^32 + 1 */ 383 384 /* Carry 2->3 */ 385 tmp[3] = zero110[3] + in[3] + ((u64)(in[2] >> 64)); 386 /* tmp[3] < 2^110 */ 387 388 tmp[2] = zero110[2] + (u64)in[2]; 389 tmp[0] = zero110[0] + in[0]; 390 tmp[1] = zero110[1] + in[1]; 391 /* tmp[0] < 2**110, tmp[1] < 2^111, tmp[2] < 2**65 */ 392 393 /* 394 * We perform two partial reductions where we eliminate the high-word of 395 * tmp[3]. We don't update the other words till the end. 396 */ 397 a = tmp[3] >> 64; /* a < 2^46 */ 398 tmp[3] = (u64)tmp[3]; 399 tmp[3] -= a; 400 tmp[3] += ((limb) a) << 32; 401 /* tmp[3] < 2^79 */ 402 403 b = a; 404 a = tmp[3] >> 64; /* a < 2^15 */ 405 b += a; /* b < 2^46 + 2^15 < 2^47 */ 406 tmp[3] = (u64)tmp[3]; 407 tmp[3] -= a; 408 tmp[3] += ((limb) a) << 32; 409 /* tmp[3] < 2^64 + 2^47 */ 410 411 /* 412 * This adjusts the other two words to complete the two partial 413 * reductions. 414 */ 415 tmp[0] += b; 416 tmp[1] -= (((limb) b) << 32); 417 418 /* 419 * In order to make space in tmp[3] for the carry from 2 -> 3, we 420 * conditionally subtract kPrime if tmp[3] is large enough. 421 */ 422 high = (u64)(tmp[3] >> 64); 423 /* As tmp[3] < 2^65, high is either 1 or 0 */ 424 high = 0 - high; 425 /*- 426 * high is: 427 * all ones if the high word of tmp[3] is 1 428 * all zeros if the high word of tmp[3] if 0 429 */ 430 low = (u64)tmp[3]; 431 mask = 0 - (low >> 63); 432 /*- 433 * mask is: 434 * all ones if the MSB of low is 1 435 * all zeros if the MSB of low if 0 436 */ 437 low &= bottom63bits; 438 low -= kPrime3Test; 439 /* if low was greater than kPrime3Test then the MSB is zero */ 440 low = ~low; 441 low = 0 - (low >> 63); 442 /*- 443 * low is: 444 * all ones if low was > kPrime3Test 445 * all zeros if low was <= kPrime3Test 446 */ 447 mask = (mask & low) | high; 448 tmp[0] -= mask & kPrime[0]; 449 tmp[1] -= mask & kPrime[1]; 450 /* kPrime[2] is zero, so omitted */ 451 tmp[3] -= mask & kPrime[3]; 452 /* tmp[3] < 2**64 - 2**32 + 1 */ 453 454 tmp[1] += ((u64)(tmp[0] >> 64)); 455 tmp[0] = (u64)tmp[0]; 456 tmp[2] += ((u64)(tmp[1] >> 64)); 457 tmp[1] = (u64)tmp[1]; 458 tmp[3] += ((u64)(tmp[2] >> 64)); 459 tmp[2] = (u64)tmp[2]; 460 /* tmp[i] < 2^64 */ 461 462 out[0] = tmp[0]; 463 out[1] = tmp[1]; 464 out[2] = tmp[2]; 465 out[3] = tmp[3]; 466} 467 468/* smallfelem_expand converts a smallfelem to an felem */ 469static void smallfelem_expand(felem out, const smallfelem in) 470{ 471 out[0] = in[0]; 472 out[1] = in[1]; 473 out[2] = in[2]; 474 out[3] = in[3]; 475} 476 477/*- 478 * smallfelem_square sets |out| = |small|^2 479 * On entry: 480 * small[i] < 2^64 481 * On exit: 482 * out[i] < 7 * 2^64 < 2^67 483 */ 484static void smallfelem_square(longfelem out, const smallfelem small) 485{ 486 limb a; 487 u64 high, low; 488 489 a = ((uint128_t) small[0]) * small[0]; 490 low = a; 491 high = a >> 64; 492 out[0] = low; 493 out[1] = high; 494 495 a = ((uint128_t) small[0]) * small[1]; 496 low = a; 497 high = a >> 64; 498 out[1] += low; 499 out[1] += low; 500 out[2] = high; 501 502 a = ((uint128_t) small[0]) * small[2]; 503 low = a; 504 high = a >> 64; 505 out[2] += low; 506 out[2] *= 2; 507 out[3] = high; 508 509 a = ((uint128_t) small[0]) * small[3]; 510 low = a; 511 high = a >> 64; 512 out[3] += low; 513 out[4] = high; 514 515 a = ((uint128_t) small[1]) * small[2]; 516 low = a; 517 high = a >> 64; 518 out[3] += low; 519 out[3] *= 2; 520 out[4] += high; 521 522 a = ((uint128_t) small[1]) * small[1]; 523 low = a; 524 high = a >> 64; 525 out[2] += low; 526 out[3] += high; 527 528 a = ((uint128_t) small[1]) * small[3]; 529 low = a; 530 high = a >> 64; 531 out[4] += low; 532 out[4] *= 2; 533 out[5] = high; 534 535 a = ((uint128_t) small[2]) * small[3]; 536 low = a; 537 high = a >> 64; 538 out[5] += low; 539 out[5] *= 2; 540 out[6] = high; 541 out[6] += high; 542 543 a = ((uint128_t) small[2]) * small[2]; 544 low = a; 545 high = a >> 64; 546 out[4] += low; 547 out[5] += high; 548 549 a = ((uint128_t) small[3]) * small[3]; 550 low = a; 551 high = a >> 64; 552 out[6] += low; 553 out[7] = high; 554} 555 556/*- 557 * felem_square sets |out| = |in|^2 558 * On entry: 559 * in[i] < 2^109 560 * On exit: 561 * out[i] < 7 * 2^64 < 2^67 562 */ 563static void felem_square(longfelem out, const felem in) 564{ 565 u64 small[4]; 566 felem_shrink(small, in); 567 smallfelem_square(out, small); 568} 569 570/*- 571 * smallfelem_mul sets |out| = |small1| * |small2| 572 * On entry: 573 * small1[i] < 2^64 574 * small2[i] < 2^64 575 * On exit: 576 * out[i] < 7 * 2^64 < 2^67 577 */ 578static void smallfelem_mul(longfelem out, const smallfelem small1, 579 const smallfelem small2) 580{ 581 limb a; 582 u64 high, low; 583 584 a = ((uint128_t) small1[0]) * small2[0]; 585 low = a; 586 high = a >> 64; 587 out[0] = low; 588 out[1] = high; 589 590 a = ((uint128_t) small1[0]) * small2[1]; 591 low = a; 592 high = a >> 64; 593 out[1] += low; 594 out[2] = high; 595 596 a = ((uint128_t) small1[1]) * small2[0]; 597 low = a; 598 high = a >> 64; 599 out[1] += low; 600 out[2] += high; 601 602 a = ((uint128_t) small1[0]) * small2[2]; 603 low = a; 604 high = a >> 64; 605 out[2] += low; 606 out[3] = high; 607 608 a = ((uint128_t) small1[1]) * small2[1]; 609 low = a; 610 high = a >> 64; 611 out[2] += low; 612 out[3] += high; 613 614 a = ((uint128_t) small1[2]) * small2[0]; 615 low = a; 616 high = a >> 64; 617 out[2] += low; 618 out[3] += high; 619 620 a = ((uint128_t) small1[0]) * small2[3]; 621 low = a; 622 high = a >> 64; 623 out[3] += low; 624 out[4] = high; 625 626 a = ((uint128_t) small1[1]) * small2[2]; 627 low = a; 628 high = a >> 64; 629 out[3] += low; 630 out[4] += high; 631 632 a = ((uint128_t) small1[2]) * small2[1]; 633 low = a; 634 high = a >> 64; 635 out[3] += low; 636 out[4] += high; 637 638 a = ((uint128_t) small1[3]) * small2[0]; 639 low = a; 640 high = a >> 64; 641 out[3] += low; 642 out[4] += high; 643 644 a = ((uint128_t) small1[1]) * small2[3]; 645 low = a; 646 high = a >> 64; 647 out[4] += low; 648 out[5] = high; 649 650 a = ((uint128_t) small1[2]) * small2[2]; 651 low = a; 652 high = a >> 64; 653 out[4] += low; 654 out[5] += high; 655 656 a = ((uint128_t) small1[3]) * small2[1]; 657 low = a; 658 high = a >> 64; 659 out[4] += low; 660 out[5] += high; 661 662 a = ((uint128_t) small1[2]) * small2[3]; 663 low = a; 664 high = a >> 64; 665 out[5] += low; 666 out[6] = high; 667 668 a = ((uint128_t) small1[3]) * small2[2]; 669 low = a; 670 high = a >> 64; 671 out[5] += low; 672 out[6] += high; 673 674 a = ((uint128_t) small1[3]) * small2[3]; 675 low = a; 676 high = a >> 64; 677 out[6] += low; 678 out[7] = high; 679} 680 681/*- 682 * felem_mul sets |out| = |in1| * |in2| 683 * On entry: 684 * in1[i] < 2^109 685 * in2[i] < 2^109 686 * On exit: 687 * out[i] < 7 * 2^64 < 2^67 688 */ 689static void felem_mul(longfelem out, const felem in1, const felem in2) 690{ 691 smallfelem small1, small2; 692 felem_shrink(small1, in1); 693 felem_shrink(small2, in2); 694 smallfelem_mul(out, small1, small2); 695} 696 697/*- 698 * felem_small_mul sets |out| = |small1| * |in2| 699 * On entry: 700 * small1[i] < 2^64 701 * in2[i] < 2^109 702 * On exit: 703 * out[i] < 7 * 2^64 < 2^67 704 */ 705static void felem_small_mul(longfelem out, const smallfelem small1, 706 const felem in2) 707{ 708 smallfelem small2; 709 felem_shrink(small2, in2); 710 smallfelem_mul(out, small1, small2); 711} 712 713# define two100m36m4 (((limb)1) << 100) - (((limb)1) << 36) - (((limb)1) << 4) 714# define two100 (((limb)1) << 100) 715# define two100m36p4 (((limb)1) << 100) - (((limb)1) << 36) + (((limb)1) << 4) 716/* zero100 is 0 mod p */ 717static const felem zero100 = 718 { two100m36m4, two100, two100m36p4, two100m36p4 }; 719 720/*- 721 * Internal function for the different flavours of felem_reduce. 722 * felem_reduce_ reduces the higher coefficients in[4]-in[7]. 723 * On entry: 724 * out[0] >= in[6] + 2^32*in[6] + in[7] + 2^32*in[7] 725 * out[1] >= in[7] + 2^32*in[4] 726 * out[2] >= in[5] + 2^32*in[5] 727 * out[3] >= in[4] + 2^32*in[5] + 2^32*in[6] 728 * On exit: 729 * out[0] <= out[0] + in[4] + 2^32*in[5] 730 * out[1] <= out[1] + in[5] + 2^33*in[6] 731 * out[2] <= out[2] + in[7] + 2*in[6] + 2^33*in[7] 732 * out[3] <= out[3] + 2^32*in[4] + 3*in[7] 733 */ 734static void felem_reduce_(felem out, const longfelem in) 735{ 736 int128_t c; 737 /* combine common terms from below */ 738 c = in[4] + (in[5] << 32); 739 out[0] += c; 740 out[3] -= c; 741 742 c = in[5] - in[7]; 743 out[1] += c; 744 out[2] -= c; 745 746 /* the remaining terms */ 747 /* 256: [(0,1),(96,-1),(192,-1),(224,1)] */ 748 out[1] -= (in[4] << 32); 749 out[3] += (in[4] << 32); 750 751 /* 320: [(32,1),(64,1),(128,-1),(160,-1),(224,-1)] */ 752 out[2] -= (in[5] << 32); 753 754 /* 384: [(0,-1),(32,-1),(96,2),(128,2),(224,-1)] */ 755 out[0] -= in[6]; 756 out[0] -= (in[6] << 32); 757 out[1] += (in[6] << 33); 758 out[2] += (in[6] * 2); 759 out[3] -= (in[6] << 32); 760 761 /* 448: [(0,-1),(32,-1),(64,-1),(128,1),(160,2),(192,3)] */ 762 out[0] -= in[7]; 763 out[0] -= (in[7] << 32); 764 out[2] += (in[7] << 33); 765 out[3] += (in[7] * 3); 766} 767 768/*- 769 * felem_reduce converts a longfelem into an felem. 770 * To be called directly after felem_square or felem_mul. 771 * On entry: 772 * in[0] < 2^64, in[1] < 3*2^64, in[2] < 5*2^64, in[3] < 7*2^64 773 * in[4] < 7*2^64, in[5] < 5*2^64, in[6] < 3*2^64, in[7] < 2*64 774 * On exit: 775 * out[i] < 2^101 776 */ 777static void felem_reduce(felem out, const longfelem in) 778{ 779 out[0] = zero100[0] + in[0]; 780 out[1] = zero100[1] + in[1]; 781 out[2] = zero100[2] + in[2]; 782 out[3] = zero100[3] + in[3]; 783 784 felem_reduce_(out, in); 785 786 /*- 787 * out[0] > 2^100 - 2^36 - 2^4 - 3*2^64 - 3*2^96 - 2^64 - 2^96 > 0 788 * out[1] > 2^100 - 2^64 - 7*2^96 > 0 789 * out[2] > 2^100 - 2^36 + 2^4 - 5*2^64 - 5*2^96 > 0 790 * out[3] > 2^100 - 2^36 + 2^4 - 7*2^64 - 5*2^96 - 3*2^96 > 0 791 * 792 * out[0] < 2^100 + 2^64 + 7*2^64 + 5*2^96 < 2^101 793 * out[1] < 2^100 + 3*2^64 + 5*2^64 + 3*2^97 < 2^101 794 * out[2] < 2^100 + 5*2^64 + 2^64 + 3*2^65 + 2^97 < 2^101 795 * out[3] < 2^100 + 7*2^64 + 7*2^96 + 3*2^64 < 2^101 796 */ 797} 798 799/*- 800 * felem_reduce_zero105 converts a larger longfelem into an felem. 801 * On entry: 802 * in[0] < 2^71 803 * On exit: 804 * out[i] < 2^106 805 */ 806static void felem_reduce_zero105(felem out, const longfelem in) 807{ 808 out[0] = zero105[0] + in[0]; 809 out[1] = zero105[1] + in[1]; 810 out[2] = zero105[2] + in[2]; 811 out[3] = zero105[3] + in[3]; 812 813 felem_reduce_(out, in); 814 815 /*- 816 * out[0] > 2^105 - 2^41 - 2^9 - 2^71 - 2^103 - 2^71 - 2^103 > 0 817 * out[1] > 2^105 - 2^71 - 2^103 > 0 818 * out[2] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 > 0 819 * out[3] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 - 2^103 > 0 820 * 821 * out[0] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106 822 * out[1] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106 823 * out[2] < 2^105 + 2^71 + 2^71 + 2^71 + 2^103 < 2^106 824 * out[3] < 2^105 + 2^71 + 2^103 + 2^71 < 2^106 825 */ 826} 827 828/* 829 * subtract_u64 sets *result = *result - v and *carry to one if the 830 * subtraction underflowed. 831 */ 832static void subtract_u64(u64 *result, u64 *carry, u64 v) 833{ 834 uint128_t r = *result; 835 r -= v; 836 *carry = (r >> 64) & 1; 837 *result = (u64)r; 838} 839 840/* 841 * felem_contract converts |in| to its unique, minimal representation. On 842 * entry: in[i] < 2^109 843 */ 844static void felem_contract(smallfelem out, const felem in) 845{ 846 unsigned i; 847 u64 all_equal_so_far = 0, result = 0, carry; 848 849 felem_shrink(out, in); 850 /* small is minimal except that the value might be > p */ 851 852 all_equal_so_far--; 853 /* 854 * We are doing a constant time test if out >= kPrime. We need to compare 855 * each u64, from most-significant to least significant. For each one, if 856 * all words so far have been equal (m is all ones) then a non-equal 857 * result is the answer. Otherwise we continue. 858 */ 859 for (i = 3; i < 4; i--) { 860 u64 equal; 861 uint128_t a = ((uint128_t) kPrime[i]) - out[i]; 862 /* 863 * if out[i] > kPrime[i] then a will underflow and the high 64-bits 864 * will all be set. 865 */ 866 result |= all_equal_so_far & ((u64)(a >> 64)); 867 868 /* 869 * if kPrime[i] == out[i] then |equal| will be all zeros and the 870 * decrement will make it all ones. 871 */ 872 equal = kPrime[i] ^ out[i]; 873 equal--; 874 equal &= equal << 32; 875 equal &= equal << 16; 876 equal &= equal << 8; 877 equal &= equal << 4; 878 equal &= equal << 2; 879 equal &= equal << 1; 880 equal = 0 - (equal >> 63); 881 882 all_equal_so_far &= equal; 883 } 884 885 /* 886 * if all_equal_so_far is still all ones then the two values are equal 887 * and so out >= kPrime is true. 888 */ 889 result |= all_equal_so_far; 890 891 /* if out >= kPrime then we subtract kPrime. */ 892 subtract_u64(&out[0], &carry, result & kPrime[0]); 893 subtract_u64(&out[1], &carry, carry); 894 subtract_u64(&out[2], &carry, carry); 895 subtract_u64(&out[3], &carry, carry); 896 897 subtract_u64(&out[1], &carry, result & kPrime[1]); 898 subtract_u64(&out[2], &carry, carry); 899 subtract_u64(&out[3], &carry, carry); 900 901 subtract_u64(&out[2], &carry, result & kPrime[2]); 902 subtract_u64(&out[3], &carry, carry); 903 904 subtract_u64(&out[3], &carry, result & kPrime[3]); 905} 906 907static void smallfelem_square_contract(smallfelem out, const smallfelem in) 908{ 909 longfelem longtmp; 910 felem tmp; 911 912 smallfelem_square(longtmp, in); 913 felem_reduce(tmp, longtmp); 914 felem_contract(out, tmp); 915} 916 917static void smallfelem_mul_contract(smallfelem out, const smallfelem in1, 918 const smallfelem in2) 919{ 920 longfelem longtmp; 921 felem tmp; 922 923 smallfelem_mul(longtmp, in1, in2); 924 felem_reduce(tmp, longtmp); 925 felem_contract(out, tmp); 926} 927 928/*- 929 * felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0 930 * otherwise. 931 * On entry: 932 * small[i] < 2^64 933 */ 934static limb smallfelem_is_zero(const smallfelem small) 935{ 936 limb result; 937 u64 is_p; 938 939 u64 is_zero = small[0] | small[1] | small[2] | small[3]; 940 is_zero--; 941 is_zero &= is_zero << 32; 942 is_zero &= is_zero << 16; 943 is_zero &= is_zero << 8; 944 is_zero &= is_zero << 4; 945 is_zero &= is_zero << 2; 946 is_zero &= is_zero << 1; 947 is_zero = 0 - (is_zero >> 63); 948 949 is_p = (small[0] ^ kPrime[0]) | 950 (small[1] ^ kPrime[1]) | 951 (small[2] ^ kPrime[2]) | (small[3] ^ kPrime[3]); 952 is_p--; 953 is_p &= is_p << 32; 954 is_p &= is_p << 16; 955 is_p &= is_p << 8; 956 is_p &= is_p << 4; 957 is_p &= is_p << 2; 958 is_p &= is_p << 1; 959 is_p = 0 - (is_p >> 63); 960 961 is_zero |= is_p; 962 963 result = is_zero; 964 result |= ((limb) is_zero) << 64; 965 return result; 966} 967 968static int smallfelem_is_zero_int(const void *small) 969{ 970 return (int)(smallfelem_is_zero(small) & ((limb) 1)); 971} 972 973/*- 974 * felem_inv calculates |out| = |in|^{-1} 975 * 976 * Based on Fermat's Little Theorem: 977 * a^p = a (mod p) 978 * a^{p-1} = 1 (mod p) 979 * a^{p-2} = a^{-1} (mod p) 980 */ 981static void felem_inv(felem out, const felem in) 982{ 983 felem ftmp, ftmp2; 984 /* each e_I will hold |in|^{2^I - 1} */ 985 felem e2, e4, e8, e16, e32, e64; 986 longfelem tmp; 987 unsigned i; 988 989 felem_square(tmp, in); 990 felem_reduce(ftmp, tmp); /* 2^1 */ 991 felem_mul(tmp, in, ftmp); 992 felem_reduce(ftmp, tmp); /* 2^2 - 2^0 */ 993 felem_assign(e2, ftmp); 994 felem_square(tmp, ftmp); 995 felem_reduce(ftmp, tmp); /* 2^3 - 2^1 */ 996 felem_square(tmp, ftmp); 997 felem_reduce(ftmp, tmp); /* 2^4 - 2^2 */ 998 felem_mul(tmp, ftmp, e2); 999 felem_reduce(ftmp, tmp); /* 2^4 - 2^0 */ 1000 felem_assign(e4, ftmp); 1001 felem_square(tmp, ftmp); 1002 felem_reduce(ftmp, tmp); /* 2^5 - 2^1 */ 1003 felem_square(tmp, ftmp); 1004 felem_reduce(ftmp, tmp); /* 2^6 - 2^2 */ 1005 felem_square(tmp, ftmp); 1006 felem_reduce(ftmp, tmp); /* 2^7 - 2^3 */ 1007 felem_square(tmp, ftmp); 1008 felem_reduce(ftmp, tmp); /* 2^8 - 2^4 */ 1009 felem_mul(tmp, ftmp, e4); 1010 felem_reduce(ftmp, tmp); /* 2^8 - 2^0 */ 1011 felem_assign(e8, ftmp); 1012 for (i = 0; i < 8; i++) { 1013 felem_square(tmp, ftmp); 1014 felem_reduce(ftmp, tmp); 1015 } /* 2^16 - 2^8 */ 1016 felem_mul(tmp, ftmp, e8); 1017 felem_reduce(ftmp, tmp); /* 2^16 - 2^0 */ 1018 felem_assign(e16, ftmp); 1019 for (i = 0; i < 16; i++) { 1020 felem_square(tmp, ftmp); 1021 felem_reduce(ftmp, tmp); 1022 } /* 2^32 - 2^16 */ 1023 felem_mul(tmp, ftmp, e16); 1024 felem_reduce(ftmp, tmp); /* 2^32 - 2^0 */ 1025 felem_assign(e32, ftmp); 1026 for (i = 0; i < 32; i++) { 1027 felem_square(tmp, ftmp); 1028 felem_reduce(ftmp, tmp); 1029 } /* 2^64 - 2^32 */ 1030 felem_assign(e64, ftmp); 1031 felem_mul(tmp, ftmp, in); 1032 felem_reduce(ftmp, tmp); /* 2^64 - 2^32 + 2^0 */ 1033 for (i = 0; i < 192; i++) { 1034 felem_square(tmp, ftmp); 1035 felem_reduce(ftmp, tmp); 1036 } /* 2^256 - 2^224 + 2^192 */ 1037 1038 felem_mul(tmp, e64, e32); 1039 felem_reduce(ftmp2, tmp); /* 2^64 - 2^0 */ 1040 for (i = 0; i < 16; i++) { 1041 felem_square(tmp, ftmp2); 1042 felem_reduce(ftmp2, tmp); 1043 } /* 2^80 - 2^16 */ 1044 felem_mul(tmp, ftmp2, e16); 1045 felem_reduce(ftmp2, tmp); /* 2^80 - 2^0 */ 1046 for (i = 0; i < 8; i++) { 1047 felem_square(tmp, ftmp2); 1048 felem_reduce(ftmp2, tmp); 1049 } /* 2^88 - 2^8 */ 1050 felem_mul(tmp, ftmp2, e8); 1051 felem_reduce(ftmp2, tmp); /* 2^88 - 2^0 */ 1052 for (i = 0; i < 4; i++) { 1053 felem_square(tmp, ftmp2); 1054 felem_reduce(ftmp2, tmp); 1055 } /* 2^92 - 2^4 */ 1056 felem_mul(tmp, ftmp2, e4); 1057 felem_reduce(ftmp2, tmp); /* 2^92 - 2^0 */ 1058 felem_square(tmp, ftmp2); 1059 felem_reduce(ftmp2, tmp); /* 2^93 - 2^1 */ 1060 felem_square(tmp, ftmp2); 1061 felem_reduce(ftmp2, tmp); /* 2^94 - 2^2 */ 1062 felem_mul(tmp, ftmp2, e2); 1063 felem_reduce(ftmp2, tmp); /* 2^94 - 2^0 */ 1064 felem_square(tmp, ftmp2); 1065 felem_reduce(ftmp2, tmp); /* 2^95 - 2^1 */ 1066 felem_square(tmp, ftmp2); 1067 felem_reduce(ftmp2, tmp); /* 2^96 - 2^2 */ 1068 felem_mul(tmp, ftmp2, in); 1069 felem_reduce(ftmp2, tmp); /* 2^96 - 3 */ 1070 1071 felem_mul(tmp, ftmp2, ftmp); 1072 felem_reduce(out, tmp); /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */ 1073} 1074 1075static void smallfelem_inv_contract(smallfelem out, const smallfelem in) 1076{ 1077 felem tmp; 1078 1079 smallfelem_expand(tmp, in); 1080 felem_inv(tmp, tmp); 1081 felem_contract(out, tmp); 1082} 1083 1084/*- 1085 * Group operations 1086 * ---------------- 1087 * 1088 * Building on top of the field operations we have the operations on the 1089 * elliptic curve group itself. Points on the curve are represented in Jacobian 1090 * coordinates 1091 */ 1092 1093/*- 1094 * point_double calculates 2*(x_in, y_in, z_in) 1095 * 1096 * The method is taken from: 1097 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b 1098 * 1099 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed. 1100 * while x_out == y_in is not (maybe this works, but it's not tested). 1101 */ 1102static void 1103point_double(felem x_out, felem y_out, felem z_out, 1104 const felem x_in, const felem y_in, const felem z_in) 1105{ 1106 longfelem tmp, tmp2; 1107 felem delta, gamma, beta, alpha, ftmp, ftmp2; 1108 smallfelem small1, small2; 1109 1110 felem_assign(ftmp, x_in); 1111 /* ftmp[i] < 2^106 */ 1112 felem_assign(ftmp2, x_in); 1113 /* ftmp2[i] < 2^106 */ 1114 1115 /* delta = z^2 */ 1116 felem_square(tmp, z_in); 1117 felem_reduce(delta, tmp); 1118 /* delta[i] < 2^101 */ 1119 1120 /* gamma = y^2 */ 1121 felem_square(tmp, y_in); 1122 felem_reduce(gamma, tmp); 1123 /* gamma[i] < 2^101 */ 1124 felem_shrink(small1, gamma); 1125 1126 /* beta = x*gamma */ 1127 felem_small_mul(tmp, small1, x_in); 1128 felem_reduce(beta, tmp); 1129 /* beta[i] < 2^101 */ 1130 1131 /* alpha = 3*(x-delta)*(x+delta) */ 1132 felem_diff(ftmp, delta); 1133 /* ftmp[i] < 2^105 + 2^106 < 2^107 */ 1134 felem_sum(ftmp2, delta); 1135 /* ftmp2[i] < 2^105 + 2^106 < 2^107 */ 1136 felem_scalar(ftmp2, 3); 1137 /* ftmp2[i] < 3 * 2^107 < 2^109 */ 1138 felem_mul(tmp, ftmp, ftmp2); 1139 felem_reduce(alpha, tmp); 1140 /* alpha[i] < 2^101 */ 1141 felem_shrink(small2, alpha); 1142 1143 /* x' = alpha^2 - 8*beta */ 1144 smallfelem_square(tmp, small2); 1145 felem_reduce(x_out, tmp); 1146 felem_assign(ftmp, beta); 1147 felem_scalar(ftmp, 8); 1148 /* ftmp[i] < 8 * 2^101 = 2^104 */ 1149 felem_diff(x_out, ftmp); 1150 /* x_out[i] < 2^105 + 2^101 < 2^106 */ 1151 1152 /* z' = (y + z)^2 - gamma - delta */ 1153 felem_sum(delta, gamma); 1154 /* delta[i] < 2^101 + 2^101 = 2^102 */ 1155 felem_assign(ftmp, y_in); 1156 felem_sum(ftmp, z_in); 1157 /* ftmp[i] < 2^106 + 2^106 = 2^107 */ 1158 felem_square(tmp, ftmp); 1159 felem_reduce(z_out, tmp); 1160 felem_diff(z_out, delta); 1161 /* z_out[i] < 2^105 + 2^101 < 2^106 */ 1162 1163 /* y' = alpha*(4*beta - x') - 8*gamma^2 */ 1164 felem_scalar(beta, 4); 1165 /* beta[i] < 4 * 2^101 = 2^103 */ 1166 felem_diff_zero107(beta, x_out); 1167 /* beta[i] < 2^107 + 2^103 < 2^108 */ 1168 felem_small_mul(tmp, small2, beta); 1169 /* tmp[i] < 7 * 2^64 < 2^67 */ 1170 smallfelem_square(tmp2, small1); 1171 /* tmp2[i] < 7 * 2^64 */ 1172 longfelem_scalar(tmp2, 8); 1173 /* tmp2[i] < 8 * 7 * 2^64 = 7 * 2^67 */ 1174 longfelem_diff(tmp, tmp2); 1175 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */ 1176 felem_reduce_zero105(y_out, tmp); 1177 /* y_out[i] < 2^106 */ 1178} 1179 1180/* 1181 * point_double_small is the same as point_double, except that it operates on 1182 * smallfelems 1183 */ 1184static void 1185point_double_small(smallfelem x_out, smallfelem y_out, smallfelem z_out, 1186 const smallfelem x_in, const smallfelem y_in, 1187 const smallfelem z_in) 1188{ 1189 felem felem_x_out, felem_y_out, felem_z_out; 1190 felem felem_x_in, felem_y_in, felem_z_in; 1191 1192 smallfelem_expand(felem_x_in, x_in); 1193 smallfelem_expand(felem_y_in, y_in); 1194 smallfelem_expand(felem_z_in, z_in); 1195 point_double(felem_x_out, felem_y_out, felem_z_out, 1196 felem_x_in, felem_y_in, felem_z_in); 1197 felem_shrink(x_out, felem_x_out); 1198 felem_shrink(y_out, felem_y_out); 1199 felem_shrink(z_out, felem_z_out); 1200} 1201 1202/* copy_conditional copies in to out iff mask is all ones. */ 1203static void copy_conditional(felem out, const felem in, limb mask) 1204{ 1205 unsigned i; 1206 for (i = 0; i < NLIMBS; ++i) { 1207 const limb tmp = mask & (in[i] ^ out[i]); 1208 out[i] ^= tmp; 1209 } 1210} 1211 1212/* copy_small_conditional copies in to out iff mask is all ones. */ 1213static void copy_small_conditional(felem out, const smallfelem in, limb mask) 1214{ 1215 unsigned i; 1216 const u64 mask64 = mask; 1217 for (i = 0; i < NLIMBS; ++i) { 1218 out[i] = ((limb) (in[i] & mask64)) | (out[i] & ~mask); 1219 } 1220} 1221 1222/*- 1223 * point_add calcuates (x1, y1, z1) + (x2, y2, z2) 1224 * 1225 * The method is taken from: 1226 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl, 1227 * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity). 1228 * 1229 * This function includes a branch for checking whether the two input points 1230 * are equal, (while not equal to the point at infinity). This case never 1231 * happens during single point multiplication, so there is no timing leak for 1232 * ECDH or ECDSA signing. 1233 */ 1234static void point_add(felem x3, felem y3, felem z3, 1235 const felem x1, const felem y1, const felem z1, 1236 const int mixed, const smallfelem x2, 1237 const smallfelem y2, const smallfelem z2) 1238{ 1239 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out; 1240 longfelem tmp, tmp2; 1241 smallfelem small1, small2, small3, small4, small5; 1242 limb x_equal, y_equal, z1_is_zero, z2_is_zero; 1243 1244 felem_shrink(small3, z1); 1245 1246 z1_is_zero = smallfelem_is_zero(small3); 1247 z2_is_zero = smallfelem_is_zero(z2); 1248 1249 /* ftmp = z1z1 = z1**2 */ 1250 smallfelem_square(tmp, small3); 1251 felem_reduce(ftmp, tmp); 1252 /* ftmp[i] < 2^101 */ 1253 felem_shrink(small1, ftmp); 1254 1255 if (!mixed) { 1256 /* ftmp2 = z2z2 = z2**2 */ 1257 smallfelem_square(tmp, z2); 1258 felem_reduce(ftmp2, tmp); 1259 /* ftmp2[i] < 2^101 */ 1260 felem_shrink(small2, ftmp2); 1261 1262 felem_shrink(small5, x1); 1263 1264 /* u1 = ftmp3 = x1*z2z2 */ 1265 smallfelem_mul(tmp, small5, small2); 1266 felem_reduce(ftmp3, tmp); 1267 /* ftmp3[i] < 2^101 */ 1268 1269 /* ftmp5 = z1 + z2 */ 1270 felem_assign(ftmp5, z1); 1271 felem_small_sum(ftmp5, z2); 1272 /* ftmp5[i] < 2^107 */ 1273 1274 /* ftmp5 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2 */ 1275 felem_square(tmp, ftmp5); 1276 felem_reduce(ftmp5, tmp); 1277 /* ftmp2 = z2z2 + z1z1 */ 1278 felem_sum(ftmp2, ftmp); 1279 /* ftmp2[i] < 2^101 + 2^101 = 2^102 */ 1280 felem_diff(ftmp5, ftmp2); 1281 /* ftmp5[i] < 2^105 + 2^101 < 2^106 */ 1282 1283 /* ftmp2 = z2 * z2z2 */ 1284 smallfelem_mul(tmp, small2, z2); 1285 felem_reduce(ftmp2, tmp); 1286 1287 /* s1 = ftmp2 = y1 * z2**3 */ 1288 felem_mul(tmp, y1, ftmp2); 1289 felem_reduce(ftmp6, tmp); 1290 /* ftmp6[i] < 2^101 */ 1291 } else { 1292 /* 1293 * We'll assume z2 = 1 (special case z2 = 0 is handled later) 1294 */ 1295 1296 /* u1 = ftmp3 = x1*z2z2 */ 1297 felem_assign(ftmp3, x1); 1298 /* ftmp3[i] < 2^106 */ 1299 1300 /* ftmp5 = 2z1z2 */ 1301 felem_assign(ftmp5, z1); 1302 felem_scalar(ftmp5, 2); 1303 /* ftmp5[i] < 2*2^106 = 2^107 */ 1304 1305 /* s1 = ftmp2 = y1 * z2**3 */ 1306 felem_assign(ftmp6, y1); 1307 /* ftmp6[i] < 2^106 */ 1308 } 1309 1310 /* u2 = x2*z1z1 */ 1311 smallfelem_mul(tmp, x2, small1); 1312 felem_reduce(ftmp4, tmp); 1313 1314 /* h = ftmp4 = u2 - u1 */ 1315 felem_diff_zero107(ftmp4, ftmp3); 1316 /* ftmp4[i] < 2^107 + 2^101 < 2^108 */ 1317 felem_shrink(small4, ftmp4); 1318 1319 x_equal = smallfelem_is_zero(small4); 1320 1321 /* z_out = ftmp5 * h */ 1322 felem_small_mul(tmp, small4, ftmp5); 1323 felem_reduce(z_out, tmp); 1324 /* z_out[i] < 2^101 */ 1325 1326 /* ftmp = z1 * z1z1 */ 1327 smallfelem_mul(tmp, small1, small3); 1328 felem_reduce(ftmp, tmp); 1329 1330 /* s2 = tmp = y2 * z1**3 */ 1331 felem_small_mul(tmp, y2, ftmp); 1332 felem_reduce(ftmp5, tmp); 1333 1334 /* r = ftmp5 = (s2 - s1)*2 */ 1335 felem_diff_zero107(ftmp5, ftmp6); 1336 /* ftmp5[i] < 2^107 + 2^107 = 2^108 */ 1337 felem_scalar(ftmp5, 2); 1338 /* ftmp5[i] < 2^109 */ 1339 felem_shrink(small1, ftmp5); 1340 y_equal = smallfelem_is_zero(small1); 1341 1342 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) { 1343 point_double(x3, y3, z3, x1, y1, z1); 1344 return; 1345 } 1346 1347 /* I = ftmp = (2h)**2 */ 1348 felem_assign(ftmp, ftmp4); 1349 felem_scalar(ftmp, 2); 1350 /* ftmp[i] < 2*2^108 = 2^109 */ 1351 felem_square(tmp, ftmp); 1352 felem_reduce(ftmp, tmp); 1353 1354 /* J = ftmp2 = h * I */ 1355 felem_mul(tmp, ftmp4, ftmp); 1356 felem_reduce(ftmp2, tmp); 1357 1358 /* V = ftmp4 = U1 * I */ 1359 felem_mul(tmp, ftmp3, ftmp); 1360 felem_reduce(ftmp4, tmp); 1361 1362 /* x_out = r**2 - J - 2V */ 1363 smallfelem_square(tmp, small1); 1364 felem_reduce(x_out, tmp); 1365 felem_assign(ftmp3, ftmp4); 1366 felem_scalar(ftmp4, 2); 1367 felem_sum(ftmp4, ftmp2); 1368 /* ftmp4[i] < 2*2^101 + 2^101 < 2^103 */ 1369 felem_diff(x_out, ftmp4); 1370 /* x_out[i] < 2^105 + 2^101 */ 1371 1372 /* y_out = r(V-x_out) - 2 * s1 * J */ 1373 felem_diff_zero107(ftmp3, x_out); 1374 /* ftmp3[i] < 2^107 + 2^101 < 2^108 */ 1375 felem_small_mul(tmp, small1, ftmp3); 1376 felem_mul(tmp2, ftmp6, ftmp2); 1377 longfelem_scalar(tmp2, 2); 1378 /* tmp2[i] < 2*2^67 = 2^68 */ 1379 longfelem_diff(tmp, tmp2); 1380 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */ 1381 felem_reduce_zero105(y_out, tmp); 1382 /* y_out[i] < 2^106 */ 1383 1384 copy_small_conditional(x_out, x2, z1_is_zero); 1385 copy_conditional(x_out, x1, z2_is_zero); 1386 copy_small_conditional(y_out, y2, z1_is_zero); 1387 copy_conditional(y_out, y1, z2_is_zero); 1388 copy_small_conditional(z_out, z2, z1_is_zero); 1389 copy_conditional(z_out, z1, z2_is_zero); 1390 felem_assign(x3, x_out); 1391 felem_assign(y3, y_out); 1392 felem_assign(z3, z_out); 1393} 1394 1395/* 1396 * point_add_small is the same as point_add, except that it operates on 1397 * smallfelems 1398 */ 1399static void point_add_small(smallfelem x3, smallfelem y3, smallfelem z3, 1400 smallfelem x1, smallfelem y1, smallfelem z1, 1401 smallfelem x2, smallfelem y2, smallfelem z2) 1402{ 1403 felem felem_x3, felem_y3, felem_z3; 1404 felem felem_x1, felem_y1, felem_z1; 1405 smallfelem_expand(felem_x1, x1); 1406 smallfelem_expand(felem_y1, y1); 1407 smallfelem_expand(felem_z1, z1); 1408 point_add(felem_x3, felem_y3, felem_z3, felem_x1, felem_y1, felem_z1, 0, 1409 x2, y2, z2); 1410 felem_shrink(x3, felem_x3); 1411 felem_shrink(y3, felem_y3); 1412 felem_shrink(z3, felem_z3); 1413} 1414 1415/*- 1416 * Base point pre computation 1417 * -------------------------- 1418 * 1419 * Two different sorts of precomputed tables are used in the following code. 1420 * Each contain various points on the curve, where each point is three field 1421 * elements (x, y, z). 1422 * 1423 * For the base point table, z is usually 1 (0 for the point at infinity). 1424 * This table has 2 * 16 elements, starting with the following: 1425 * index | bits | point 1426 * ------+---------+------------------------------ 1427 * 0 | 0 0 0 0 | 0G 1428 * 1 | 0 0 0 1 | 1G 1429 * 2 | 0 0 1 0 | 2^64G 1430 * 3 | 0 0 1 1 | (2^64 + 1)G 1431 * 4 | 0 1 0 0 | 2^128G 1432 * 5 | 0 1 0 1 | (2^128 + 1)G 1433 * 6 | 0 1 1 0 | (2^128 + 2^64)G 1434 * 7 | 0 1 1 1 | (2^128 + 2^64 + 1)G 1435 * 8 | 1 0 0 0 | 2^192G 1436 * 9 | 1 0 0 1 | (2^192 + 1)G 1437 * 10 | 1 0 1 0 | (2^192 + 2^64)G 1438 * 11 | 1 0 1 1 | (2^192 + 2^64 + 1)G 1439 * 12 | 1 1 0 0 | (2^192 + 2^128)G 1440 * 13 | 1 1 0 1 | (2^192 + 2^128 + 1)G 1441 * 14 | 1 1 1 0 | (2^192 + 2^128 + 2^64)G 1442 * 15 | 1 1 1 1 | (2^192 + 2^128 + 2^64 + 1)G 1443 * followed by a copy of this with each element multiplied by 2^32. 1444 * 1445 * The reason for this is so that we can clock bits into four different 1446 * locations when doing simple scalar multiplies against the base point, 1447 * and then another four locations using the second 16 elements. 1448 * 1449 * Tables for other points have table[i] = iG for i in 0 .. 16. */ 1450 1451/* gmul is the table of precomputed base points */ 1452static const smallfelem gmul[2][16][3] = { 1453 {{{0, 0, 0, 0}, 1454 {0, 0, 0, 0}, 1455 {0, 0, 0, 0}}, 1456 {{0xf4a13945d898c296, 0x77037d812deb33a0, 0xf8bce6e563a440f2, 1457 0x6b17d1f2e12c4247}, 1458 {0xcbb6406837bf51f5, 0x2bce33576b315ece, 0x8ee7eb4a7c0f9e16, 1459 0x4fe342e2fe1a7f9b}, 1460 {1, 0, 0, 0}}, 1461 {{0x90e75cb48e14db63, 0x29493baaad651f7e, 0x8492592e326e25de, 1462 0x0fa822bc2811aaa5}, 1463 {0xe41124545f462ee7, 0x34b1a65050fe82f5, 0x6f4ad4bcb3df188b, 1464 0xbff44ae8f5dba80d}, 1465 {1, 0, 0, 0}}, 1466 {{0x93391ce2097992af, 0xe96c98fd0d35f1fa, 0xb257c0de95e02789, 1467 0x300a4bbc89d6726f}, 1468 {0xaa54a291c08127a0, 0x5bb1eeada9d806a5, 0x7f1ddb25ff1e3c6f, 1469 0x72aac7e0d09b4644}, 1470 {1, 0, 0, 0}}, 1471 {{0x57c84fc9d789bd85, 0xfc35ff7dc297eac3, 0xfb982fd588c6766e, 1472 0x447d739beedb5e67}, 1473 {0x0c7e33c972e25b32, 0x3d349b95a7fae500, 0xe12e9d953a4aaff7, 1474 0x2d4825ab834131ee}, 1475 {1, 0, 0, 0}}, 1476 {{0x13949c932a1d367f, 0xef7fbd2b1a0a11b7, 0xddc6068bb91dfc60, 1477 0xef9519328a9c72ff}, 1478 {0x196035a77376d8a8, 0x23183b0895ca1740, 0xc1ee9807022c219c, 1479 0x611e9fc37dbb2c9b}, 1480 {1, 0, 0, 0}}, 1481 {{0xcae2b1920b57f4bc, 0x2936df5ec6c9bc36, 0x7dea6482e11238bf, 1482 0x550663797b51f5d8}, 1483 {0x44ffe216348a964c, 0x9fb3d576dbdefbe1, 0x0afa40018d9d50e5, 1484 0x157164848aecb851}, 1485 {1, 0, 0, 0}}, 1486 {{0xe48ecafffc5cde01, 0x7ccd84e70d715f26, 0xa2e8f483f43e4391, 1487 0xeb5d7745b21141ea}, 1488 {0xcac917e2731a3479, 0x85f22cfe2844b645, 0x0990e6a158006cee, 1489 0xeafd72ebdbecc17b}, 1490 {1, 0, 0, 0}}, 1491 {{0x6cf20ffb313728be, 0x96439591a3c6b94a, 0x2736ff8344315fc5, 1492 0xa6d39677a7849276}, 1493 {0xf2bab833c357f5f4, 0x824a920c2284059b, 0x66b8babd2d27ecdf, 1494 0x674f84749b0b8816}, 1495 {1, 0, 0, 0}}, 1496 {{0x2df48c04677c8a3e, 0x74e02f080203a56b, 0x31855f7db8c7fedb, 1497 0x4e769e7672c9ddad}, 1498 {0xa4c36165b824bbb0, 0xfb9ae16f3b9122a5, 0x1ec0057206947281, 1499 0x42b99082de830663}, 1500 {1, 0, 0, 0}}, 1501 {{0x6ef95150dda868b9, 0xd1f89e799c0ce131, 0x7fdc1ca008a1c478, 1502 0x78878ef61c6ce04d}, 1503 {0x9c62b9121fe0d976, 0x6ace570ebde08d4f, 0xde53142c12309def, 1504 0xb6cb3f5d7b72c321}, 1505 {1, 0, 0, 0}}, 1506 {{0x7f991ed2c31a3573, 0x5b82dd5bd54fb496, 0x595c5220812ffcae, 1507 0x0c88bc4d716b1287}, 1508 {0x3a57bf635f48aca8, 0x7c8181f4df2564f3, 0x18d1b5b39c04e6aa, 1509 0xdd5ddea3f3901dc6}, 1510 {1, 0, 0, 0}}, 1511 {{0xe96a79fb3e72ad0c, 0x43a0a28c42ba792f, 0xefe0a423083e49f3, 1512 0x68f344af6b317466}, 1513 {0xcdfe17db3fb24d4a, 0x668bfc2271f5c626, 0x604ed93c24d67ff3, 1514 0x31b9c405f8540a20}, 1515 {1, 0, 0, 0}}, 1516 {{0xd36b4789a2582e7f, 0x0d1a10144ec39c28, 0x663c62c3edbad7a0, 1517 0x4052bf4b6f461db9}, 1518 {0x235a27c3188d25eb, 0xe724f33999bfcc5b, 0x862be6bd71d70cc8, 1519 0xfecf4d5190b0fc61}, 1520 {1, 0, 0, 0}}, 1521 {{0x74346c10a1d4cfac, 0xafdf5cc08526a7a4, 0x123202a8f62bff7a, 1522 0x1eddbae2c802e41a}, 1523 {0x8fa0af2dd603f844, 0x36e06b7e4c701917, 0x0c45f45273db33a0, 1524 0x43104d86560ebcfc}, 1525 {1, 0, 0, 0}}, 1526 {{0x9615b5110d1d78e5, 0x66b0de3225c4744b, 0x0a4a46fb6aaf363a, 1527 0xb48e26b484f7a21c}, 1528 {0x06ebb0f621a01b2d, 0xc004e4048b7b0f98, 0x64131bcdfed6f668, 1529 0xfac015404d4d3dab}, 1530 {1, 0, 0, 0}}}, 1531 {{{0, 0, 0, 0}, 1532 {0, 0, 0, 0}, 1533 {0, 0, 0, 0}}, 1534 {{0x3a5a9e22185a5943, 0x1ab919365c65dfb6, 0x21656b32262c71da, 1535 0x7fe36b40af22af89}, 1536 {0xd50d152c699ca101, 0x74b3d5867b8af212, 0x9f09f40407dca6f1, 1537 0xe697d45825b63624}, 1538 {1, 0, 0, 0}}, 1539 {{0xa84aa9397512218e, 0xe9a521b074ca0141, 0x57880b3a18a2e902, 1540 0x4a5b506612a677a6}, 1541 {0x0beada7a4c4f3840, 0x626db15419e26d9d, 0xc42604fbe1627d40, 1542 0xeb13461ceac089f1}, 1543 {1, 0, 0, 0}}, 1544 {{0xf9faed0927a43281, 0x5e52c4144103ecbc, 0xc342967aa815c857, 1545 0x0781b8291c6a220a}, 1546 {0x5a8343ceeac55f80, 0x88f80eeee54a05e3, 0x97b2a14f12916434, 1547 0x690cde8df0151593}, 1548 {1, 0, 0, 0}}, 1549 {{0xaee9c75df7f82f2a, 0x9e4c35874afdf43a, 0xf5622df437371326, 1550 0x8a535f566ec73617}, 1551 {0xc5f9a0ac223094b7, 0xcde533864c8c7669, 0x37e02819085a92bf, 1552 0x0455c08468b08bd7}, 1553 {1, 0, 0, 0}}, 1554 {{0x0c0a6e2c9477b5d9, 0xf9a4bf62876dc444, 0x5050a949b6cdc279, 1555 0x06bada7ab77f8276}, 1556 {0xc8b4aed1ea48dac9, 0xdebd8a4b7ea1070f, 0x427d49101366eb70, 1557 0x5b476dfd0e6cb18a}, 1558 {1, 0, 0, 0}}, 1559 {{0x7c5c3e44278c340a, 0x4d54606812d66f3b, 0x29a751b1ae23c5d8, 1560 0x3e29864e8a2ec908}, 1561 {0x142d2a6626dbb850, 0xad1744c4765bd780, 0x1f150e68e322d1ed, 1562 0x239b90ea3dc31e7e}, 1563 {1, 0, 0, 0}}, 1564 {{0x78c416527a53322a, 0x305dde6709776f8e, 0xdbcab759f8862ed4, 1565 0x820f4dd949f72ff7}, 1566 {0x6cc544a62b5debd4, 0x75be5d937b4e8cc4, 0x1b481b1b215c14d3, 1567 0x140406ec783a05ec}, 1568 {1, 0, 0, 0}}, 1569 {{0x6a703f10e895df07, 0xfd75f3fa01876bd8, 0xeb5b06e70ce08ffe, 1570 0x68f6b8542783dfee}, 1571 {0x90c76f8a78712655, 0xcf5293d2f310bf7f, 0xfbc8044dfda45028, 1572 0xcbe1feba92e40ce6}, 1573 {1, 0, 0, 0}}, 1574 {{0xe998ceea4396e4c1, 0xfc82ef0b6acea274, 0x230f729f2250e927, 1575 0xd0b2f94d2f420109}, 1576 {0x4305adddb38d4966, 0x10b838f8624c3b45, 0x7db2636658954e7a, 1577 0x971459828b0719e5}, 1578 {1, 0, 0, 0}}, 1579 {{0x4bd6b72623369fc9, 0x57f2929e53d0b876, 0xc2d5cba4f2340687, 1580 0x961610004a866aba}, 1581 {0x49997bcd2e407a5e, 0x69ab197d92ddcb24, 0x2cf1f2438fe5131c, 1582 0x7acb9fadcee75e44}, 1583 {1, 0, 0, 0}}, 1584 {{0x254e839423d2d4c0, 0xf57f0c917aea685b, 0xa60d880f6f75aaea, 1585 0x24eb9acca333bf5b}, 1586 {0xe3de4ccb1cda5dea, 0xfeef9341c51a6b4f, 0x743125f88bac4c4d, 1587 0x69f891c5acd079cc}, 1588 {1, 0, 0, 0}}, 1589 {{0xeee44b35702476b5, 0x7ed031a0e45c2258, 0xb422d1e7bd6f8514, 1590 0xe51f547c5972a107}, 1591 {0xa25bcd6fc9cf343d, 0x8ca922ee097c184e, 0xa62f98b3a9fe9a06, 1592 0x1c309a2b25bb1387}, 1593 {1, 0, 0, 0}}, 1594 {{0x9295dbeb1967c459, 0xb00148833472c98e, 0xc504977708011828, 1595 0x20b87b8aa2c4e503}, 1596 {0x3063175de057c277, 0x1bd539338fe582dd, 0x0d11adef5f69a044, 1597 0xf5c6fa49919776be}, 1598 {1, 0, 0, 0}}, 1599 {{0x8c944e760fd59e11, 0x3876cba1102fad5f, 0xa454c3fad83faa56, 1600 0x1ed7d1b9332010b9}, 1601 {0xa1011a270024b889, 0x05e4d0dcac0cd344, 0x52b520f0eb6a2a24, 1602 0x3a2b03f03217257a}, 1603 {1, 0, 0, 0}}, 1604 {{0xf20fc2afdf1d043d, 0xf330240db58d5a62, 0xfc7d229ca0058c3b, 1605 0x15fee545c78dd9f6}, 1606 {0x501e82885bc98cda, 0x41ef80e5d046ac04, 0x557d9f49461210fb, 1607 0x4ab5b6b2b8753f81}, 1608 {1, 0, 0, 0}}} 1609}; 1610 1611/* 1612 * select_point selects the |idx|th point from a precomputation table and 1613 * copies it to out. 1614 */ 1615static void select_point(const u64 idx, unsigned int size, 1616 const smallfelem pre_comp[16][3], smallfelem out[3]) 1617{ 1618 unsigned i, j; 1619 u64 *outlimbs = &out[0][0]; 1620 memset(outlimbs, 0, 3 * sizeof(smallfelem)); 1621 1622 for (i = 0; i < size; i++) { 1623 const u64 *inlimbs = (u64 *)&pre_comp[i][0][0]; 1624 u64 mask = i ^ idx; 1625 mask |= mask >> 4; 1626 mask |= mask >> 2; 1627 mask |= mask >> 1; 1628 mask &= 1; 1629 mask--; 1630 for (j = 0; j < NLIMBS * 3; j++) 1631 outlimbs[j] |= inlimbs[j] & mask; 1632 } 1633} 1634 1635/* get_bit returns the |i|th bit in |in| */ 1636static char get_bit(const felem_bytearray in, int i) 1637{ 1638 if ((i < 0) || (i >= 256)) 1639 return 0; 1640 return (in[i >> 3] >> (i & 7)) & 1; 1641} 1642 1643/* 1644 * Interleaved point multiplication using precomputed point multiples: The 1645 * small point multiples 0*P, 1*P, ..., 17*P are in pre_comp[], the scalars 1646 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the 1647 * generator, using certain (large) precomputed multiples in g_pre_comp. 1648 * Output point (X, Y, Z) is stored in x_out, y_out, z_out 1649 */ 1650static void batch_mul(felem x_out, felem y_out, felem z_out, 1651 const felem_bytearray scalars[], 1652 const unsigned num_points, const u8 *g_scalar, 1653 const int mixed, const smallfelem pre_comp[][17][3], 1654 const smallfelem g_pre_comp[2][16][3]) 1655{ 1656 int i, skip; 1657 unsigned num, gen_mul = (g_scalar != NULL); 1658 felem nq[3], ftmp; 1659 smallfelem tmp[3]; 1660 u64 bits; 1661 u8 sign, digit; 1662 1663 /* set nq to the point at infinity */ 1664 memset(nq, 0, 3 * sizeof(felem)); 1665 1666 /* 1667 * Loop over all scalars msb-to-lsb, interleaving additions of multiples 1668 * of the generator (two in each of the last 32 rounds) and additions of 1669 * other points multiples (every 5th round). 1670 */ 1671 skip = 1; /* save two point operations in the first 1672 * round */ 1673 for (i = (num_points ? 255 : 31); i >= 0; --i) { 1674 /* double */ 1675 if (!skip) 1676 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]); 1677 1678 /* add multiples of the generator */ 1679 if (gen_mul && (i <= 31)) { 1680 /* first, look 32 bits upwards */ 1681 bits = get_bit(g_scalar, i + 224) << 3; 1682 bits |= get_bit(g_scalar, i + 160) << 2; 1683 bits |= get_bit(g_scalar, i + 96) << 1; 1684 bits |= get_bit(g_scalar, i + 32); 1685 /* select the point to add, in constant time */ 1686 select_point(bits, 16, g_pre_comp[1], tmp); 1687 1688 if (!skip) { 1689 /* Arg 1 below is for "mixed" */ 1690 point_add(nq[0], nq[1], nq[2], 1691 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]); 1692 } else { 1693 smallfelem_expand(nq[0], tmp[0]); 1694 smallfelem_expand(nq[1], tmp[1]); 1695 smallfelem_expand(nq[2], tmp[2]); 1696 skip = 0; 1697 } 1698 1699 /* second, look at the current position */ 1700 bits = get_bit(g_scalar, i + 192) << 3; 1701 bits |= get_bit(g_scalar, i + 128) << 2; 1702 bits |= get_bit(g_scalar, i + 64) << 1; 1703 bits |= get_bit(g_scalar, i); 1704 /* select the point to add, in constant time */ 1705 select_point(bits, 16, g_pre_comp[0], tmp); 1706 /* Arg 1 below is for "mixed" */ 1707 point_add(nq[0], nq[1], nq[2], 1708 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]); 1709 } 1710 1711 /* do other additions every 5 doublings */ 1712 if (num_points && (i % 5 == 0)) { 1713 /* loop over all scalars */ 1714 for (num = 0; num < num_points; ++num) { 1715 bits = get_bit(scalars[num], i + 4) << 5; 1716 bits |= get_bit(scalars[num], i + 3) << 4; 1717 bits |= get_bit(scalars[num], i + 2) << 3; 1718 bits |= get_bit(scalars[num], i + 1) << 2; 1719 bits |= get_bit(scalars[num], i) << 1; 1720 bits |= get_bit(scalars[num], i - 1); 1721 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits); 1722 1723 /* 1724 * select the point to add or subtract, in constant time 1725 */ 1726 select_point(digit, 17, pre_comp[num], tmp); 1727 smallfelem_neg(ftmp, tmp[1]); /* (X, -Y, Z) is the negative 1728 * point */ 1729 copy_small_conditional(ftmp, tmp[1], (((limb) sign) - 1)); 1730 felem_contract(tmp[1], ftmp); 1731 1732 if (!skip) { 1733 point_add(nq[0], nq[1], nq[2], 1734 nq[0], nq[1], nq[2], 1735 mixed, tmp[0], tmp[1], tmp[2]); 1736 } else { 1737 smallfelem_expand(nq[0], tmp[0]); 1738 smallfelem_expand(nq[1], tmp[1]); 1739 smallfelem_expand(nq[2], tmp[2]); 1740 skip = 0; 1741 } 1742 } 1743 } 1744 } 1745 felem_assign(x_out, nq[0]); 1746 felem_assign(y_out, nq[1]); 1747 felem_assign(z_out, nq[2]); 1748} 1749 1750/* Precomputation for the group generator. */ 1751typedef struct { 1752 smallfelem g_pre_comp[2][16][3]; 1753 int references; 1754} NISTP256_PRE_COMP; 1755 1756const EC_METHOD *EC_GFp_nistp256_method(void) 1757{ 1758 static const EC_METHOD ret = { 1759 EC_FLAGS_DEFAULT_OCT, 1760 NID_X9_62_prime_field, 1761 ec_GFp_nistp256_group_init, 1762 ec_GFp_simple_group_finish, 1763 ec_GFp_simple_group_clear_finish, 1764 ec_GFp_nist_group_copy, 1765 ec_GFp_nistp256_group_set_curve, 1766 ec_GFp_simple_group_get_curve, 1767 ec_GFp_simple_group_get_degree, 1768 ec_GFp_simple_group_check_discriminant, 1769 ec_GFp_simple_point_init, 1770 ec_GFp_simple_point_finish, 1771 ec_GFp_simple_point_clear_finish, 1772 ec_GFp_simple_point_copy, 1773 ec_GFp_simple_point_set_to_infinity, 1774 ec_GFp_simple_set_Jprojective_coordinates_GFp, 1775 ec_GFp_simple_get_Jprojective_coordinates_GFp, 1776 ec_GFp_simple_point_set_affine_coordinates, 1777 ec_GFp_nistp256_point_get_affine_coordinates, 1778 0 /* point_set_compressed_coordinates */ , 1779 0 /* point2oct */ , 1780 0 /* oct2point */ , 1781 ec_GFp_simple_add, 1782 ec_GFp_simple_dbl, 1783 ec_GFp_simple_invert, 1784 ec_GFp_simple_is_at_infinity, 1785 ec_GFp_simple_is_on_curve, 1786 ec_GFp_simple_cmp, 1787 ec_GFp_simple_make_affine, 1788 ec_GFp_simple_points_make_affine, 1789 ec_GFp_nistp256_points_mul, 1790 ec_GFp_nistp256_precompute_mult, 1791 ec_GFp_nistp256_have_precompute_mult, 1792 ec_GFp_nist_field_mul, 1793 ec_GFp_nist_field_sqr, 1794 0 /* field_div */ , 1795 0 /* field_encode */ , 1796 0 /* field_decode */ , 1797 0 /* field_set_to_one */ 1798 }; 1799 1800 return &ret; 1801} 1802 1803/******************************************************************************/ 1804/* 1805 * FUNCTIONS TO MANAGE PRECOMPUTATION 1806 */ 1807 1808static NISTP256_PRE_COMP *nistp256_pre_comp_new() 1809{ 1810 NISTP256_PRE_COMP *ret = NULL; 1811 ret = (NISTP256_PRE_COMP *) OPENSSL_malloc(sizeof(*ret)); 1812 if (!ret) { 1813 ECerr(EC_F_NISTP256_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE); 1814 return ret; 1815 } 1816 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp)); 1817 ret->references = 1; 1818 return ret; 1819} 1820 1821static void *nistp256_pre_comp_dup(void *src_) 1822{ 1823 NISTP256_PRE_COMP *src = src_; 1824 1825 /* no need to actually copy, these objects never change! */ 1826 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP); 1827 1828 return src_; 1829} 1830 1831static void nistp256_pre_comp_free(void *pre_) 1832{ 1833 int i; 1834 NISTP256_PRE_COMP *pre = pre_; 1835 1836 if (!pre) 1837 return; 1838 1839 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP); 1840 if (i > 0) 1841 return; 1842 1843 OPENSSL_free(pre); 1844} 1845 1846static void nistp256_pre_comp_clear_free(void *pre_) 1847{ 1848 int i; 1849 NISTP256_PRE_COMP *pre = pre_; 1850 1851 if (!pre) 1852 return; 1853 1854 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP); 1855 if (i > 0) 1856 return; 1857 1858 OPENSSL_cleanse(pre, sizeof(*pre)); 1859 OPENSSL_free(pre); 1860} 1861 1862/******************************************************************************/ 1863/* 1864 * OPENSSL EC_METHOD FUNCTIONS 1865 */ 1866 1867int ec_GFp_nistp256_group_init(EC_GROUP *group) 1868{ 1869 int ret; 1870 ret = ec_GFp_simple_group_init(group); 1871 group->a_is_minus3 = 1; 1872 return ret; 1873} 1874 1875int ec_GFp_nistp256_group_set_curve(EC_GROUP *group, const BIGNUM *p, 1876 const BIGNUM *a, const BIGNUM *b, 1877 BN_CTX *ctx) 1878{ 1879 int ret = 0; 1880 BN_CTX *new_ctx = NULL; 1881 BIGNUM *curve_p, *curve_a, *curve_b; 1882 1883 if (ctx == NULL) 1884 if ((ctx = new_ctx = BN_CTX_new()) == NULL) 1885 return 0; 1886 BN_CTX_start(ctx); 1887 if (((curve_p = BN_CTX_get(ctx)) == NULL) || 1888 ((curve_a = BN_CTX_get(ctx)) == NULL) || 1889 ((curve_b = BN_CTX_get(ctx)) == NULL)) 1890 goto err; 1891 BN_bin2bn(nistp256_curve_params[0], sizeof(felem_bytearray), curve_p); 1892 BN_bin2bn(nistp256_curve_params[1], sizeof(felem_bytearray), curve_a); 1893 BN_bin2bn(nistp256_curve_params[2], sizeof(felem_bytearray), curve_b); 1894 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) { 1895 ECerr(EC_F_EC_GFP_NISTP256_GROUP_SET_CURVE, 1896 EC_R_WRONG_CURVE_PARAMETERS); 1897 goto err; 1898 } 1899 group->field_mod_func = BN_nist_mod_256; 1900 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx); 1901 err: 1902 BN_CTX_end(ctx); 1903 if (new_ctx != NULL) 1904 BN_CTX_free(new_ctx); 1905 return ret; 1906} 1907 1908/* 1909 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') = 1910 * (X/Z^2, Y/Z^3) 1911 */ 1912int ec_GFp_nistp256_point_get_affine_coordinates(const EC_GROUP *group, 1913 const EC_POINT *point, 1914 BIGNUM *x, BIGNUM *y, 1915 BN_CTX *ctx) 1916{ 1917 felem z1, z2, x_in, y_in; 1918 smallfelem x_out, y_out; 1919 longfelem tmp; 1920 1921 if (EC_POINT_is_at_infinity(group, point)) { 1922 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES, 1923 EC_R_POINT_AT_INFINITY); 1924 return 0; 1925 } 1926 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) || 1927 (!BN_to_felem(z1, &point->Z))) 1928 return 0; 1929 felem_inv(z2, z1); 1930 felem_square(tmp, z2); 1931 felem_reduce(z1, tmp); 1932 felem_mul(tmp, x_in, z1); 1933 felem_reduce(x_in, tmp); 1934 felem_contract(x_out, x_in); 1935 if (x != NULL) { 1936 if (!smallfelem_to_BN(x, x_out)) { 1937 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES, 1938 ERR_R_BN_LIB); 1939 return 0; 1940 } 1941 } 1942 felem_mul(tmp, z1, z2); 1943 felem_reduce(z1, tmp); 1944 felem_mul(tmp, y_in, z1); 1945 felem_reduce(y_in, tmp); 1946 felem_contract(y_out, y_in); 1947 if (y != NULL) { 1948 if (!smallfelem_to_BN(y, y_out)) { 1949 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES, 1950 ERR_R_BN_LIB); 1951 return 0; 1952 } 1953 } 1954 return 1; 1955} 1956 1957/* points below is of size |num|, and tmp_smallfelems is of size |num+1| */ 1958static void make_points_affine(size_t num, smallfelem points[][3], 1959 smallfelem tmp_smallfelems[]) 1960{ 1961 /* 1962 * Runs in constant time, unless an input is the point at infinity (which 1963 * normally shouldn't happen). 1964 */ 1965 ec_GFp_nistp_points_make_affine_internal(num, 1966 points, 1967 sizeof(smallfelem), 1968 tmp_smallfelems, 1969 (void (*)(void *))smallfelem_one, 1970 smallfelem_is_zero_int, 1971 (void (*)(void *, const void *)) 1972 smallfelem_assign, 1973 (void (*)(void *, const void *)) 1974 smallfelem_square_contract, 1975 (void (*) 1976 (void *, const void *, 1977 const void *)) 1978 smallfelem_mul_contract, 1979 (void (*)(void *, const void *)) 1980 smallfelem_inv_contract, 1981 /* nothing to contract */ 1982 (void (*)(void *, const void *)) 1983 smallfelem_assign); 1984} 1985 1986/* 1987 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL 1988 * values Result is stored in r (r can equal one of the inputs). 1989 */ 1990int ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_POINT *r, 1991 const BIGNUM *scalar, size_t num, 1992 const EC_POINT *points[], 1993 const BIGNUM *scalars[], BN_CTX *ctx) 1994{ 1995 int ret = 0; 1996 int j; 1997 int mixed = 0; 1998 BN_CTX *new_ctx = NULL; 1999 BIGNUM *x, *y, *z, *tmp_scalar; 2000 felem_bytearray g_secret; 2001 felem_bytearray *secrets = NULL; 2002 smallfelem(*pre_comp)[17][3] = NULL; 2003 smallfelem *tmp_smallfelems = NULL; 2004 unsigned i; 2005 int num_bytes; 2006 int have_pre_comp = 0; 2007 size_t num_points = num; 2008 smallfelem x_in, y_in, z_in; 2009 felem x_out, y_out, z_out; 2010 NISTP256_PRE_COMP *pre = NULL; 2011 const smallfelem(*g_pre_comp)[16][3] = NULL; 2012 EC_POINT *generator = NULL; 2013 const EC_POINT *p = NULL; 2014 const BIGNUM *p_scalar = NULL; 2015 2016 if (ctx == NULL) 2017 if ((ctx = new_ctx = BN_CTX_new()) == NULL) 2018 return 0; 2019 BN_CTX_start(ctx); 2020 if (((x = BN_CTX_get(ctx)) == NULL) || 2021 ((y = BN_CTX_get(ctx)) == NULL) || 2022 ((z = BN_CTX_get(ctx)) == NULL) || 2023 ((tmp_scalar = BN_CTX_get(ctx)) == NULL)) 2024 goto err; 2025 2026 if (scalar != NULL) { 2027 pre = EC_EX_DATA_get_data(group->extra_data, 2028 nistp256_pre_comp_dup, 2029 nistp256_pre_comp_free, 2030 nistp256_pre_comp_clear_free); 2031 if (pre) 2032 /* we have precomputation, try to use it */ 2033 g_pre_comp = (const smallfelem(*)[16][3])pre->g_pre_comp; 2034 else 2035 /* try to use the standard precomputation */ 2036 g_pre_comp = &gmul[0]; 2037 generator = EC_POINT_new(group); 2038 if (generator == NULL) 2039 goto err; 2040 /* get the generator from precomputation */ 2041 if (!smallfelem_to_BN(x, g_pre_comp[0][1][0]) || 2042 !smallfelem_to_BN(y, g_pre_comp[0][1][1]) || 2043 !smallfelem_to_BN(z, g_pre_comp[0][1][2])) { 2044 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB); 2045 goto err; 2046 } 2047 if (!EC_POINT_set_Jprojective_coordinates_GFp(group, 2048 generator, x, y, z, 2049 ctx)) 2050 goto err; 2051 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) 2052 /* precomputation matches generator */ 2053 have_pre_comp = 1; 2054 else 2055 /* 2056 * we don't have valid precomputation: treat the generator as a 2057 * random point 2058 */ 2059 num_points++; 2060 } 2061 if (num_points > 0) { 2062 if (num_points >= 3) { 2063 /* 2064 * unless we precompute multiples for just one or two points, 2065 * converting those into affine form is time well spent 2066 */ 2067 mixed = 1; 2068 } 2069 secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray)); 2070 pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(smallfelem)); 2071 if (mixed) 2072 tmp_smallfelems = 2073 OPENSSL_malloc((num_points * 17 + 1) * sizeof(smallfelem)); 2074 if ((secrets == NULL) || (pre_comp == NULL) 2075 || (mixed && (tmp_smallfelems == NULL))) { 2076 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_MALLOC_FAILURE); 2077 goto err; 2078 } 2079 2080 /* 2081 * we treat NULL scalars as 0, and NULL points as points at infinity, 2082 * i.e., they contribute nothing to the linear combination 2083 */ 2084 memset(secrets, 0, num_points * sizeof(felem_bytearray)); 2085 memset(pre_comp, 0, num_points * 17 * 3 * sizeof(smallfelem)); 2086 for (i = 0; i < num_points; ++i) { 2087 if (i == num) { 2088 /* 2089 * we didn't have a valid precomputation, so we pick the 2090 * generator 2091 */ 2092 p = EC_GROUP_get0_generator(group); 2093 p_scalar = scalar; 2094 } else { 2095 /* the i^th point */ 2096 p = points[i]; 2097 p_scalar = scalars[i]; 2098 } 2099 if ((p_scalar != NULL) && (p != NULL)) { 2100 /* reduce scalar to 0 <= scalar < 2^256 */ 2101 if ((BN_num_bits(p_scalar) > 256) 2102 || (BN_is_negative(p_scalar))) { 2103 /* 2104 * this is an unusual input, and we don't guarantee 2105 * constant-timeness 2106 */ 2107 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) { 2108 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB); 2109 goto err; 2110 } 2111 num_bytes = bn_bn2lebinpad(tmp_scalar, 2112 secrets[i], sizeof(secrets[i])); 2113 } else { 2114 num_bytes = bn_bn2lebinpad(p_scalar, 2115 secrets[i], sizeof(secrets[i])); 2116 } 2117 if (num_bytes < 0) { 2118 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB); 2119 goto err; 2120 } 2121 /* precompute multiples */ 2122 if ((!BN_to_felem(x_out, &p->X)) || 2123 (!BN_to_felem(y_out, &p->Y)) || 2124 (!BN_to_felem(z_out, &p->Z))) 2125 goto err; 2126 felem_shrink(pre_comp[i][1][0], x_out); 2127 felem_shrink(pre_comp[i][1][1], y_out); 2128 felem_shrink(pre_comp[i][1][2], z_out); 2129 for (j = 2; j <= 16; ++j) { 2130 if (j & 1) { 2131 point_add_small(pre_comp[i][j][0], pre_comp[i][j][1], 2132 pre_comp[i][j][2], pre_comp[i][1][0], 2133 pre_comp[i][1][1], pre_comp[i][1][2], 2134 pre_comp[i][j - 1][0], 2135 pre_comp[i][j - 1][1], 2136 pre_comp[i][j - 1][2]); 2137 } else { 2138 point_double_small(pre_comp[i][j][0], 2139 pre_comp[i][j][1], 2140 pre_comp[i][j][2], 2141 pre_comp[i][j / 2][0], 2142 pre_comp[i][j / 2][1], 2143 pre_comp[i][j / 2][2]); 2144 } 2145 } 2146 } 2147 } 2148 if (mixed) 2149 make_points_affine(num_points * 17, pre_comp[0], tmp_smallfelems); 2150 } 2151 2152 /* the scalar for the generator */ 2153 if ((scalar != NULL) && (have_pre_comp)) { 2154 memset(g_secret, 0, sizeof(g_secret)); 2155 /* reduce scalar to 0 <= scalar < 2^256 */ 2156 if ((BN_num_bits(scalar) > 256) || (BN_is_negative(scalar))) { 2157 /* 2158 * this is an unusual input, and we don't guarantee 2159 * constant-timeness 2160 */ 2161 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) { 2162 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB); 2163 goto err; 2164 } 2165 num_bytes = bn_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret)); 2166 } else { 2167 num_bytes = bn_bn2lebinpad(scalar, g_secret, sizeof(g_secret)); 2168 } 2169 /* do the multiplication with generator precomputation */ 2170 batch_mul(x_out, y_out, z_out, 2171 (const felem_bytearray(*))secrets, num_points, 2172 g_secret, 2173 mixed, (const smallfelem(*)[17][3])pre_comp, g_pre_comp); 2174 } else { 2175 /* do the multiplication without generator precomputation */ 2176 batch_mul(x_out, y_out, z_out, 2177 (const felem_bytearray(*))secrets, num_points, 2178 NULL, mixed, (const smallfelem(*)[17][3])pre_comp, NULL); 2179 } 2180 /* reduce the output to its unique minimal representation */ 2181 felem_contract(x_in, x_out); 2182 felem_contract(y_in, y_out); 2183 felem_contract(z_in, z_out); 2184 if ((!smallfelem_to_BN(x, x_in)) || (!smallfelem_to_BN(y, y_in)) || 2185 (!smallfelem_to_BN(z, z_in))) { 2186 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB); 2187 goto err; 2188 } 2189 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx); 2190 2191 err: 2192 BN_CTX_end(ctx); 2193 if (generator != NULL) 2194 EC_POINT_free(generator); 2195 if (new_ctx != NULL) 2196 BN_CTX_free(new_ctx); 2197 if (secrets != NULL) 2198 OPENSSL_free(secrets); 2199 if (pre_comp != NULL) 2200 OPENSSL_free(pre_comp); 2201 if (tmp_smallfelems != NULL) 2202 OPENSSL_free(tmp_smallfelems); 2203 return ret; 2204} 2205 2206int ec_GFp_nistp256_precompute_mult(EC_GROUP *group, BN_CTX *ctx) 2207{ 2208 int ret = 0; 2209 NISTP256_PRE_COMP *pre = NULL; 2210 int i, j; 2211 BN_CTX *new_ctx = NULL; 2212 BIGNUM *x, *y; 2213 EC_POINT *generator = NULL; 2214 smallfelem tmp_smallfelems[32]; 2215 felem x_tmp, y_tmp, z_tmp; 2216 2217 /* throw away old precomputation */ 2218 EC_EX_DATA_free_data(&group->extra_data, nistp256_pre_comp_dup, 2219 nistp256_pre_comp_free, 2220 nistp256_pre_comp_clear_free); 2221 if (ctx == NULL) 2222 if ((ctx = new_ctx = BN_CTX_new()) == NULL) 2223 return 0; 2224 BN_CTX_start(ctx); 2225 if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL)) 2226 goto err; 2227 /* get the generator */ 2228 if (group->generator == NULL) 2229 goto err; 2230 generator = EC_POINT_new(group); 2231 if (generator == NULL) 2232 goto err; 2233 BN_bin2bn(nistp256_curve_params[3], sizeof(felem_bytearray), x); 2234 BN_bin2bn(nistp256_curve_params[4], sizeof(felem_bytearray), y); 2235 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx)) 2236 goto err; 2237 if ((pre = nistp256_pre_comp_new()) == NULL) 2238 goto err; 2239 /* 2240 * if the generator is the standard one, use built-in precomputation 2241 */ 2242 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) { 2243 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp)); 2244 goto done; 2245 } 2246 if ((!BN_to_felem(x_tmp, &group->generator->X)) || 2247 (!BN_to_felem(y_tmp, &group->generator->Y)) || 2248 (!BN_to_felem(z_tmp, &group->generator->Z))) 2249 goto err; 2250 felem_shrink(pre->g_pre_comp[0][1][0], x_tmp); 2251 felem_shrink(pre->g_pre_comp[0][1][1], y_tmp); 2252 felem_shrink(pre->g_pre_comp[0][1][2], z_tmp); 2253 /* 2254 * compute 2^64*G, 2^128*G, 2^192*G for the first table, 2^32*G, 2^96*G, 2255 * 2^160*G, 2^224*G for the second one 2256 */ 2257 for (i = 1; i <= 8; i <<= 1) { 2258 point_double_small(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], 2259 pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0], 2260 pre->g_pre_comp[0][i][1], 2261 pre->g_pre_comp[0][i][2]); 2262 for (j = 0; j < 31; ++j) { 2263 point_double_small(pre->g_pre_comp[1][i][0], 2264 pre->g_pre_comp[1][i][1], 2265 pre->g_pre_comp[1][i][2], 2266 pre->g_pre_comp[1][i][0], 2267 pre->g_pre_comp[1][i][1], 2268 pre->g_pre_comp[1][i][2]); 2269 } 2270 if (i == 8) 2271 break; 2272 point_double_small(pre->g_pre_comp[0][2 * i][0], 2273 pre->g_pre_comp[0][2 * i][1], 2274 pre->g_pre_comp[0][2 * i][2], 2275 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], 2276 pre->g_pre_comp[1][i][2]); 2277 for (j = 0; j < 31; ++j) { 2278 point_double_small(pre->g_pre_comp[0][2 * i][0], 2279 pre->g_pre_comp[0][2 * i][1], 2280 pre->g_pre_comp[0][2 * i][2], 2281 pre->g_pre_comp[0][2 * i][0], 2282 pre->g_pre_comp[0][2 * i][1], 2283 pre->g_pre_comp[0][2 * i][2]); 2284 } 2285 } 2286 for (i = 0; i < 2; i++) { 2287 /* g_pre_comp[i][0] is the point at infinity */ 2288 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0])); 2289 /* the remaining multiples */ 2290 /* 2^64*G + 2^128*G resp. 2^96*G + 2^160*G */ 2291 point_add_small(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1], 2292 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0], 2293 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2], 2294 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], 2295 pre->g_pre_comp[i][2][2]); 2296 /* 2^64*G + 2^192*G resp. 2^96*G + 2^224*G */ 2297 point_add_small(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1], 2298 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0], 2299 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2], 2300 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], 2301 pre->g_pre_comp[i][2][2]); 2302 /* 2^128*G + 2^192*G resp. 2^160*G + 2^224*G */ 2303 point_add_small(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1], 2304 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0], 2305 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2], 2306 pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1], 2307 pre->g_pre_comp[i][4][2]); 2308 /* 2309 * 2^64*G + 2^128*G + 2^192*G resp. 2^96*G + 2^160*G + 2^224*G 2310 */ 2311 point_add_small(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1], 2312 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0], 2313 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2], 2314 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], 2315 pre->g_pre_comp[i][2][2]); 2316 for (j = 1; j < 8; ++j) { 2317 /* odd multiples: add G resp. 2^32*G */ 2318 point_add_small(pre->g_pre_comp[i][2 * j + 1][0], 2319 pre->g_pre_comp[i][2 * j + 1][1], 2320 pre->g_pre_comp[i][2 * j + 1][2], 2321 pre->g_pre_comp[i][2 * j][0], 2322 pre->g_pre_comp[i][2 * j][1], 2323 pre->g_pre_comp[i][2 * j][2], 2324 pre->g_pre_comp[i][1][0], 2325 pre->g_pre_comp[i][1][1], 2326 pre->g_pre_comp[i][1][2]); 2327 } 2328 } 2329 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_smallfelems); 2330 2331 done: 2332 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp256_pre_comp_dup, 2333 nistp256_pre_comp_free, 2334 nistp256_pre_comp_clear_free)) 2335 goto err; 2336 ret = 1; 2337 pre = NULL; 2338 err: 2339 BN_CTX_end(ctx); 2340 if (generator != NULL) 2341 EC_POINT_free(generator); 2342 if (new_ctx != NULL) 2343 BN_CTX_free(new_ctx); 2344 if (pre) 2345 nistp256_pre_comp_free(pre); 2346 return ret; 2347} 2348 2349int ec_GFp_nistp256_have_precompute_mult(const EC_GROUP *group) 2350{ 2351 if (EC_EX_DATA_get_data(group->extra_data, nistp256_pre_comp_dup, 2352 nistp256_pre_comp_free, 2353 nistp256_pre_comp_clear_free) 2354 != NULL) 2355 return 1; 2356 else 2357 return 0; 2358} 2359#else 2360static void *dummy = &dummy; 2361#endif 2362