1238384Sjkim/* crypto/ec/ecp_nistp521.c */ 2238384Sjkim/* 3238384Sjkim * Written by Adam Langley (Google) for the OpenSSL project 4238384Sjkim */ 5238384Sjkim/* Copyright 2011 Google Inc. 6238384Sjkim * 7238384Sjkim * Licensed under the Apache License, Version 2.0 (the "License"); 8238384Sjkim * 9238384Sjkim * you may not use this file except in compliance with the License. 10238384Sjkim * You may obtain a copy of the License at 11238384Sjkim * 12238384Sjkim * http://www.apache.org/licenses/LICENSE-2.0 13238384Sjkim * 14238384Sjkim * Unless required by applicable law or agreed to in writing, software 15238384Sjkim * distributed under the License is distributed on an "AS IS" BASIS, 16238384Sjkim * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17238384Sjkim * See the License for the specific language governing permissions and 18238384Sjkim * limitations under the License. 19238384Sjkim */ 20238384Sjkim 21238384Sjkim/* 22238384Sjkim * A 64-bit implementation of the NIST P-521 elliptic curve point multiplication 23238384Sjkim * 24238384Sjkim * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c. 25238384Sjkim * Otherwise based on Emilia's P224 work, which was inspired by my curve25519 26238384Sjkim * work which got its smarts from Daniel J. Bernstein's work on the same. 27238384Sjkim */ 28238384Sjkim 29238384Sjkim#include <openssl/opensslconf.h> 30238384Sjkim#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 31238384Sjkim 32280304Sjkim# ifndef OPENSSL_SYS_VMS 33280304Sjkim# include <stdint.h> 34280304Sjkim# else 35280304Sjkim# include <inttypes.h> 36280304Sjkim# endif 37238384Sjkim 38280304Sjkim# include <string.h> 39280304Sjkim# include <openssl/err.h> 40280304Sjkim# include "ec_lcl.h" 41238384Sjkim 42280304Sjkim# if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) 43238384Sjkim /* even with gcc, the typedef won't work for 32-bit platforms */ 44280304Sjkimtypedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit 45280304Sjkim * platforms */ 46280304Sjkim# else 47280304Sjkim# error "Need GCC 3.1 or later to define type uint128_t" 48280304Sjkim# endif 49238384Sjkim 50238384Sjkimtypedef uint8_t u8; 51238384Sjkimtypedef uint64_t u64; 52238384Sjkimtypedef int64_t s64; 53238384Sjkim 54280304Sjkim/* 55280304Sjkim * The underlying field. P521 operates over GF(2^521-1). We can serialise an 56280304Sjkim * element of this field into 66 bytes where the most significant byte 57280304Sjkim * contains only a single bit. We call this an felem_bytearray. 58280304Sjkim */ 59238384Sjkim 60238384Sjkimtypedef u8 felem_bytearray[66]; 61238384Sjkim 62280304Sjkim/* 63280304Sjkim * These are the parameters of P521, taken from FIPS 186-3, section D.1.2.5. 64280304Sjkim * These values are big-endian. 65280304Sjkim */ 66280304Sjkimstatic const felem_bytearray nistp521_curve_params[5] = { 67280304Sjkim {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* p */ 68280304Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 69280304Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 70280304Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 71280304Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 72280304Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 73280304Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 74280304Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 75280304Sjkim 0xff, 0xff}, 76280304Sjkim {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* a = -3 */ 77280304Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 78280304Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 79280304Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 80280304Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 81280304Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 82280304Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 83280304Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 84280304Sjkim 0xff, 0xfc}, 85280304Sjkim {0x00, 0x51, 0x95, 0x3e, 0xb9, 0x61, 0x8e, 0x1c, /* b */ 86280304Sjkim 0x9a, 0x1f, 0x92, 0x9a, 0x21, 0xa0, 0xb6, 0x85, 87280304Sjkim 0x40, 0xee, 0xa2, 0xda, 0x72, 0x5b, 0x99, 0xb3, 88280304Sjkim 0x15, 0xf3, 0xb8, 0xb4, 0x89, 0x91, 0x8e, 0xf1, 89280304Sjkim 0x09, 0xe1, 0x56, 0x19, 0x39, 0x51, 0xec, 0x7e, 90280304Sjkim 0x93, 0x7b, 0x16, 0x52, 0xc0, 0xbd, 0x3b, 0xb1, 91280304Sjkim 0xbf, 0x07, 0x35, 0x73, 0xdf, 0x88, 0x3d, 0x2c, 92280304Sjkim 0x34, 0xf1, 0xef, 0x45, 0x1f, 0xd4, 0x6b, 0x50, 93280304Sjkim 0x3f, 0x00}, 94280304Sjkim {0x00, 0xc6, 0x85, 0x8e, 0x06, 0xb7, 0x04, 0x04, /* x */ 95280304Sjkim 0xe9, 0xcd, 0x9e, 0x3e, 0xcb, 0x66, 0x23, 0x95, 96280304Sjkim 0xb4, 0x42, 0x9c, 0x64, 0x81, 0x39, 0x05, 0x3f, 97280304Sjkim 0xb5, 0x21, 0xf8, 0x28, 0xaf, 0x60, 0x6b, 0x4d, 98280304Sjkim 0x3d, 0xba, 0xa1, 0x4b, 0x5e, 0x77, 0xef, 0xe7, 99280304Sjkim 0x59, 0x28, 0xfe, 0x1d, 0xc1, 0x27, 0xa2, 0xff, 100280304Sjkim 0xa8, 0xde, 0x33, 0x48, 0xb3, 0xc1, 0x85, 0x6a, 101280304Sjkim 0x42, 0x9b, 0xf9, 0x7e, 0x7e, 0x31, 0xc2, 0xe5, 102280304Sjkim 0xbd, 0x66}, 103280304Sjkim {0x01, 0x18, 0x39, 0x29, 0x6a, 0x78, 0x9a, 0x3b, /* y */ 104280304Sjkim 0xc0, 0x04, 0x5c, 0x8a, 0x5f, 0xb4, 0x2c, 0x7d, 105280304Sjkim 0x1b, 0xd9, 0x98, 0xf5, 0x44, 0x49, 0x57, 0x9b, 106280304Sjkim 0x44, 0x68, 0x17, 0xaf, 0xbd, 0x17, 0x27, 0x3e, 107280304Sjkim 0x66, 0x2c, 0x97, 0xee, 0x72, 0x99, 0x5e, 0xf4, 108280304Sjkim 0x26, 0x40, 0xc5, 0x50, 0xb9, 0x01, 0x3f, 0xad, 109280304Sjkim 0x07, 0x61, 0x35, 0x3c, 0x70, 0x86, 0xa2, 0x72, 110280304Sjkim 0xc2, 0x40, 0x88, 0xbe, 0x94, 0x76, 0x9f, 0xd1, 111280304Sjkim 0x66, 0x50} 112280304Sjkim}; 113238384Sjkim 114280304Sjkim/*- 115280304Sjkim * The representation of field elements. 116238384Sjkim * ------------------------------------ 117238384Sjkim * 118238384Sjkim * We represent field elements with nine values. These values are either 64 or 119238384Sjkim * 128 bits and the field element represented is: 120238384Sjkim * v[0]*2^0 + v[1]*2^58 + v[2]*2^116 + ... + v[8]*2^464 (mod p) 121238384Sjkim * Each of the nine values is called a 'limb'. Since the limbs are spaced only 122238384Sjkim * 58 bits apart, but are greater than 58 bits in length, the most significant 123238384Sjkim * bits of each limb overlap with the least significant bits of the next. 124238384Sjkim * 125238384Sjkim * A field element with 64-bit limbs is an 'felem'. One with 128-bit limbs is a 126238384Sjkim * 'largefelem' */ 127238384Sjkim 128280304Sjkim# define NLIMBS 9 129238384Sjkim 130238384Sjkimtypedef uint64_t limb; 131238384Sjkimtypedef limb felem[NLIMBS]; 132238384Sjkimtypedef uint128_t largefelem[NLIMBS]; 133238384Sjkim 134238384Sjkimstatic const limb bottom57bits = 0x1ffffffffffffff; 135238384Sjkimstatic const limb bottom58bits = 0x3ffffffffffffff; 136238384Sjkim 137280304Sjkim/* 138280304Sjkim * bin66_to_felem takes a little-endian byte array and converts it into felem 139280304Sjkim * form. This assumes that the CPU is little-endian. 140280304Sjkim */ 141238384Sjkimstatic void bin66_to_felem(felem out, const u8 in[66]) 142280304Sjkim{ 143280304Sjkim out[0] = (*((limb *) & in[0])) & bottom58bits; 144280304Sjkim out[1] = (*((limb *) & in[7]) >> 2) & bottom58bits; 145280304Sjkim out[2] = (*((limb *) & in[14]) >> 4) & bottom58bits; 146280304Sjkim out[3] = (*((limb *) & in[21]) >> 6) & bottom58bits; 147280304Sjkim out[4] = (*((limb *) & in[29])) & bottom58bits; 148280304Sjkim out[5] = (*((limb *) & in[36]) >> 2) & bottom58bits; 149280304Sjkim out[6] = (*((limb *) & in[43]) >> 4) & bottom58bits; 150280304Sjkim out[7] = (*((limb *) & in[50]) >> 6) & bottom58bits; 151280304Sjkim out[8] = (*((limb *) & in[58])) & bottom57bits; 152280304Sjkim} 153238384Sjkim 154280304Sjkim/* 155280304Sjkim * felem_to_bin66 takes an felem and serialises into a little endian, 66 byte 156280304Sjkim * array. This assumes that the CPU is little-endian. 157280304Sjkim */ 158238384Sjkimstatic void felem_to_bin66(u8 out[66], const felem in) 159280304Sjkim{ 160280304Sjkim memset(out, 0, 66); 161280304Sjkim (*((limb *) & out[0])) = in[0]; 162280304Sjkim (*((limb *) & out[7])) |= in[1] << 2; 163280304Sjkim (*((limb *) & out[14])) |= in[2] << 4; 164280304Sjkim (*((limb *) & out[21])) |= in[3] << 6; 165280304Sjkim (*((limb *) & out[29])) = in[4]; 166280304Sjkim (*((limb *) & out[36])) |= in[5] << 2; 167280304Sjkim (*((limb *) & out[43])) |= in[6] << 4; 168280304Sjkim (*((limb *) & out[50])) |= in[7] << 6; 169280304Sjkim (*((limb *) & out[58])) = in[8]; 170280304Sjkim} 171238384Sjkim 172238384Sjkim/* To preserve endianness when using BN_bn2bin and BN_bin2bn */ 173238384Sjkimstatic void flip_endian(u8 *out, const u8 *in, unsigned len) 174280304Sjkim{ 175280304Sjkim unsigned i; 176280304Sjkim for (i = 0; i < len; ++i) 177280304Sjkim out[i] = in[len - 1 - i]; 178280304Sjkim} 179238384Sjkim 180238384Sjkim/* BN_to_felem converts an OpenSSL BIGNUM into an felem */ 181238384Sjkimstatic int BN_to_felem(felem out, const BIGNUM *bn) 182280304Sjkim{ 183280304Sjkim felem_bytearray b_in; 184280304Sjkim felem_bytearray b_out; 185280304Sjkim unsigned num_bytes; 186238384Sjkim 187280304Sjkim /* BN_bn2bin eats leading zeroes */ 188280304Sjkim memset(b_out, 0, sizeof b_out); 189280304Sjkim num_bytes = BN_num_bytes(bn); 190280304Sjkim if (num_bytes > sizeof b_out) { 191280304Sjkim ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE); 192280304Sjkim return 0; 193280304Sjkim } 194280304Sjkim if (BN_is_negative(bn)) { 195280304Sjkim ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE); 196280304Sjkim return 0; 197280304Sjkim } 198280304Sjkim num_bytes = BN_bn2bin(bn, b_in); 199280304Sjkim flip_endian(b_out, b_in, num_bytes); 200280304Sjkim bin66_to_felem(out, b_out); 201280304Sjkim return 1; 202280304Sjkim} 203238384Sjkim 204238384Sjkim/* felem_to_BN converts an felem into an OpenSSL BIGNUM */ 205238384Sjkimstatic BIGNUM *felem_to_BN(BIGNUM *out, const felem in) 206280304Sjkim{ 207280304Sjkim felem_bytearray b_in, b_out; 208280304Sjkim felem_to_bin66(b_in, in); 209280304Sjkim flip_endian(b_out, b_in, sizeof b_out); 210280304Sjkim return BN_bin2bn(b_out, sizeof b_out, out); 211280304Sjkim} 212238384Sjkim 213280304Sjkim/*- 214280304Sjkim * Field operations 215280304Sjkim * ---------------- 216280304Sjkim */ 217238384Sjkim 218238384Sjkimstatic void felem_one(felem out) 219280304Sjkim{ 220280304Sjkim out[0] = 1; 221280304Sjkim out[1] = 0; 222280304Sjkim out[2] = 0; 223280304Sjkim out[3] = 0; 224280304Sjkim out[4] = 0; 225280304Sjkim out[5] = 0; 226280304Sjkim out[6] = 0; 227280304Sjkim out[7] = 0; 228280304Sjkim out[8] = 0; 229280304Sjkim} 230238384Sjkim 231238384Sjkimstatic void felem_assign(felem out, const felem in) 232280304Sjkim{ 233280304Sjkim out[0] = in[0]; 234280304Sjkim out[1] = in[1]; 235280304Sjkim out[2] = in[2]; 236280304Sjkim out[3] = in[3]; 237280304Sjkim out[4] = in[4]; 238280304Sjkim out[5] = in[5]; 239280304Sjkim out[6] = in[6]; 240280304Sjkim out[7] = in[7]; 241280304Sjkim out[8] = in[8]; 242280304Sjkim} 243238384Sjkim 244238384Sjkim/* felem_sum64 sets out = out + in. */ 245238384Sjkimstatic void felem_sum64(felem out, const felem in) 246280304Sjkim{ 247280304Sjkim out[0] += in[0]; 248280304Sjkim out[1] += in[1]; 249280304Sjkim out[2] += in[2]; 250280304Sjkim out[3] += in[3]; 251280304Sjkim out[4] += in[4]; 252280304Sjkim out[5] += in[5]; 253280304Sjkim out[6] += in[6]; 254280304Sjkim out[7] += in[7]; 255280304Sjkim out[8] += in[8]; 256280304Sjkim} 257238384Sjkim 258238384Sjkim/* felem_scalar sets out = in * scalar */ 259238384Sjkimstatic void felem_scalar(felem out, const felem in, limb scalar) 260280304Sjkim{ 261280304Sjkim out[0] = in[0] * scalar; 262280304Sjkim out[1] = in[1] * scalar; 263280304Sjkim out[2] = in[2] * scalar; 264280304Sjkim out[3] = in[3] * scalar; 265280304Sjkim out[4] = in[4] * scalar; 266280304Sjkim out[5] = in[5] * scalar; 267280304Sjkim out[6] = in[6] * scalar; 268280304Sjkim out[7] = in[7] * scalar; 269280304Sjkim out[8] = in[8] * scalar; 270280304Sjkim} 271238384Sjkim 272238384Sjkim/* felem_scalar64 sets out = out * scalar */ 273238384Sjkimstatic void felem_scalar64(felem out, limb scalar) 274280304Sjkim{ 275280304Sjkim out[0] *= scalar; 276280304Sjkim out[1] *= scalar; 277280304Sjkim out[2] *= scalar; 278280304Sjkim out[3] *= scalar; 279280304Sjkim out[4] *= scalar; 280280304Sjkim out[5] *= scalar; 281280304Sjkim out[6] *= scalar; 282280304Sjkim out[7] *= scalar; 283280304Sjkim out[8] *= scalar; 284280304Sjkim} 285238384Sjkim 286238384Sjkim/* felem_scalar128 sets out = out * scalar */ 287238384Sjkimstatic void felem_scalar128(largefelem out, limb scalar) 288280304Sjkim{ 289280304Sjkim out[0] *= scalar; 290280304Sjkim out[1] *= scalar; 291280304Sjkim out[2] *= scalar; 292280304Sjkim out[3] *= scalar; 293280304Sjkim out[4] *= scalar; 294280304Sjkim out[5] *= scalar; 295280304Sjkim out[6] *= scalar; 296280304Sjkim out[7] *= scalar; 297280304Sjkim out[8] *= scalar; 298280304Sjkim} 299238384Sjkim 300280304Sjkim/*- 301280304Sjkim * felem_neg sets |out| to |-in| 302238384Sjkim * On entry: 303238384Sjkim * in[i] < 2^59 + 2^14 304238384Sjkim * On exit: 305238384Sjkim * out[i] < 2^62 306238384Sjkim */ 307238384Sjkimstatic void felem_neg(felem out, const felem in) 308280304Sjkim{ 309280304Sjkim /* In order to prevent underflow, we subtract from 0 mod p. */ 310280304Sjkim static const limb two62m3 = (((limb) 1) << 62) - (((limb) 1) << 5); 311280304Sjkim static const limb two62m2 = (((limb) 1) << 62) - (((limb) 1) << 4); 312238384Sjkim 313280304Sjkim out[0] = two62m3 - in[0]; 314280304Sjkim out[1] = two62m2 - in[1]; 315280304Sjkim out[2] = two62m2 - in[2]; 316280304Sjkim out[3] = two62m2 - in[3]; 317280304Sjkim out[4] = two62m2 - in[4]; 318280304Sjkim out[5] = two62m2 - in[5]; 319280304Sjkim out[6] = two62m2 - in[6]; 320280304Sjkim out[7] = two62m2 - in[7]; 321280304Sjkim out[8] = two62m2 - in[8]; 322280304Sjkim} 323238384Sjkim 324280304Sjkim/*- 325280304Sjkim * felem_diff64 subtracts |in| from |out| 326238384Sjkim * On entry: 327238384Sjkim * in[i] < 2^59 + 2^14 328238384Sjkim * On exit: 329238384Sjkim * out[i] < out[i] + 2^62 330238384Sjkim */ 331238384Sjkimstatic void felem_diff64(felem out, const felem in) 332280304Sjkim{ 333280304Sjkim /* 334280304Sjkim * In order to prevent underflow, we add 0 mod p before subtracting. 335280304Sjkim */ 336280304Sjkim static const limb two62m3 = (((limb) 1) << 62) - (((limb) 1) << 5); 337280304Sjkim static const limb two62m2 = (((limb) 1) << 62) - (((limb) 1) << 4); 338238384Sjkim 339280304Sjkim out[0] += two62m3 - in[0]; 340280304Sjkim out[1] += two62m2 - in[1]; 341280304Sjkim out[2] += two62m2 - in[2]; 342280304Sjkim out[3] += two62m2 - in[3]; 343280304Sjkim out[4] += two62m2 - in[4]; 344280304Sjkim out[5] += two62m2 - in[5]; 345280304Sjkim out[6] += two62m2 - in[6]; 346280304Sjkim out[7] += two62m2 - in[7]; 347280304Sjkim out[8] += two62m2 - in[8]; 348280304Sjkim} 349238384Sjkim 350280304Sjkim/*- 351280304Sjkim * felem_diff_128_64 subtracts |in| from |out| 352238384Sjkim * On entry: 353238384Sjkim * in[i] < 2^62 + 2^17 354238384Sjkim * On exit: 355238384Sjkim * out[i] < out[i] + 2^63 356238384Sjkim */ 357238384Sjkimstatic void felem_diff_128_64(largefelem out, const felem in) 358280304Sjkim{ 359280304Sjkim /* 360280304Sjkim * In order to prevent underflow, we add 0 mod p before subtracting. 361280304Sjkim */ 362280304Sjkim static const limb two63m6 = (((limb) 1) << 62) - (((limb) 1) << 5); 363280304Sjkim static const limb two63m5 = (((limb) 1) << 62) - (((limb) 1) << 4); 364238384Sjkim 365280304Sjkim out[0] += two63m6 - in[0]; 366280304Sjkim out[1] += two63m5 - in[1]; 367280304Sjkim out[2] += two63m5 - in[2]; 368280304Sjkim out[3] += two63m5 - in[3]; 369280304Sjkim out[4] += two63m5 - in[4]; 370280304Sjkim out[5] += two63m5 - in[5]; 371280304Sjkim out[6] += two63m5 - in[6]; 372280304Sjkim out[7] += two63m5 - in[7]; 373280304Sjkim out[8] += two63m5 - in[8]; 374280304Sjkim} 375238384Sjkim 376280304Sjkim/*- 377280304Sjkim * felem_diff_128_64 subtracts |in| from |out| 378238384Sjkim * On entry: 379238384Sjkim * in[i] < 2^126 380238384Sjkim * On exit: 381238384Sjkim * out[i] < out[i] + 2^127 - 2^69 382238384Sjkim */ 383238384Sjkimstatic void felem_diff128(largefelem out, const largefelem in) 384280304Sjkim{ 385280304Sjkim /* 386280304Sjkim * In order to prevent underflow, we add 0 mod p before subtracting. 387280304Sjkim */ 388280304Sjkim static const uint128_t two127m70 = 389280304Sjkim (((uint128_t) 1) << 127) - (((uint128_t) 1) << 70); 390280304Sjkim static const uint128_t two127m69 = 391280304Sjkim (((uint128_t) 1) << 127) - (((uint128_t) 1) << 69); 392238384Sjkim 393280304Sjkim out[0] += (two127m70 - in[0]); 394280304Sjkim out[1] += (two127m69 - in[1]); 395280304Sjkim out[2] += (two127m69 - in[2]); 396280304Sjkim out[3] += (two127m69 - in[3]); 397280304Sjkim out[4] += (two127m69 - in[4]); 398280304Sjkim out[5] += (two127m69 - in[5]); 399280304Sjkim out[6] += (two127m69 - in[6]); 400280304Sjkim out[7] += (two127m69 - in[7]); 401280304Sjkim out[8] += (two127m69 - in[8]); 402280304Sjkim} 403238384Sjkim 404280304Sjkim/*- 405280304Sjkim * felem_square sets |out| = |in|^2 406238384Sjkim * On entry: 407238384Sjkim * in[i] < 2^62 408238384Sjkim * On exit: 409238384Sjkim * out[i] < 17 * max(in[i]) * max(in[i]) 410238384Sjkim */ 411238384Sjkimstatic void felem_square(largefelem out, const felem in) 412280304Sjkim{ 413280304Sjkim felem inx2, inx4; 414280304Sjkim felem_scalar(inx2, in, 2); 415280304Sjkim felem_scalar(inx4, in, 4); 416238384Sjkim 417280304Sjkim /*- 418280304Sjkim * We have many cases were we want to do 419280304Sjkim * in[x] * in[y] + 420280304Sjkim * in[y] * in[x] 421280304Sjkim * This is obviously just 422280304Sjkim * 2 * in[x] * in[y] 423280304Sjkim * However, rather than do the doubling on the 128 bit result, we 424280304Sjkim * double one of the inputs to the multiplication by reading from 425280304Sjkim * |inx2| 426280304Sjkim */ 427238384Sjkim 428280304Sjkim out[0] = ((uint128_t) in[0]) * in[0]; 429280304Sjkim out[1] = ((uint128_t) in[0]) * inx2[1]; 430280304Sjkim out[2] = ((uint128_t) in[0]) * inx2[2] + ((uint128_t) in[1]) * in[1]; 431280304Sjkim out[3] = ((uint128_t) in[0]) * inx2[3] + ((uint128_t) in[1]) * inx2[2]; 432280304Sjkim out[4] = ((uint128_t) in[0]) * inx2[4] + 433280304Sjkim ((uint128_t) in[1]) * inx2[3] + ((uint128_t) in[2]) * in[2]; 434280304Sjkim out[5] = ((uint128_t) in[0]) * inx2[5] + 435280304Sjkim ((uint128_t) in[1]) * inx2[4] + ((uint128_t) in[2]) * inx2[3]; 436280304Sjkim out[6] = ((uint128_t) in[0]) * inx2[6] + 437280304Sjkim ((uint128_t) in[1]) * inx2[5] + 438280304Sjkim ((uint128_t) in[2]) * inx2[4] + ((uint128_t) in[3]) * in[3]; 439280304Sjkim out[7] = ((uint128_t) in[0]) * inx2[7] + 440280304Sjkim ((uint128_t) in[1]) * inx2[6] + 441280304Sjkim ((uint128_t) in[2]) * inx2[5] + ((uint128_t) in[3]) * inx2[4]; 442280304Sjkim out[8] = ((uint128_t) in[0]) * inx2[8] + 443280304Sjkim ((uint128_t) in[1]) * inx2[7] + 444280304Sjkim ((uint128_t) in[2]) * inx2[6] + 445280304Sjkim ((uint128_t) in[3]) * inx2[5] + ((uint128_t) in[4]) * in[4]; 446238384Sjkim 447280304Sjkim /* 448280304Sjkim * The remaining limbs fall above 2^521, with the first falling at 2^522. 449280304Sjkim * They correspond to locations one bit up from the limbs produced above 450280304Sjkim * so we would have to multiply by two to align them. Again, rather than 451280304Sjkim * operate on the 128-bit result, we double one of the inputs to the 452280304Sjkim * multiplication. If we want to double for both this reason, and the 453280304Sjkim * reason above, then we end up multiplying by four. 454280304Sjkim */ 455238384Sjkim 456280304Sjkim /* 9 */ 457280304Sjkim out[0] += ((uint128_t) in[1]) * inx4[8] + 458280304Sjkim ((uint128_t) in[2]) * inx4[7] + 459280304Sjkim ((uint128_t) in[3]) * inx4[6] + ((uint128_t) in[4]) * inx4[5]; 460238384Sjkim 461280304Sjkim /* 10 */ 462280304Sjkim out[1] += ((uint128_t) in[2]) * inx4[8] + 463280304Sjkim ((uint128_t) in[3]) * inx4[7] + 464280304Sjkim ((uint128_t) in[4]) * inx4[6] + ((uint128_t) in[5]) * inx2[5]; 465238384Sjkim 466280304Sjkim /* 11 */ 467280304Sjkim out[2] += ((uint128_t) in[3]) * inx4[8] + 468280304Sjkim ((uint128_t) in[4]) * inx4[7] + ((uint128_t) in[5]) * inx4[6]; 469238384Sjkim 470280304Sjkim /* 12 */ 471280304Sjkim out[3] += ((uint128_t) in[4]) * inx4[8] + 472280304Sjkim ((uint128_t) in[5]) * inx4[7] + ((uint128_t) in[6]) * inx2[6]; 473238384Sjkim 474280304Sjkim /* 13 */ 475280304Sjkim out[4] += ((uint128_t) in[5]) * inx4[8] + ((uint128_t) in[6]) * inx4[7]; 476238384Sjkim 477280304Sjkim /* 14 */ 478280304Sjkim out[5] += ((uint128_t) in[6]) * inx4[8] + ((uint128_t) in[7]) * inx2[7]; 479238384Sjkim 480280304Sjkim /* 15 */ 481280304Sjkim out[6] += ((uint128_t) in[7]) * inx4[8]; 482238384Sjkim 483280304Sjkim /* 16 */ 484280304Sjkim out[7] += ((uint128_t) in[8]) * inx2[8]; 485280304Sjkim} 486238384Sjkim 487280304Sjkim/*- 488280304Sjkim * felem_mul sets |out| = |in1| * |in2| 489238384Sjkim * On entry: 490238384Sjkim * in1[i] < 2^64 491238384Sjkim * in2[i] < 2^63 492238384Sjkim * On exit: 493238384Sjkim * out[i] < 17 * max(in1[i]) * max(in2[i]) 494238384Sjkim */ 495238384Sjkimstatic void felem_mul(largefelem out, const felem in1, const felem in2) 496280304Sjkim{ 497280304Sjkim felem in2x2; 498280304Sjkim felem_scalar(in2x2, in2, 2); 499238384Sjkim 500280304Sjkim out[0] = ((uint128_t) in1[0]) * in2[0]; 501238384Sjkim 502280304Sjkim out[1] = ((uint128_t) in1[0]) * in2[1] + ((uint128_t) in1[1]) * in2[0]; 503238384Sjkim 504280304Sjkim out[2] = ((uint128_t) in1[0]) * in2[2] + 505280304Sjkim ((uint128_t) in1[1]) * in2[1] + ((uint128_t) in1[2]) * in2[0]; 506238384Sjkim 507280304Sjkim out[3] = ((uint128_t) in1[0]) * in2[3] + 508280304Sjkim ((uint128_t) in1[1]) * in2[2] + 509280304Sjkim ((uint128_t) in1[2]) * in2[1] + ((uint128_t) in1[3]) * in2[0]; 510238384Sjkim 511280304Sjkim out[4] = ((uint128_t) in1[0]) * in2[4] + 512280304Sjkim ((uint128_t) in1[1]) * in2[3] + 513280304Sjkim ((uint128_t) in1[2]) * in2[2] + 514280304Sjkim ((uint128_t) in1[3]) * in2[1] + ((uint128_t) in1[4]) * in2[0]; 515238384Sjkim 516280304Sjkim out[5] = ((uint128_t) in1[0]) * in2[5] + 517280304Sjkim ((uint128_t) in1[1]) * in2[4] + 518280304Sjkim ((uint128_t) in1[2]) * in2[3] + 519280304Sjkim ((uint128_t) in1[3]) * in2[2] + 520280304Sjkim ((uint128_t) in1[4]) * in2[1] + ((uint128_t) in1[5]) * in2[0]; 521238384Sjkim 522280304Sjkim out[6] = ((uint128_t) in1[0]) * in2[6] + 523280304Sjkim ((uint128_t) in1[1]) * in2[5] + 524280304Sjkim ((uint128_t) in1[2]) * in2[4] + 525280304Sjkim ((uint128_t) in1[3]) * in2[3] + 526280304Sjkim ((uint128_t) in1[4]) * in2[2] + 527280304Sjkim ((uint128_t) in1[5]) * in2[1] + ((uint128_t) in1[6]) * in2[0]; 528238384Sjkim 529280304Sjkim out[7] = ((uint128_t) in1[0]) * in2[7] + 530280304Sjkim ((uint128_t) in1[1]) * in2[6] + 531280304Sjkim ((uint128_t) in1[2]) * in2[5] + 532280304Sjkim ((uint128_t) in1[3]) * in2[4] + 533280304Sjkim ((uint128_t) in1[4]) * in2[3] + 534280304Sjkim ((uint128_t) in1[5]) * in2[2] + 535280304Sjkim ((uint128_t) in1[6]) * in2[1] + ((uint128_t) in1[7]) * in2[0]; 536238384Sjkim 537280304Sjkim out[8] = ((uint128_t) in1[0]) * in2[8] + 538280304Sjkim ((uint128_t) in1[1]) * in2[7] + 539280304Sjkim ((uint128_t) in1[2]) * in2[6] + 540280304Sjkim ((uint128_t) in1[3]) * in2[5] + 541280304Sjkim ((uint128_t) in1[4]) * in2[4] + 542280304Sjkim ((uint128_t) in1[5]) * in2[3] + 543280304Sjkim ((uint128_t) in1[6]) * in2[2] + 544280304Sjkim ((uint128_t) in1[7]) * in2[1] + ((uint128_t) in1[8]) * in2[0]; 545238384Sjkim 546280304Sjkim /* See comment in felem_square about the use of in2x2 here */ 547238384Sjkim 548280304Sjkim out[0] += ((uint128_t) in1[1]) * in2x2[8] + 549280304Sjkim ((uint128_t) in1[2]) * in2x2[7] + 550280304Sjkim ((uint128_t) in1[3]) * in2x2[6] + 551280304Sjkim ((uint128_t) in1[4]) * in2x2[5] + 552280304Sjkim ((uint128_t) in1[5]) * in2x2[4] + 553280304Sjkim ((uint128_t) in1[6]) * in2x2[3] + 554280304Sjkim ((uint128_t) in1[7]) * in2x2[2] + ((uint128_t) in1[8]) * in2x2[1]; 555238384Sjkim 556280304Sjkim out[1] += ((uint128_t) in1[2]) * in2x2[8] + 557280304Sjkim ((uint128_t) in1[3]) * in2x2[7] + 558280304Sjkim ((uint128_t) in1[4]) * in2x2[6] + 559280304Sjkim ((uint128_t) in1[5]) * in2x2[5] + 560280304Sjkim ((uint128_t) in1[6]) * in2x2[4] + 561280304Sjkim ((uint128_t) in1[7]) * in2x2[3] + ((uint128_t) in1[8]) * in2x2[2]; 562238384Sjkim 563280304Sjkim out[2] += ((uint128_t) in1[3]) * in2x2[8] + 564280304Sjkim ((uint128_t) in1[4]) * in2x2[7] + 565280304Sjkim ((uint128_t) in1[5]) * in2x2[6] + 566280304Sjkim ((uint128_t) in1[6]) * in2x2[5] + 567280304Sjkim ((uint128_t) in1[7]) * in2x2[4] + ((uint128_t) in1[8]) * in2x2[3]; 568238384Sjkim 569280304Sjkim out[3] += ((uint128_t) in1[4]) * in2x2[8] + 570280304Sjkim ((uint128_t) in1[5]) * in2x2[7] + 571280304Sjkim ((uint128_t) in1[6]) * in2x2[6] + 572280304Sjkim ((uint128_t) in1[7]) * in2x2[5] + ((uint128_t) in1[8]) * in2x2[4]; 573238384Sjkim 574280304Sjkim out[4] += ((uint128_t) in1[5]) * in2x2[8] + 575280304Sjkim ((uint128_t) in1[6]) * in2x2[7] + 576280304Sjkim ((uint128_t) in1[7]) * in2x2[6] + ((uint128_t) in1[8]) * in2x2[5]; 577238384Sjkim 578280304Sjkim out[5] += ((uint128_t) in1[6]) * in2x2[8] + 579280304Sjkim ((uint128_t) in1[7]) * in2x2[7] + ((uint128_t) in1[8]) * in2x2[6]; 580238384Sjkim 581280304Sjkim out[6] += ((uint128_t) in1[7]) * in2x2[8] + 582280304Sjkim ((uint128_t) in1[8]) * in2x2[7]; 583238384Sjkim 584280304Sjkim out[7] += ((uint128_t) in1[8]) * in2x2[8]; 585280304Sjkim} 586238384Sjkim 587238384Sjkimstatic const limb bottom52bits = 0xfffffffffffff; 588238384Sjkim 589280304Sjkim/*- 590280304Sjkim * felem_reduce converts a largefelem to an felem. 591238384Sjkim * On entry: 592238384Sjkim * in[i] < 2^128 593238384Sjkim * On exit: 594238384Sjkim * out[i] < 2^59 + 2^14 595238384Sjkim */ 596238384Sjkimstatic void felem_reduce(felem out, const largefelem in) 597280304Sjkim{ 598280304Sjkim u64 overflow1, overflow2; 599238384Sjkim 600280304Sjkim out[0] = ((limb) in[0]) & bottom58bits; 601280304Sjkim out[1] = ((limb) in[1]) & bottom58bits; 602280304Sjkim out[2] = ((limb) in[2]) & bottom58bits; 603280304Sjkim out[3] = ((limb) in[3]) & bottom58bits; 604280304Sjkim out[4] = ((limb) in[4]) & bottom58bits; 605280304Sjkim out[5] = ((limb) in[5]) & bottom58bits; 606280304Sjkim out[6] = ((limb) in[6]) & bottom58bits; 607280304Sjkim out[7] = ((limb) in[7]) & bottom58bits; 608280304Sjkim out[8] = ((limb) in[8]) & bottom58bits; 609238384Sjkim 610280304Sjkim /* out[i] < 2^58 */ 611238384Sjkim 612280304Sjkim out[1] += ((limb) in[0]) >> 58; 613280304Sjkim out[1] += (((limb) (in[0] >> 64)) & bottom52bits) << 6; 614280304Sjkim /*- 615280304Sjkim * out[1] < 2^58 + 2^6 + 2^58 616280304Sjkim * = 2^59 + 2^6 617280304Sjkim */ 618280304Sjkim out[2] += ((limb) (in[0] >> 64)) >> 52; 619238384Sjkim 620280304Sjkim out[2] += ((limb) in[1]) >> 58; 621280304Sjkim out[2] += (((limb) (in[1] >> 64)) & bottom52bits) << 6; 622280304Sjkim out[3] += ((limb) (in[1] >> 64)) >> 52; 623238384Sjkim 624280304Sjkim out[3] += ((limb) in[2]) >> 58; 625280304Sjkim out[3] += (((limb) (in[2] >> 64)) & bottom52bits) << 6; 626280304Sjkim out[4] += ((limb) (in[2] >> 64)) >> 52; 627238384Sjkim 628280304Sjkim out[4] += ((limb) in[3]) >> 58; 629280304Sjkim out[4] += (((limb) (in[3] >> 64)) & bottom52bits) << 6; 630280304Sjkim out[5] += ((limb) (in[3] >> 64)) >> 52; 631238384Sjkim 632280304Sjkim out[5] += ((limb) in[4]) >> 58; 633280304Sjkim out[5] += (((limb) (in[4] >> 64)) & bottom52bits) << 6; 634280304Sjkim out[6] += ((limb) (in[4] >> 64)) >> 52; 635238384Sjkim 636280304Sjkim out[6] += ((limb) in[5]) >> 58; 637280304Sjkim out[6] += (((limb) (in[5] >> 64)) & bottom52bits) << 6; 638280304Sjkim out[7] += ((limb) (in[5] >> 64)) >> 52; 639238384Sjkim 640280304Sjkim out[7] += ((limb) in[6]) >> 58; 641280304Sjkim out[7] += (((limb) (in[6] >> 64)) & bottom52bits) << 6; 642280304Sjkim out[8] += ((limb) (in[6] >> 64)) >> 52; 643238384Sjkim 644280304Sjkim out[8] += ((limb) in[7]) >> 58; 645280304Sjkim out[8] += (((limb) (in[7] >> 64)) & bottom52bits) << 6; 646280304Sjkim /*- 647280304Sjkim * out[x > 1] < 2^58 + 2^6 + 2^58 + 2^12 648280304Sjkim * < 2^59 + 2^13 649280304Sjkim */ 650280304Sjkim overflow1 = ((limb) (in[7] >> 64)) >> 52; 651238384Sjkim 652280304Sjkim overflow1 += ((limb) in[8]) >> 58; 653280304Sjkim overflow1 += (((limb) (in[8] >> 64)) & bottom52bits) << 6; 654280304Sjkim overflow2 = ((limb) (in[8] >> 64)) >> 52; 655238384Sjkim 656280304Sjkim overflow1 <<= 1; /* overflow1 < 2^13 + 2^7 + 2^59 */ 657280304Sjkim overflow2 <<= 1; /* overflow2 < 2^13 */ 658238384Sjkim 659280304Sjkim out[0] += overflow1; /* out[0] < 2^60 */ 660280304Sjkim out[1] += overflow2; /* out[1] < 2^59 + 2^6 + 2^13 */ 661238384Sjkim 662280304Sjkim out[1] += out[0] >> 58; 663280304Sjkim out[0] &= bottom58bits; 664280304Sjkim /*- 665280304Sjkim * out[0] < 2^58 666280304Sjkim * out[1] < 2^59 + 2^6 + 2^13 + 2^2 667280304Sjkim * < 2^59 + 2^14 668280304Sjkim */ 669280304Sjkim} 670238384Sjkim 671238384Sjkimstatic void felem_square_reduce(felem out, const felem in) 672280304Sjkim{ 673280304Sjkim largefelem tmp; 674280304Sjkim felem_square(tmp, in); 675280304Sjkim felem_reduce(out, tmp); 676280304Sjkim} 677238384Sjkim 678238384Sjkimstatic void felem_mul_reduce(felem out, const felem in1, const felem in2) 679280304Sjkim{ 680280304Sjkim largefelem tmp; 681280304Sjkim felem_mul(tmp, in1, in2); 682280304Sjkim felem_reduce(out, tmp); 683280304Sjkim} 684238384Sjkim 685280304Sjkim/*- 686280304Sjkim * felem_inv calculates |out| = |in|^{-1} 687238384Sjkim * 688238384Sjkim * Based on Fermat's Little Theorem: 689238384Sjkim * a^p = a (mod p) 690238384Sjkim * a^{p-1} = 1 (mod p) 691238384Sjkim * a^{p-2} = a^{-1} (mod p) 692238384Sjkim */ 693238384Sjkimstatic void felem_inv(felem out, const felem in) 694280304Sjkim{ 695280304Sjkim felem ftmp, ftmp2, ftmp3, ftmp4; 696280304Sjkim largefelem tmp; 697280304Sjkim unsigned i; 698238384Sjkim 699280304Sjkim felem_square(tmp, in); 700280304Sjkim felem_reduce(ftmp, tmp); /* 2^1 */ 701280304Sjkim felem_mul(tmp, in, ftmp); 702280304Sjkim felem_reduce(ftmp, tmp); /* 2^2 - 2^0 */ 703280304Sjkim felem_assign(ftmp2, ftmp); 704280304Sjkim felem_square(tmp, ftmp); 705280304Sjkim felem_reduce(ftmp, tmp); /* 2^3 - 2^1 */ 706280304Sjkim felem_mul(tmp, in, ftmp); 707280304Sjkim felem_reduce(ftmp, tmp); /* 2^3 - 2^0 */ 708280304Sjkim felem_square(tmp, ftmp); 709280304Sjkim felem_reduce(ftmp, tmp); /* 2^4 - 2^1 */ 710238384Sjkim 711280304Sjkim felem_square(tmp, ftmp2); 712280304Sjkim felem_reduce(ftmp3, tmp); /* 2^3 - 2^1 */ 713280304Sjkim felem_square(tmp, ftmp3); 714280304Sjkim felem_reduce(ftmp3, tmp); /* 2^4 - 2^2 */ 715280304Sjkim felem_mul(tmp, ftmp3, ftmp2); 716280304Sjkim felem_reduce(ftmp3, tmp); /* 2^4 - 2^0 */ 717238384Sjkim 718280304Sjkim felem_assign(ftmp2, ftmp3); 719280304Sjkim felem_square(tmp, ftmp3); 720280304Sjkim felem_reduce(ftmp3, tmp); /* 2^5 - 2^1 */ 721280304Sjkim felem_square(tmp, ftmp3); 722280304Sjkim felem_reduce(ftmp3, tmp); /* 2^6 - 2^2 */ 723280304Sjkim felem_square(tmp, ftmp3); 724280304Sjkim felem_reduce(ftmp3, tmp); /* 2^7 - 2^3 */ 725280304Sjkim felem_square(tmp, ftmp3); 726280304Sjkim felem_reduce(ftmp3, tmp); /* 2^8 - 2^4 */ 727280304Sjkim felem_assign(ftmp4, ftmp3); 728280304Sjkim felem_mul(tmp, ftmp3, ftmp); 729280304Sjkim felem_reduce(ftmp4, tmp); /* 2^8 - 2^1 */ 730280304Sjkim felem_square(tmp, ftmp4); 731280304Sjkim felem_reduce(ftmp4, tmp); /* 2^9 - 2^2 */ 732280304Sjkim felem_mul(tmp, ftmp3, ftmp2); 733280304Sjkim felem_reduce(ftmp3, tmp); /* 2^8 - 2^0 */ 734280304Sjkim felem_assign(ftmp2, ftmp3); 735238384Sjkim 736280304Sjkim for (i = 0; i < 8; i++) { 737280304Sjkim felem_square(tmp, ftmp3); 738280304Sjkim felem_reduce(ftmp3, tmp); /* 2^16 - 2^8 */ 739280304Sjkim } 740280304Sjkim felem_mul(tmp, ftmp3, ftmp2); 741280304Sjkim felem_reduce(ftmp3, tmp); /* 2^16 - 2^0 */ 742280304Sjkim felem_assign(ftmp2, ftmp3); 743238384Sjkim 744280304Sjkim for (i = 0; i < 16; i++) { 745280304Sjkim felem_square(tmp, ftmp3); 746280304Sjkim felem_reduce(ftmp3, tmp); /* 2^32 - 2^16 */ 747280304Sjkim } 748280304Sjkim felem_mul(tmp, ftmp3, ftmp2); 749280304Sjkim felem_reduce(ftmp3, tmp); /* 2^32 - 2^0 */ 750280304Sjkim felem_assign(ftmp2, ftmp3); 751238384Sjkim 752280304Sjkim for (i = 0; i < 32; i++) { 753280304Sjkim felem_square(tmp, ftmp3); 754280304Sjkim felem_reduce(ftmp3, tmp); /* 2^64 - 2^32 */ 755280304Sjkim } 756280304Sjkim felem_mul(tmp, ftmp3, ftmp2); 757280304Sjkim felem_reduce(ftmp3, tmp); /* 2^64 - 2^0 */ 758280304Sjkim felem_assign(ftmp2, ftmp3); 759238384Sjkim 760280304Sjkim for (i = 0; i < 64; i++) { 761280304Sjkim felem_square(tmp, ftmp3); 762280304Sjkim felem_reduce(ftmp3, tmp); /* 2^128 - 2^64 */ 763280304Sjkim } 764280304Sjkim felem_mul(tmp, ftmp3, ftmp2); 765280304Sjkim felem_reduce(ftmp3, tmp); /* 2^128 - 2^0 */ 766280304Sjkim felem_assign(ftmp2, ftmp3); 767238384Sjkim 768280304Sjkim for (i = 0; i < 128; i++) { 769280304Sjkim felem_square(tmp, ftmp3); 770280304Sjkim felem_reduce(ftmp3, tmp); /* 2^256 - 2^128 */ 771280304Sjkim } 772280304Sjkim felem_mul(tmp, ftmp3, ftmp2); 773280304Sjkim felem_reduce(ftmp3, tmp); /* 2^256 - 2^0 */ 774280304Sjkim felem_assign(ftmp2, ftmp3); 775238384Sjkim 776280304Sjkim for (i = 0; i < 256; i++) { 777280304Sjkim felem_square(tmp, ftmp3); 778280304Sjkim felem_reduce(ftmp3, tmp); /* 2^512 - 2^256 */ 779280304Sjkim } 780280304Sjkim felem_mul(tmp, ftmp3, ftmp2); 781280304Sjkim felem_reduce(ftmp3, tmp); /* 2^512 - 2^0 */ 782238384Sjkim 783280304Sjkim for (i = 0; i < 9; i++) { 784280304Sjkim felem_square(tmp, ftmp3); 785280304Sjkim felem_reduce(ftmp3, tmp); /* 2^521 - 2^9 */ 786280304Sjkim } 787280304Sjkim felem_mul(tmp, ftmp3, ftmp4); 788280304Sjkim felem_reduce(ftmp3, tmp); /* 2^512 - 2^2 */ 789280304Sjkim felem_mul(tmp, ftmp3, in); 790280304Sjkim felem_reduce(out, tmp); /* 2^512 - 3 */ 791238384Sjkim} 792238384Sjkim 793238384Sjkim/* This is 2^521-1, expressed as an felem */ 794280304Sjkimstatic const felem kPrime = { 795280304Sjkim 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff, 796280304Sjkim 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff, 797280304Sjkim 0x03ffffffffffffff, 0x03ffffffffffffff, 0x01ffffffffffffff 798280304Sjkim}; 799238384Sjkim 800280304Sjkim/*- 801280304Sjkim * felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0 802238384Sjkim * otherwise. 803238384Sjkim * On entry: 804238384Sjkim * in[i] < 2^59 + 2^14 805238384Sjkim */ 806238384Sjkimstatic limb felem_is_zero(const felem in) 807280304Sjkim{ 808280304Sjkim felem ftmp; 809280304Sjkim limb is_zero, is_p; 810280304Sjkim felem_assign(ftmp, in); 811238384Sjkim 812280304Sjkim ftmp[0] += ftmp[8] >> 57; 813280304Sjkim ftmp[8] &= bottom57bits; 814280304Sjkim /* ftmp[8] < 2^57 */ 815280304Sjkim ftmp[1] += ftmp[0] >> 58; 816280304Sjkim ftmp[0] &= bottom58bits; 817280304Sjkim ftmp[2] += ftmp[1] >> 58; 818280304Sjkim ftmp[1] &= bottom58bits; 819280304Sjkim ftmp[3] += ftmp[2] >> 58; 820280304Sjkim ftmp[2] &= bottom58bits; 821280304Sjkim ftmp[4] += ftmp[3] >> 58; 822280304Sjkim ftmp[3] &= bottom58bits; 823280304Sjkim ftmp[5] += ftmp[4] >> 58; 824280304Sjkim ftmp[4] &= bottom58bits; 825280304Sjkim ftmp[6] += ftmp[5] >> 58; 826280304Sjkim ftmp[5] &= bottom58bits; 827280304Sjkim ftmp[7] += ftmp[6] >> 58; 828280304Sjkim ftmp[6] &= bottom58bits; 829280304Sjkim ftmp[8] += ftmp[7] >> 58; 830280304Sjkim ftmp[7] &= bottom58bits; 831280304Sjkim /* ftmp[8] < 2^57 + 4 */ 832238384Sjkim 833280304Sjkim /* 834280304Sjkim * The ninth limb of 2*(2^521-1) is 0x03ffffffffffffff, which is greater 835280304Sjkim * than our bound for ftmp[8]. Therefore we only have to check if the 836280304Sjkim * zero is zero or 2^521-1. 837280304Sjkim */ 838238384Sjkim 839280304Sjkim is_zero = 0; 840280304Sjkim is_zero |= ftmp[0]; 841280304Sjkim is_zero |= ftmp[1]; 842280304Sjkim is_zero |= ftmp[2]; 843280304Sjkim is_zero |= ftmp[3]; 844280304Sjkim is_zero |= ftmp[4]; 845280304Sjkim is_zero |= ftmp[5]; 846280304Sjkim is_zero |= ftmp[6]; 847280304Sjkim is_zero |= ftmp[7]; 848280304Sjkim is_zero |= ftmp[8]; 849238384Sjkim 850280304Sjkim is_zero--; 851280304Sjkim /* 852280304Sjkim * We know that ftmp[i] < 2^63, therefore the only way that the top bit 853280304Sjkim * can be set is if is_zero was 0 before the decrement. 854280304Sjkim */ 855280304Sjkim is_zero = ((s64) is_zero) >> 63; 856238384Sjkim 857280304Sjkim is_p = ftmp[0] ^ kPrime[0]; 858280304Sjkim is_p |= ftmp[1] ^ kPrime[1]; 859280304Sjkim is_p |= ftmp[2] ^ kPrime[2]; 860280304Sjkim is_p |= ftmp[3] ^ kPrime[3]; 861280304Sjkim is_p |= ftmp[4] ^ kPrime[4]; 862280304Sjkim is_p |= ftmp[5] ^ kPrime[5]; 863280304Sjkim is_p |= ftmp[6] ^ kPrime[6]; 864280304Sjkim is_p |= ftmp[7] ^ kPrime[7]; 865280304Sjkim is_p |= ftmp[8] ^ kPrime[8]; 866238384Sjkim 867280304Sjkim is_p--; 868280304Sjkim is_p = ((s64) is_p) >> 63; 869238384Sjkim 870280304Sjkim is_zero |= is_p; 871280304Sjkim return is_zero; 872280304Sjkim} 873238384Sjkim 874238384Sjkimstatic int felem_is_zero_int(const felem in) 875280304Sjkim{ 876280304Sjkim return (int)(felem_is_zero(in) & ((limb) 1)); 877280304Sjkim} 878238384Sjkim 879280304Sjkim/*- 880280304Sjkim * felem_contract converts |in| to its unique, minimal representation. 881238384Sjkim * On entry: 882238384Sjkim * in[i] < 2^59 + 2^14 883238384Sjkim */ 884238384Sjkimstatic void felem_contract(felem out, const felem in) 885280304Sjkim{ 886280304Sjkim limb is_p, is_greater, sign; 887280304Sjkim static const limb two58 = ((limb) 1) << 58; 888238384Sjkim 889280304Sjkim felem_assign(out, in); 890238384Sjkim 891280304Sjkim out[0] += out[8] >> 57; 892280304Sjkim out[8] &= bottom57bits; 893280304Sjkim /* out[8] < 2^57 */ 894280304Sjkim out[1] += out[0] >> 58; 895280304Sjkim out[0] &= bottom58bits; 896280304Sjkim out[2] += out[1] >> 58; 897280304Sjkim out[1] &= bottom58bits; 898280304Sjkim out[3] += out[2] >> 58; 899280304Sjkim out[2] &= bottom58bits; 900280304Sjkim out[4] += out[3] >> 58; 901280304Sjkim out[3] &= bottom58bits; 902280304Sjkim out[5] += out[4] >> 58; 903280304Sjkim out[4] &= bottom58bits; 904280304Sjkim out[6] += out[5] >> 58; 905280304Sjkim out[5] &= bottom58bits; 906280304Sjkim out[7] += out[6] >> 58; 907280304Sjkim out[6] &= bottom58bits; 908280304Sjkim out[8] += out[7] >> 58; 909280304Sjkim out[7] &= bottom58bits; 910280304Sjkim /* out[8] < 2^57 + 4 */ 911238384Sjkim 912280304Sjkim /* 913280304Sjkim * If the value is greater than 2^521-1 then we have to subtract 2^521-1 914280304Sjkim * out. See the comments in felem_is_zero regarding why we don't test for 915280304Sjkim * other multiples of the prime. 916280304Sjkim */ 917238384Sjkim 918280304Sjkim /* 919280304Sjkim * First, if |out| is equal to 2^521-1, we subtract it out to get zero. 920280304Sjkim */ 921238384Sjkim 922280304Sjkim is_p = out[0] ^ kPrime[0]; 923280304Sjkim is_p |= out[1] ^ kPrime[1]; 924280304Sjkim is_p |= out[2] ^ kPrime[2]; 925280304Sjkim is_p |= out[3] ^ kPrime[3]; 926280304Sjkim is_p |= out[4] ^ kPrime[4]; 927280304Sjkim is_p |= out[5] ^ kPrime[5]; 928280304Sjkim is_p |= out[6] ^ kPrime[6]; 929280304Sjkim is_p |= out[7] ^ kPrime[7]; 930280304Sjkim is_p |= out[8] ^ kPrime[8]; 931238384Sjkim 932280304Sjkim is_p--; 933280304Sjkim is_p &= is_p << 32; 934280304Sjkim is_p &= is_p << 16; 935280304Sjkim is_p &= is_p << 8; 936280304Sjkim is_p &= is_p << 4; 937280304Sjkim is_p &= is_p << 2; 938280304Sjkim is_p &= is_p << 1; 939280304Sjkim is_p = ((s64) is_p) >> 63; 940280304Sjkim is_p = ~is_p; 941238384Sjkim 942280304Sjkim /* is_p is 0 iff |out| == 2^521-1 and all ones otherwise */ 943238384Sjkim 944280304Sjkim out[0] &= is_p; 945280304Sjkim out[1] &= is_p; 946280304Sjkim out[2] &= is_p; 947280304Sjkim out[3] &= is_p; 948280304Sjkim out[4] &= is_p; 949280304Sjkim out[5] &= is_p; 950280304Sjkim out[6] &= is_p; 951280304Sjkim out[7] &= is_p; 952280304Sjkim out[8] &= is_p; 953238384Sjkim 954280304Sjkim /* 955280304Sjkim * In order to test that |out| >= 2^521-1 we need only test if out[8] >> 956280304Sjkim * 57 is greater than zero as (2^521-1) + x >= 2^522 957280304Sjkim */ 958280304Sjkim is_greater = out[8] >> 57; 959280304Sjkim is_greater |= is_greater << 32; 960280304Sjkim is_greater |= is_greater << 16; 961280304Sjkim is_greater |= is_greater << 8; 962280304Sjkim is_greater |= is_greater << 4; 963280304Sjkim is_greater |= is_greater << 2; 964280304Sjkim is_greater |= is_greater << 1; 965280304Sjkim is_greater = ((s64) is_greater) >> 63; 966238384Sjkim 967280304Sjkim out[0] -= kPrime[0] & is_greater; 968280304Sjkim out[1] -= kPrime[1] & is_greater; 969280304Sjkim out[2] -= kPrime[2] & is_greater; 970280304Sjkim out[3] -= kPrime[3] & is_greater; 971280304Sjkim out[4] -= kPrime[4] & is_greater; 972280304Sjkim out[5] -= kPrime[5] & is_greater; 973280304Sjkim out[6] -= kPrime[6] & is_greater; 974280304Sjkim out[7] -= kPrime[7] & is_greater; 975280304Sjkim out[8] -= kPrime[8] & is_greater; 976238384Sjkim 977280304Sjkim /* Eliminate negative coefficients */ 978280304Sjkim sign = -(out[0] >> 63); 979280304Sjkim out[0] += (two58 & sign); 980280304Sjkim out[1] -= (1 & sign); 981280304Sjkim sign = -(out[1] >> 63); 982280304Sjkim out[1] += (two58 & sign); 983280304Sjkim out[2] -= (1 & sign); 984280304Sjkim sign = -(out[2] >> 63); 985280304Sjkim out[2] += (two58 & sign); 986280304Sjkim out[3] -= (1 & sign); 987280304Sjkim sign = -(out[3] >> 63); 988280304Sjkim out[3] += (two58 & sign); 989280304Sjkim out[4] -= (1 & sign); 990280304Sjkim sign = -(out[4] >> 63); 991280304Sjkim out[4] += (two58 & sign); 992280304Sjkim out[5] -= (1 & sign); 993280304Sjkim sign = -(out[0] >> 63); 994280304Sjkim out[5] += (two58 & sign); 995280304Sjkim out[6] -= (1 & sign); 996280304Sjkim sign = -(out[6] >> 63); 997280304Sjkim out[6] += (two58 & sign); 998280304Sjkim out[7] -= (1 & sign); 999280304Sjkim sign = -(out[7] >> 63); 1000280304Sjkim out[7] += (two58 & sign); 1001280304Sjkim out[8] -= (1 & sign); 1002280304Sjkim sign = -(out[5] >> 63); 1003280304Sjkim out[5] += (two58 & sign); 1004280304Sjkim out[6] -= (1 & sign); 1005280304Sjkim sign = -(out[6] >> 63); 1006280304Sjkim out[6] += (two58 & sign); 1007280304Sjkim out[7] -= (1 & sign); 1008280304Sjkim sign = -(out[7] >> 63); 1009280304Sjkim out[7] += (two58 & sign); 1010280304Sjkim out[8] -= (1 & sign); 1011280304Sjkim} 1012238384Sjkim 1013280304Sjkim/*- 1014280304Sjkim * Group operations 1015238384Sjkim * ---------------- 1016238384Sjkim * 1017238384Sjkim * Building on top of the field operations we have the operations on the 1018238384Sjkim * elliptic curve group itself. Points on the curve are represented in Jacobian 1019238384Sjkim * coordinates */ 1020238384Sjkim 1021280304Sjkim/*- 1022280304Sjkim * point_double calcuates 2*(x_in, y_in, z_in) 1023238384Sjkim * 1024238384Sjkim * The method is taken from: 1025238384Sjkim * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b 1026238384Sjkim * 1027238384Sjkim * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed. 1028238384Sjkim * while x_out == y_in is not (maybe this works, but it's not tested). */ 1029238384Sjkimstatic void 1030238384Sjkimpoint_double(felem x_out, felem y_out, felem z_out, 1031280304Sjkim const felem x_in, const felem y_in, const felem z_in) 1032280304Sjkim{ 1033280304Sjkim largefelem tmp, tmp2; 1034280304Sjkim felem delta, gamma, beta, alpha, ftmp, ftmp2; 1035238384Sjkim 1036280304Sjkim felem_assign(ftmp, x_in); 1037280304Sjkim felem_assign(ftmp2, x_in); 1038238384Sjkim 1039280304Sjkim /* delta = z^2 */ 1040280304Sjkim felem_square(tmp, z_in); 1041280304Sjkim felem_reduce(delta, tmp); /* delta[i] < 2^59 + 2^14 */ 1042238384Sjkim 1043280304Sjkim /* gamma = y^2 */ 1044280304Sjkim felem_square(tmp, y_in); 1045280304Sjkim felem_reduce(gamma, tmp); /* gamma[i] < 2^59 + 2^14 */ 1046238384Sjkim 1047280304Sjkim /* beta = x*gamma */ 1048280304Sjkim felem_mul(tmp, x_in, gamma); 1049280304Sjkim felem_reduce(beta, tmp); /* beta[i] < 2^59 + 2^14 */ 1050238384Sjkim 1051280304Sjkim /* alpha = 3*(x-delta)*(x+delta) */ 1052280304Sjkim felem_diff64(ftmp, delta); 1053280304Sjkim /* ftmp[i] < 2^61 */ 1054280304Sjkim felem_sum64(ftmp2, delta); 1055280304Sjkim /* ftmp2[i] < 2^60 + 2^15 */ 1056280304Sjkim felem_scalar64(ftmp2, 3); 1057280304Sjkim /* ftmp2[i] < 3*2^60 + 3*2^15 */ 1058280304Sjkim felem_mul(tmp, ftmp, ftmp2); 1059280304Sjkim /*- 1060280304Sjkim * tmp[i] < 17(3*2^121 + 3*2^76) 1061280304Sjkim * = 61*2^121 + 61*2^76 1062280304Sjkim * < 64*2^121 + 64*2^76 1063280304Sjkim * = 2^127 + 2^82 1064280304Sjkim * < 2^128 1065280304Sjkim */ 1066280304Sjkim felem_reduce(alpha, tmp); 1067238384Sjkim 1068280304Sjkim /* x' = alpha^2 - 8*beta */ 1069280304Sjkim felem_square(tmp, alpha); 1070280304Sjkim /* 1071280304Sjkim * tmp[i] < 17*2^120 < 2^125 1072280304Sjkim */ 1073280304Sjkim felem_assign(ftmp, beta); 1074280304Sjkim felem_scalar64(ftmp, 8); 1075280304Sjkim /* ftmp[i] < 2^62 + 2^17 */ 1076280304Sjkim felem_diff_128_64(tmp, ftmp); 1077280304Sjkim /* tmp[i] < 2^125 + 2^63 + 2^62 + 2^17 */ 1078280304Sjkim felem_reduce(x_out, tmp); 1079238384Sjkim 1080280304Sjkim /* z' = (y + z)^2 - gamma - delta */ 1081280304Sjkim felem_sum64(delta, gamma); 1082280304Sjkim /* delta[i] < 2^60 + 2^15 */ 1083280304Sjkim felem_assign(ftmp, y_in); 1084280304Sjkim felem_sum64(ftmp, z_in); 1085280304Sjkim /* ftmp[i] < 2^60 + 2^15 */ 1086280304Sjkim felem_square(tmp, ftmp); 1087280304Sjkim /* 1088280304Sjkim * tmp[i] < 17(2^122) < 2^127 1089280304Sjkim */ 1090280304Sjkim felem_diff_128_64(tmp, delta); 1091280304Sjkim /* tmp[i] < 2^127 + 2^63 */ 1092280304Sjkim felem_reduce(z_out, tmp); 1093238384Sjkim 1094280304Sjkim /* y' = alpha*(4*beta - x') - 8*gamma^2 */ 1095280304Sjkim felem_scalar64(beta, 4); 1096280304Sjkim /* beta[i] < 2^61 + 2^16 */ 1097280304Sjkim felem_diff64(beta, x_out); 1098280304Sjkim /* beta[i] < 2^61 + 2^60 + 2^16 */ 1099280304Sjkim felem_mul(tmp, alpha, beta); 1100280304Sjkim /*- 1101280304Sjkim * tmp[i] < 17*((2^59 + 2^14)(2^61 + 2^60 + 2^16)) 1102280304Sjkim * = 17*(2^120 + 2^75 + 2^119 + 2^74 + 2^75 + 2^30) 1103280304Sjkim * = 17*(2^120 + 2^119 + 2^76 + 2^74 + 2^30) 1104280304Sjkim * < 2^128 1105280304Sjkim */ 1106280304Sjkim felem_square(tmp2, gamma); 1107280304Sjkim /*- 1108280304Sjkim * tmp2[i] < 17*(2^59 + 2^14)^2 1109280304Sjkim * = 17*(2^118 + 2^74 + 2^28) 1110280304Sjkim */ 1111280304Sjkim felem_scalar128(tmp2, 8); 1112280304Sjkim /*- 1113280304Sjkim * tmp2[i] < 8*17*(2^118 + 2^74 + 2^28) 1114280304Sjkim * = 2^125 + 2^121 + 2^81 + 2^77 + 2^35 + 2^31 1115280304Sjkim * < 2^126 1116280304Sjkim */ 1117280304Sjkim felem_diff128(tmp, tmp2); 1118280304Sjkim /*- 1119280304Sjkim * tmp[i] < 2^127 - 2^69 + 17(2^120 + 2^119 + 2^76 + 2^74 + 2^30) 1120280304Sjkim * = 2^127 + 2^124 + 2^122 + 2^120 + 2^118 + 2^80 + 2^78 + 2^76 + 1121280304Sjkim * 2^74 + 2^69 + 2^34 + 2^30 1122280304Sjkim * < 2^128 1123280304Sjkim */ 1124280304Sjkim felem_reduce(y_out, tmp); 1125280304Sjkim} 1126238384Sjkim 1127238384Sjkim/* copy_conditional copies in to out iff mask is all ones. */ 1128280304Sjkimstatic void copy_conditional(felem out, const felem in, limb mask) 1129280304Sjkim{ 1130280304Sjkim unsigned i; 1131280304Sjkim for (i = 0; i < NLIMBS; ++i) { 1132280304Sjkim const limb tmp = mask & (in[i] ^ out[i]); 1133280304Sjkim out[i] ^= tmp; 1134280304Sjkim } 1135280304Sjkim} 1136238384Sjkim 1137280304Sjkim/*- 1138280304Sjkim * point_add calcuates (x1, y1, z1) + (x2, y2, z2) 1139238384Sjkim * 1140238384Sjkim * The method is taken from 1141238384Sjkim * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl, 1142238384Sjkim * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity). 1143238384Sjkim * 1144238384Sjkim * This function includes a branch for checking whether the two input points 1145238384Sjkim * are equal (while not equal to the point at infinity). This case never 1146238384Sjkim * happens during single point multiplication, so there is no timing leak for 1147238384Sjkim * ECDH or ECDSA signing. */ 1148238384Sjkimstatic void point_add(felem x3, felem y3, felem z3, 1149280304Sjkim const felem x1, const felem y1, const felem z1, 1150280304Sjkim const int mixed, const felem x2, const felem y2, 1151280304Sjkim const felem z2) 1152280304Sjkim{ 1153280304Sjkim felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out; 1154280304Sjkim largefelem tmp, tmp2; 1155280304Sjkim limb x_equal, y_equal, z1_is_zero, z2_is_zero; 1156238384Sjkim 1157280304Sjkim z1_is_zero = felem_is_zero(z1); 1158280304Sjkim z2_is_zero = felem_is_zero(z2); 1159238384Sjkim 1160280304Sjkim /* ftmp = z1z1 = z1**2 */ 1161280304Sjkim felem_square(tmp, z1); 1162280304Sjkim felem_reduce(ftmp, tmp); 1163238384Sjkim 1164280304Sjkim if (!mixed) { 1165280304Sjkim /* ftmp2 = z2z2 = z2**2 */ 1166280304Sjkim felem_square(tmp, z2); 1167280304Sjkim felem_reduce(ftmp2, tmp); 1168238384Sjkim 1169280304Sjkim /* u1 = ftmp3 = x1*z2z2 */ 1170280304Sjkim felem_mul(tmp, x1, ftmp2); 1171280304Sjkim felem_reduce(ftmp3, tmp); 1172238384Sjkim 1173280304Sjkim /* ftmp5 = z1 + z2 */ 1174280304Sjkim felem_assign(ftmp5, z1); 1175280304Sjkim felem_sum64(ftmp5, z2); 1176280304Sjkim /* ftmp5[i] < 2^61 */ 1177238384Sjkim 1178280304Sjkim /* ftmp5 = (z1 + z2)**2 - z1z1 - z2z2 = 2*z1z2 */ 1179280304Sjkim felem_square(tmp, ftmp5); 1180280304Sjkim /* tmp[i] < 17*2^122 */ 1181280304Sjkim felem_diff_128_64(tmp, ftmp); 1182280304Sjkim /* tmp[i] < 17*2^122 + 2^63 */ 1183280304Sjkim felem_diff_128_64(tmp, ftmp2); 1184280304Sjkim /* tmp[i] < 17*2^122 + 2^64 */ 1185280304Sjkim felem_reduce(ftmp5, tmp); 1186238384Sjkim 1187280304Sjkim /* ftmp2 = z2 * z2z2 */ 1188280304Sjkim felem_mul(tmp, ftmp2, z2); 1189280304Sjkim felem_reduce(ftmp2, tmp); 1190238384Sjkim 1191280304Sjkim /* s1 = ftmp6 = y1 * z2**3 */ 1192280304Sjkim felem_mul(tmp, y1, ftmp2); 1193280304Sjkim felem_reduce(ftmp6, tmp); 1194280304Sjkim } else { 1195280304Sjkim /* 1196280304Sjkim * We'll assume z2 = 1 (special case z2 = 0 is handled later) 1197280304Sjkim */ 1198238384Sjkim 1199280304Sjkim /* u1 = ftmp3 = x1*z2z2 */ 1200280304Sjkim felem_assign(ftmp3, x1); 1201238384Sjkim 1202280304Sjkim /* ftmp5 = 2*z1z2 */ 1203280304Sjkim felem_scalar(ftmp5, z1, 2); 1204238384Sjkim 1205280304Sjkim /* s1 = ftmp6 = y1 * z2**3 */ 1206280304Sjkim felem_assign(ftmp6, y1); 1207280304Sjkim } 1208238384Sjkim 1209280304Sjkim /* u2 = x2*z1z1 */ 1210280304Sjkim felem_mul(tmp, x2, ftmp); 1211280304Sjkim /* tmp[i] < 17*2^120 */ 1212238384Sjkim 1213280304Sjkim /* h = ftmp4 = u2 - u1 */ 1214280304Sjkim felem_diff_128_64(tmp, ftmp3); 1215280304Sjkim /* tmp[i] < 17*2^120 + 2^63 */ 1216280304Sjkim felem_reduce(ftmp4, tmp); 1217238384Sjkim 1218280304Sjkim x_equal = felem_is_zero(ftmp4); 1219238384Sjkim 1220280304Sjkim /* z_out = ftmp5 * h */ 1221280304Sjkim felem_mul(tmp, ftmp5, ftmp4); 1222280304Sjkim felem_reduce(z_out, tmp); 1223238384Sjkim 1224280304Sjkim /* ftmp = z1 * z1z1 */ 1225280304Sjkim felem_mul(tmp, ftmp, z1); 1226280304Sjkim felem_reduce(ftmp, tmp); 1227238384Sjkim 1228280304Sjkim /* s2 = tmp = y2 * z1**3 */ 1229280304Sjkim felem_mul(tmp, y2, ftmp); 1230280304Sjkim /* tmp[i] < 17*2^120 */ 1231238384Sjkim 1232280304Sjkim /* r = ftmp5 = (s2 - s1)*2 */ 1233280304Sjkim felem_diff_128_64(tmp, ftmp6); 1234280304Sjkim /* tmp[i] < 17*2^120 + 2^63 */ 1235280304Sjkim felem_reduce(ftmp5, tmp); 1236280304Sjkim y_equal = felem_is_zero(ftmp5); 1237280304Sjkim felem_scalar64(ftmp5, 2); 1238280304Sjkim /* ftmp5[i] < 2^61 */ 1239238384Sjkim 1240280304Sjkim if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) { 1241280304Sjkim point_double(x3, y3, z3, x1, y1, z1); 1242280304Sjkim return; 1243280304Sjkim } 1244238384Sjkim 1245280304Sjkim /* I = ftmp = (2h)**2 */ 1246280304Sjkim felem_assign(ftmp, ftmp4); 1247280304Sjkim felem_scalar64(ftmp, 2); 1248280304Sjkim /* ftmp[i] < 2^61 */ 1249280304Sjkim felem_square(tmp, ftmp); 1250280304Sjkim /* tmp[i] < 17*2^122 */ 1251280304Sjkim felem_reduce(ftmp, tmp); 1252238384Sjkim 1253280304Sjkim /* J = ftmp2 = h * I */ 1254280304Sjkim felem_mul(tmp, ftmp4, ftmp); 1255280304Sjkim felem_reduce(ftmp2, tmp); 1256238384Sjkim 1257280304Sjkim /* V = ftmp4 = U1 * I */ 1258280304Sjkim felem_mul(tmp, ftmp3, ftmp); 1259280304Sjkim felem_reduce(ftmp4, tmp); 1260238384Sjkim 1261280304Sjkim /* x_out = r**2 - J - 2V */ 1262280304Sjkim felem_square(tmp, ftmp5); 1263280304Sjkim /* tmp[i] < 17*2^122 */ 1264280304Sjkim felem_diff_128_64(tmp, ftmp2); 1265280304Sjkim /* tmp[i] < 17*2^122 + 2^63 */ 1266280304Sjkim felem_assign(ftmp3, ftmp4); 1267280304Sjkim felem_scalar64(ftmp4, 2); 1268280304Sjkim /* ftmp4[i] < 2^61 */ 1269280304Sjkim felem_diff_128_64(tmp, ftmp4); 1270280304Sjkim /* tmp[i] < 17*2^122 + 2^64 */ 1271280304Sjkim felem_reduce(x_out, tmp); 1272238384Sjkim 1273280304Sjkim /* y_out = r(V-x_out) - 2 * s1 * J */ 1274280304Sjkim felem_diff64(ftmp3, x_out); 1275280304Sjkim /* 1276280304Sjkim * ftmp3[i] < 2^60 + 2^60 = 2^61 1277280304Sjkim */ 1278280304Sjkim felem_mul(tmp, ftmp5, ftmp3); 1279280304Sjkim /* tmp[i] < 17*2^122 */ 1280280304Sjkim felem_mul(tmp2, ftmp6, ftmp2); 1281280304Sjkim /* tmp2[i] < 17*2^120 */ 1282280304Sjkim felem_scalar128(tmp2, 2); 1283280304Sjkim /* tmp2[i] < 17*2^121 */ 1284280304Sjkim felem_diff128(tmp, tmp2); 1285280304Sjkim /*- 1286280304Sjkim * tmp[i] < 2^127 - 2^69 + 17*2^122 1287280304Sjkim * = 2^126 - 2^122 - 2^6 - 2^2 - 1 1288280304Sjkim * < 2^127 1289280304Sjkim */ 1290280304Sjkim felem_reduce(y_out, tmp); 1291238384Sjkim 1292280304Sjkim copy_conditional(x_out, x2, z1_is_zero); 1293280304Sjkim copy_conditional(x_out, x1, z2_is_zero); 1294280304Sjkim copy_conditional(y_out, y2, z1_is_zero); 1295280304Sjkim copy_conditional(y_out, y1, z2_is_zero); 1296280304Sjkim copy_conditional(z_out, z2, z1_is_zero); 1297280304Sjkim copy_conditional(z_out, z1, z2_is_zero); 1298280304Sjkim felem_assign(x3, x_out); 1299280304Sjkim felem_assign(y3, y_out); 1300280304Sjkim felem_assign(z3, z_out); 1301280304Sjkim} 1302238384Sjkim 1303280304Sjkim/*- 1304280304Sjkim * Base point pre computation 1305238384Sjkim * -------------------------- 1306238384Sjkim * 1307238384Sjkim * Two different sorts of precomputed tables are used in the following code. 1308238384Sjkim * Each contain various points on the curve, where each point is three field 1309238384Sjkim * elements (x, y, z). 1310238384Sjkim * 1311238384Sjkim * For the base point table, z is usually 1 (0 for the point at infinity). 1312238384Sjkim * This table has 16 elements: 1313238384Sjkim * index | bits | point 1314238384Sjkim * ------+---------+------------------------------ 1315238384Sjkim * 0 | 0 0 0 0 | 0G 1316238384Sjkim * 1 | 0 0 0 1 | 1G 1317238384Sjkim * 2 | 0 0 1 0 | 2^130G 1318238384Sjkim * 3 | 0 0 1 1 | (2^130 + 1)G 1319238384Sjkim * 4 | 0 1 0 0 | 2^260G 1320238384Sjkim * 5 | 0 1 0 1 | (2^260 + 1)G 1321238384Sjkim * 6 | 0 1 1 0 | (2^260 + 2^130)G 1322238384Sjkim * 7 | 0 1 1 1 | (2^260 + 2^130 + 1)G 1323238384Sjkim * 8 | 1 0 0 0 | 2^390G 1324238384Sjkim * 9 | 1 0 0 1 | (2^390 + 1)G 1325238384Sjkim * 10 | 1 0 1 0 | (2^390 + 2^130)G 1326238384Sjkim * 11 | 1 0 1 1 | (2^390 + 2^130 + 1)G 1327238384Sjkim * 12 | 1 1 0 0 | (2^390 + 2^260)G 1328238384Sjkim * 13 | 1 1 0 1 | (2^390 + 2^260 + 1)G 1329238384Sjkim * 14 | 1 1 1 0 | (2^390 + 2^260 + 2^130)G 1330238384Sjkim * 15 | 1 1 1 1 | (2^390 + 2^260 + 2^130 + 1)G 1331238384Sjkim * 1332238384Sjkim * The reason for this is so that we can clock bits into four different 1333238384Sjkim * locations when doing simple scalar multiplies against the base point. 1334238384Sjkim * 1335238384Sjkim * Tables for other points have table[i] = iG for i in 0 .. 16. */ 1336238384Sjkim 1337238384Sjkim/* gmul is the table of precomputed base points */ 1338280304Sjkimstatic const felem gmul[16][3] = { {{0, 0, 0, 0, 0, 0, 0, 0, 0}, 1339280304Sjkim {0, 0, 0, 0, 0, 0, 0, 0, 0}, 1340280304Sjkim {0, 0, 0, 0, 0, 0, 0, 0, 0}}, 1341280304Sjkim{{0x017e7e31c2e5bd66, 0x022cf0615a90a6fe, 0x00127a2ffa8de334, 1342280304Sjkim 0x01dfbf9d64a3f877, 0x006b4d3dbaa14b5e, 0x014fed487e0a2bd8, 1343280304Sjkim 0x015b4429c6481390, 0x03a73678fb2d988e, 0x00c6858e06b70404}, 1344280304Sjkim {0x00be94769fd16650, 0x031c21a89cb09022, 0x039013fad0761353, 1345280304Sjkim 0x02657bd099031542, 0x03273e662c97ee72, 0x01e6d11a05ebef45, 1346280304Sjkim 0x03d1bd998f544495, 0x03001172297ed0b1, 0x011839296a789a3b}, 1347280304Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1348280304Sjkim{{0x0373faacbc875bae, 0x00f325023721c671, 0x00f666fd3dbde5ad, 1349280304Sjkim 0x01a6932363f88ea7, 0x01fc6d9e13f9c47b, 0x03bcbffc2bbf734e, 1350280304Sjkim 0x013ee3c3647f3a92, 0x029409fefe75d07d, 0x00ef9199963d85e5}, 1351280304Sjkim {0x011173743ad5b178, 0x02499c7c21bf7d46, 0x035beaeabb8b1a58, 1352280304Sjkim 0x00f989c4752ea0a3, 0x0101e1de48a9c1a3, 0x01a20076be28ba6c, 1353280304Sjkim 0x02f8052e5eb2de95, 0x01bfe8f82dea117c, 0x0160074d3c36ddb7}, 1354280304Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1355280304Sjkim{{0x012f3fc373393b3b, 0x03d3d6172f1419fa, 0x02adc943c0b86873, 1356280304Sjkim 0x00d475584177952b, 0x012a4d1673750ee2, 0x00512517a0f13b0c, 1357280304Sjkim 0x02b184671a7b1734, 0x0315b84236f1a50a, 0x00a4afc472edbdb9}, 1358280304Sjkim {0x00152a7077f385c4, 0x03044007d8d1c2ee, 0x0065829d61d52b52, 1359280304Sjkim 0x00494ff6b6631d0d, 0x00a11d94d5f06bcf, 0x02d2f89474d9282e, 1360280304Sjkim 0x0241c5727c06eeb9, 0x0386928710fbdb9d, 0x01f883f727b0dfbe}, 1361280304Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1362280304Sjkim{{0x019b0c3c9185544d, 0x006243a37c9d97db, 0x02ee3cbe030a2ad2, 1363280304Sjkim 0x00cfdd946bb51e0d, 0x0271c00932606b91, 0x03f817d1ec68c561, 1364280304Sjkim 0x03f37009806a369c, 0x03c1f30baf184fd5, 0x01091022d6d2f065}, 1365280304Sjkim {0x0292c583514c45ed, 0x0316fca51f9a286c, 0x00300af507c1489a, 1366280304Sjkim 0x0295f69008298cf1, 0x02c0ed8274943d7b, 0x016509b9b47a431e, 1367280304Sjkim 0x02bc9de9634868ce, 0x005b34929bffcb09, 0x000c1a0121681524}, 1368280304Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1369280304Sjkim{{0x0286abc0292fb9f2, 0x02665eee9805b3f7, 0x01ed7455f17f26d6, 1370280304Sjkim 0x0346355b83175d13, 0x006284944cd0a097, 0x0191895bcdec5e51, 1371280304Sjkim 0x02e288370afda7d9, 0x03b22312bfefa67a, 0x01d104d3fc0613fe}, 1372280304Sjkim {0x0092421a12f7e47f, 0x0077a83fa373c501, 0x03bd25c5f696bd0d, 1373280304Sjkim 0x035c41e4d5459761, 0x01ca0d1742b24f53, 0x00aaab27863a509c, 1374280304Sjkim 0x018b6de47df73917, 0x025c0b771705cd01, 0x01fd51d566d760a7}, 1375280304Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1376280304Sjkim{{0x01dd92ff6b0d1dbd, 0x039c5e2e8f8afa69, 0x0261ed13242c3b27, 1377280304Sjkim 0x0382c6e67026e6a0, 0x01d60b10be2089f9, 0x03c15f3dce86723f, 1378280304Sjkim 0x03c764a32d2a062d, 0x017307eac0fad056, 0x018207c0b96c5256}, 1379280304Sjkim {0x0196a16d60e13154, 0x03e6ce74c0267030, 0x00ddbf2b4e52a5aa, 1380280304Sjkim 0x012738241bbf31c8, 0x00ebe8dc04685a28, 0x024c2ad6d380d4a2, 1381280304Sjkim 0x035ee062a6e62d0e, 0x0029ed74af7d3a0f, 0x00eef32aec142ebd}, 1382280304Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1383280304Sjkim{{0x00c31ec398993b39, 0x03a9f45bcda68253, 0x00ac733c24c70890, 1384280304Sjkim 0x00872b111401ff01, 0x01d178c23195eafb, 0x03bca2c816b87f74, 1385280304Sjkim 0x0261a9af46fbad7a, 0x0324b2a8dd3d28f9, 0x00918121d8f24e23}, 1386280304Sjkim {0x032bc8c1ca983cd7, 0x00d869dfb08fc8c6, 0x01693cb61fce1516, 1387280304Sjkim 0x012a5ea68f4e88a8, 0x010869cab88d7ae3, 0x009081ad277ceee1, 1388280304Sjkim 0x033a77166d064cdc, 0x03955235a1fb3a95, 0x01251a4a9b25b65e}, 1389280304Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1390280304Sjkim{{0x00148a3a1b27f40b, 0x0123186df1b31fdc, 0x00026e7beaad34ce, 1391280304Sjkim 0x01db446ac1d3dbba, 0x0299c1a33437eaec, 0x024540610183cbb7, 1392280304Sjkim 0x0173bb0e9ce92e46, 0x02b937e43921214b, 0x01ab0436a9bf01b5}, 1393280304Sjkim {0x0383381640d46948, 0x008dacbf0e7f330f, 0x03602122bcc3f318, 1394280304Sjkim 0x01ee596b200620d6, 0x03bd0585fda430b3, 0x014aed77fd123a83, 1395280304Sjkim 0x005ace749e52f742, 0x0390fe041da2b842, 0x0189a8ceb3299242}, 1396280304Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1397280304Sjkim{{0x012a19d6b3282473, 0x00c0915918b423ce, 0x023a954eb94405ae, 1398280304Sjkim 0x00529f692be26158, 0x0289fa1b6fa4b2aa, 0x0198ae4ceea346ef, 1399280304Sjkim 0x0047d8cdfbdedd49, 0x00cc8c8953f0f6b8, 0x001424abbff49203}, 1400280304Sjkim {0x0256732a1115a03a, 0x0351bc38665c6733, 0x03f7b950fb4a6447, 1401280304Sjkim 0x000afffa94c22155, 0x025763d0a4dab540, 0x000511e92d4fc283, 1402280304Sjkim 0x030a7e9eda0ee96c, 0x004c3cd93a28bf0a, 0x017edb3a8719217f}, 1403280304Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1404280304Sjkim{{0x011de5675a88e673, 0x031d7d0f5e567fbe, 0x0016b2062c970ae5, 1405280304Sjkim 0x03f4a2be49d90aa7, 0x03cef0bd13822866, 0x03f0923dcf774a6c, 1406280304Sjkim 0x0284bebc4f322f72, 0x016ab2645302bb2c, 0x01793f95dace0e2a}, 1407280304Sjkim {0x010646e13527a28f, 0x01ca1babd59dc5e7, 0x01afedfd9a5595df, 1408280304Sjkim 0x01f15785212ea6b1, 0x0324e5d64f6ae3f4, 0x02d680f526d00645, 1409280304Sjkim 0x0127920fadf627a7, 0x03b383f75df4f684, 0x0089e0057e783b0a}, 1410280304Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1411280304Sjkim{{0x00f334b9eb3c26c6, 0x0298fdaa98568dce, 0x01c2d24843a82292, 1412280304Sjkim 0x020bcb24fa1b0711, 0x02cbdb3d2b1875e6, 0x0014907598f89422, 1413280304Sjkim 0x03abe3aa43b26664, 0x02cbf47f720bc168, 0x0133b5e73014b79b}, 1414280304Sjkim {0x034aab5dab05779d, 0x00cdc5d71fee9abb, 0x0399f16bd4bd9d30, 1415280304Sjkim 0x03582fa592d82647, 0x02be1cdfb775b0e9, 0x0034f7cea32e94cb, 1416280304Sjkim 0x0335a7f08f56f286, 0x03b707e9565d1c8b, 0x0015c946ea5b614f}, 1417280304Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1418280304Sjkim{{0x024676f6cff72255, 0x00d14625cac96378, 0x00532b6008bc3767, 1419280304Sjkim 0x01fc16721b985322, 0x023355ea1b091668, 0x029de7afdc0317c3, 1420280304Sjkim 0x02fc8a7ca2da037c, 0x02de1217d74a6f30, 0x013f7173175b73bf}, 1421280304Sjkim {0x0344913f441490b5, 0x0200f9e272b61eca, 0x0258a246b1dd55d2, 1422280304Sjkim 0x03753db9ea496f36, 0x025e02937a09c5ef, 0x030cbd3d14012692, 1423280304Sjkim 0x01793a67e70dc72a, 0x03ec1d37048a662e, 0x006550f700c32a8d}, 1424280304Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1425280304Sjkim{{0x00d3f48a347eba27, 0x008e636649b61bd8, 0x00d3b93716778fb3, 1426280304Sjkim 0x004d1915757bd209, 0x019d5311a3da44e0, 0x016d1afcbbe6aade, 1427280304Sjkim 0x0241bf5f73265616, 0x0384672e5d50d39b, 0x005009fee522b684}, 1428280304Sjkim {0x029b4fab064435fe, 0x018868ee095bbb07, 0x01ea3d6936cc92b8, 1429280304Sjkim 0x000608b00f78a2f3, 0x02db911073d1c20f, 0x018205938470100a, 1430280304Sjkim 0x01f1e4964cbe6ff2, 0x021a19a29eed4663, 0x01414485f42afa81}, 1431280304Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1432280304Sjkim{{0x01612b3a17f63e34, 0x03813992885428e6, 0x022b3c215b5a9608, 1433280304Sjkim 0x029b4057e19f2fcb, 0x0384059a587af7e6, 0x02d6400ace6fe610, 1434280304Sjkim 0x029354d896e8e331, 0x00c047ee6dfba65e, 0x0037720542e9d49d}, 1435280304Sjkim {0x02ce9eed7c5e9278, 0x0374ed703e79643b, 0x01316c54c4072006, 1436280304Sjkim 0x005aaa09054b2ee8, 0x002824000c840d57, 0x03d4eba24771ed86, 1437280304Sjkim 0x0189c50aabc3bdae, 0x0338c01541e15510, 0x00466d56e38eed42}, 1438280304Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1439280304Sjkim{{0x007efd8330ad8bd6, 0x02465ed48047710b, 0x0034c6606b215e0c, 1440280304Sjkim 0x016ae30c53cbf839, 0x01fa17bd37161216, 0x018ead4e61ce8ab9, 1441280304Sjkim 0x005482ed5f5dee46, 0x037543755bba1d7f, 0x005e5ac7e70a9d0f}, 1442280304Sjkim {0x0117e1bb2fdcb2a2, 0x03deea36249f40c4, 0x028d09b4a6246cb7, 1443280304Sjkim 0x03524b8855bcf756, 0x023d7d109d5ceb58, 0x0178e43e3223ef9c, 1444280304Sjkim 0x0154536a0c6e966a, 0x037964d1286ee9fe, 0x0199bcd90e125055}, 1445280304Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}} 1446280304Sjkim}; 1447238384Sjkim 1448280304Sjkim/* 1449280304Sjkim * select_point selects the |idx|th point from a precomputation table and 1450280304Sjkim * copies it to out. 1451280304Sjkim */ 1452280304Sjkim /* pre_comp below is of the size provided in |size| */ 1453280304Sjkimstatic void select_point(const limb idx, unsigned int size, 1454280304Sjkim const felem pre_comp[][3], felem out[3]) 1455280304Sjkim{ 1456280304Sjkim unsigned i, j; 1457280304Sjkim limb *outlimbs = &out[0][0]; 1458280304Sjkim memset(outlimbs, 0, 3 * sizeof(felem)); 1459238384Sjkim 1460280304Sjkim for (i = 0; i < size; i++) { 1461280304Sjkim const limb *inlimbs = &pre_comp[i][0][0]; 1462280304Sjkim limb mask = i ^ idx; 1463280304Sjkim mask |= mask >> 4; 1464280304Sjkim mask |= mask >> 2; 1465280304Sjkim mask |= mask >> 1; 1466280304Sjkim mask &= 1; 1467280304Sjkim mask--; 1468280304Sjkim for (j = 0; j < NLIMBS * 3; j++) 1469280304Sjkim outlimbs[j] |= inlimbs[j] & mask; 1470280304Sjkim } 1471280304Sjkim} 1472238384Sjkim 1473238384Sjkim/* get_bit returns the |i|th bit in |in| */ 1474238384Sjkimstatic char get_bit(const felem_bytearray in, int i) 1475280304Sjkim{ 1476280304Sjkim if (i < 0) 1477280304Sjkim return 0; 1478280304Sjkim return (in[i >> 3] >> (i & 7)) & 1; 1479280304Sjkim} 1480238384Sjkim 1481280304Sjkim/* 1482280304Sjkim * Interleaved point multiplication using precomputed point multiples: The 1483280304Sjkim * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars 1484280304Sjkim * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the 1485280304Sjkim * generator, using certain (large) precomputed multiples in g_pre_comp. 1486280304Sjkim * Output point (X, Y, Z) is stored in x_out, y_out, z_out 1487280304Sjkim */ 1488238384Sjkimstatic void batch_mul(felem x_out, felem y_out, felem z_out, 1489280304Sjkim const felem_bytearray scalars[], 1490280304Sjkim const unsigned num_points, const u8 *g_scalar, 1491280304Sjkim const int mixed, const felem pre_comp[][17][3], 1492280304Sjkim const felem g_pre_comp[16][3]) 1493280304Sjkim{ 1494280304Sjkim int i, skip; 1495280304Sjkim unsigned num, gen_mul = (g_scalar != NULL); 1496280304Sjkim felem nq[3], tmp[4]; 1497280304Sjkim limb bits; 1498280304Sjkim u8 sign, digit; 1499238384Sjkim 1500280304Sjkim /* set nq to the point at infinity */ 1501280304Sjkim memset(nq, 0, 3 * sizeof(felem)); 1502238384Sjkim 1503280304Sjkim /* 1504280304Sjkim * Loop over all scalars msb-to-lsb, interleaving additions of multiples 1505280304Sjkim * of the generator (last quarter of rounds) and additions of other 1506280304Sjkim * points multiples (every 5th round). 1507280304Sjkim */ 1508280304Sjkim skip = 1; /* save two point operations in the first 1509280304Sjkim * round */ 1510280304Sjkim for (i = (num_points ? 520 : 130); i >= 0; --i) { 1511280304Sjkim /* double */ 1512280304Sjkim if (!skip) 1513280304Sjkim point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]); 1514238384Sjkim 1515280304Sjkim /* add multiples of the generator */ 1516280304Sjkim if (gen_mul && (i <= 130)) { 1517280304Sjkim bits = get_bit(g_scalar, i + 390) << 3; 1518280304Sjkim if (i < 130) { 1519280304Sjkim bits |= get_bit(g_scalar, i + 260) << 2; 1520280304Sjkim bits |= get_bit(g_scalar, i + 130) << 1; 1521280304Sjkim bits |= get_bit(g_scalar, i); 1522280304Sjkim } 1523280304Sjkim /* select the point to add, in constant time */ 1524280304Sjkim select_point(bits, 16, g_pre_comp, tmp); 1525280304Sjkim if (!skip) { 1526280304Sjkim /* The 1 argument below is for "mixed" */ 1527280304Sjkim point_add(nq[0], nq[1], nq[2], 1528280304Sjkim nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]); 1529280304Sjkim } else { 1530280304Sjkim memcpy(nq, tmp, 3 * sizeof(felem)); 1531280304Sjkim skip = 0; 1532280304Sjkim } 1533280304Sjkim } 1534238384Sjkim 1535280304Sjkim /* do other additions every 5 doublings */ 1536280304Sjkim if (num_points && (i % 5 == 0)) { 1537280304Sjkim /* loop over all scalars */ 1538280304Sjkim for (num = 0; num < num_points; ++num) { 1539280304Sjkim bits = get_bit(scalars[num], i + 4) << 5; 1540280304Sjkim bits |= get_bit(scalars[num], i + 3) << 4; 1541280304Sjkim bits |= get_bit(scalars[num], i + 2) << 3; 1542280304Sjkim bits |= get_bit(scalars[num], i + 1) << 2; 1543280304Sjkim bits |= get_bit(scalars[num], i) << 1; 1544280304Sjkim bits |= get_bit(scalars[num], i - 1); 1545280304Sjkim ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits); 1546238384Sjkim 1547280304Sjkim /* 1548280304Sjkim * select the point to add or subtract, in constant time 1549280304Sjkim */ 1550280304Sjkim select_point(digit, 17, pre_comp[num], tmp); 1551280304Sjkim felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative 1552280304Sjkim * point */ 1553280304Sjkim copy_conditional(tmp[1], tmp[3], (-(limb) sign)); 1554238384Sjkim 1555280304Sjkim if (!skip) { 1556280304Sjkim point_add(nq[0], nq[1], nq[2], 1557280304Sjkim nq[0], nq[1], nq[2], 1558280304Sjkim mixed, tmp[0], tmp[1], tmp[2]); 1559280304Sjkim } else { 1560280304Sjkim memcpy(nq, tmp, 3 * sizeof(felem)); 1561280304Sjkim skip = 0; 1562280304Sjkim } 1563280304Sjkim } 1564280304Sjkim } 1565280304Sjkim } 1566280304Sjkim felem_assign(x_out, nq[0]); 1567280304Sjkim felem_assign(y_out, nq[1]); 1568280304Sjkim felem_assign(z_out, nq[2]); 1569280304Sjkim} 1570238384Sjkim 1571238384Sjkim/* Precomputation for the group generator. */ 1572238384Sjkimtypedef struct { 1573280304Sjkim felem g_pre_comp[16][3]; 1574280304Sjkim int references; 1575238384Sjkim} NISTP521_PRE_COMP; 1576238384Sjkim 1577238384Sjkimconst EC_METHOD *EC_GFp_nistp521_method(void) 1578280304Sjkim{ 1579280304Sjkim static const EC_METHOD ret = { 1580280304Sjkim EC_FLAGS_DEFAULT_OCT, 1581280304Sjkim NID_X9_62_prime_field, 1582280304Sjkim ec_GFp_nistp521_group_init, 1583280304Sjkim ec_GFp_simple_group_finish, 1584280304Sjkim ec_GFp_simple_group_clear_finish, 1585280304Sjkim ec_GFp_nist_group_copy, 1586280304Sjkim ec_GFp_nistp521_group_set_curve, 1587280304Sjkim ec_GFp_simple_group_get_curve, 1588280304Sjkim ec_GFp_simple_group_get_degree, 1589280304Sjkim ec_GFp_simple_group_check_discriminant, 1590280304Sjkim ec_GFp_simple_point_init, 1591280304Sjkim ec_GFp_simple_point_finish, 1592280304Sjkim ec_GFp_simple_point_clear_finish, 1593280304Sjkim ec_GFp_simple_point_copy, 1594280304Sjkim ec_GFp_simple_point_set_to_infinity, 1595280304Sjkim ec_GFp_simple_set_Jprojective_coordinates_GFp, 1596280304Sjkim ec_GFp_simple_get_Jprojective_coordinates_GFp, 1597280304Sjkim ec_GFp_simple_point_set_affine_coordinates, 1598280304Sjkim ec_GFp_nistp521_point_get_affine_coordinates, 1599280304Sjkim 0 /* point_set_compressed_coordinates */ , 1600280304Sjkim 0 /* point2oct */ , 1601280304Sjkim 0 /* oct2point */ , 1602280304Sjkim ec_GFp_simple_add, 1603280304Sjkim ec_GFp_simple_dbl, 1604280304Sjkim ec_GFp_simple_invert, 1605280304Sjkim ec_GFp_simple_is_at_infinity, 1606280304Sjkim ec_GFp_simple_is_on_curve, 1607280304Sjkim ec_GFp_simple_cmp, 1608280304Sjkim ec_GFp_simple_make_affine, 1609280304Sjkim ec_GFp_simple_points_make_affine, 1610280304Sjkim ec_GFp_nistp521_points_mul, 1611280304Sjkim ec_GFp_nistp521_precompute_mult, 1612280304Sjkim ec_GFp_nistp521_have_precompute_mult, 1613280304Sjkim ec_GFp_nist_field_mul, 1614280304Sjkim ec_GFp_nist_field_sqr, 1615280304Sjkim 0 /* field_div */ , 1616280304Sjkim 0 /* field_encode */ , 1617280304Sjkim 0 /* field_decode */ , 1618280304Sjkim 0 /* field_set_to_one */ 1619280304Sjkim }; 1620238384Sjkim 1621280304Sjkim return &ret; 1622280304Sjkim} 1623238384Sjkim 1624238384Sjkim/******************************************************************************/ 1625280304Sjkim/* 1626280304Sjkim * FUNCTIONS TO MANAGE PRECOMPUTATION 1627238384Sjkim */ 1628238384Sjkim 1629238384Sjkimstatic NISTP521_PRE_COMP *nistp521_pre_comp_new() 1630280304Sjkim{ 1631280304Sjkim NISTP521_PRE_COMP *ret = NULL; 1632280304Sjkim ret = (NISTP521_PRE_COMP *) OPENSSL_malloc(sizeof(NISTP521_PRE_COMP)); 1633280304Sjkim if (!ret) { 1634280304Sjkim ECerr(EC_F_NISTP521_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE); 1635280304Sjkim return ret; 1636280304Sjkim } 1637280304Sjkim memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp)); 1638280304Sjkim ret->references = 1; 1639280304Sjkim return ret; 1640280304Sjkim} 1641238384Sjkim 1642238384Sjkimstatic void *nistp521_pre_comp_dup(void *src_) 1643280304Sjkim{ 1644280304Sjkim NISTP521_PRE_COMP *src = src_; 1645238384Sjkim 1646280304Sjkim /* no need to actually copy, these objects never change! */ 1647280304Sjkim CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP); 1648238384Sjkim 1649280304Sjkim return src_; 1650280304Sjkim} 1651238384Sjkim 1652238384Sjkimstatic void nistp521_pre_comp_free(void *pre_) 1653280304Sjkim{ 1654280304Sjkim int i; 1655280304Sjkim NISTP521_PRE_COMP *pre = pre_; 1656238384Sjkim 1657280304Sjkim if (!pre) 1658280304Sjkim return; 1659238384Sjkim 1660280304Sjkim i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP); 1661280304Sjkim if (i > 0) 1662280304Sjkim return; 1663238384Sjkim 1664280304Sjkim OPENSSL_free(pre); 1665280304Sjkim} 1666238384Sjkim 1667238384Sjkimstatic void nistp521_pre_comp_clear_free(void *pre_) 1668280304Sjkim{ 1669280304Sjkim int i; 1670280304Sjkim NISTP521_PRE_COMP *pre = pre_; 1671238384Sjkim 1672280304Sjkim if (!pre) 1673280304Sjkim return; 1674238384Sjkim 1675280304Sjkim i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP); 1676280304Sjkim if (i > 0) 1677280304Sjkim return; 1678238384Sjkim 1679280304Sjkim OPENSSL_cleanse(pre, sizeof(*pre)); 1680280304Sjkim OPENSSL_free(pre); 1681280304Sjkim} 1682238384Sjkim 1683238384Sjkim/******************************************************************************/ 1684280304Sjkim/* 1685280304Sjkim * OPENSSL EC_METHOD FUNCTIONS 1686238384Sjkim */ 1687238384Sjkim 1688238384Sjkimint ec_GFp_nistp521_group_init(EC_GROUP *group) 1689280304Sjkim{ 1690280304Sjkim int ret; 1691280304Sjkim ret = ec_GFp_simple_group_init(group); 1692280304Sjkim group->a_is_minus3 = 1; 1693280304Sjkim return ret; 1694280304Sjkim} 1695238384Sjkim 1696238384Sjkimint ec_GFp_nistp521_group_set_curve(EC_GROUP *group, const BIGNUM *p, 1697280304Sjkim const BIGNUM *a, const BIGNUM *b, 1698280304Sjkim BN_CTX *ctx) 1699280304Sjkim{ 1700280304Sjkim int ret = 0; 1701280304Sjkim BN_CTX *new_ctx = NULL; 1702280304Sjkim BIGNUM *curve_p, *curve_a, *curve_b; 1703238384Sjkim 1704280304Sjkim if (ctx == NULL) 1705280304Sjkim if ((ctx = new_ctx = BN_CTX_new()) == NULL) 1706280304Sjkim return 0; 1707280304Sjkim BN_CTX_start(ctx); 1708280304Sjkim if (((curve_p = BN_CTX_get(ctx)) == NULL) || 1709280304Sjkim ((curve_a = BN_CTX_get(ctx)) == NULL) || 1710280304Sjkim ((curve_b = BN_CTX_get(ctx)) == NULL)) 1711280304Sjkim goto err; 1712280304Sjkim BN_bin2bn(nistp521_curve_params[0], sizeof(felem_bytearray), curve_p); 1713280304Sjkim BN_bin2bn(nistp521_curve_params[1], sizeof(felem_bytearray), curve_a); 1714280304Sjkim BN_bin2bn(nistp521_curve_params[2], sizeof(felem_bytearray), curve_b); 1715280304Sjkim if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) { 1716280304Sjkim ECerr(EC_F_EC_GFP_NISTP521_GROUP_SET_CURVE, 1717280304Sjkim EC_R_WRONG_CURVE_PARAMETERS); 1718280304Sjkim goto err; 1719280304Sjkim } 1720280304Sjkim group->field_mod_func = BN_nist_mod_521; 1721280304Sjkim ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx); 1722280304Sjkim err: 1723280304Sjkim BN_CTX_end(ctx); 1724280304Sjkim if (new_ctx != NULL) 1725280304Sjkim BN_CTX_free(new_ctx); 1726280304Sjkim return ret; 1727280304Sjkim} 1728238384Sjkim 1729280304Sjkim/* 1730280304Sjkim * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') = 1731280304Sjkim * (X/Z^2, Y/Z^3) 1732280304Sjkim */ 1733238384Sjkimint ec_GFp_nistp521_point_get_affine_coordinates(const EC_GROUP *group, 1734280304Sjkim const EC_POINT *point, 1735280304Sjkim BIGNUM *x, BIGNUM *y, 1736280304Sjkim BN_CTX *ctx) 1737280304Sjkim{ 1738280304Sjkim felem z1, z2, x_in, y_in, x_out, y_out; 1739280304Sjkim largefelem tmp; 1740238384Sjkim 1741280304Sjkim if (EC_POINT_is_at_infinity(group, point)) { 1742280304Sjkim ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES, 1743280304Sjkim EC_R_POINT_AT_INFINITY); 1744280304Sjkim return 0; 1745280304Sjkim } 1746280304Sjkim if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) || 1747280304Sjkim (!BN_to_felem(z1, &point->Z))) 1748280304Sjkim return 0; 1749280304Sjkim felem_inv(z2, z1); 1750280304Sjkim felem_square(tmp, z2); 1751280304Sjkim felem_reduce(z1, tmp); 1752280304Sjkim felem_mul(tmp, x_in, z1); 1753280304Sjkim felem_reduce(x_in, tmp); 1754280304Sjkim felem_contract(x_out, x_in); 1755280304Sjkim if (x != NULL) { 1756280304Sjkim if (!felem_to_BN(x, x_out)) { 1757280304Sjkim ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES, 1758280304Sjkim ERR_R_BN_LIB); 1759280304Sjkim return 0; 1760280304Sjkim } 1761280304Sjkim } 1762280304Sjkim felem_mul(tmp, z1, z2); 1763280304Sjkim felem_reduce(z1, tmp); 1764280304Sjkim felem_mul(tmp, y_in, z1); 1765280304Sjkim felem_reduce(y_in, tmp); 1766280304Sjkim felem_contract(y_out, y_in); 1767280304Sjkim if (y != NULL) { 1768280304Sjkim if (!felem_to_BN(y, y_out)) { 1769280304Sjkim ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES, 1770280304Sjkim ERR_R_BN_LIB); 1771280304Sjkim return 0; 1772280304Sjkim } 1773280304Sjkim } 1774280304Sjkim return 1; 1775280304Sjkim} 1776238384Sjkim 1777280304Sjkim/* points below is of size |num|, and tmp_felems is of size |num+1/ */ 1778280304Sjkimstatic void make_points_affine(size_t num, felem points[][3], 1779280304Sjkim felem tmp_felems[]) 1780280304Sjkim{ 1781280304Sjkim /* 1782280304Sjkim * Runs in constant time, unless an input is the point at infinity (which 1783280304Sjkim * normally shouldn't happen). 1784280304Sjkim */ 1785280304Sjkim ec_GFp_nistp_points_make_affine_internal(num, 1786280304Sjkim points, 1787280304Sjkim sizeof(felem), 1788280304Sjkim tmp_felems, 1789280304Sjkim (void (*)(void *))felem_one, 1790280304Sjkim (int (*)(const void *)) 1791280304Sjkim felem_is_zero_int, 1792280304Sjkim (void (*)(void *, const void *)) 1793280304Sjkim felem_assign, 1794280304Sjkim (void (*)(void *, const void *)) 1795280304Sjkim felem_square_reduce, (void (*) 1796280304Sjkim (void *, 1797280304Sjkim const void 1798280304Sjkim *, 1799280304Sjkim const void 1800280304Sjkim *)) 1801280304Sjkim felem_mul_reduce, 1802280304Sjkim (void (*)(void *, const void *)) 1803280304Sjkim felem_inv, 1804280304Sjkim (void (*)(void *, const void *)) 1805280304Sjkim felem_contract); 1806280304Sjkim} 1807238384Sjkim 1808280304Sjkim/* 1809280304Sjkim * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL 1810280304Sjkim * values Result is stored in r (r can equal one of the inputs). 1811280304Sjkim */ 1812238384Sjkimint ec_GFp_nistp521_points_mul(const EC_GROUP *group, EC_POINT *r, 1813280304Sjkim const BIGNUM *scalar, size_t num, 1814280304Sjkim const EC_POINT *points[], 1815280304Sjkim const BIGNUM *scalars[], BN_CTX *ctx) 1816280304Sjkim{ 1817280304Sjkim int ret = 0; 1818280304Sjkim int j; 1819280304Sjkim int mixed = 0; 1820280304Sjkim BN_CTX *new_ctx = NULL; 1821280304Sjkim BIGNUM *x, *y, *z, *tmp_scalar; 1822280304Sjkim felem_bytearray g_secret; 1823280304Sjkim felem_bytearray *secrets = NULL; 1824280304Sjkim felem(*pre_comp)[17][3] = NULL; 1825280304Sjkim felem *tmp_felems = NULL; 1826280304Sjkim felem_bytearray tmp; 1827280304Sjkim unsigned i, num_bytes; 1828280304Sjkim int have_pre_comp = 0; 1829280304Sjkim size_t num_points = num; 1830280304Sjkim felem x_in, y_in, z_in, x_out, y_out, z_out; 1831280304Sjkim NISTP521_PRE_COMP *pre = NULL; 1832280304Sjkim felem(*g_pre_comp)[3] = NULL; 1833280304Sjkim EC_POINT *generator = NULL; 1834280304Sjkim const EC_POINT *p = NULL; 1835280304Sjkim const BIGNUM *p_scalar = NULL; 1836238384Sjkim 1837280304Sjkim if (ctx == NULL) 1838280304Sjkim if ((ctx = new_ctx = BN_CTX_new()) == NULL) 1839280304Sjkim return 0; 1840280304Sjkim BN_CTX_start(ctx); 1841280304Sjkim if (((x = BN_CTX_get(ctx)) == NULL) || 1842280304Sjkim ((y = BN_CTX_get(ctx)) == NULL) || 1843280304Sjkim ((z = BN_CTX_get(ctx)) == NULL) || 1844280304Sjkim ((tmp_scalar = BN_CTX_get(ctx)) == NULL)) 1845280304Sjkim goto err; 1846238384Sjkim 1847280304Sjkim if (scalar != NULL) { 1848280304Sjkim pre = EC_EX_DATA_get_data(group->extra_data, 1849280304Sjkim nistp521_pre_comp_dup, 1850280304Sjkim nistp521_pre_comp_free, 1851280304Sjkim nistp521_pre_comp_clear_free); 1852280304Sjkim if (pre) 1853280304Sjkim /* we have precomputation, try to use it */ 1854280304Sjkim g_pre_comp = &pre->g_pre_comp[0]; 1855280304Sjkim else 1856280304Sjkim /* try to use the standard precomputation */ 1857280304Sjkim g_pre_comp = (felem(*)[3]) gmul; 1858280304Sjkim generator = EC_POINT_new(group); 1859280304Sjkim if (generator == NULL) 1860280304Sjkim goto err; 1861280304Sjkim /* get the generator from precomputation */ 1862280304Sjkim if (!felem_to_BN(x, g_pre_comp[1][0]) || 1863280304Sjkim !felem_to_BN(y, g_pre_comp[1][1]) || 1864280304Sjkim !felem_to_BN(z, g_pre_comp[1][2])) { 1865280304Sjkim ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB); 1866280304Sjkim goto err; 1867280304Sjkim } 1868280304Sjkim if (!EC_POINT_set_Jprojective_coordinates_GFp(group, 1869280304Sjkim generator, x, y, z, 1870280304Sjkim ctx)) 1871280304Sjkim goto err; 1872280304Sjkim if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) 1873280304Sjkim /* precomputation matches generator */ 1874280304Sjkim have_pre_comp = 1; 1875280304Sjkim else 1876280304Sjkim /* 1877280304Sjkim * we don't have valid precomputation: treat the generator as a 1878280304Sjkim * random point 1879280304Sjkim */ 1880280304Sjkim num_points++; 1881280304Sjkim } 1882238384Sjkim 1883280304Sjkim if (num_points > 0) { 1884280304Sjkim if (num_points >= 2) { 1885280304Sjkim /* 1886280304Sjkim * unless we precompute multiples for just one point, converting 1887280304Sjkim * those into affine form is time well spent 1888280304Sjkim */ 1889280304Sjkim mixed = 1; 1890280304Sjkim } 1891280304Sjkim secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray)); 1892280304Sjkim pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem)); 1893280304Sjkim if (mixed) 1894280304Sjkim tmp_felems = 1895280304Sjkim OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem)); 1896280304Sjkim if ((secrets == NULL) || (pre_comp == NULL) 1897280304Sjkim || (mixed && (tmp_felems == NULL))) { 1898280304Sjkim ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_MALLOC_FAILURE); 1899280304Sjkim goto err; 1900280304Sjkim } 1901238384Sjkim 1902280304Sjkim /* 1903280304Sjkim * we treat NULL scalars as 0, and NULL points as points at infinity, 1904280304Sjkim * i.e., they contribute nothing to the linear combination 1905280304Sjkim */ 1906280304Sjkim memset(secrets, 0, num_points * sizeof(felem_bytearray)); 1907280304Sjkim memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem)); 1908280304Sjkim for (i = 0; i < num_points; ++i) { 1909280304Sjkim if (i == num) 1910280304Sjkim /* 1911280304Sjkim * we didn't have a valid precomputation, so we pick the 1912280304Sjkim * generator 1913280304Sjkim */ 1914280304Sjkim { 1915280304Sjkim p = EC_GROUP_get0_generator(group); 1916280304Sjkim p_scalar = scalar; 1917280304Sjkim } else 1918280304Sjkim /* the i^th point */ 1919280304Sjkim { 1920280304Sjkim p = points[i]; 1921280304Sjkim p_scalar = scalars[i]; 1922280304Sjkim } 1923280304Sjkim if ((p_scalar != NULL) && (p != NULL)) { 1924280304Sjkim /* reduce scalar to 0 <= scalar < 2^521 */ 1925280304Sjkim if ((BN_num_bits(p_scalar) > 521) 1926280304Sjkim || (BN_is_negative(p_scalar))) { 1927280304Sjkim /* 1928280304Sjkim * this is an unusual input, and we don't guarantee 1929280304Sjkim * constant-timeness 1930280304Sjkim */ 1931280304Sjkim if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) { 1932280304Sjkim ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB); 1933280304Sjkim goto err; 1934280304Sjkim } 1935280304Sjkim num_bytes = BN_bn2bin(tmp_scalar, tmp); 1936280304Sjkim } else 1937280304Sjkim num_bytes = BN_bn2bin(p_scalar, tmp); 1938280304Sjkim flip_endian(secrets[i], tmp, num_bytes); 1939280304Sjkim /* precompute multiples */ 1940280304Sjkim if ((!BN_to_felem(x_out, &p->X)) || 1941280304Sjkim (!BN_to_felem(y_out, &p->Y)) || 1942280304Sjkim (!BN_to_felem(z_out, &p->Z))) 1943280304Sjkim goto err; 1944280304Sjkim memcpy(pre_comp[i][1][0], x_out, sizeof(felem)); 1945280304Sjkim memcpy(pre_comp[i][1][1], y_out, sizeof(felem)); 1946280304Sjkim memcpy(pre_comp[i][1][2], z_out, sizeof(felem)); 1947280304Sjkim for (j = 2; j <= 16; ++j) { 1948280304Sjkim if (j & 1) { 1949280304Sjkim point_add(pre_comp[i][j][0], pre_comp[i][j][1], 1950280304Sjkim pre_comp[i][j][2], pre_comp[i][1][0], 1951280304Sjkim pre_comp[i][1][1], pre_comp[i][1][2], 0, 1952280304Sjkim pre_comp[i][j - 1][0], 1953280304Sjkim pre_comp[i][j - 1][1], 1954280304Sjkim pre_comp[i][j - 1][2]); 1955280304Sjkim } else { 1956280304Sjkim point_double(pre_comp[i][j][0], pre_comp[i][j][1], 1957280304Sjkim pre_comp[i][j][2], pre_comp[i][j / 2][0], 1958280304Sjkim pre_comp[i][j / 2][1], 1959280304Sjkim pre_comp[i][j / 2][2]); 1960280304Sjkim } 1961280304Sjkim } 1962280304Sjkim } 1963280304Sjkim } 1964280304Sjkim if (mixed) 1965280304Sjkim make_points_affine(num_points * 17, pre_comp[0], tmp_felems); 1966280304Sjkim } 1967238384Sjkim 1968280304Sjkim /* the scalar for the generator */ 1969280304Sjkim if ((scalar != NULL) && (have_pre_comp)) { 1970280304Sjkim memset(g_secret, 0, sizeof(g_secret)); 1971280304Sjkim /* reduce scalar to 0 <= scalar < 2^521 */ 1972280304Sjkim if ((BN_num_bits(scalar) > 521) || (BN_is_negative(scalar))) { 1973280304Sjkim /* 1974280304Sjkim * this is an unusual input, and we don't guarantee 1975280304Sjkim * constant-timeness 1976280304Sjkim */ 1977280304Sjkim if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) { 1978280304Sjkim ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB); 1979280304Sjkim goto err; 1980280304Sjkim } 1981280304Sjkim num_bytes = BN_bn2bin(tmp_scalar, tmp); 1982280304Sjkim } else 1983280304Sjkim num_bytes = BN_bn2bin(scalar, tmp); 1984280304Sjkim flip_endian(g_secret, tmp, num_bytes); 1985280304Sjkim /* do the multiplication with generator precomputation */ 1986280304Sjkim batch_mul(x_out, y_out, z_out, 1987280304Sjkim (const felem_bytearray(*))secrets, num_points, 1988280304Sjkim g_secret, 1989280304Sjkim mixed, (const felem(*)[17][3])pre_comp, 1990280304Sjkim (const felem(*)[3])g_pre_comp); 1991280304Sjkim } else 1992280304Sjkim /* do the multiplication without generator precomputation */ 1993280304Sjkim batch_mul(x_out, y_out, z_out, 1994280304Sjkim (const felem_bytearray(*))secrets, num_points, 1995280304Sjkim NULL, mixed, (const felem(*)[17][3])pre_comp, NULL); 1996280304Sjkim /* reduce the output to its unique minimal representation */ 1997280304Sjkim felem_contract(x_in, x_out); 1998280304Sjkim felem_contract(y_in, y_out); 1999280304Sjkim felem_contract(z_in, z_out); 2000280304Sjkim if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) || 2001280304Sjkim (!felem_to_BN(z, z_in))) { 2002280304Sjkim ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB); 2003280304Sjkim goto err; 2004280304Sjkim } 2005280304Sjkim ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx); 2006238384Sjkim 2007280304Sjkim err: 2008280304Sjkim BN_CTX_end(ctx); 2009280304Sjkim if (generator != NULL) 2010280304Sjkim EC_POINT_free(generator); 2011280304Sjkim if (new_ctx != NULL) 2012280304Sjkim BN_CTX_free(new_ctx); 2013280304Sjkim if (secrets != NULL) 2014280304Sjkim OPENSSL_free(secrets); 2015280304Sjkim if (pre_comp != NULL) 2016280304Sjkim OPENSSL_free(pre_comp); 2017280304Sjkim if (tmp_felems != NULL) 2018280304Sjkim OPENSSL_free(tmp_felems); 2019280304Sjkim return ret; 2020280304Sjkim} 2021238384Sjkim 2022238384Sjkimint ec_GFp_nistp521_precompute_mult(EC_GROUP *group, BN_CTX *ctx) 2023280304Sjkim{ 2024280304Sjkim int ret = 0; 2025280304Sjkim NISTP521_PRE_COMP *pre = NULL; 2026280304Sjkim int i, j; 2027280304Sjkim BN_CTX *new_ctx = NULL; 2028280304Sjkim BIGNUM *x, *y; 2029280304Sjkim EC_POINT *generator = NULL; 2030280304Sjkim felem tmp_felems[16]; 2031238384Sjkim 2032280304Sjkim /* throw away old precomputation */ 2033280304Sjkim EC_EX_DATA_free_data(&group->extra_data, nistp521_pre_comp_dup, 2034280304Sjkim nistp521_pre_comp_free, 2035280304Sjkim nistp521_pre_comp_clear_free); 2036280304Sjkim if (ctx == NULL) 2037280304Sjkim if ((ctx = new_ctx = BN_CTX_new()) == NULL) 2038280304Sjkim return 0; 2039280304Sjkim BN_CTX_start(ctx); 2040280304Sjkim if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL)) 2041280304Sjkim goto err; 2042280304Sjkim /* get the generator */ 2043280304Sjkim if (group->generator == NULL) 2044280304Sjkim goto err; 2045280304Sjkim generator = EC_POINT_new(group); 2046280304Sjkim if (generator == NULL) 2047280304Sjkim goto err; 2048280304Sjkim BN_bin2bn(nistp521_curve_params[3], sizeof(felem_bytearray), x); 2049280304Sjkim BN_bin2bn(nistp521_curve_params[4], sizeof(felem_bytearray), y); 2050280304Sjkim if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx)) 2051280304Sjkim goto err; 2052280304Sjkim if ((pre = nistp521_pre_comp_new()) == NULL) 2053280304Sjkim goto err; 2054280304Sjkim /* 2055280304Sjkim * if the generator is the standard one, use built-in precomputation 2056280304Sjkim */ 2057280304Sjkim if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) { 2058280304Sjkim memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp)); 2059280304Sjkim ret = 1; 2060280304Sjkim goto err; 2061280304Sjkim } 2062280304Sjkim if ((!BN_to_felem(pre->g_pre_comp[1][0], &group->generator->X)) || 2063280304Sjkim (!BN_to_felem(pre->g_pre_comp[1][1], &group->generator->Y)) || 2064280304Sjkim (!BN_to_felem(pre->g_pre_comp[1][2], &group->generator->Z))) 2065280304Sjkim goto err; 2066280304Sjkim /* compute 2^130*G, 2^260*G, 2^390*G */ 2067280304Sjkim for (i = 1; i <= 4; i <<= 1) { 2068280304Sjkim point_double(pre->g_pre_comp[2 * i][0], pre->g_pre_comp[2 * i][1], 2069280304Sjkim pre->g_pre_comp[2 * i][2], pre->g_pre_comp[i][0], 2070280304Sjkim pre->g_pre_comp[i][1], pre->g_pre_comp[i][2]); 2071280304Sjkim for (j = 0; j < 129; ++j) { 2072280304Sjkim point_double(pre->g_pre_comp[2 * i][0], 2073280304Sjkim pre->g_pre_comp[2 * i][1], 2074280304Sjkim pre->g_pre_comp[2 * i][2], 2075280304Sjkim pre->g_pre_comp[2 * i][0], 2076280304Sjkim pre->g_pre_comp[2 * i][1], 2077280304Sjkim pre->g_pre_comp[2 * i][2]); 2078280304Sjkim } 2079280304Sjkim } 2080280304Sjkim /* g_pre_comp[0] is the point at infinity */ 2081280304Sjkim memset(pre->g_pre_comp[0], 0, sizeof(pre->g_pre_comp[0])); 2082280304Sjkim /* the remaining multiples */ 2083280304Sjkim /* 2^130*G + 2^260*G */ 2084280304Sjkim point_add(pre->g_pre_comp[6][0], pre->g_pre_comp[6][1], 2085280304Sjkim pre->g_pre_comp[6][2], pre->g_pre_comp[4][0], 2086280304Sjkim pre->g_pre_comp[4][1], pre->g_pre_comp[4][2], 2087280304Sjkim 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1], 2088280304Sjkim pre->g_pre_comp[2][2]); 2089280304Sjkim /* 2^130*G + 2^390*G */ 2090280304Sjkim point_add(pre->g_pre_comp[10][0], pre->g_pre_comp[10][1], 2091280304Sjkim pre->g_pre_comp[10][2], pre->g_pre_comp[8][0], 2092280304Sjkim pre->g_pre_comp[8][1], pre->g_pre_comp[8][2], 2093280304Sjkim 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1], 2094280304Sjkim pre->g_pre_comp[2][2]); 2095280304Sjkim /* 2^260*G + 2^390*G */ 2096280304Sjkim point_add(pre->g_pre_comp[12][0], pre->g_pre_comp[12][1], 2097280304Sjkim pre->g_pre_comp[12][2], pre->g_pre_comp[8][0], 2098280304Sjkim pre->g_pre_comp[8][1], pre->g_pre_comp[8][2], 2099280304Sjkim 0, pre->g_pre_comp[4][0], pre->g_pre_comp[4][1], 2100280304Sjkim pre->g_pre_comp[4][2]); 2101280304Sjkim /* 2^130*G + 2^260*G + 2^390*G */ 2102280304Sjkim point_add(pre->g_pre_comp[14][0], pre->g_pre_comp[14][1], 2103280304Sjkim pre->g_pre_comp[14][2], pre->g_pre_comp[12][0], 2104280304Sjkim pre->g_pre_comp[12][1], pre->g_pre_comp[12][2], 2105280304Sjkim 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1], 2106280304Sjkim pre->g_pre_comp[2][2]); 2107280304Sjkim for (i = 1; i < 8; ++i) { 2108280304Sjkim /* odd multiples: add G */ 2109280304Sjkim point_add(pre->g_pre_comp[2 * i + 1][0], 2110280304Sjkim pre->g_pre_comp[2 * i + 1][1], 2111280304Sjkim pre->g_pre_comp[2 * i + 1][2], pre->g_pre_comp[2 * i][0], 2112280304Sjkim pre->g_pre_comp[2 * i][1], pre->g_pre_comp[2 * i][2], 0, 2113280304Sjkim pre->g_pre_comp[1][0], pre->g_pre_comp[1][1], 2114280304Sjkim pre->g_pre_comp[1][2]); 2115280304Sjkim } 2116280304Sjkim make_points_affine(15, &(pre->g_pre_comp[1]), tmp_felems); 2117238384Sjkim 2118280304Sjkim if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp521_pre_comp_dup, 2119280304Sjkim nistp521_pre_comp_free, 2120280304Sjkim nistp521_pre_comp_clear_free)) 2121280304Sjkim goto err; 2122280304Sjkim ret = 1; 2123280304Sjkim pre = NULL; 2124238384Sjkim err: 2125280304Sjkim BN_CTX_end(ctx); 2126280304Sjkim if (generator != NULL) 2127280304Sjkim EC_POINT_free(generator); 2128280304Sjkim if (new_ctx != NULL) 2129280304Sjkim BN_CTX_free(new_ctx); 2130280304Sjkim if (pre) 2131280304Sjkim nistp521_pre_comp_free(pre); 2132280304Sjkim return ret; 2133280304Sjkim} 2134238384Sjkim 2135238384Sjkimint ec_GFp_nistp521_have_precompute_mult(const EC_GROUP *group) 2136280304Sjkim{ 2137280304Sjkim if (EC_EX_DATA_get_data(group->extra_data, nistp521_pre_comp_dup, 2138280304Sjkim nistp521_pre_comp_free, 2139280304Sjkim nistp521_pre_comp_clear_free) 2140280304Sjkim != NULL) 2141280304Sjkim return 1; 2142280304Sjkim else 2143280304Sjkim return 0; 2144280304Sjkim} 2145238384Sjkim 2146238384Sjkim#else 2147280304Sjkimstatic void *dummy = &dummy; 2148238384Sjkim#endif 2149