1238384Sjkim/* crypto/ec/ecp_nistp521.c */ 2238384Sjkim/* 3238384Sjkim * Written by Adam Langley (Google) for the OpenSSL project 4238384Sjkim */ 5238384Sjkim/* Copyright 2011 Google Inc. 6238384Sjkim * 7238384Sjkim * Licensed under the Apache License, Version 2.0 (the "License"); 8238384Sjkim * 9238384Sjkim * you may not use this file except in compliance with the License. 10238384Sjkim * You may obtain a copy of the License at 11238384Sjkim * 12238384Sjkim * http://www.apache.org/licenses/LICENSE-2.0 13238384Sjkim * 14238384Sjkim * Unless required by applicable law or agreed to in writing, software 15238384Sjkim * distributed under the License is distributed on an "AS IS" BASIS, 16238384Sjkim * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17238384Sjkim * See the License for the specific language governing permissions and 18238384Sjkim * limitations under the License. 19238384Sjkim */ 20238384Sjkim 21238384Sjkim/* 22238384Sjkim * A 64-bit implementation of the NIST P-521 elliptic curve point multiplication 23238384Sjkim * 24238384Sjkim * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c. 25238384Sjkim * Otherwise based on Emilia's P224 work, which was inspired by my curve25519 26238384Sjkim * work which got its smarts from Daniel J. Bernstein's work on the same. 27238384Sjkim */ 28238384Sjkim 29238384Sjkim#include <openssl/opensslconf.h> 30238384Sjkim#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 31238384Sjkim 32280297Sjkim# ifndef OPENSSL_SYS_VMS 33280297Sjkim# include <stdint.h> 34280297Sjkim# else 35280297Sjkim# include <inttypes.h> 36280297Sjkim# endif 37238384Sjkim 38280297Sjkim# include <string.h> 39280297Sjkim# include <openssl/err.h> 40280297Sjkim# include "ec_lcl.h" 41352193Sjkim# include "bn_int.h" /* bn_bn2lebinpad, bn_lebin2bn */ 42238384Sjkim 43280297Sjkim# if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) 44238384Sjkim /* even with gcc, the typedef won't work for 32-bit platforms */ 45280297Sjkimtypedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit 46280297Sjkim * platforms */ 47280297Sjkim# else 48280297Sjkim# error "Need GCC 3.1 or later to define type uint128_t" 49280297Sjkim# endif 50238384Sjkim 51238384Sjkimtypedef uint8_t u8; 52238384Sjkimtypedef uint64_t u64; 53238384Sjkim 54280297Sjkim/* 55280297Sjkim * The underlying field. P521 operates over GF(2^521-1). We can serialise an 56280297Sjkim * element of this field into 66 bytes where the most significant byte 57280297Sjkim * contains only a single bit. We call this an felem_bytearray. 58280297Sjkim */ 59238384Sjkim 60238384Sjkimtypedef u8 felem_bytearray[66]; 61238384Sjkim 62280297Sjkim/* 63280297Sjkim * These are the parameters of P521, taken from FIPS 186-3, section D.1.2.5. 64280297Sjkim * These values are big-endian. 65280297Sjkim */ 66280297Sjkimstatic const felem_bytearray nistp521_curve_params[5] = { 67280297Sjkim {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* p */ 68280297Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 69280297Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 70280297Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 71280297Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 72280297Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 73280297Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 74280297Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 75280297Sjkim 0xff, 0xff}, 76280297Sjkim {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* a = -3 */ 77280297Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 78280297Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 79280297Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 80280297Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 81280297Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 82280297Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 83280297Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 84280297Sjkim 0xff, 0xfc}, 85280297Sjkim {0x00, 0x51, 0x95, 0x3e, 0xb9, 0x61, 0x8e, 0x1c, /* b */ 86280297Sjkim 0x9a, 0x1f, 0x92, 0x9a, 0x21, 0xa0, 0xb6, 0x85, 87280297Sjkim 0x40, 0xee, 0xa2, 0xda, 0x72, 0x5b, 0x99, 0xb3, 88280297Sjkim 0x15, 0xf3, 0xb8, 0xb4, 0x89, 0x91, 0x8e, 0xf1, 89280297Sjkim 0x09, 0xe1, 0x56, 0x19, 0x39, 0x51, 0xec, 0x7e, 90280297Sjkim 0x93, 0x7b, 0x16, 0x52, 0xc0, 0xbd, 0x3b, 0xb1, 91280297Sjkim 0xbf, 0x07, 0x35, 0x73, 0xdf, 0x88, 0x3d, 0x2c, 92280297Sjkim 0x34, 0xf1, 0xef, 0x45, 0x1f, 0xd4, 0x6b, 0x50, 93280297Sjkim 0x3f, 0x00}, 94280297Sjkim {0x00, 0xc6, 0x85, 0x8e, 0x06, 0xb7, 0x04, 0x04, /* x */ 95280297Sjkim 0xe9, 0xcd, 0x9e, 0x3e, 0xcb, 0x66, 0x23, 0x95, 96280297Sjkim 0xb4, 0x42, 0x9c, 0x64, 0x81, 0x39, 0x05, 0x3f, 97280297Sjkim 0xb5, 0x21, 0xf8, 0x28, 0xaf, 0x60, 0x6b, 0x4d, 98280297Sjkim 0x3d, 0xba, 0xa1, 0x4b, 0x5e, 0x77, 0xef, 0xe7, 99280297Sjkim 0x59, 0x28, 0xfe, 0x1d, 0xc1, 0x27, 0xa2, 0xff, 100280297Sjkim 0xa8, 0xde, 0x33, 0x48, 0xb3, 0xc1, 0x85, 0x6a, 101280297Sjkim 0x42, 0x9b, 0xf9, 0x7e, 0x7e, 0x31, 0xc2, 0xe5, 102280297Sjkim 0xbd, 0x66}, 103280297Sjkim {0x01, 0x18, 0x39, 0x29, 0x6a, 0x78, 0x9a, 0x3b, /* y */ 104280297Sjkim 0xc0, 0x04, 0x5c, 0x8a, 0x5f, 0xb4, 0x2c, 0x7d, 105280297Sjkim 0x1b, 0xd9, 0x98, 0xf5, 0x44, 0x49, 0x57, 0x9b, 106280297Sjkim 0x44, 0x68, 0x17, 0xaf, 0xbd, 0x17, 0x27, 0x3e, 107280297Sjkim 0x66, 0x2c, 0x97, 0xee, 0x72, 0x99, 0x5e, 0xf4, 108280297Sjkim 0x26, 0x40, 0xc5, 0x50, 0xb9, 0x01, 0x3f, 0xad, 109280297Sjkim 0x07, 0x61, 0x35, 0x3c, 0x70, 0x86, 0xa2, 0x72, 110280297Sjkim 0xc2, 0x40, 0x88, 0xbe, 0x94, 0x76, 0x9f, 0xd1, 111280297Sjkim 0x66, 0x50} 112280297Sjkim}; 113238384Sjkim 114280297Sjkim/*- 115280297Sjkim * The representation of field elements. 116238384Sjkim * ------------------------------------ 117238384Sjkim * 118238384Sjkim * We represent field elements with nine values. These values are either 64 or 119238384Sjkim * 128 bits and the field element represented is: 120238384Sjkim * v[0]*2^0 + v[1]*2^58 + v[2]*2^116 + ... + v[8]*2^464 (mod p) 121238384Sjkim * Each of the nine values is called a 'limb'. Since the limbs are spaced only 122238384Sjkim * 58 bits apart, but are greater than 58 bits in length, the most significant 123238384Sjkim * bits of each limb overlap with the least significant bits of the next. 124238384Sjkim * 125238384Sjkim * A field element with 64-bit limbs is an 'felem'. One with 128-bit limbs is a 126238384Sjkim * 'largefelem' */ 127238384Sjkim 128280297Sjkim# define NLIMBS 9 129238384Sjkim 130238384Sjkimtypedef uint64_t limb; 131238384Sjkimtypedef limb felem[NLIMBS]; 132238384Sjkimtypedef uint128_t largefelem[NLIMBS]; 133238384Sjkim 134238384Sjkimstatic const limb bottom57bits = 0x1ffffffffffffff; 135238384Sjkimstatic const limb bottom58bits = 0x3ffffffffffffff; 136238384Sjkim 137280297Sjkim/* 138280297Sjkim * bin66_to_felem takes a little-endian byte array and converts it into felem 139280297Sjkim * form. This assumes that the CPU is little-endian. 140280297Sjkim */ 141238384Sjkimstatic void bin66_to_felem(felem out, const u8 in[66]) 142280297Sjkim{ 143280297Sjkim out[0] = (*((limb *) & in[0])) & bottom58bits; 144280297Sjkim out[1] = (*((limb *) & in[7]) >> 2) & bottom58bits; 145280297Sjkim out[2] = (*((limb *) & in[14]) >> 4) & bottom58bits; 146280297Sjkim out[3] = (*((limb *) & in[21]) >> 6) & bottom58bits; 147280297Sjkim out[4] = (*((limb *) & in[29])) & bottom58bits; 148280297Sjkim out[5] = (*((limb *) & in[36]) >> 2) & bottom58bits; 149280297Sjkim out[6] = (*((limb *) & in[43]) >> 4) & bottom58bits; 150280297Sjkim out[7] = (*((limb *) & in[50]) >> 6) & bottom58bits; 151280297Sjkim out[8] = (*((limb *) & in[58])) & bottom57bits; 152280297Sjkim} 153238384Sjkim 154280297Sjkim/* 155280297Sjkim * felem_to_bin66 takes an felem and serialises into a little endian, 66 byte 156280297Sjkim * array. This assumes that the CPU is little-endian. 157280297Sjkim */ 158238384Sjkimstatic void felem_to_bin66(u8 out[66], const felem in) 159280297Sjkim{ 160280297Sjkim memset(out, 0, 66); 161280297Sjkim (*((limb *) & out[0])) = in[0]; 162280297Sjkim (*((limb *) & out[7])) |= in[1] << 2; 163280297Sjkim (*((limb *) & out[14])) |= in[2] << 4; 164280297Sjkim (*((limb *) & out[21])) |= in[3] << 6; 165280297Sjkim (*((limb *) & out[29])) = in[4]; 166280297Sjkim (*((limb *) & out[36])) |= in[5] << 2; 167280297Sjkim (*((limb *) & out[43])) |= in[6] << 4; 168280297Sjkim (*((limb *) & out[50])) |= in[7] << 6; 169280297Sjkim (*((limb *) & out[58])) = in[8]; 170280297Sjkim} 171238384Sjkim 172238384Sjkim/* BN_to_felem converts an OpenSSL BIGNUM into an felem */ 173238384Sjkimstatic int BN_to_felem(felem out, const BIGNUM *bn) 174280297Sjkim{ 175280297Sjkim felem_bytearray b_out; 176352193Sjkim int num_bytes; 177238384Sjkim 178352193Sjkim if (BN_is_negative(bn)) { 179280297Sjkim ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE); 180280297Sjkim return 0; 181280297Sjkim } 182352193Sjkim num_bytes = bn_bn2lebinpad(bn, b_out, sizeof(b_out)); 183352193Sjkim if (num_bytes < 0) { 184280297Sjkim ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE); 185280297Sjkim return 0; 186280297Sjkim } 187280297Sjkim bin66_to_felem(out, b_out); 188280297Sjkim return 1; 189280297Sjkim} 190238384Sjkim 191238384Sjkim/* felem_to_BN converts an felem into an OpenSSL BIGNUM */ 192238384Sjkimstatic BIGNUM *felem_to_BN(BIGNUM *out, const felem in) 193280297Sjkim{ 194352193Sjkim felem_bytearray b_out; 195352193Sjkim felem_to_bin66(b_out, in); 196352193Sjkim return bn_lebin2bn(b_out, sizeof(b_out), out); 197280297Sjkim} 198238384Sjkim 199280297Sjkim/*- 200280297Sjkim * Field operations 201280297Sjkim * ---------------- 202280297Sjkim */ 203238384Sjkim 204238384Sjkimstatic void felem_one(felem out) 205280297Sjkim{ 206280297Sjkim out[0] = 1; 207280297Sjkim out[1] = 0; 208280297Sjkim out[2] = 0; 209280297Sjkim out[3] = 0; 210280297Sjkim out[4] = 0; 211280297Sjkim out[5] = 0; 212280297Sjkim out[6] = 0; 213280297Sjkim out[7] = 0; 214280297Sjkim out[8] = 0; 215280297Sjkim} 216238384Sjkim 217238384Sjkimstatic void felem_assign(felem out, const felem in) 218280297Sjkim{ 219280297Sjkim out[0] = in[0]; 220280297Sjkim out[1] = in[1]; 221280297Sjkim out[2] = in[2]; 222280297Sjkim out[3] = in[3]; 223280297Sjkim out[4] = in[4]; 224280297Sjkim out[5] = in[5]; 225280297Sjkim out[6] = in[6]; 226280297Sjkim out[7] = in[7]; 227280297Sjkim out[8] = in[8]; 228280297Sjkim} 229238384Sjkim 230238384Sjkim/* felem_sum64 sets out = out + in. */ 231238384Sjkimstatic void felem_sum64(felem out, const felem in) 232280297Sjkim{ 233280297Sjkim out[0] += in[0]; 234280297Sjkim out[1] += in[1]; 235280297Sjkim out[2] += in[2]; 236280297Sjkim out[3] += in[3]; 237280297Sjkim out[4] += in[4]; 238280297Sjkim out[5] += in[5]; 239280297Sjkim out[6] += in[6]; 240280297Sjkim out[7] += in[7]; 241280297Sjkim out[8] += in[8]; 242280297Sjkim} 243238384Sjkim 244238384Sjkim/* felem_scalar sets out = in * scalar */ 245238384Sjkimstatic void felem_scalar(felem out, const felem in, limb scalar) 246280297Sjkim{ 247280297Sjkim out[0] = in[0] * scalar; 248280297Sjkim out[1] = in[1] * scalar; 249280297Sjkim out[2] = in[2] * scalar; 250280297Sjkim out[3] = in[3] * scalar; 251280297Sjkim out[4] = in[4] * scalar; 252280297Sjkim out[5] = in[5] * scalar; 253280297Sjkim out[6] = in[6] * scalar; 254280297Sjkim out[7] = in[7] * scalar; 255280297Sjkim out[8] = in[8] * scalar; 256280297Sjkim} 257238384Sjkim 258238384Sjkim/* felem_scalar64 sets out = out * scalar */ 259238384Sjkimstatic void felem_scalar64(felem out, limb scalar) 260280297Sjkim{ 261280297Sjkim out[0] *= scalar; 262280297Sjkim out[1] *= scalar; 263280297Sjkim out[2] *= scalar; 264280297Sjkim out[3] *= scalar; 265280297Sjkim out[4] *= scalar; 266280297Sjkim out[5] *= scalar; 267280297Sjkim out[6] *= scalar; 268280297Sjkim out[7] *= scalar; 269280297Sjkim out[8] *= scalar; 270280297Sjkim} 271238384Sjkim 272238384Sjkim/* felem_scalar128 sets out = out * scalar */ 273238384Sjkimstatic void felem_scalar128(largefelem out, limb scalar) 274280297Sjkim{ 275280297Sjkim out[0] *= scalar; 276280297Sjkim out[1] *= scalar; 277280297Sjkim out[2] *= scalar; 278280297Sjkim out[3] *= scalar; 279280297Sjkim out[4] *= scalar; 280280297Sjkim out[5] *= scalar; 281280297Sjkim out[6] *= scalar; 282280297Sjkim out[7] *= scalar; 283280297Sjkim out[8] *= scalar; 284280297Sjkim} 285238384Sjkim 286280297Sjkim/*- 287280297Sjkim * felem_neg sets |out| to |-in| 288238384Sjkim * On entry: 289238384Sjkim * in[i] < 2^59 + 2^14 290238384Sjkim * On exit: 291238384Sjkim * out[i] < 2^62 292238384Sjkim */ 293238384Sjkimstatic void felem_neg(felem out, const felem in) 294280297Sjkim{ 295280297Sjkim /* In order to prevent underflow, we subtract from 0 mod p. */ 296280297Sjkim static const limb two62m3 = (((limb) 1) << 62) - (((limb) 1) << 5); 297280297Sjkim static const limb two62m2 = (((limb) 1) << 62) - (((limb) 1) << 4); 298238384Sjkim 299280297Sjkim out[0] = two62m3 - in[0]; 300280297Sjkim out[1] = two62m2 - in[1]; 301280297Sjkim out[2] = two62m2 - in[2]; 302280297Sjkim out[3] = two62m2 - in[3]; 303280297Sjkim out[4] = two62m2 - in[4]; 304280297Sjkim out[5] = two62m2 - in[5]; 305280297Sjkim out[6] = two62m2 - in[6]; 306280297Sjkim out[7] = two62m2 - in[7]; 307280297Sjkim out[8] = two62m2 - in[8]; 308280297Sjkim} 309238384Sjkim 310280297Sjkim/*- 311280297Sjkim * felem_diff64 subtracts |in| from |out| 312238384Sjkim * On entry: 313238384Sjkim * in[i] < 2^59 + 2^14 314238384Sjkim * On exit: 315238384Sjkim * out[i] < out[i] + 2^62 316238384Sjkim */ 317238384Sjkimstatic void felem_diff64(felem out, const felem in) 318280297Sjkim{ 319280297Sjkim /* 320280297Sjkim * In order to prevent underflow, we add 0 mod p before subtracting. 321280297Sjkim */ 322280297Sjkim static const limb two62m3 = (((limb) 1) << 62) - (((limb) 1) << 5); 323280297Sjkim static const limb two62m2 = (((limb) 1) << 62) - (((limb) 1) << 4); 324238384Sjkim 325280297Sjkim out[0] += two62m3 - in[0]; 326280297Sjkim out[1] += two62m2 - in[1]; 327280297Sjkim out[2] += two62m2 - in[2]; 328280297Sjkim out[3] += two62m2 - in[3]; 329280297Sjkim out[4] += two62m2 - in[4]; 330280297Sjkim out[5] += two62m2 - in[5]; 331280297Sjkim out[6] += two62m2 - in[6]; 332280297Sjkim out[7] += two62m2 - in[7]; 333280297Sjkim out[8] += two62m2 - in[8]; 334280297Sjkim} 335238384Sjkim 336280297Sjkim/*- 337280297Sjkim * felem_diff_128_64 subtracts |in| from |out| 338238384Sjkim * On entry: 339238384Sjkim * in[i] < 2^62 + 2^17 340238384Sjkim * On exit: 341238384Sjkim * out[i] < out[i] + 2^63 342238384Sjkim */ 343238384Sjkimstatic void felem_diff_128_64(largefelem out, const felem in) 344280297Sjkim{ 345280297Sjkim /* 346348343Sjkim * In order to prevent underflow, we add 64p mod p (which is equivalent 347348343Sjkim * to 0 mod p) before subtracting. p is 2^521 - 1, i.e. in binary a 521 348348343Sjkim * digit number with all bits set to 1. See "The representation of field 349348343Sjkim * elements" comment above for a description of how limbs are used to 350348343Sjkim * represent a number. 64p is represented with 8 limbs containing a number 351348343Sjkim * with 58 bits set and one limb with a number with 57 bits set. 352280297Sjkim */ 353348343Sjkim static const limb two63m6 = (((limb) 1) << 63) - (((limb) 1) << 6); 354348343Sjkim static const limb two63m5 = (((limb) 1) << 63) - (((limb) 1) << 5); 355238384Sjkim 356280297Sjkim out[0] += two63m6 - in[0]; 357280297Sjkim out[1] += two63m5 - in[1]; 358280297Sjkim out[2] += two63m5 - in[2]; 359280297Sjkim out[3] += two63m5 - in[3]; 360280297Sjkim out[4] += two63m5 - in[4]; 361280297Sjkim out[5] += two63m5 - in[5]; 362280297Sjkim out[6] += two63m5 - in[6]; 363280297Sjkim out[7] += two63m5 - in[7]; 364280297Sjkim out[8] += two63m5 - in[8]; 365280297Sjkim} 366238384Sjkim 367280297Sjkim/*- 368280297Sjkim * felem_diff_128_64 subtracts |in| from |out| 369238384Sjkim * On entry: 370238384Sjkim * in[i] < 2^126 371238384Sjkim * On exit: 372238384Sjkim * out[i] < out[i] + 2^127 - 2^69 373238384Sjkim */ 374238384Sjkimstatic void felem_diff128(largefelem out, const largefelem in) 375280297Sjkim{ 376280297Sjkim /* 377280297Sjkim * In order to prevent underflow, we add 0 mod p before subtracting. 378280297Sjkim */ 379280297Sjkim static const uint128_t two127m70 = 380280297Sjkim (((uint128_t) 1) << 127) - (((uint128_t) 1) << 70); 381280297Sjkim static const uint128_t two127m69 = 382280297Sjkim (((uint128_t) 1) << 127) - (((uint128_t) 1) << 69); 383238384Sjkim 384280297Sjkim out[0] += (two127m70 - in[0]); 385280297Sjkim out[1] += (two127m69 - in[1]); 386280297Sjkim out[2] += (two127m69 - in[2]); 387280297Sjkim out[3] += (two127m69 - in[3]); 388280297Sjkim out[4] += (two127m69 - in[4]); 389280297Sjkim out[5] += (two127m69 - in[5]); 390280297Sjkim out[6] += (two127m69 - in[6]); 391280297Sjkim out[7] += (two127m69 - in[7]); 392280297Sjkim out[8] += (two127m69 - in[8]); 393280297Sjkim} 394238384Sjkim 395280297Sjkim/*- 396280297Sjkim * felem_square sets |out| = |in|^2 397238384Sjkim * On entry: 398238384Sjkim * in[i] < 2^62 399238384Sjkim * On exit: 400238384Sjkim * out[i] < 17 * max(in[i]) * max(in[i]) 401238384Sjkim */ 402238384Sjkimstatic void felem_square(largefelem out, const felem in) 403280297Sjkim{ 404280297Sjkim felem inx2, inx4; 405280297Sjkim felem_scalar(inx2, in, 2); 406280297Sjkim felem_scalar(inx4, in, 4); 407238384Sjkim 408280297Sjkim /*- 409280297Sjkim * We have many cases were we want to do 410280297Sjkim * in[x] * in[y] + 411280297Sjkim * in[y] * in[x] 412280297Sjkim * This is obviously just 413280297Sjkim * 2 * in[x] * in[y] 414280297Sjkim * However, rather than do the doubling on the 128 bit result, we 415280297Sjkim * double one of the inputs to the multiplication by reading from 416280297Sjkim * |inx2| 417280297Sjkim */ 418238384Sjkim 419280297Sjkim out[0] = ((uint128_t) in[0]) * in[0]; 420280297Sjkim out[1] = ((uint128_t) in[0]) * inx2[1]; 421280297Sjkim out[2] = ((uint128_t) in[0]) * inx2[2] + ((uint128_t) in[1]) * in[1]; 422280297Sjkim out[3] = ((uint128_t) in[0]) * inx2[3] + ((uint128_t) in[1]) * inx2[2]; 423280297Sjkim out[4] = ((uint128_t) in[0]) * inx2[4] + 424280297Sjkim ((uint128_t) in[1]) * inx2[3] + ((uint128_t) in[2]) * in[2]; 425280297Sjkim out[5] = ((uint128_t) in[0]) * inx2[5] + 426280297Sjkim ((uint128_t) in[1]) * inx2[4] + ((uint128_t) in[2]) * inx2[3]; 427280297Sjkim out[6] = ((uint128_t) in[0]) * inx2[6] + 428280297Sjkim ((uint128_t) in[1]) * inx2[5] + 429280297Sjkim ((uint128_t) in[2]) * inx2[4] + ((uint128_t) in[3]) * in[3]; 430280297Sjkim out[7] = ((uint128_t) in[0]) * inx2[7] + 431280297Sjkim ((uint128_t) in[1]) * inx2[6] + 432280297Sjkim ((uint128_t) in[2]) * inx2[5] + ((uint128_t) in[3]) * inx2[4]; 433280297Sjkim out[8] = ((uint128_t) in[0]) * inx2[8] + 434280297Sjkim ((uint128_t) in[1]) * inx2[7] + 435280297Sjkim ((uint128_t) in[2]) * inx2[6] + 436280297Sjkim ((uint128_t) in[3]) * inx2[5] + ((uint128_t) in[4]) * in[4]; 437238384Sjkim 438280297Sjkim /* 439280297Sjkim * The remaining limbs fall above 2^521, with the first falling at 2^522. 440280297Sjkim * They correspond to locations one bit up from the limbs produced above 441280297Sjkim * so we would have to multiply by two to align them. Again, rather than 442280297Sjkim * operate on the 128-bit result, we double one of the inputs to the 443280297Sjkim * multiplication. If we want to double for both this reason, and the 444280297Sjkim * reason above, then we end up multiplying by four. 445280297Sjkim */ 446238384Sjkim 447280297Sjkim /* 9 */ 448280297Sjkim out[0] += ((uint128_t) in[1]) * inx4[8] + 449280297Sjkim ((uint128_t) in[2]) * inx4[7] + 450280297Sjkim ((uint128_t) in[3]) * inx4[6] + ((uint128_t) in[4]) * inx4[5]; 451238384Sjkim 452280297Sjkim /* 10 */ 453280297Sjkim out[1] += ((uint128_t) in[2]) * inx4[8] + 454280297Sjkim ((uint128_t) in[3]) * inx4[7] + 455280297Sjkim ((uint128_t) in[4]) * inx4[6] + ((uint128_t) in[5]) * inx2[5]; 456238384Sjkim 457280297Sjkim /* 11 */ 458280297Sjkim out[2] += ((uint128_t) in[3]) * inx4[8] + 459280297Sjkim ((uint128_t) in[4]) * inx4[7] + ((uint128_t) in[5]) * inx4[6]; 460238384Sjkim 461280297Sjkim /* 12 */ 462280297Sjkim out[3] += ((uint128_t) in[4]) * inx4[8] + 463280297Sjkim ((uint128_t) in[5]) * inx4[7] + ((uint128_t) in[6]) * inx2[6]; 464238384Sjkim 465280297Sjkim /* 13 */ 466280297Sjkim out[4] += ((uint128_t) in[5]) * inx4[8] + ((uint128_t) in[6]) * inx4[7]; 467238384Sjkim 468280297Sjkim /* 14 */ 469280297Sjkim out[5] += ((uint128_t) in[6]) * inx4[8] + ((uint128_t) in[7]) * inx2[7]; 470238384Sjkim 471280297Sjkim /* 15 */ 472280297Sjkim out[6] += ((uint128_t) in[7]) * inx4[8]; 473238384Sjkim 474280297Sjkim /* 16 */ 475280297Sjkim out[7] += ((uint128_t) in[8]) * inx2[8]; 476280297Sjkim} 477238384Sjkim 478280297Sjkim/*- 479280297Sjkim * felem_mul sets |out| = |in1| * |in2| 480238384Sjkim * On entry: 481238384Sjkim * in1[i] < 2^64 482238384Sjkim * in2[i] < 2^63 483238384Sjkim * On exit: 484238384Sjkim * out[i] < 17 * max(in1[i]) * max(in2[i]) 485238384Sjkim */ 486238384Sjkimstatic void felem_mul(largefelem out, const felem in1, const felem in2) 487280297Sjkim{ 488280297Sjkim felem in2x2; 489280297Sjkim felem_scalar(in2x2, in2, 2); 490238384Sjkim 491280297Sjkim out[0] = ((uint128_t) in1[0]) * in2[0]; 492238384Sjkim 493280297Sjkim out[1] = ((uint128_t) in1[0]) * in2[1] + ((uint128_t) in1[1]) * in2[0]; 494238384Sjkim 495280297Sjkim out[2] = ((uint128_t) in1[0]) * in2[2] + 496280297Sjkim ((uint128_t) in1[1]) * in2[1] + ((uint128_t) in1[2]) * in2[0]; 497238384Sjkim 498280297Sjkim out[3] = ((uint128_t) in1[0]) * in2[3] + 499280297Sjkim ((uint128_t) in1[1]) * in2[2] + 500280297Sjkim ((uint128_t) in1[2]) * in2[1] + ((uint128_t) in1[3]) * in2[0]; 501238384Sjkim 502280297Sjkim out[4] = ((uint128_t) in1[0]) * in2[4] + 503280297Sjkim ((uint128_t) in1[1]) * in2[3] + 504280297Sjkim ((uint128_t) in1[2]) * in2[2] + 505280297Sjkim ((uint128_t) in1[3]) * in2[1] + ((uint128_t) in1[4]) * in2[0]; 506238384Sjkim 507280297Sjkim out[5] = ((uint128_t) in1[0]) * in2[5] + 508280297Sjkim ((uint128_t) in1[1]) * in2[4] + 509280297Sjkim ((uint128_t) in1[2]) * in2[3] + 510280297Sjkim ((uint128_t) in1[3]) * in2[2] + 511280297Sjkim ((uint128_t) in1[4]) * in2[1] + ((uint128_t) in1[5]) * in2[0]; 512238384Sjkim 513280297Sjkim out[6] = ((uint128_t) in1[0]) * in2[6] + 514280297Sjkim ((uint128_t) in1[1]) * in2[5] + 515280297Sjkim ((uint128_t) in1[2]) * in2[4] + 516280297Sjkim ((uint128_t) in1[3]) * in2[3] + 517280297Sjkim ((uint128_t) in1[4]) * in2[2] + 518280297Sjkim ((uint128_t) in1[5]) * in2[1] + ((uint128_t) in1[6]) * in2[0]; 519238384Sjkim 520280297Sjkim out[7] = ((uint128_t) in1[0]) * in2[7] + 521280297Sjkim ((uint128_t) in1[1]) * in2[6] + 522280297Sjkim ((uint128_t) in1[2]) * in2[5] + 523280297Sjkim ((uint128_t) in1[3]) * in2[4] + 524280297Sjkim ((uint128_t) in1[4]) * in2[3] + 525280297Sjkim ((uint128_t) in1[5]) * in2[2] + 526280297Sjkim ((uint128_t) in1[6]) * in2[1] + ((uint128_t) in1[7]) * in2[0]; 527238384Sjkim 528280297Sjkim out[8] = ((uint128_t) in1[0]) * in2[8] + 529280297Sjkim ((uint128_t) in1[1]) * in2[7] + 530280297Sjkim ((uint128_t) in1[2]) * in2[6] + 531280297Sjkim ((uint128_t) in1[3]) * in2[5] + 532280297Sjkim ((uint128_t) in1[4]) * in2[4] + 533280297Sjkim ((uint128_t) in1[5]) * in2[3] + 534280297Sjkim ((uint128_t) in1[6]) * in2[2] + 535280297Sjkim ((uint128_t) in1[7]) * in2[1] + ((uint128_t) in1[8]) * in2[0]; 536238384Sjkim 537280297Sjkim /* See comment in felem_square about the use of in2x2 here */ 538238384Sjkim 539280297Sjkim out[0] += ((uint128_t) in1[1]) * in2x2[8] + 540280297Sjkim ((uint128_t) in1[2]) * in2x2[7] + 541280297Sjkim ((uint128_t) in1[3]) * in2x2[6] + 542280297Sjkim ((uint128_t) in1[4]) * in2x2[5] + 543280297Sjkim ((uint128_t) in1[5]) * in2x2[4] + 544280297Sjkim ((uint128_t) in1[6]) * in2x2[3] + 545280297Sjkim ((uint128_t) in1[7]) * in2x2[2] + ((uint128_t) in1[8]) * in2x2[1]; 546238384Sjkim 547280297Sjkim out[1] += ((uint128_t) in1[2]) * in2x2[8] + 548280297Sjkim ((uint128_t) in1[3]) * in2x2[7] + 549280297Sjkim ((uint128_t) in1[4]) * in2x2[6] + 550280297Sjkim ((uint128_t) in1[5]) * in2x2[5] + 551280297Sjkim ((uint128_t) in1[6]) * in2x2[4] + 552280297Sjkim ((uint128_t) in1[7]) * in2x2[3] + ((uint128_t) in1[8]) * in2x2[2]; 553238384Sjkim 554280297Sjkim out[2] += ((uint128_t) in1[3]) * in2x2[8] + 555280297Sjkim ((uint128_t) in1[4]) * in2x2[7] + 556280297Sjkim ((uint128_t) in1[5]) * in2x2[6] + 557280297Sjkim ((uint128_t) in1[6]) * in2x2[5] + 558280297Sjkim ((uint128_t) in1[7]) * in2x2[4] + ((uint128_t) in1[8]) * in2x2[3]; 559238384Sjkim 560280297Sjkim out[3] += ((uint128_t) in1[4]) * in2x2[8] + 561280297Sjkim ((uint128_t) in1[5]) * in2x2[7] + 562280297Sjkim ((uint128_t) in1[6]) * in2x2[6] + 563280297Sjkim ((uint128_t) in1[7]) * in2x2[5] + ((uint128_t) in1[8]) * in2x2[4]; 564238384Sjkim 565280297Sjkim out[4] += ((uint128_t) in1[5]) * in2x2[8] + 566280297Sjkim ((uint128_t) in1[6]) * in2x2[7] + 567280297Sjkim ((uint128_t) in1[7]) * in2x2[6] + ((uint128_t) in1[8]) * in2x2[5]; 568238384Sjkim 569280297Sjkim out[5] += ((uint128_t) in1[6]) * in2x2[8] + 570280297Sjkim ((uint128_t) in1[7]) * in2x2[7] + ((uint128_t) in1[8]) * in2x2[6]; 571238384Sjkim 572280297Sjkim out[6] += ((uint128_t) in1[7]) * in2x2[8] + 573280297Sjkim ((uint128_t) in1[8]) * in2x2[7]; 574238384Sjkim 575280297Sjkim out[7] += ((uint128_t) in1[8]) * in2x2[8]; 576280297Sjkim} 577238384Sjkim 578238384Sjkimstatic const limb bottom52bits = 0xfffffffffffff; 579238384Sjkim 580280297Sjkim/*- 581280297Sjkim * felem_reduce converts a largefelem to an felem. 582238384Sjkim * On entry: 583238384Sjkim * in[i] < 2^128 584238384Sjkim * On exit: 585238384Sjkim * out[i] < 2^59 + 2^14 586238384Sjkim */ 587238384Sjkimstatic void felem_reduce(felem out, const largefelem in) 588280297Sjkim{ 589280297Sjkim u64 overflow1, overflow2; 590238384Sjkim 591280297Sjkim out[0] = ((limb) in[0]) & bottom58bits; 592280297Sjkim out[1] = ((limb) in[1]) & bottom58bits; 593280297Sjkim out[2] = ((limb) in[2]) & bottom58bits; 594280297Sjkim out[3] = ((limb) in[3]) & bottom58bits; 595280297Sjkim out[4] = ((limb) in[4]) & bottom58bits; 596280297Sjkim out[5] = ((limb) in[5]) & bottom58bits; 597280297Sjkim out[6] = ((limb) in[6]) & bottom58bits; 598280297Sjkim out[7] = ((limb) in[7]) & bottom58bits; 599280297Sjkim out[8] = ((limb) in[8]) & bottom58bits; 600238384Sjkim 601280297Sjkim /* out[i] < 2^58 */ 602238384Sjkim 603280297Sjkim out[1] += ((limb) in[0]) >> 58; 604280297Sjkim out[1] += (((limb) (in[0] >> 64)) & bottom52bits) << 6; 605280297Sjkim /*- 606280297Sjkim * out[1] < 2^58 + 2^6 + 2^58 607280297Sjkim * = 2^59 + 2^6 608280297Sjkim */ 609280297Sjkim out[2] += ((limb) (in[0] >> 64)) >> 52; 610238384Sjkim 611280297Sjkim out[2] += ((limb) in[1]) >> 58; 612280297Sjkim out[2] += (((limb) (in[1] >> 64)) & bottom52bits) << 6; 613280297Sjkim out[3] += ((limb) (in[1] >> 64)) >> 52; 614238384Sjkim 615280297Sjkim out[3] += ((limb) in[2]) >> 58; 616280297Sjkim out[3] += (((limb) (in[2] >> 64)) & bottom52bits) << 6; 617280297Sjkim out[4] += ((limb) (in[2] >> 64)) >> 52; 618238384Sjkim 619280297Sjkim out[4] += ((limb) in[3]) >> 58; 620280297Sjkim out[4] += (((limb) (in[3] >> 64)) & bottom52bits) << 6; 621280297Sjkim out[5] += ((limb) (in[3] >> 64)) >> 52; 622238384Sjkim 623280297Sjkim out[5] += ((limb) in[4]) >> 58; 624280297Sjkim out[5] += (((limb) (in[4] >> 64)) & bottom52bits) << 6; 625280297Sjkim out[6] += ((limb) (in[4] >> 64)) >> 52; 626238384Sjkim 627280297Sjkim out[6] += ((limb) in[5]) >> 58; 628280297Sjkim out[6] += (((limb) (in[5] >> 64)) & bottom52bits) << 6; 629280297Sjkim out[7] += ((limb) (in[5] >> 64)) >> 52; 630238384Sjkim 631280297Sjkim out[7] += ((limb) in[6]) >> 58; 632280297Sjkim out[7] += (((limb) (in[6] >> 64)) & bottom52bits) << 6; 633280297Sjkim out[8] += ((limb) (in[6] >> 64)) >> 52; 634238384Sjkim 635280297Sjkim out[8] += ((limb) in[7]) >> 58; 636280297Sjkim out[8] += (((limb) (in[7] >> 64)) & bottom52bits) << 6; 637280297Sjkim /*- 638280297Sjkim * out[x > 1] < 2^58 + 2^6 + 2^58 + 2^12 639280297Sjkim * < 2^59 + 2^13 640280297Sjkim */ 641280297Sjkim overflow1 = ((limb) (in[7] >> 64)) >> 52; 642238384Sjkim 643280297Sjkim overflow1 += ((limb) in[8]) >> 58; 644280297Sjkim overflow1 += (((limb) (in[8] >> 64)) & bottom52bits) << 6; 645280297Sjkim overflow2 = ((limb) (in[8] >> 64)) >> 52; 646238384Sjkim 647280297Sjkim overflow1 <<= 1; /* overflow1 < 2^13 + 2^7 + 2^59 */ 648280297Sjkim overflow2 <<= 1; /* overflow2 < 2^13 */ 649238384Sjkim 650280297Sjkim out[0] += overflow1; /* out[0] < 2^60 */ 651280297Sjkim out[1] += overflow2; /* out[1] < 2^59 + 2^6 + 2^13 */ 652238384Sjkim 653280297Sjkim out[1] += out[0] >> 58; 654280297Sjkim out[0] &= bottom58bits; 655280297Sjkim /*- 656280297Sjkim * out[0] < 2^58 657280297Sjkim * out[1] < 2^59 + 2^6 + 2^13 + 2^2 658280297Sjkim * < 2^59 + 2^14 659280297Sjkim */ 660280297Sjkim} 661238384Sjkim 662238384Sjkimstatic void felem_square_reduce(felem out, const felem in) 663280297Sjkim{ 664280297Sjkim largefelem tmp; 665280297Sjkim felem_square(tmp, in); 666280297Sjkim felem_reduce(out, tmp); 667280297Sjkim} 668238384Sjkim 669238384Sjkimstatic void felem_mul_reduce(felem out, const felem in1, const felem in2) 670280297Sjkim{ 671280297Sjkim largefelem tmp; 672280297Sjkim felem_mul(tmp, in1, in2); 673280297Sjkim felem_reduce(out, tmp); 674280297Sjkim} 675238384Sjkim 676280297Sjkim/*- 677280297Sjkim * felem_inv calculates |out| = |in|^{-1} 678238384Sjkim * 679238384Sjkim * Based on Fermat's Little Theorem: 680238384Sjkim * a^p = a (mod p) 681238384Sjkim * a^{p-1} = 1 (mod p) 682238384Sjkim * a^{p-2} = a^{-1} (mod p) 683238384Sjkim */ 684238384Sjkimstatic void felem_inv(felem out, const felem in) 685280297Sjkim{ 686280297Sjkim felem ftmp, ftmp2, ftmp3, ftmp4; 687280297Sjkim largefelem tmp; 688280297Sjkim unsigned i; 689238384Sjkim 690280297Sjkim felem_square(tmp, in); 691280297Sjkim felem_reduce(ftmp, tmp); /* 2^1 */ 692280297Sjkim felem_mul(tmp, in, ftmp); 693280297Sjkim felem_reduce(ftmp, tmp); /* 2^2 - 2^0 */ 694280297Sjkim felem_assign(ftmp2, ftmp); 695280297Sjkim felem_square(tmp, ftmp); 696280297Sjkim felem_reduce(ftmp, tmp); /* 2^3 - 2^1 */ 697280297Sjkim felem_mul(tmp, in, ftmp); 698280297Sjkim felem_reduce(ftmp, tmp); /* 2^3 - 2^0 */ 699280297Sjkim felem_square(tmp, ftmp); 700280297Sjkim felem_reduce(ftmp, tmp); /* 2^4 - 2^1 */ 701238384Sjkim 702280297Sjkim felem_square(tmp, ftmp2); 703280297Sjkim felem_reduce(ftmp3, tmp); /* 2^3 - 2^1 */ 704280297Sjkim felem_square(tmp, ftmp3); 705280297Sjkim felem_reduce(ftmp3, tmp); /* 2^4 - 2^2 */ 706280297Sjkim felem_mul(tmp, ftmp3, ftmp2); 707280297Sjkim felem_reduce(ftmp3, tmp); /* 2^4 - 2^0 */ 708238384Sjkim 709280297Sjkim felem_assign(ftmp2, ftmp3); 710280297Sjkim felem_square(tmp, ftmp3); 711280297Sjkim felem_reduce(ftmp3, tmp); /* 2^5 - 2^1 */ 712280297Sjkim felem_square(tmp, ftmp3); 713280297Sjkim felem_reduce(ftmp3, tmp); /* 2^6 - 2^2 */ 714280297Sjkim felem_square(tmp, ftmp3); 715280297Sjkim felem_reduce(ftmp3, tmp); /* 2^7 - 2^3 */ 716280297Sjkim felem_square(tmp, ftmp3); 717280297Sjkim felem_reduce(ftmp3, tmp); /* 2^8 - 2^4 */ 718280297Sjkim felem_assign(ftmp4, ftmp3); 719280297Sjkim felem_mul(tmp, ftmp3, ftmp); 720280297Sjkim felem_reduce(ftmp4, tmp); /* 2^8 - 2^1 */ 721280297Sjkim felem_square(tmp, ftmp4); 722280297Sjkim felem_reduce(ftmp4, tmp); /* 2^9 - 2^2 */ 723280297Sjkim felem_mul(tmp, ftmp3, ftmp2); 724280297Sjkim felem_reduce(ftmp3, tmp); /* 2^8 - 2^0 */ 725280297Sjkim felem_assign(ftmp2, ftmp3); 726238384Sjkim 727280297Sjkim for (i = 0; i < 8; i++) { 728280297Sjkim felem_square(tmp, ftmp3); 729280297Sjkim felem_reduce(ftmp3, tmp); /* 2^16 - 2^8 */ 730280297Sjkim } 731280297Sjkim felem_mul(tmp, ftmp3, ftmp2); 732280297Sjkim felem_reduce(ftmp3, tmp); /* 2^16 - 2^0 */ 733280297Sjkim felem_assign(ftmp2, ftmp3); 734238384Sjkim 735280297Sjkim for (i = 0; i < 16; i++) { 736280297Sjkim felem_square(tmp, ftmp3); 737280297Sjkim felem_reduce(ftmp3, tmp); /* 2^32 - 2^16 */ 738280297Sjkim } 739280297Sjkim felem_mul(tmp, ftmp3, ftmp2); 740280297Sjkim felem_reduce(ftmp3, tmp); /* 2^32 - 2^0 */ 741280297Sjkim felem_assign(ftmp2, ftmp3); 742238384Sjkim 743280297Sjkim for (i = 0; i < 32; i++) { 744280297Sjkim felem_square(tmp, ftmp3); 745280297Sjkim felem_reduce(ftmp3, tmp); /* 2^64 - 2^32 */ 746280297Sjkim } 747280297Sjkim felem_mul(tmp, ftmp3, ftmp2); 748280297Sjkim felem_reduce(ftmp3, tmp); /* 2^64 - 2^0 */ 749280297Sjkim felem_assign(ftmp2, ftmp3); 750238384Sjkim 751280297Sjkim for (i = 0; i < 64; i++) { 752280297Sjkim felem_square(tmp, ftmp3); 753280297Sjkim felem_reduce(ftmp3, tmp); /* 2^128 - 2^64 */ 754280297Sjkim } 755280297Sjkim felem_mul(tmp, ftmp3, ftmp2); 756280297Sjkim felem_reduce(ftmp3, tmp); /* 2^128 - 2^0 */ 757280297Sjkim felem_assign(ftmp2, ftmp3); 758238384Sjkim 759280297Sjkim for (i = 0; i < 128; i++) { 760280297Sjkim felem_square(tmp, ftmp3); 761280297Sjkim felem_reduce(ftmp3, tmp); /* 2^256 - 2^128 */ 762280297Sjkim } 763280297Sjkim felem_mul(tmp, ftmp3, ftmp2); 764280297Sjkim felem_reduce(ftmp3, tmp); /* 2^256 - 2^0 */ 765280297Sjkim felem_assign(ftmp2, ftmp3); 766238384Sjkim 767280297Sjkim for (i = 0; i < 256; i++) { 768280297Sjkim felem_square(tmp, ftmp3); 769280297Sjkim felem_reduce(ftmp3, tmp); /* 2^512 - 2^256 */ 770280297Sjkim } 771280297Sjkim felem_mul(tmp, ftmp3, ftmp2); 772280297Sjkim felem_reduce(ftmp3, tmp); /* 2^512 - 2^0 */ 773238384Sjkim 774280297Sjkim for (i = 0; i < 9; i++) { 775280297Sjkim felem_square(tmp, ftmp3); 776280297Sjkim felem_reduce(ftmp3, tmp); /* 2^521 - 2^9 */ 777280297Sjkim } 778280297Sjkim felem_mul(tmp, ftmp3, ftmp4); 779280297Sjkim felem_reduce(ftmp3, tmp); /* 2^512 - 2^2 */ 780280297Sjkim felem_mul(tmp, ftmp3, in); 781280297Sjkim felem_reduce(out, tmp); /* 2^512 - 3 */ 782238384Sjkim} 783238384Sjkim 784238384Sjkim/* This is 2^521-1, expressed as an felem */ 785280297Sjkimstatic const felem kPrime = { 786280297Sjkim 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff, 787280297Sjkim 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff, 788280297Sjkim 0x03ffffffffffffff, 0x03ffffffffffffff, 0x01ffffffffffffff 789280297Sjkim}; 790238384Sjkim 791280297Sjkim/*- 792280297Sjkim * felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0 793238384Sjkim * otherwise. 794238384Sjkim * On entry: 795238384Sjkim * in[i] < 2^59 + 2^14 796238384Sjkim */ 797238384Sjkimstatic limb felem_is_zero(const felem in) 798280297Sjkim{ 799280297Sjkim felem ftmp; 800280297Sjkim limb is_zero, is_p; 801280297Sjkim felem_assign(ftmp, in); 802238384Sjkim 803280297Sjkim ftmp[0] += ftmp[8] >> 57; 804280297Sjkim ftmp[8] &= bottom57bits; 805280297Sjkim /* ftmp[8] < 2^57 */ 806280297Sjkim ftmp[1] += ftmp[0] >> 58; 807280297Sjkim ftmp[0] &= bottom58bits; 808280297Sjkim ftmp[2] += ftmp[1] >> 58; 809280297Sjkim ftmp[1] &= bottom58bits; 810280297Sjkim ftmp[3] += ftmp[2] >> 58; 811280297Sjkim ftmp[2] &= bottom58bits; 812280297Sjkim ftmp[4] += ftmp[3] >> 58; 813280297Sjkim ftmp[3] &= bottom58bits; 814280297Sjkim ftmp[5] += ftmp[4] >> 58; 815280297Sjkim ftmp[4] &= bottom58bits; 816280297Sjkim ftmp[6] += ftmp[5] >> 58; 817280297Sjkim ftmp[5] &= bottom58bits; 818280297Sjkim ftmp[7] += ftmp[6] >> 58; 819280297Sjkim ftmp[6] &= bottom58bits; 820280297Sjkim ftmp[8] += ftmp[7] >> 58; 821280297Sjkim ftmp[7] &= bottom58bits; 822280297Sjkim /* ftmp[8] < 2^57 + 4 */ 823238384Sjkim 824280297Sjkim /* 825280297Sjkim * The ninth limb of 2*(2^521-1) is 0x03ffffffffffffff, which is greater 826280297Sjkim * than our bound for ftmp[8]. Therefore we only have to check if the 827280297Sjkim * zero is zero or 2^521-1. 828280297Sjkim */ 829238384Sjkim 830280297Sjkim is_zero = 0; 831280297Sjkim is_zero |= ftmp[0]; 832280297Sjkim is_zero |= ftmp[1]; 833280297Sjkim is_zero |= ftmp[2]; 834280297Sjkim is_zero |= ftmp[3]; 835280297Sjkim is_zero |= ftmp[4]; 836280297Sjkim is_zero |= ftmp[5]; 837280297Sjkim is_zero |= ftmp[6]; 838280297Sjkim is_zero |= ftmp[7]; 839280297Sjkim is_zero |= ftmp[8]; 840238384Sjkim 841280297Sjkim is_zero--; 842280297Sjkim /* 843280297Sjkim * We know that ftmp[i] < 2^63, therefore the only way that the top bit 844280297Sjkim * can be set is if is_zero was 0 before the decrement. 845280297Sjkim */ 846331638Sjkim is_zero = 0 - (is_zero >> 63); 847238384Sjkim 848280297Sjkim is_p = ftmp[0] ^ kPrime[0]; 849280297Sjkim is_p |= ftmp[1] ^ kPrime[1]; 850280297Sjkim is_p |= ftmp[2] ^ kPrime[2]; 851280297Sjkim is_p |= ftmp[3] ^ kPrime[3]; 852280297Sjkim is_p |= ftmp[4] ^ kPrime[4]; 853280297Sjkim is_p |= ftmp[5] ^ kPrime[5]; 854280297Sjkim is_p |= ftmp[6] ^ kPrime[6]; 855280297Sjkim is_p |= ftmp[7] ^ kPrime[7]; 856280297Sjkim is_p |= ftmp[8] ^ kPrime[8]; 857238384Sjkim 858280297Sjkim is_p--; 859331638Sjkim is_p = 0 - (is_p >> 63); 860238384Sjkim 861280297Sjkim is_zero |= is_p; 862280297Sjkim return is_zero; 863280297Sjkim} 864238384Sjkim 865325337Sjkimstatic int felem_is_zero_int(const void *in) 866280297Sjkim{ 867280297Sjkim return (int)(felem_is_zero(in) & ((limb) 1)); 868280297Sjkim} 869238384Sjkim 870280297Sjkim/*- 871280297Sjkim * felem_contract converts |in| to its unique, minimal representation. 872238384Sjkim * On entry: 873238384Sjkim * in[i] < 2^59 + 2^14 874238384Sjkim */ 875238384Sjkimstatic void felem_contract(felem out, const felem in) 876280297Sjkim{ 877280297Sjkim limb is_p, is_greater, sign; 878280297Sjkim static const limb two58 = ((limb) 1) << 58; 879238384Sjkim 880280297Sjkim felem_assign(out, in); 881238384Sjkim 882280297Sjkim out[0] += out[8] >> 57; 883280297Sjkim out[8] &= bottom57bits; 884280297Sjkim /* out[8] < 2^57 */ 885280297Sjkim out[1] += out[0] >> 58; 886280297Sjkim out[0] &= bottom58bits; 887280297Sjkim out[2] += out[1] >> 58; 888280297Sjkim out[1] &= bottom58bits; 889280297Sjkim out[3] += out[2] >> 58; 890280297Sjkim out[2] &= bottom58bits; 891280297Sjkim out[4] += out[3] >> 58; 892280297Sjkim out[3] &= bottom58bits; 893280297Sjkim out[5] += out[4] >> 58; 894280297Sjkim out[4] &= bottom58bits; 895280297Sjkim out[6] += out[5] >> 58; 896280297Sjkim out[5] &= bottom58bits; 897280297Sjkim out[7] += out[6] >> 58; 898280297Sjkim out[6] &= bottom58bits; 899280297Sjkim out[8] += out[7] >> 58; 900280297Sjkim out[7] &= bottom58bits; 901280297Sjkim /* out[8] < 2^57 + 4 */ 902238384Sjkim 903280297Sjkim /* 904280297Sjkim * If the value is greater than 2^521-1 then we have to subtract 2^521-1 905280297Sjkim * out. See the comments in felem_is_zero regarding why we don't test for 906280297Sjkim * other multiples of the prime. 907280297Sjkim */ 908238384Sjkim 909280297Sjkim /* 910280297Sjkim * First, if |out| is equal to 2^521-1, we subtract it out to get zero. 911280297Sjkim */ 912238384Sjkim 913280297Sjkim is_p = out[0] ^ kPrime[0]; 914280297Sjkim is_p |= out[1] ^ kPrime[1]; 915280297Sjkim is_p |= out[2] ^ kPrime[2]; 916280297Sjkim is_p |= out[3] ^ kPrime[3]; 917280297Sjkim is_p |= out[4] ^ kPrime[4]; 918280297Sjkim is_p |= out[5] ^ kPrime[5]; 919280297Sjkim is_p |= out[6] ^ kPrime[6]; 920280297Sjkim is_p |= out[7] ^ kPrime[7]; 921280297Sjkim is_p |= out[8] ^ kPrime[8]; 922238384Sjkim 923280297Sjkim is_p--; 924280297Sjkim is_p &= is_p << 32; 925280297Sjkim is_p &= is_p << 16; 926280297Sjkim is_p &= is_p << 8; 927280297Sjkim is_p &= is_p << 4; 928280297Sjkim is_p &= is_p << 2; 929280297Sjkim is_p &= is_p << 1; 930331638Sjkim is_p = 0 - (is_p >> 63); 931280297Sjkim is_p = ~is_p; 932238384Sjkim 933280297Sjkim /* is_p is 0 iff |out| == 2^521-1 and all ones otherwise */ 934238384Sjkim 935280297Sjkim out[0] &= is_p; 936280297Sjkim out[1] &= is_p; 937280297Sjkim out[2] &= is_p; 938280297Sjkim out[3] &= is_p; 939280297Sjkim out[4] &= is_p; 940280297Sjkim out[5] &= is_p; 941280297Sjkim out[6] &= is_p; 942280297Sjkim out[7] &= is_p; 943280297Sjkim out[8] &= is_p; 944238384Sjkim 945280297Sjkim /* 946280297Sjkim * In order to test that |out| >= 2^521-1 we need only test if out[8] >> 947280297Sjkim * 57 is greater than zero as (2^521-1) + x >= 2^522 948280297Sjkim */ 949280297Sjkim is_greater = out[8] >> 57; 950280297Sjkim is_greater |= is_greater << 32; 951280297Sjkim is_greater |= is_greater << 16; 952280297Sjkim is_greater |= is_greater << 8; 953280297Sjkim is_greater |= is_greater << 4; 954280297Sjkim is_greater |= is_greater << 2; 955280297Sjkim is_greater |= is_greater << 1; 956331638Sjkim is_greater = 0 - (is_greater >> 63); 957238384Sjkim 958280297Sjkim out[0] -= kPrime[0] & is_greater; 959280297Sjkim out[1] -= kPrime[1] & is_greater; 960280297Sjkim out[2] -= kPrime[2] & is_greater; 961280297Sjkim out[3] -= kPrime[3] & is_greater; 962280297Sjkim out[4] -= kPrime[4] & is_greater; 963280297Sjkim out[5] -= kPrime[5] & is_greater; 964280297Sjkim out[6] -= kPrime[6] & is_greater; 965280297Sjkim out[7] -= kPrime[7] & is_greater; 966280297Sjkim out[8] -= kPrime[8] & is_greater; 967238384Sjkim 968280297Sjkim /* Eliminate negative coefficients */ 969280297Sjkim sign = -(out[0] >> 63); 970280297Sjkim out[0] += (two58 & sign); 971280297Sjkim out[1] -= (1 & sign); 972280297Sjkim sign = -(out[1] >> 63); 973280297Sjkim out[1] += (two58 & sign); 974280297Sjkim out[2] -= (1 & sign); 975280297Sjkim sign = -(out[2] >> 63); 976280297Sjkim out[2] += (two58 & sign); 977280297Sjkim out[3] -= (1 & sign); 978280297Sjkim sign = -(out[3] >> 63); 979280297Sjkim out[3] += (two58 & sign); 980280297Sjkim out[4] -= (1 & sign); 981280297Sjkim sign = -(out[4] >> 63); 982280297Sjkim out[4] += (two58 & sign); 983280297Sjkim out[5] -= (1 & sign); 984280297Sjkim sign = -(out[0] >> 63); 985280297Sjkim out[5] += (two58 & sign); 986280297Sjkim out[6] -= (1 & sign); 987280297Sjkim sign = -(out[6] >> 63); 988280297Sjkim out[6] += (two58 & sign); 989280297Sjkim out[7] -= (1 & sign); 990280297Sjkim sign = -(out[7] >> 63); 991280297Sjkim out[7] += (two58 & sign); 992280297Sjkim out[8] -= (1 & sign); 993280297Sjkim sign = -(out[5] >> 63); 994280297Sjkim out[5] += (two58 & sign); 995280297Sjkim out[6] -= (1 & sign); 996280297Sjkim sign = -(out[6] >> 63); 997280297Sjkim out[6] += (two58 & sign); 998280297Sjkim out[7] -= (1 & sign); 999280297Sjkim sign = -(out[7] >> 63); 1000280297Sjkim out[7] += (two58 & sign); 1001280297Sjkim out[8] -= (1 & sign); 1002280297Sjkim} 1003238384Sjkim 1004280297Sjkim/*- 1005280297Sjkim * Group operations 1006238384Sjkim * ---------------- 1007238384Sjkim * 1008238384Sjkim * Building on top of the field operations we have the operations on the 1009238384Sjkim * elliptic curve group itself. Points on the curve are represented in Jacobian 1010238384Sjkim * coordinates */ 1011238384Sjkim 1012280297Sjkim/*- 1013280297Sjkim * point_double calcuates 2*(x_in, y_in, z_in) 1014238384Sjkim * 1015238384Sjkim * The method is taken from: 1016238384Sjkim * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b 1017238384Sjkim * 1018238384Sjkim * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed. 1019238384Sjkim * while x_out == y_in is not (maybe this works, but it's not tested). */ 1020238384Sjkimstatic void 1021238384Sjkimpoint_double(felem x_out, felem y_out, felem z_out, 1022280297Sjkim const felem x_in, const felem y_in, const felem z_in) 1023280297Sjkim{ 1024280297Sjkim largefelem tmp, tmp2; 1025280297Sjkim felem delta, gamma, beta, alpha, ftmp, ftmp2; 1026238384Sjkim 1027280297Sjkim felem_assign(ftmp, x_in); 1028280297Sjkim felem_assign(ftmp2, x_in); 1029238384Sjkim 1030280297Sjkim /* delta = z^2 */ 1031280297Sjkim felem_square(tmp, z_in); 1032280297Sjkim felem_reduce(delta, tmp); /* delta[i] < 2^59 + 2^14 */ 1033238384Sjkim 1034280297Sjkim /* gamma = y^2 */ 1035280297Sjkim felem_square(tmp, y_in); 1036280297Sjkim felem_reduce(gamma, tmp); /* gamma[i] < 2^59 + 2^14 */ 1037238384Sjkim 1038280297Sjkim /* beta = x*gamma */ 1039280297Sjkim felem_mul(tmp, x_in, gamma); 1040280297Sjkim felem_reduce(beta, tmp); /* beta[i] < 2^59 + 2^14 */ 1041238384Sjkim 1042280297Sjkim /* alpha = 3*(x-delta)*(x+delta) */ 1043280297Sjkim felem_diff64(ftmp, delta); 1044280297Sjkim /* ftmp[i] < 2^61 */ 1045280297Sjkim felem_sum64(ftmp2, delta); 1046280297Sjkim /* ftmp2[i] < 2^60 + 2^15 */ 1047280297Sjkim felem_scalar64(ftmp2, 3); 1048280297Sjkim /* ftmp2[i] < 3*2^60 + 3*2^15 */ 1049280297Sjkim felem_mul(tmp, ftmp, ftmp2); 1050280297Sjkim /*- 1051280297Sjkim * tmp[i] < 17(3*2^121 + 3*2^76) 1052280297Sjkim * = 61*2^121 + 61*2^76 1053280297Sjkim * < 64*2^121 + 64*2^76 1054280297Sjkim * = 2^127 + 2^82 1055280297Sjkim * < 2^128 1056280297Sjkim */ 1057280297Sjkim felem_reduce(alpha, tmp); 1058238384Sjkim 1059280297Sjkim /* x' = alpha^2 - 8*beta */ 1060280297Sjkim felem_square(tmp, alpha); 1061280297Sjkim /* 1062280297Sjkim * tmp[i] < 17*2^120 < 2^125 1063280297Sjkim */ 1064280297Sjkim felem_assign(ftmp, beta); 1065280297Sjkim felem_scalar64(ftmp, 8); 1066280297Sjkim /* ftmp[i] < 2^62 + 2^17 */ 1067280297Sjkim felem_diff_128_64(tmp, ftmp); 1068280297Sjkim /* tmp[i] < 2^125 + 2^63 + 2^62 + 2^17 */ 1069280297Sjkim felem_reduce(x_out, tmp); 1070238384Sjkim 1071280297Sjkim /* z' = (y + z)^2 - gamma - delta */ 1072280297Sjkim felem_sum64(delta, gamma); 1073280297Sjkim /* delta[i] < 2^60 + 2^15 */ 1074280297Sjkim felem_assign(ftmp, y_in); 1075280297Sjkim felem_sum64(ftmp, z_in); 1076280297Sjkim /* ftmp[i] < 2^60 + 2^15 */ 1077280297Sjkim felem_square(tmp, ftmp); 1078280297Sjkim /* 1079280297Sjkim * tmp[i] < 17(2^122) < 2^127 1080280297Sjkim */ 1081280297Sjkim felem_diff_128_64(tmp, delta); 1082280297Sjkim /* tmp[i] < 2^127 + 2^63 */ 1083280297Sjkim felem_reduce(z_out, tmp); 1084238384Sjkim 1085280297Sjkim /* y' = alpha*(4*beta - x') - 8*gamma^2 */ 1086280297Sjkim felem_scalar64(beta, 4); 1087280297Sjkim /* beta[i] < 2^61 + 2^16 */ 1088280297Sjkim felem_diff64(beta, x_out); 1089280297Sjkim /* beta[i] < 2^61 + 2^60 + 2^16 */ 1090280297Sjkim felem_mul(tmp, alpha, beta); 1091280297Sjkim /*- 1092280297Sjkim * tmp[i] < 17*((2^59 + 2^14)(2^61 + 2^60 + 2^16)) 1093280297Sjkim * = 17*(2^120 + 2^75 + 2^119 + 2^74 + 2^75 + 2^30) 1094280297Sjkim * = 17*(2^120 + 2^119 + 2^76 + 2^74 + 2^30) 1095280297Sjkim * < 2^128 1096280297Sjkim */ 1097280297Sjkim felem_square(tmp2, gamma); 1098280297Sjkim /*- 1099280297Sjkim * tmp2[i] < 17*(2^59 + 2^14)^2 1100280297Sjkim * = 17*(2^118 + 2^74 + 2^28) 1101280297Sjkim */ 1102280297Sjkim felem_scalar128(tmp2, 8); 1103280297Sjkim /*- 1104280297Sjkim * tmp2[i] < 8*17*(2^118 + 2^74 + 2^28) 1105280297Sjkim * = 2^125 + 2^121 + 2^81 + 2^77 + 2^35 + 2^31 1106280297Sjkim * < 2^126 1107280297Sjkim */ 1108280297Sjkim felem_diff128(tmp, tmp2); 1109280297Sjkim /*- 1110280297Sjkim * tmp[i] < 2^127 - 2^69 + 17(2^120 + 2^119 + 2^76 + 2^74 + 2^30) 1111280297Sjkim * = 2^127 + 2^124 + 2^122 + 2^120 + 2^118 + 2^80 + 2^78 + 2^76 + 1112280297Sjkim * 2^74 + 2^69 + 2^34 + 2^30 1113280297Sjkim * < 2^128 1114280297Sjkim */ 1115280297Sjkim felem_reduce(y_out, tmp); 1116280297Sjkim} 1117238384Sjkim 1118238384Sjkim/* copy_conditional copies in to out iff mask is all ones. */ 1119280297Sjkimstatic void copy_conditional(felem out, const felem in, limb mask) 1120280297Sjkim{ 1121280297Sjkim unsigned i; 1122280297Sjkim for (i = 0; i < NLIMBS; ++i) { 1123280297Sjkim const limb tmp = mask & (in[i] ^ out[i]); 1124280297Sjkim out[i] ^= tmp; 1125280297Sjkim } 1126280297Sjkim} 1127238384Sjkim 1128280297Sjkim/*- 1129280297Sjkim * point_add calcuates (x1, y1, z1) + (x2, y2, z2) 1130238384Sjkim * 1131238384Sjkim * The method is taken from 1132238384Sjkim * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl, 1133238384Sjkim * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity). 1134238384Sjkim * 1135238384Sjkim * This function includes a branch for checking whether the two input points 1136238384Sjkim * are equal (while not equal to the point at infinity). This case never 1137238384Sjkim * happens during single point multiplication, so there is no timing leak for 1138238384Sjkim * ECDH or ECDSA signing. */ 1139238384Sjkimstatic void point_add(felem x3, felem y3, felem z3, 1140280297Sjkim const felem x1, const felem y1, const felem z1, 1141280297Sjkim const int mixed, const felem x2, const felem y2, 1142280297Sjkim const felem z2) 1143280297Sjkim{ 1144280297Sjkim felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out; 1145280297Sjkim largefelem tmp, tmp2; 1146280297Sjkim limb x_equal, y_equal, z1_is_zero, z2_is_zero; 1147238384Sjkim 1148280297Sjkim z1_is_zero = felem_is_zero(z1); 1149280297Sjkim z2_is_zero = felem_is_zero(z2); 1150238384Sjkim 1151280297Sjkim /* ftmp = z1z1 = z1**2 */ 1152280297Sjkim felem_square(tmp, z1); 1153280297Sjkim felem_reduce(ftmp, tmp); 1154238384Sjkim 1155280297Sjkim if (!mixed) { 1156280297Sjkim /* ftmp2 = z2z2 = z2**2 */ 1157280297Sjkim felem_square(tmp, z2); 1158280297Sjkim felem_reduce(ftmp2, tmp); 1159238384Sjkim 1160280297Sjkim /* u1 = ftmp3 = x1*z2z2 */ 1161280297Sjkim felem_mul(tmp, x1, ftmp2); 1162280297Sjkim felem_reduce(ftmp3, tmp); 1163238384Sjkim 1164280297Sjkim /* ftmp5 = z1 + z2 */ 1165280297Sjkim felem_assign(ftmp5, z1); 1166280297Sjkim felem_sum64(ftmp5, z2); 1167280297Sjkim /* ftmp5[i] < 2^61 */ 1168238384Sjkim 1169280297Sjkim /* ftmp5 = (z1 + z2)**2 - z1z1 - z2z2 = 2*z1z2 */ 1170280297Sjkim felem_square(tmp, ftmp5); 1171280297Sjkim /* tmp[i] < 17*2^122 */ 1172280297Sjkim felem_diff_128_64(tmp, ftmp); 1173280297Sjkim /* tmp[i] < 17*2^122 + 2^63 */ 1174280297Sjkim felem_diff_128_64(tmp, ftmp2); 1175280297Sjkim /* tmp[i] < 17*2^122 + 2^64 */ 1176280297Sjkim felem_reduce(ftmp5, tmp); 1177238384Sjkim 1178280297Sjkim /* ftmp2 = z2 * z2z2 */ 1179280297Sjkim felem_mul(tmp, ftmp2, z2); 1180280297Sjkim felem_reduce(ftmp2, tmp); 1181238384Sjkim 1182280297Sjkim /* s1 = ftmp6 = y1 * z2**3 */ 1183280297Sjkim felem_mul(tmp, y1, ftmp2); 1184280297Sjkim felem_reduce(ftmp6, tmp); 1185280297Sjkim } else { 1186280297Sjkim /* 1187280297Sjkim * We'll assume z2 = 1 (special case z2 = 0 is handled later) 1188280297Sjkim */ 1189238384Sjkim 1190280297Sjkim /* u1 = ftmp3 = x1*z2z2 */ 1191280297Sjkim felem_assign(ftmp3, x1); 1192238384Sjkim 1193280297Sjkim /* ftmp5 = 2*z1z2 */ 1194280297Sjkim felem_scalar(ftmp5, z1, 2); 1195238384Sjkim 1196280297Sjkim /* s1 = ftmp6 = y1 * z2**3 */ 1197280297Sjkim felem_assign(ftmp6, y1); 1198280297Sjkim } 1199238384Sjkim 1200280297Sjkim /* u2 = x2*z1z1 */ 1201280297Sjkim felem_mul(tmp, x2, ftmp); 1202280297Sjkim /* tmp[i] < 17*2^120 */ 1203238384Sjkim 1204280297Sjkim /* h = ftmp4 = u2 - u1 */ 1205280297Sjkim felem_diff_128_64(tmp, ftmp3); 1206280297Sjkim /* tmp[i] < 17*2^120 + 2^63 */ 1207280297Sjkim felem_reduce(ftmp4, tmp); 1208238384Sjkim 1209280297Sjkim x_equal = felem_is_zero(ftmp4); 1210238384Sjkim 1211280297Sjkim /* z_out = ftmp5 * h */ 1212280297Sjkim felem_mul(tmp, ftmp5, ftmp4); 1213280297Sjkim felem_reduce(z_out, tmp); 1214238384Sjkim 1215280297Sjkim /* ftmp = z1 * z1z1 */ 1216280297Sjkim felem_mul(tmp, ftmp, z1); 1217280297Sjkim felem_reduce(ftmp, tmp); 1218238384Sjkim 1219280297Sjkim /* s2 = tmp = y2 * z1**3 */ 1220280297Sjkim felem_mul(tmp, y2, ftmp); 1221280297Sjkim /* tmp[i] < 17*2^120 */ 1222238384Sjkim 1223280297Sjkim /* r = ftmp5 = (s2 - s1)*2 */ 1224280297Sjkim felem_diff_128_64(tmp, ftmp6); 1225280297Sjkim /* tmp[i] < 17*2^120 + 2^63 */ 1226280297Sjkim felem_reduce(ftmp5, tmp); 1227280297Sjkim y_equal = felem_is_zero(ftmp5); 1228280297Sjkim felem_scalar64(ftmp5, 2); 1229280297Sjkim /* ftmp5[i] < 2^61 */ 1230238384Sjkim 1231280297Sjkim if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) { 1232280297Sjkim point_double(x3, y3, z3, x1, y1, z1); 1233280297Sjkim return; 1234280297Sjkim } 1235238384Sjkim 1236280297Sjkim /* I = ftmp = (2h)**2 */ 1237280297Sjkim felem_assign(ftmp, ftmp4); 1238280297Sjkim felem_scalar64(ftmp, 2); 1239280297Sjkim /* ftmp[i] < 2^61 */ 1240280297Sjkim felem_square(tmp, ftmp); 1241280297Sjkim /* tmp[i] < 17*2^122 */ 1242280297Sjkim felem_reduce(ftmp, tmp); 1243238384Sjkim 1244280297Sjkim /* J = ftmp2 = h * I */ 1245280297Sjkim felem_mul(tmp, ftmp4, ftmp); 1246280297Sjkim felem_reduce(ftmp2, tmp); 1247238384Sjkim 1248280297Sjkim /* V = ftmp4 = U1 * I */ 1249280297Sjkim felem_mul(tmp, ftmp3, ftmp); 1250280297Sjkim felem_reduce(ftmp4, tmp); 1251238384Sjkim 1252280297Sjkim /* x_out = r**2 - J - 2V */ 1253280297Sjkim felem_square(tmp, ftmp5); 1254280297Sjkim /* tmp[i] < 17*2^122 */ 1255280297Sjkim felem_diff_128_64(tmp, ftmp2); 1256280297Sjkim /* tmp[i] < 17*2^122 + 2^63 */ 1257280297Sjkim felem_assign(ftmp3, ftmp4); 1258280297Sjkim felem_scalar64(ftmp4, 2); 1259280297Sjkim /* ftmp4[i] < 2^61 */ 1260280297Sjkim felem_diff_128_64(tmp, ftmp4); 1261280297Sjkim /* tmp[i] < 17*2^122 + 2^64 */ 1262280297Sjkim felem_reduce(x_out, tmp); 1263238384Sjkim 1264280297Sjkim /* y_out = r(V-x_out) - 2 * s1 * J */ 1265280297Sjkim felem_diff64(ftmp3, x_out); 1266280297Sjkim /* 1267280297Sjkim * ftmp3[i] < 2^60 + 2^60 = 2^61 1268280297Sjkim */ 1269280297Sjkim felem_mul(tmp, ftmp5, ftmp3); 1270280297Sjkim /* tmp[i] < 17*2^122 */ 1271280297Sjkim felem_mul(tmp2, ftmp6, ftmp2); 1272280297Sjkim /* tmp2[i] < 17*2^120 */ 1273280297Sjkim felem_scalar128(tmp2, 2); 1274280297Sjkim /* tmp2[i] < 17*2^121 */ 1275280297Sjkim felem_diff128(tmp, tmp2); 1276290207Sjkim /*- 1277290207Sjkim * tmp[i] < 2^127 - 2^69 + 17*2^122 1278290207Sjkim * = 2^126 - 2^122 - 2^6 - 2^2 - 1 1279290207Sjkim * < 2^127 1280290207Sjkim */ 1281280297Sjkim felem_reduce(y_out, tmp); 1282238384Sjkim 1283280297Sjkim copy_conditional(x_out, x2, z1_is_zero); 1284280297Sjkim copy_conditional(x_out, x1, z2_is_zero); 1285280297Sjkim copy_conditional(y_out, y2, z1_is_zero); 1286280297Sjkim copy_conditional(y_out, y1, z2_is_zero); 1287280297Sjkim copy_conditional(z_out, z2, z1_is_zero); 1288280297Sjkim copy_conditional(z_out, z1, z2_is_zero); 1289280297Sjkim felem_assign(x3, x_out); 1290280297Sjkim felem_assign(y3, y_out); 1291280297Sjkim felem_assign(z3, z_out); 1292280297Sjkim} 1293238384Sjkim 1294280297Sjkim/*- 1295280297Sjkim * Base point pre computation 1296238384Sjkim * -------------------------- 1297238384Sjkim * 1298238384Sjkim * Two different sorts of precomputed tables are used in the following code. 1299238384Sjkim * Each contain various points on the curve, where each point is three field 1300238384Sjkim * elements (x, y, z). 1301238384Sjkim * 1302238384Sjkim * For the base point table, z is usually 1 (0 for the point at infinity). 1303238384Sjkim * This table has 16 elements: 1304238384Sjkim * index | bits | point 1305238384Sjkim * ------+---------+------------------------------ 1306238384Sjkim * 0 | 0 0 0 0 | 0G 1307238384Sjkim * 1 | 0 0 0 1 | 1G 1308238384Sjkim * 2 | 0 0 1 0 | 2^130G 1309238384Sjkim * 3 | 0 0 1 1 | (2^130 + 1)G 1310238384Sjkim * 4 | 0 1 0 0 | 2^260G 1311238384Sjkim * 5 | 0 1 0 1 | (2^260 + 1)G 1312238384Sjkim * 6 | 0 1 1 0 | (2^260 + 2^130)G 1313238384Sjkim * 7 | 0 1 1 1 | (2^260 + 2^130 + 1)G 1314238384Sjkim * 8 | 1 0 0 0 | 2^390G 1315238384Sjkim * 9 | 1 0 0 1 | (2^390 + 1)G 1316238384Sjkim * 10 | 1 0 1 0 | (2^390 + 2^130)G 1317238384Sjkim * 11 | 1 0 1 1 | (2^390 + 2^130 + 1)G 1318238384Sjkim * 12 | 1 1 0 0 | (2^390 + 2^260)G 1319238384Sjkim * 13 | 1 1 0 1 | (2^390 + 2^260 + 1)G 1320238384Sjkim * 14 | 1 1 1 0 | (2^390 + 2^260 + 2^130)G 1321238384Sjkim * 15 | 1 1 1 1 | (2^390 + 2^260 + 2^130 + 1)G 1322238384Sjkim * 1323238384Sjkim * The reason for this is so that we can clock bits into four different 1324238384Sjkim * locations when doing simple scalar multiplies against the base point. 1325238384Sjkim * 1326238384Sjkim * Tables for other points have table[i] = iG for i in 0 .. 16. */ 1327238384Sjkim 1328238384Sjkim/* gmul is the table of precomputed base points */ 1329280297Sjkimstatic const felem gmul[16][3] = { {{0, 0, 0, 0, 0, 0, 0, 0, 0}, 1330280297Sjkim {0, 0, 0, 0, 0, 0, 0, 0, 0}, 1331280297Sjkim {0, 0, 0, 0, 0, 0, 0, 0, 0}}, 1332280297Sjkim{{0x017e7e31c2e5bd66, 0x022cf0615a90a6fe, 0x00127a2ffa8de334, 1333280297Sjkim 0x01dfbf9d64a3f877, 0x006b4d3dbaa14b5e, 0x014fed487e0a2bd8, 1334280297Sjkim 0x015b4429c6481390, 0x03a73678fb2d988e, 0x00c6858e06b70404}, 1335280297Sjkim {0x00be94769fd16650, 0x031c21a89cb09022, 0x039013fad0761353, 1336280297Sjkim 0x02657bd099031542, 0x03273e662c97ee72, 0x01e6d11a05ebef45, 1337280297Sjkim 0x03d1bd998f544495, 0x03001172297ed0b1, 0x011839296a789a3b}, 1338280297Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1339280297Sjkim{{0x0373faacbc875bae, 0x00f325023721c671, 0x00f666fd3dbde5ad, 1340280297Sjkim 0x01a6932363f88ea7, 0x01fc6d9e13f9c47b, 0x03bcbffc2bbf734e, 1341280297Sjkim 0x013ee3c3647f3a92, 0x029409fefe75d07d, 0x00ef9199963d85e5}, 1342280297Sjkim {0x011173743ad5b178, 0x02499c7c21bf7d46, 0x035beaeabb8b1a58, 1343280297Sjkim 0x00f989c4752ea0a3, 0x0101e1de48a9c1a3, 0x01a20076be28ba6c, 1344280297Sjkim 0x02f8052e5eb2de95, 0x01bfe8f82dea117c, 0x0160074d3c36ddb7}, 1345280297Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1346280297Sjkim{{0x012f3fc373393b3b, 0x03d3d6172f1419fa, 0x02adc943c0b86873, 1347280297Sjkim 0x00d475584177952b, 0x012a4d1673750ee2, 0x00512517a0f13b0c, 1348280297Sjkim 0x02b184671a7b1734, 0x0315b84236f1a50a, 0x00a4afc472edbdb9}, 1349280297Sjkim {0x00152a7077f385c4, 0x03044007d8d1c2ee, 0x0065829d61d52b52, 1350280297Sjkim 0x00494ff6b6631d0d, 0x00a11d94d5f06bcf, 0x02d2f89474d9282e, 1351280297Sjkim 0x0241c5727c06eeb9, 0x0386928710fbdb9d, 0x01f883f727b0dfbe}, 1352280297Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1353280297Sjkim{{0x019b0c3c9185544d, 0x006243a37c9d97db, 0x02ee3cbe030a2ad2, 1354280297Sjkim 0x00cfdd946bb51e0d, 0x0271c00932606b91, 0x03f817d1ec68c561, 1355280297Sjkim 0x03f37009806a369c, 0x03c1f30baf184fd5, 0x01091022d6d2f065}, 1356280297Sjkim {0x0292c583514c45ed, 0x0316fca51f9a286c, 0x00300af507c1489a, 1357280297Sjkim 0x0295f69008298cf1, 0x02c0ed8274943d7b, 0x016509b9b47a431e, 1358280297Sjkim 0x02bc9de9634868ce, 0x005b34929bffcb09, 0x000c1a0121681524}, 1359280297Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1360280297Sjkim{{0x0286abc0292fb9f2, 0x02665eee9805b3f7, 0x01ed7455f17f26d6, 1361280297Sjkim 0x0346355b83175d13, 0x006284944cd0a097, 0x0191895bcdec5e51, 1362280297Sjkim 0x02e288370afda7d9, 0x03b22312bfefa67a, 0x01d104d3fc0613fe}, 1363280297Sjkim {0x0092421a12f7e47f, 0x0077a83fa373c501, 0x03bd25c5f696bd0d, 1364280297Sjkim 0x035c41e4d5459761, 0x01ca0d1742b24f53, 0x00aaab27863a509c, 1365280297Sjkim 0x018b6de47df73917, 0x025c0b771705cd01, 0x01fd51d566d760a7}, 1366280297Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1367280297Sjkim{{0x01dd92ff6b0d1dbd, 0x039c5e2e8f8afa69, 0x0261ed13242c3b27, 1368280297Sjkim 0x0382c6e67026e6a0, 0x01d60b10be2089f9, 0x03c15f3dce86723f, 1369280297Sjkim 0x03c764a32d2a062d, 0x017307eac0fad056, 0x018207c0b96c5256}, 1370280297Sjkim {0x0196a16d60e13154, 0x03e6ce74c0267030, 0x00ddbf2b4e52a5aa, 1371280297Sjkim 0x012738241bbf31c8, 0x00ebe8dc04685a28, 0x024c2ad6d380d4a2, 1372280297Sjkim 0x035ee062a6e62d0e, 0x0029ed74af7d3a0f, 0x00eef32aec142ebd}, 1373280297Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1374280297Sjkim{{0x00c31ec398993b39, 0x03a9f45bcda68253, 0x00ac733c24c70890, 1375280297Sjkim 0x00872b111401ff01, 0x01d178c23195eafb, 0x03bca2c816b87f74, 1376280297Sjkim 0x0261a9af46fbad7a, 0x0324b2a8dd3d28f9, 0x00918121d8f24e23}, 1377280297Sjkim {0x032bc8c1ca983cd7, 0x00d869dfb08fc8c6, 0x01693cb61fce1516, 1378280297Sjkim 0x012a5ea68f4e88a8, 0x010869cab88d7ae3, 0x009081ad277ceee1, 1379280297Sjkim 0x033a77166d064cdc, 0x03955235a1fb3a95, 0x01251a4a9b25b65e}, 1380280297Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1381280297Sjkim{{0x00148a3a1b27f40b, 0x0123186df1b31fdc, 0x00026e7beaad34ce, 1382280297Sjkim 0x01db446ac1d3dbba, 0x0299c1a33437eaec, 0x024540610183cbb7, 1383280297Sjkim 0x0173bb0e9ce92e46, 0x02b937e43921214b, 0x01ab0436a9bf01b5}, 1384280297Sjkim {0x0383381640d46948, 0x008dacbf0e7f330f, 0x03602122bcc3f318, 1385280297Sjkim 0x01ee596b200620d6, 0x03bd0585fda430b3, 0x014aed77fd123a83, 1386280297Sjkim 0x005ace749e52f742, 0x0390fe041da2b842, 0x0189a8ceb3299242}, 1387280297Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1388280297Sjkim{{0x012a19d6b3282473, 0x00c0915918b423ce, 0x023a954eb94405ae, 1389280297Sjkim 0x00529f692be26158, 0x0289fa1b6fa4b2aa, 0x0198ae4ceea346ef, 1390280297Sjkim 0x0047d8cdfbdedd49, 0x00cc8c8953f0f6b8, 0x001424abbff49203}, 1391280297Sjkim {0x0256732a1115a03a, 0x0351bc38665c6733, 0x03f7b950fb4a6447, 1392280297Sjkim 0x000afffa94c22155, 0x025763d0a4dab540, 0x000511e92d4fc283, 1393280297Sjkim 0x030a7e9eda0ee96c, 0x004c3cd93a28bf0a, 0x017edb3a8719217f}, 1394280297Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1395280297Sjkim{{0x011de5675a88e673, 0x031d7d0f5e567fbe, 0x0016b2062c970ae5, 1396280297Sjkim 0x03f4a2be49d90aa7, 0x03cef0bd13822866, 0x03f0923dcf774a6c, 1397280297Sjkim 0x0284bebc4f322f72, 0x016ab2645302bb2c, 0x01793f95dace0e2a}, 1398280297Sjkim {0x010646e13527a28f, 0x01ca1babd59dc5e7, 0x01afedfd9a5595df, 1399280297Sjkim 0x01f15785212ea6b1, 0x0324e5d64f6ae3f4, 0x02d680f526d00645, 1400280297Sjkim 0x0127920fadf627a7, 0x03b383f75df4f684, 0x0089e0057e783b0a}, 1401280297Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1402280297Sjkim{{0x00f334b9eb3c26c6, 0x0298fdaa98568dce, 0x01c2d24843a82292, 1403280297Sjkim 0x020bcb24fa1b0711, 0x02cbdb3d2b1875e6, 0x0014907598f89422, 1404280297Sjkim 0x03abe3aa43b26664, 0x02cbf47f720bc168, 0x0133b5e73014b79b}, 1405280297Sjkim {0x034aab5dab05779d, 0x00cdc5d71fee9abb, 0x0399f16bd4bd9d30, 1406280297Sjkim 0x03582fa592d82647, 0x02be1cdfb775b0e9, 0x0034f7cea32e94cb, 1407280297Sjkim 0x0335a7f08f56f286, 0x03b707e9565d1c8b, 0x0015c946ea5b614f}, 1408280297Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1409280297Sjkim{{0x024676f6cff72255, 0x00d14625cac96378, 0x00532b6008bc3767, 1410280297Sjkim 0x01fc16721b985322, 0x023355ea1b091668, 0x029de7afdc0317c3, 1411280297Sjkim 0x02fc8a7ca2da037c, 0x02de1217d74a6f30, 0x013f7173175b73bf}, 1412280297Sjkim {0x0344913f441490b5, 0x0200f9e272b61eca, 0x0258a246b1dd55d2, 1413280297Sjkim 0x03753db9ea496f36, 0x025e02937a09c5ef, 0x030cbd3d14012692, 1414280297Sjkim 0x01793a67e70dc72a, 0x03ec1d37048a662e, 0x006550f700c32a8d}, 1415280297Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1416280297Sjkim{{0x00d3f48a347eba27, 0x008e636649b61bd8, 0x00d3b93716778fb3, 1417280297Sjkim 0x004d1915757bd209, 0x019d5311a3da44e0, 0x016d1afcbbe6aade, 1418280297Sjkim 0x0241bf5f73265616, 0x0384672e5d50d39b, 0x005009fee522b684}, 1419280297Sjkim {0x029b4fab064435fe, 0x018868ee095bbb07, 0x01ea3d6936cc92b8, 1420280297Sjkim 0x000608b00f78a2f3, 0x02db911073d1c20f, 0x018205938470100a, 1421280297Sjkim 0x01f1e4964cbe6ff2, 0x021a19a29eed4663, 0x01414485f42afa81}, 1422280297Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1423280297Sjkim{{0x01612b3a17f63e34, 0x03813992885428e6, 0x022b3c215b5a9608, 1424280297Sjkim 0x029b4057e19f2fcb, 0x0384059a587af7e6, 0x02d6400ace6fe610, 1425280297Sjkim 0x029354d896e8e331, 0x00c047ee6dfba65e, 0x0037720542e9d49d}, 1426280297Sjkim {0x02ce9eed7c5e9278, 0x0374ed703e79643b, 0x01316c54c4072006, 1427280297Sjkim 0x005aaa09054b2ee8, 0x002824000c840d57, 0x03d4eba24771ed86, 1428280297Sjkim 0x0189c50aabc3bdae, 0x0338c01541e15510, 0x00466d56e38eed42}, 1429280297Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1430280297Sjkim{{0x007efd8330ad8bd6, 0x02465ed48047710b, 0x0034c6606b215e0c, 1431280297Sjkim 0x016ae30c53cbf839, 0x01fa17bd37161216, 0x018ead4e61ce8ab9, 1432280297Sjkim 0x005482ed5f5dee46, 0x037543755bba1d7f, 0x005e5ac7e70a9d0f}, 1433280297Sjkim {0x0117e1bb2fdcb2a2, 0x03deea36249f40c4, 0x028d09b4a6246cb7, 1434280297Sjkim 0x03524b8855bcf756, 0x023d7d109d5ceb58, 0x0178e43e3223ef9c, 1435280297Sjkim 0x0154536a0c6e966a, 0x037964d1286ee9fe, 0x0199bcd90e125055}, 1436280297Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}} 1437280297Sjkim}; 1438238384Sjkim 1439280297Sjkim/* 1440280297Sjkim * select_point selects the |idx|th point from a precomputation table and 1441280297Sjkim * copies it to out. 1442280297Sjkim */ 1443280297Sjkim /* pre_comp below is of the size provided in |size| */ 1444280297Sjkimstatic void select_point(const limb idx, unsigned int size, 1445280297Sjkim const felem pre_comp[][3], felem out[3]) 1446280297Sjkim{ 1447280297Sjkim unsigned i, j; 1448280297Sjkim limb *outlimbs = &out[0][0]; 1449280297Sjkim memset(outlimbs, 0, 3 * sizeof(felem)); 1450238384Sjkim 1451280297Sjkim for (i = 0; i < size; i++) { 1452280297Sjkim const limb *inlimbs = &pre_comp[i][0][0]; 1453280297Sjkim limb mask = i ^ idx; 1454280297Sjkim mask |= mask >> 4; 1455280297Sjkim mask |= mask >> 2; 1456280297Sjkim mask |= mask >> 1; 1457280297Sjkim mask &= 1; 1458280297Sjkim mask--; 1459280297Sjkim for (j = 0; j < NLIMBS * 3; j++) 1460280297Sjkim outlimbs[j] |= inlimbs[j] & mask; 1461280297Sjkim } 1462280297Sjkim} 1463238384Sjkim 1464238384Sjkim/* get_bit returns the |i|th bit in |in| */ 1465238384Sjkimstatic char get_bit(const felem_bytearray in, int i) 1466280297Sjkim{ 1467280297Sjkim if (i < 0) 1468280297Sjkim return 0; 1469280297Sjkim return (in[i >> 3] >> (i & 7)) & 1; 1470280297Sjkim} 1471238384Sjkim 1472280297Sjkim/* 1473280297Sjkim * Interleaved point multiplication using precomputed point multiples: The 1474280297Sjkim * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars 1475280297Sjkim * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the 1476280297Sjkim * generator, using certain (large) precomputed multiples in g_pre_comp. 1477280297Sjkim * Output point (X, Y, Z) is stored in x_out, y_out, z_out 1478280297Sjkim */ 1479238384Sjkimstatic void batch_mul(felem x_out, felem y_out, felem z_out, 1480280297Sjkim const felem_bytearray scalars[], 1481280297Sjkim const unsigned num_points, const u8 *g_scalar, 1482280297Sjkim const int mixed, const felem pre_comp[][17][3], 1483280297Sjkim const felem g_pre_comp[16][3]) 1484280297Sjkim{ 1485280297Sjkim int i, skip; 1486280297Sjkim unsigned num, gen_mul = (g_scalar != NULL); 1487280297Sjkim felem nq[3], tmp[4]; 1488280297Sjkim limb bits; 1489280297Sjkim u8 sign, digit; 1490238384Sjkim 1491280297Sjkim /* set nq to the point at infinity */ 1492280297Sjkim memset(nq, 0, 3 * sizeof(felem)); 1493238384Sjkim 1494280297Sjkim /* 1495280297Sjkim * Loop over all scalars msb-to-lsb, interleaving additions of multiples 1496280297Sjkim * of the generator (last quarter of rounds) and additions of other 1497280297Sjkim * points multiples (every 5th round). 1498280297Sjkim */ 1499280297Sjkim skip = 1; /* save two point operations in the first 1500280297Sjkim * round */ 1501280297Sjkim for (i = (num_points ? 520 : 130); i >= 0; --i) { 1502280297Sjkim /* double */ 1503280297Sjkim if (!skip) 1504280297Sjkim point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]); 1505238384Sjkim 1506280297Sjkim /* add multiples of the generator */ 1507280297Sjkim if (gen_mul && (i <= 130)) { 1508280297Sjkim bits = get_bit(g_scalar, i + 390) << 3; 1509280297Sjkim if (i < 130) { 1510280297Sjkim bits |= get_bit(g_scalar, i + 260) << 2; 1511280297Sjkim bits |= get_bit(g_scalar, i + 130) << 1; 1512280297Sjkim bits |= get_bit(g_scalar, i); 1513280297Sjkim } 1514280297Sjkim /* select the point to add, in constant time */ 1515280297Sjkim select_point(bits, 16, g_pre_comp, tmp); 1516280297Sjkim if (!skip) { 1517280297Sjkim /* The 1 argument below is for "mixed" */ 1518280297Sjkim point_add(nq[0], nq[1], nq[2], 1519280297Sjkim nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]); 1520280297Sjkim } else { 1521280297Sjkim memcpy(nq, tmp, 3 * sizeof(felem)); 1522280297Sjkim skip = 0; 1523280297Sjkim } 1524280297Sjkim } 1525238384Sjkim 1526280297Sjkim /* do other additions every 5 doublings */ 1527280297Sjkim if (num_points && (i % 5 == 0)) { 1528280297Sjkim /* loop over all scalars */ 1529280297Sjkim for (num = 0; num < num_points; ++num) { 1530280297Sjkim bits = get_bit(scalars[num], i + 4) << 5; 1531280297Sjkim bits |= get_bit(scalars[num], i + 3) << 4; 1532280297Sjkim bits |= get_bit(scalars[num], i + 2) << 3; 1533280297Sjkim bits |= get_bit(scalars[num], i + 1) << 2; 1534280297Sjkim bits |= get_bit(scalars[num], i) << 1; 1535280297Sjkim bits |= get_bit(scalars[num], i - 1); 1536280297Sjkim ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits); 1537238384Sjkim 1538280297Sjkim /* 1539280297Sjkim * select the point to add or subtract, in constant time 1540280297Sjkim */ 1541280297Sjkim select_point(digit, 17, pre_comp[num], tmp); 1542280297Sjkim felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative 1543280297Sjkim * point */ 1544280297Sjkim copy_conditional(tmp[1], tmp[3], (-(limb) sign)); 1545238384Sjkim 1546280297Sjkim if (!skip) { 1547280297Sjkim point_add(nq[0], nq[1], nq[2], 1548280297Sjkim nq[0], nq[1], nq[2], 1549280297Sjkim mixed, tmp[0], tmp[1], tmp[2]); 1550280297Sjkim } else { 1551280297Sjkim memcpy(nq, tmp, 3 * sizeof(felem)); 1552280297Sjkim skip = 0; 1553280297Sjkim } 1554280297Sjkim } 1555280297Sjkim } 1556280297Sjkim } 1557280297Sjkim felem_assign(x_out, nq[0]); 1558280297Sjkim felem_assign(y_out, nq[1]); 1559280297Sjkim felem_assign(z_out, nq[2]); 1560280297Sjkim} 1561238384Sjkim 1562238384Sjkim/* Precomputation for the group generator. */ 1563238384Sjkimtypedef struct { 1564280297Sjkim felem g_pre_comp[16][3]; 1565280297Sjkim int references; 1566238384Sjkim} NISTP521_PRE_COMP; 1567238384Sjkim 1568238384Sjkimconst EC_METHOD *EC_GFp_nistp521_method(void) 1569280297Sjkim{ 1570280297Sjkim static const EC_METHOD ret = { 1571280297Sjkim EC_FLAGS_DEFAULT_OCT, 1572280297Sjkim NID_X9_62_prime_field, 1573280297Sjkim ec_GFp_nistp521_group_init, 1574280297Sjkim ec_GFp_simple_group_finish, 1575280297Sjkim ec_GFp_simple_group_clear_finish, 1576280297Sjkim ec_GFp_nist_group_copy, 1577280297Sjkim ec_GFp_nistp521_group_set_curve, 1578280297Sjkim ec_GFp_simple_group_get_curve, 1579280297Sjkim ec_GFp_simple_group_get_degree, 1580280297Sjkim ec_GFp_simple_group_check_discriminant, 1581280297Sjkim ec_GFp_simple_point_init, 1582280297Sjkim ec_GFp_simple_point_finish, 1583280297Sjkim ec_GFp_simple_point_clear_finish, 1584280297Sjkim ec_GFp_simple_point_copy, 1585280297Sjkim ec_GFp_simple_point_set_to_infinity, 1586280297Sjkim ec_GFp_simple_set_Jprojective_coordinates_GFp, 1587280297Sjkim ec_GFp_simple_get_Jprojective_coordinates_GFp, 1588280297Sjkim ec_GFp_simple_point_set_affine_coordinates, 1589280297Sjkim ec_GFp_nistp521_point_get_affine_coordinates, 1590280297Sjkim 0 /* point_set_compressed_coordinates */ , 1591280297Sjkim 0 /* point2oct */ , 1592280297Sjkim 0 /* oct2point */ , 1593280297Sjkim ec_GFp_simple_add, 1594280297Sjkim ec_GFp_simple_dbl, 1595280297Sjkim ec_GFp_simple_invert, 1596280297Sjkim ec_GFp_simple_is_at_infinity, 1597280297Sjkim ec_GFp_simple_is_on_curve, 1598280297Sjkim ec_GFp_simple_cmp, 1599280297Sjkim ec_GFp_simple_make_affine, 1600280297Sjkim ec_GFp_simple_points_make_affine, 1601280297Sjkim ec_GFp_nistp521_points_mul, 1602280297Sjkim ec_GFp_nistp521_precompute_mult, 1603280297Sjkim ec_GFp_nistp521_have_precompute_mult, 1604280297Sjkim ec_GFp_nist_field_mul, 1605280297Sjkim ec_GFp_nist_field_sqr, 1606280297Sjkim 0 /* field_div */ , 1607280297Sjkim 0 /* field_encode */ , 1608280297Sjkim 0 /* field_decode */ , 1609280297Sjkim 0 /* field_set_to_one */ 1610280297Sjkim }; 1611238384Sjkim 1612280297Sjkim return &ret; 1613280297Sjkim} 1614238384Sjkim 1615238384Sjkim/******************************************************************************/ 1616280297Sjkim/* 1617280297Sjkim * FUNCTIONS TO MANAGE PRECOMPUTATION 1618238384Sjkim */ 1619238384Sjkim 1620238384Sjkimstatic NISTP521_PRE_COMP *nistp521_pre_comp_new() 1621280297Sjkim{ 1622280297Sjkim NISTP521_PRE_COMP *ret = NULL; 1623280297Sjkim ret = (NISTP521_PRE_COMP *) OPENSSL_malloc(sizeof(NISTP521_PRE_COMP)); 1624280297Sjkim if (!ret) { 1625280297Sjkim ECerr(EC_F_NISTP521_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE); 1626280297Sjkim return ret; 1627280297Sjkim } 1628280297Sjkim memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp)); 1629280297Sjkim ret->references = 1; 1630280297Sjkim return ret; 1631280297Sjkim} 1632238384Sjkim 1633238384Sjkimstatic void *nistp521_pre_comp_dup(void *src_) 1634280297Sjkim{ 1635280297Sjkim NISTP521_PRE_COMP *src = src_; 1636238384Sjkim 1637280297Sjkim /* no need to actually copy, these objects never change! */ 1638280297Sjkim CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP); 1639238384Sjkim 1640280297Sjkim return src_; 1641280297Sjkim} 1642238384Sjkim 1643238384Sjkimstatic void nistp521_pre_comp_free(void *pre_) 1644280297Sjkim{ 1645280297Sjkim int i; 1646280297Sjkim NISTP521_PRE_COMP *pre = pre_; 1647238384Sjkim 1648280297Sjkim if (!pre) 1649280297Sjkim return; 1650238384Sjkim 1651280297Sjkim i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP); 1652280297Sjkim if (i > 0) 1653280297Sjkim return; 1654238384Sjkim 1655280297Sjkim OPENSSL_free(pre); 1656280297Sjkim} 1657238384Sjkim 1658238384Sjkimstatic void nistp521_pre_comp_clear_free(void *pre_) 1659280297Sjkim{ 1660280297Sjkim int i; 1661280297Sjkim NISTP521_PRE_COMP *pre = pre_; 1662238384Sjkim 1663280297Sjkim if (!pre) 1664280297Sjkim return; 1665238384Sjkim 1666280297Sjkim i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP); 1667280297Sjkim if (i > 0) 1668280297Sjkim return; 1669238384Sjkim 1670280297Sjkim OPENSSL_cleanse(pre, sizeof(*pre)); 1671280297Sjkim OPENSSL_free(pre); 1672280297Sjkim} 1673238384Sjkim 1674238384Sjkim/******************************************************************************/ 1675280297Sjkim/* 1676280297Sjkim * OPENSSL EC_METHOD FUNCTIONS 1677238384Sjkim */ 1678238384Sjkim 1679238384Sjkimint ec_GFp_nistp521_group_init(EC_GROUP *group) 1680280297Sjkim{ 1681280297Sjkim int ret; 1682280297Sjkim ret = ec_GFp_simple_group_init(group); 1683280297Sjkim group->a_is_minus3 = 1; 1684280297Sjkim return ret; 1685280297Sjkim} 1686238384Sjkim 1687238384Sjkimint ec_GFp_nistp521_group_set_curve(EC_GROUP *group, const BIGNUM *p, 1688280297Sjkim const BIGNUM *a, const BIGNUM *b, 1689280297Sjkim BN_CTX *ctx) 1690280297Sjkim{ 1691280297Sjkim int ret = 0; 1692280297Sjkim BN_CTX *new_ctx = NULL; 1693280297Sjkim BIGNUM *curve_p, *curve_a, *curve_b; 1694238384Sjkim 1695280297Sjkim if (ctx == NULL) 1696280297Sjkim if ((ctx = new_ctx = BN_CTX_new()) == NULL) 1697280297Sjkim return 0; 1698280297Sjkim BN_CTX_start(ctx); 1699280297Sjkim if (((curve_p = BN_CTX_get(ctx)) == NULL) || 1700280297Sjkim ((curve_a = BN_CTX_get(ctx)) == NULL) || 1701280297Sjkim ((curve_b = BN_CTX_get(ctx)) == NULL)) 1702280297Sjkim goto err; 1703280297Sjkim BN_bin2bn(nistp521_curve_params[0], sizeof(felem_bytearray), curve_p); 1704280297Sjkim BN_bin2bn(nistp521_curve_params[1], sizeof(felem_bytearray), curve_a); 1705280297Sjkim BN_bin2bn(nistp521_curve_params[2], sizeof(felem_bytearray), curve_b); 1706280297Sjkim if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) { 1707280297Sjkim ECerr(EC_F_EC_GFP_NISTP521_GROUP_SET_CURVE, 1708280297Sjkim EC_R_WRONG_CURVE_PARAMETERS); 1709280297Sjkim goto err; 1710280297Sjkim } 1711280297Sjkim group->field_mod_func = BN_nist_mod_521; 1712280297Sjkim ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx); 1713280297Sjkim err: 1714280297Sjkim BN_CTX_end(ctx); 1715280297Sjkim if (new_ctx != NULL) 1716280297Sjkim BN_CTX_free(new_ctx); 1717280297Sjkim return ret; 1718280297Sjkim} 1719238384Sjkim 1720280297Sjkim/* 1721280297Sjkim * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') = 1722280297Sjkim * (X/Z^2, Y/Z^3) 1723280297Sjkim */ 1724238384Sjkimint ec_GFp_nistp521_point_get_affine_coordinates(const EC_GROUP *group, 1725280297Sjkim const EC_POINT *point, 1726280297Sjkim BIGNUM *x, BIGNUM *y, 1727280297Sjkim BN_CTX *ctx) 1728280297Sjkim{ 1729280297Sjkim felem z1, z2, x_in, y_in, x_out, y_out; 1730280297Sjkim largefelem tmp; 1731238384Sjkim 1732280297Sjkim if (EC_POINT_is_at_infinity(group, point)) { 1733280297Sjkim ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES, 1734280297Sjkim EC_R_POINT_AT_INFINITY); 1735280297Sjkim return 0; 1736280297Sjkim } 1737280297Sjkim if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) || 1738280297Sjkim (!BN_to_felem(z1, &point->Z))) 1739280297Sjkim return 0; 1740280297Sjkim felem_inv(z2, z1); 1741280297Sjkim felem_square(tmp, z2); 1742280297Sjkim felem_reduce(z1, tmp); 1743280297Sjkim felem_mul(tmp, x_in, z1); 1744280297Sjkim felem_reduce(x_in, tmp); 1745280297Sjkim felem_contract(x_out, x_in); 1746280297Sjkim if (x != NULL) { 1747280297Sjkim if (!felem_to_BN(x, x_out)) { 1748280297Sjkim ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES, 1749280297Sjkim ERR_R_BN_LIB); 1750280297Sjkim return 0; 1751280297Sjkim } 1752280297Sjkim } 1753280297Sjkim felem_mul(tmp, z1, z2); 1754280297Sjkim felem_reduce(z1, tmp); 1755280297Sjkim felem_mul(tmp, y_in, z1); 1756280297Sjkim felem_reduce(y_in, tmp); 1757280297Sjkim felem_contract(y_out, y_in); 1758280297Sjkim if (y != NULL) { 1759280297Sjkim if (!felem_to_BN(y, y_out)) { 1760280297Sjkim ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES, 1761280297Sjkim ERR_R_BN_LIB); 1762280297Sjkim return 0; 1763280297Sjkim } 1764280297Sjkim } 1765280297Sjkim return 1; 1766280297Sjkim} 1767238384Sjkim 1768280297Sjkim/* points below is of size |num|, and tmp_felems is of size |num+1/ */ 1769280297Sjkimstatic void make_points_affine(size_t num, felem points[][3], 1770280297Sjkim felem tmp_felems[]) 1771280297Sjkim{ 1772280297Sjkim /* 1773280297Sjkim * Runs in constant time, unless an input is the point at infinity (which 1774280297Sjkim * normally shouldn't happen). 1775280297Sjkim */ 1776280297Sjkim ec_GFp_nistp_points_make_affine_internal(num, 1777280297Sjkim points, 1778280297Sjkim sizeof(felem), 1779280297Sjkim tmp_felems, 1780280297Sjkim (void (*)(void *))felem_one, 1781280297Sjkim felem_is_zero_int, 1782280297Sjkim (void (*)(void *, const void *)) 1783280297Sjkim felem_assign, 1784280297Sjkim (void (*)(void *, const void *)) 1785280297Sjkim felem_square_reduce, (void (*) 1786280297Sjkim (void *, 1787280297Sjkim const void 1788280297Sjkim *, 1789280297Sjkim const void 1790280297Sjkim *)) 1791280297Sjkim felem_mul_reduce, 1792280297Sjkim (void (*)(void *, const void *)) 1793280297Sjkim felem_inv, 1794280297Sjkim (void (*)(void *, const void *)) 1795280297Sjkim felem_contract); 1796280297Sjkim} 1797238384Sjkim 1798280297Sjkim/* 1799280297Sjkim * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL 1800280297Sjkim * values Result is stored in r (r can equal one of the inputs). 1801280297Sjkim */ 1802238384Sjkimint ec_GFp_nistp521_points_mul(const EC_GROUP *group, EC_POINT *r, 1803280297Sjkim const BIGNUM *scalar, size_t num, 1804280297Sjkim const EC_POINT *points[], 1805280297Sjkim const BIGNUM *scalars[], BN_CTX *ctx) 1806280297Sjkim{ 1807280297Sjkim int ret = 0; 1808280297Sjkim int j; 1809280297Sjkim int mixed = 0; 1810280297Sjkim BN_CTX *new_ctx = NULL; 1811280297Sjkim BIGNUM *x, *y, *z, *tmp_scalar; 1812280297Sjkim felem_bytearray g_secret; 1813280297Sjkim felem_bytearray *secrets = NULL; 1814280297Sjkim felem(*pre_comp)[17][3] = NULL; 1815280297Sjkim felem *tmp_felems = NULL; 1816352193Sjkim unsigned i; 1817352193Sjkim int num_bytes; 1818280297Sjkim int have_pre_comp = 0; 1819280297Sjkim size_t num_points = num; 1820280297Sjkim felem x_in, y_in, z_in, x_out, y_out, z_out; 1821280297Sjkim NISTP521_PRE_COMP *pre = NULL; 1822280297Sjkim felem(*g_pre_comp)[3] = NULL; 1823280297Sjkim EC_POINT *generator = NULL; 1824280297Sjkim const EC_POINT *p = NULL; 1825280297Sjkim const BIGNUM *p_scalar = NULL; 1826238384Sjkim 1827280297Sjkim if (ctx == NULL) 1828280297Sjkim if ((ctx = new_ctx = BN_CTX_new()) == NULL) 1829280297Sjkim return 0; 1830280297Sjkim BN_CTX_start(ctx); 1831280297Sjkim if (((x = BN_CTX_get(ctx)) == NULL) || 1832280297Sjkim ((y = BN_CTX_get(ctx)) == NULL) || 1833280297Sjkim ((z = BN_CTX_get(ctx)) == NULL) || 1834280297Sjkim ((tmp_scalar = BN_CTX_get(ctx)) == NULL)) 1835280297Sjkim goto err; 1836238384Sjkim 1837280297Sjkim if (scalar != NULL) { 1838280297Sjkim pre = EC_EX_DATA_get_data(group->extra_data, 1839280297Sjkim nistp521_pre_comp_dup, 1840280297Sjkim nistp521_pre_comp_free, 1841280297Sjkim nistp521_pre_comp_clear_free); 1842280297Sjkim if (pre) 1843280297Sjkim /* we have precomputation, try to use it */ 1844280297Sjkim g_pre_comp = &pre->g_pre_comp[0]; 1845280297Sjkim else 1846280297Sjkim /* try to use the standard precomputation */ 1847280297Sjkim g_pre_comp = (felem(*)[3]) gmul; 1848280297Sjkim generator = EC_POINT_new(group); 1849280297Sjkim if (generator == NULL) 1850280297Sjkim goto err; 1851280297Sjkim /* get the generator from precomputation */ 1852280297Sjkim if (!felem_to_BN(x, g_pre_comp[1][0]) || 1853280297Sjkim !felem_to_BN(y, g_pre_comp[1][1]) || 1854280297Sjkim !felem_to_BN(z, g_pre_comp[1][2])) { 1855280297Sjkim ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB); 1856280297Sjkim goto err; 1857280297Sjkim } 1858280297Sjkim if (!EC_POINT_set_Jprojective_coordinates_GFp(group, 1859280297Sjkim generator, x, y, z, 1860280297Sjkim ctx)) 1861280297Sjkim goto err; 1862280297Sjkim if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) 1863280297Sjkim /* precomputation matches generator */ 1864280297Sjkim have_pre_comp = 1; 1865280297Sjkim else 1866280297Sjkim /* 1867280297Sjkim * we don't have valid precomputation: treat the generator as a 1868280297Sjkim * random point 1869280297Sjkim */ 1870280297Sjkim num_points++; 1871280297Sjkim } 1872238384Sjkim 1873280297Sjkim if (num_points > 0) { 1874280297Sjkim if (num_points >= 2) { 1875280297Sjkim /* 1876280297Sjkim * unless we precompute multiples for just one point, converting 1877280297Sjkim * those into affine form is time well spent 1878280297Sjkim */ 1879280297Sjkim mixed = 1; 1880280297Sjkim } 1881280297Sjkim secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray)); 1882280297Sjkim pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem)); 1883280297Sjkim if (mixed) 1884280297Sjkim tmp_felems = 1885280297Sjkim OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem)); 1886280297Sjkim if ((secrets == NULL) || (pre_comp == NULL) 1887280297Sjkim || (mixed && (tmp_felems == NULL))) { 1888280297Sjkim ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_MALLOC_FAILURE); 1889280297Sjkim goto err; 1890280297Sjkim } 1891238384Sjkim 1892280297Sjkim /* 1893280297Sjkim * we treat NULL scalars as 0, and NULL points as points at infinity, 1894280297Sjkim * i.e., they contribute nothing to the linear combination 1895280297Sjkim */ 1896280297Sjkim memset(secrets, 0, num_points * sizeof(felem_bytearray)); 1897280297Sjkim memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem)); 1898280297Sjkim for (i = 0; i < num_points; ++i) { 1899352193Sjkim if (i == num) { 1900280297Sjkim /* 1901280297Sjkim * we didn't have a valid precomputation, so we pick the 1902280297Sjkim * generator 1903280297Sjkim */ 1904280297Sjkim p = EC_GROUP_get0_generator(group); 1905280297Sjkim p_scalar = scalar; 1906352193Sjkim } else { 1907280297Sjkim /* the i^th point */ 1908280297Sjkim p = points[i]; 1909280297Sjkim p_scalar = scalars[i]; 1910280297Sjkim } 1911280297Sjkim if ((p_scalar != NULL) && (p != NULL)) { 1912280297Sjkim /* reduce scalar to 0 <= scalar < 2^521 */ 1913280297Sjkim if ((BN_num_bits(p_scalar) > 521) 1914280297Sjkim || (BN_is_negative(p_scalar))) { 1915280297Sjkim /* 1916280297Sjkim * this is an unusual input, and we don't guarantee 1917280297Sjkim * constant-timeness 1918280297Sjkim */ 1919280297Sjkim if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) { 1920280297Sjkim ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB); 1921280297Sjkim goto err; 1922280297Sjkim } 1923352193Sjkim num_bytes = bn_bn2lebinpad(tmp_scalar, 1924352193Sjkim secrets[i], sizeof(secrets[i])); 1925352193Sjkim } else { 1926352193Sjkim num_bytes = bn_bn2lebinpad(p_scalar, 1927352193Sjkim secrets[i], sizeof(secrets[i])); 1928352193Sjkim } 1929352193Sjkim if (num_bytes < 0) { 1930352193Sjkim ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB); 1931352193Sjkim goto err; 1932352193Sjkim } 1933280297Sjkim /* precompute multiples */ 1934280297Sjkim if ((!BN_to_felem(x_out, &p->X)) || 1935280297Sjkim (!BN_to_felem(y_out, &p->Y)) || 1936280297Sjkim (!BN_to_felem(z_out, &p->Z))) 1937280297Sjkim goto err; 1938280297Sjkim memcpy(pre_comp[i][1][0], x_out, sizeof(felem)); 1939280297Sjkim memcpy(pre_comp[i][1][1], y_out, sizeof(felem)); 1940280297Sjkim memcpy(pre_comp[i][1][2], z_out, sizeof(felem)); 1941280297Sjkim for (j = 2; j <= 16; ++j) { 1942280297Sjkim if (j & 1) { 1943280297Sjkim point_add(pre_comp[i][j][0], pre_comp[i][j][1], 1944280297Sjkim pre_comp[i][j][2], pre_comp[i][1][0], 1945280297Sjkim pre_comp[i][1][1], pre_comp[i][1][2], 0, 1946280297Sjkim pre_comp[i][j - 1][0], 1947280297Sjkim pre_comp[i][j - 1][1], 1948280297Sjkim pre_comp[i][j - 1][2]); 1949280297Sjkim } else { 1950280297Sjkim point_double(pre_comp[i][j][0], pre_comp[i][j][1], 1951280297Sjkim pre_comp[i][j][2], pre_comp[i][j / 2][0], 1952280297Sjkim pre_comp[i][j / 2][1], 1953280297Sjkim pre_comp[i][j / 2][2]); 1954280297Sjkim } 1955280297Sjkim } 1956280297Sjkim } 1957280297Sjkim } 1958280297Sjkim if (mixed) 1959280297Sjkim make_points_affine(num_points * 17, pre_comp[0], tmp_felems); 1960280297Sjkim } 1961238384Sjkim 1962280297Sjkim /* the scalar for the generator */ 1963280297Sjkim if ((scalar != NULL) && (have_pre_comp)) { 1964280297Sjkim memset(g_secret, 0, sizeof(g_secret)); 1965280297Sjkim /* reduce scalar to 0 <= scalar < 2^521 */ 1966280297Sjkim if ((BN_num_bits(scalar) > 521) || (BN_is_negative(scalar))) { 1967280297Sjkim /* 1968280297Sjkim * this is an unusual input, and we don't guarantee 1969280297Sjkim * constant-timeness 1970280297Sjkim */ 1971280297Sjkim if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) { 1972280297Sjkim ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB); 1973280297Sjkim goto err; 1974280297Sjkim } 1975352193Sjkim num_bytes = bn_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret)); 1976352193Sjkim } else { 1977352193Sjkim num_bytes = bn_bn2lebinpad(scalar, g_secret, sizeof(g_secret)); 1978352193Sjkim } 1979280297Sjkim /* do the multiplication with generator precomputation */ 1980280297Sjkim batch_mul(x_out, y_out, z_out, 1981280297Sjkim (const felem_bytearray(*))secrets, num_points, 1982280297Sjkim g_secret, 1983280297Sjkim mixed, (const felem(*)[17][3])pre_comp, 1984280297Sjkim (const felem(*)[3])g_pre_comp); 1985352193Sjkim } else { 1986280297Sjkim /* do the multiplication without generator precomputation */ 1987280297Sjkim batch_mul(x_out, y_out, z_out, 1988280297Sjkim (const felem_bytearray(*))secrets, num_points, 1989280297Sjkim NULL, mixed, (const felem(*)[17][3])pre_comp, NULL); 1990352193Sjkim } 1991280297Sjkim /* reduce the output to its unique minimal representation */ 1992280297Sjkim felem_contract(x_in, x_out); 1993280297Sjkim felem_contract(y_in, y_out); 1994280297Sjkim felem_contract(z_in, z_out); 1995280297Sjkim if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) || 1996280297Sjkim (!felem_to_BN(z, z_in))) { 1997280297Sjkim ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB); 1998280297Sjkim goto err; 1999280297Sjkim } 2000280297Sjkim ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx); 2001238384Sjkim 2002280297Sjkim err: 2003280297Sjkim BN_CTX_end(ctx); 2004280297Sjkim if (generator != NULL) 2005280297Sjkim EC_POINT_free(generator); 2006280297Sjkim if (new_ctx != NULL) 2007280297Sjkim BN_CTX_free(new_ctx); 2008280297Sjkim if (secrets != NULL) 2009280297Sjkim OPENSSL_free(secrets); 2010280297Sjkim if (pre_comp != NULL) 2011280297Sjkim OPENSSL_free(pre_comp); 2012280297Sjkim if (tmp_felems != NULL) 2013280297Sjkim OPENSSL_free(tmp_felems); 2014280297Sjkim return ret; 2015280297Sjkim} 2016238384Sjkim 2017238384Sjkimint ec_GFp_nistp521_precompute_mult(EC_GROUP *group, BN_CTX *ctx) 2018280297Sjkim{ 2019280297Sjkim int ret = 0; 2020280297Sjkim NISTP521_PRE_COMP *pre = NULL; 2021280297Sjkim int i, j; 2022280297Sjkim BN_CTX *new_ctx = NULL; 2023280297Sjkim BIGNUM *x, *y; 2024280297Sjkim EC_POINT *generator = NULL; 2025280297Sjkim felem tmp_felems[16]; 2026238384Sjkim 2027280297Sjkim /* throw away old precomputation */ 2028280297Sjkim EC_EX_DATA_free_data(&group->extra_data, nistp521_pre_comp_dup, 2029280297Sjkim nistp521_pre_comp_free, 2030280297Sjkim nistp521_pre_comp_clear_free); 2031280297Sjkim if (ctx == NULL) 2032280297Sjkim if ((ctx = new_ctx = BN_CTX_new()) == NULL) 2033280297Sjkim return 0; 2034280297Sjkim BN_CTX_start(ctx); 2035280297Sjkim if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL)) 2036280297Sjkim goto err; 2037280297Sjkim /* get the generator */ 2038280297Sjkim if (group->generator == NULL) 2039280297Sjkim goto err; 2040280297Sjkim generator = EC_POINT_new(group); 2041280297Sjkim if (generator == NULL) 2042280297Sjkim goto err; 2043280297Sjkim BN_bin2bn(nistp521_curve_params[3], sizeof(felem_bytearray), x); 2044280297Sjkim BN_bin2bn(nistp521_curve_params[4], sizeof(felem_bytearray), y); 2045280297Sjkim if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx)) 2046280297Sjkim goto err; 2047280297Sjkim if ((pre = nistp521_pre_comp_new()) == NULL) 2048280297Sjkim goto err; 2049280297Sjkim /* 2050280297Sjkim * if the generator is the standard one, use built-in precomputation 2051280297Sjkim */ 2052280297Sjkim if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) { 2053280297Sjkim memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp)); 2054296279Sjkim goto done; 2055280297Sjkim } 2056280297Sjkim if ((!BN_to_felem(pre->g_pre_comp[1][0], &group->generator->X)) || 2057280297Sjkim (!BN_to_felem(pre->g_pre_comp[1][1], &group->generator->Y)) || 2058280297Sjkim (!BN_to_felem(pre->g_pre_comp[1][2], &group->generator->Z))) 2059280297Sjkim goto err; 2060280297Sjkim /* compute 2^130*G, 2^260*G, 2^390*G */ 2061280297Sjkim for (i = 1; i <= 4; i <<= 1) { 2062280297Sjkim point_double(pre->g_pre_comp[2 * i][0], pre->g_pre_comp[2 * i][1], 2063280297Sjkim pre->g_pre_comp[2 * i][2], pre->g_pre_comp[i][0], 2064280297Sjkim pre->g_pre_comp[i][1], pre->g_pre_comp[i][2]); 2065280297Sjkim for (j = 0; j < 129; ++j) { 2066280297Sjkim point_double(pre->g_pre_comp[2 * i][0], 2067280297Sjkim pre->g_pre_comp[2 * i][1], 2068280297Sjkim pre->g_pre_comp[2 * i][2], 2069280297Sjkim pre->g_pre_comp[2 * i][0], 2070280297Sjkim pre->g_pre_comp[2 * i][1], 2071280297Sjkim pre->g_pre_comp[2 * i][2]); 2072280297Sjkim } 2073280297Sjkim } 2074280297Sjkim /* g_pre_comp[0] is the point at infinity */ 2075280297Sjkim memset(pre->g_pre_comp[0], 0, sizeof(pre->g_pre_comp[0])); 2076280297Sjkim /* the remaining multiples */ 2077280297Sjkim /* 2^130*G + 2^260*G */ 2078280297Sjkim point_add(pre->g_pre_comp[6][0], pre->g_pre_comp[6][1], 2079280297Sjkim pre->g_pre_comp[6][2], pre->g_pre_comp[4][0], 2080280297Sjkim pre->g_pre_comp[4][1], pre->g_pre_comp[4][2], 2081280297Sjkim 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1], 2082280297Sjkim pre->g_pre_comp[2][2]); 2083280297Sjkim /* 2^130*G + 2^390*G */ 2084280297Sjkim point_add(pre->g_pre_comp[10][0], pre->g_pre_comp[10][1], 2085280297Sjkim pre->g_pre_comp[10][2], pre->g_pre_comp[8][0], 2086280297Sjkim pre->g_pre_comp[8][1], pre->g_pre_comp[8][2], 2087280297Sjkim 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1], 2088280297Sjkim pre->g_pre_comp[2][2]); 2089280297Sjkim /* 2^260*G + 2^390*G */ 2090280297Sjkim point_add(pre->g_pre_comp[12][0], pre->g_pre_comp[12][1], 2091280297Sjkim pre->g_pre_comp[12][2], pre->g_pre_comp[8][0], 2092280297Sjkim pre->g_pre_comp[8][1], pre->g_pre_comp[8][2], 2093280297Sjkim 0, pre->g_pre_comp[4][0], pre->g_pre_comp[4][1], 2094280297Sjkim pre->g_pre_comp[4][2]); 2095280297Sjkim /* 2^130*G + 2^260*G + 2^390*G */ 2096280297Sjkim point_add(pre->g_pre_comp[14][0], pre->g_pre_comp[14][1], 2097280297Sjkim pre->g_pre_comp[14][2], pre->g_pre_comp[12][0], 2098280297Sjkim pre->g_pre_comp[12][1], pre->g_pre_comp[12][2], 2099280297Sjkim 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1], 2100280297Sjkim pre->g_pre_comp[2][2]); 2101280297Sjkim for (i = 1; i < 8; ++i) { 2102280297Sjkim /* odd multiples: add G */ 2103280297Sjkim point_add(pre->g_pre_comp[2 * i + 1][0], 2104280297Sjkim pre->g_pre_comp[2 * i + 1][1], 2105280297Sjkim pre->g_pre_comp[2 * i + 1][2], pre->g_pre_comp[2 * i][0], 2106280297Sjkim pre->g_pre_comp[2 * i][1], pre->g_pre_comp[2 * i][2], 0, 2107280297Sjkim pre->g_pre_comp[1][0], pre->g_pre_comp[1][1], 2108280297Sjkim pre->g_pre_comp[1][2]); 2109280297Sjkim } 2110280297Sjkim make_points_affine(15, &(pre->g_pre_comp[1]), tmp_felems); 2111238384Sjkim 2112296279Sjkim done: 2113280297Sjkim if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp521_pre_comp_dup, 2114280297Sjkim nistp521_pre_comp_free, 2115280297Sjkim nistp521_pre_comp_clear_free)) 2116280297Sjkim goto err; 2117280297Sjkim ret = 1; 2118280297Sjkim pre = NULL; 2119238384Sjkim err: 2120280297Sjkim BN_CTX_end(ctx); 2121280297Sjkim if (generator != NULL) 2122280297Sjkim EC_POINT_free(generator); 2123280297Sjkim if (new_ctx != NULL) 2124280297Sjkim BN_CTX_free(new_ctx); 2125280297Sjkim if (pre) 2126280297Sjkim nistp521_pre_comp_free(pre); 2127280297Sjkim return ret; 2128280297Sjkim} 2129238384Sjkim 2130238384Sjkimint ec_GFp_nistp521_have_precompute_mult(const EC_GROUP *group) 2131280297Sjkim{ 2132280297Sjkim if (EC_EX_DATA_get_data(group->extra_data, nistp521_pre_comp_dup, 2133280297Sjkim nistp521_pre_comp_free, 2134280297Sjkim nistp521_pre_comp_clear_free) 2135280297Sjkim != NULL) 2136280297Sjkim return 1; 2137280297Sjkim else 2138280297Sjkim return 0; 2139280297Sjkim} 2140238384Sjkim 2141238384Sjkim#else 2142280297Sjkimstatic void *dummy = &dummy; 2143238384Sjkim#endif 2144