ecp_nistp224.c revision 352193
1/* crypto/ec/ecp_nistp224.c */ 2/* 3 * Written by Emilia Kasper (Google) for the OpenSSL project. 4 */ 5/* Copyright 2011 Google Inc. 6 * 7 * Licensed under the Apache License, Version 2.0 (the "License"); 8 * 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21/* 22 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication 23 * 24 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation 25 * and Adam Langley's public domain 64-bit C implementation of curve25519 26 */ 27 28#include <openssl/opensslconf.h> 29#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 30 31# ifndef OPENSSL_SYS_VMS 32# include <stdint.h> 33# else 34# include <inttypes.h> 35# endif 36 37# include <string.h> 38# include <openssl/err.h> 39# include "ec_lcl.h" 40# include "bn_int.h" /* bn_bn2lebinpad, bn_lebin2bn */ 41 42# if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) 43 /* even with gcc, the typedef won't work for 32-bit platforms */ 44typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit 45 * platforms */ 46# else 47# error "Need GCC 3.1 or later to define type uint128_t" 48# endif 49 50typedef uint8_t u8; 51typedef uint64_t u64; 52 53/******************************************************************************/ 54/*- 55 * INTERNAL REPRESENTATION OF FIELD ELEMENTS 56 * 57 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3 58 * using 64-bit coefficients called 'limbs', 59 * and sometimes (for multiplication results) as 60 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6 61 * using 128-bit coefficients called 'widelimbs'. 62 * A 4-limb representation is an 'felem'; 63 * a 7-widelimb representation is a 'widefelem'. 64 * Even within felems, bits of adjacent limbs overlap, and we don't always 65 * reduce the representations: we ensure that inputs to each felem 66 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60, 67 * and fit into a 128-bit word without overflow. The coefficients are then 68 * again partially reduced to obtain an felem satisfying a_i < 2^57. 69 * We only reduce to the unique minimal representation at the end of the 70 * computation. 71 */ 72 73typedef uint64_t limb; 74typedef uint128_t widelimb; 75 76typedef limb felem[4]; 77typedef widelimb widefelem[7]; 78 79/* 80 * Field element represented as a byte arrary. 28*8 = 224 bits is also the 81 * group order size for the elliptic curve, and we also use this type for 82 * scalars for point multiplication. 83 */ 84typedef u8 felem_bytearray[28]; 85 86static const felem_bytearray nistp224_curve_params[5] = { 87 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */ 88 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 89 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, 90 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */ 91 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 92 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE}, 93 {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */ 94 0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA, 95 0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4}, 96 {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */ 97 0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22, 98 0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21}, 99 {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */ 100 0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64, 101 0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34} 102}; 103 104/*- 105 * Precomputed multiples of the standard generator 106 * Points are given in coordinates (X, Y, Z) where Z normally is 1 107 * (0 for the point at infinity). 108 * For each field element, slice a_0 is word 0, etc. 109 * 110 * The table has 2 * 16 elements, starting with the following: 111 * index | bits | point 112 * ------+---------+------------------------------ 113 * 0 | 0 0 0 0 | 0G 114 * 1 | 0 0 0 1 | 1G 115 * 2 | 0 0 1 0 | 2^56G 116 * 3 | 0 0 1 1 | (2^56 + 1)G 117 * 4 | 0 1 0 0 | 2^112G 118 * 5 | 0 1 0 1 | (2^112 + 1)G 119 * 6 | 0 1 1 0 | (2^112 + 2^56)G 120 * 7 | 0 1 1 1 | (2^112 + 2^56 + 1)G 121 * 8 | 1 0 0 0 | 2^168G 122 * 9 | 1 0 0 1 | (2^168 + 1)G 123 * 10 | 1 0 1 0 | (2^168 + 2^56)G 124 * 11 | 1 0 1 1 | (2^168 + 2^56 + 1)G 125 * 12 | 1 1 0 0 | (2^168 + 2^112)G 126 * 13 | 1 1 0 1 | (2^168 + 2^112 + 1)G 127 * 14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G 128 * 15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G 129 * followed by a copy of this with each element multiplied by 2^28. 130 * 131 * The reason for this is so that we can clock bits into four different 132 * locations when doing simple scalar multiplies against the base point, 133 * and then another four locations using the second 16 elements. 134 */ 135static const felem gmul[2][16][3] = { {{{0, 0, 0, 0}, 136 {0, 0, 0, 0}, 137 {0, 0, 0, 0}}, 138 {{0x3280d6115c1d21, 0xc1d356c2112234, 139 0x7f321390b94a03, 0xb70e0cbd6bb4bf}, 140 {0xd5819985007e34, 0x75a05a07476444, 141 0xfb4c22dfe6cd43, 0xbd376388b5f723}, 142 {1, 0, 0, 0}}, 143 {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 144 0x2242df8d8a2a43, 0x1f49bbb0f99bc5}, 145 {0x29e0b892dc9c43, 0xece8608436e662, 146 0xdc858f185310d0, 0x9812dd4eb8d321}, 147 {1, 0, 0, 0}}, 148 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 149 0x16e9a3bbce8a3f, 0xeedcccd8c2a748}, 150 {0xf19f90ed50266d, 0xabf2b4bf65f9df, 151 0x313865468fafec, 0x5cb379ba910a17}, 152 {1, 0, 0, 0}}, 153 {{0x0641966cab26e3, 0x91fb2991fab0a0, 154 0xefec27a4e13a0b, 0x0499aa8a5f8ebe}, 155 {0x7510407766af5d, 0x84d929610d5450, 156 0x81d77aae82f706, 0x6916f6d4338c5b}, 157 {1, 0, 0, 0}}, 158 {{0xea95ac3b1f15c6, 0x086000905e82d4, 159 0xdd323ae4d1c8b1, 0x932b56be7685a3}, 160 {0x9ef93dea25dbbf, 0x41665960f390f0, 161 0xfdec76dbe2a8a7, 0x523e80f019062a}, 162 {1, 0, 0, 0}}, 163 {{0x822fdd26732c73, 0xa01c83531b5d0f, 164 0x363f37347c1ba4, 0xc391b45c84725c}, 165 {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 166 0xc393da7e222a7f, 0x1efb7890ede244}, 167 {1, 0, 0, 0}}, 168 {{0x4c9e90ca217da1, 0xd11beca79159bb, 169 0xff8d33c2c98b7c, 0x2610b39409f849}, 170 {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 171 0x966c079b753c89, 0xfe67e4e820b112}, 172 {1, 0, 0, 0}}, 173 {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 174 0x79b7619a3e7c4c, 0x05c73240899b47}, 175 {0x9f7f6382c73e3a, 0x18615165c56bda, 176 0x641fab2116fd56, 0x72855882b08394}, 177 {1, 0, 0, 0}}, 178 {{0x0469182f161c09, 0x74a98ca8d00fb5, 179 0xb89da93489a3e0, 0x41c98768fb0c1d}, 180 {0xe5ea05fb32da81, 0x3dce9ffbca6855, 181 0x1cfe2d3fbf59e6, 0x0e5e03408738a7}, 182 {1, 0, 0, 0}}, 183 {{0xdab22b2333e87f, 0x4430137a5dd2f6, 184 0xe03ab9f738beb8, 0xcb0c5d0dc34f24}, 185 {0x764a7df0c8fda5, 0x185ba5c3fa2044, 186 0x9281d688bcbe50, 0xc40331df893881}, 187 {1, 0, 0, 0}}, 188 {{0xb89530796f0f60, 0xade92bd26909a3, 189 0x1a0c83fb4884da, 0x1765bf22a5a984}, 190 {0x772a9ee75db09e, 0x23bc6c67cec16f, 191 0x4c1edba8b14e2f, 0xe2a215d9611369}, 192 {1, 0, 0, 0}}, 193 {{0x571e509fb5efb3, 0xade88696410552, 194 0xc8ae85fada74fe, 0x6c7e4be83bbde3}, 195 {0xff9f51160f4652, 0xb47ce2495a6539, 196 0xa2946c53b582f4, 0x286d2db3ee9a60}, 197 {1, 0, 0, 0}}, 198 {{0x40bbd5081a44af, 0x0995183b13926c, 199 0xbcefba6f47f6d0, 0x215619e9cc0057}, 200 {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 201 0x8631b93cdfe8b5, 0xe7e3f4b0982db9}, 202 {1, 0, 0, 0}}, 203 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 204 0x1c29819435d2c6, 0xc813132f4c07e9}, 205 {0x2891425503b11f, 0x08781030579fea, 206 0xf5426ba5cc9674, 0x1e28ebf18562bc}, 207 {1, 0, 0, 0}}, 208 {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 209 0xff17036691a973, 0xf1aef351497c58}, 210 {0xdd1f2d600564ff, 0xdead073b1402db, 211 0x74a684435bd693, 0xeea7471f962558}, 212 {1, 0, 0, 0}}}, 213{{{0, 0, 0, 0}, 214 {0, 0, 0, 0}, 215 {0, 0, 0, 0}}, 216 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31}, 217 {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d}, 218 {1, 0, 0, 0}}, 219 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3}, 220 {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a}, 221 {1, 0, 0, 0}}, 222 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33}, 223 {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100}, 224 {1, 0, 0, 0}}, 225 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5}, 226 {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea}, 227 {1, 0, 0, 0}}, 228 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be}, 229 {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51}, 230 {1, 0, 0, 0}}, 231 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1}, 232 {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb}, 233 {1, 0, 0, 0}}, 234 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233}, 235 {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def}, 236 {1, 0, 0, 0}}, 237 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae}, 238 {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45}, 239 {1, 0, 0, 0}}, 240 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e}, 241 {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb}, 242 {1, 0, 0, 0}}, 243 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de}, 244 {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3}, 245 {1, 0, 0, 0}}, 246 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05}, 247 {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58}, 248 {1, 0, 0, 0}}, 249 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb}, 250 {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0}, 251 {1, 0, 0, 0}}, 252 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9}, 253 {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea}, 254 {1, 0, 0, 0}}, 255 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba}, 256 {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405}, 257 {1, 0, 0, 0}}, 258 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e}, 259 {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e}, 260 {1, 0, 0, 0}}} 261}; 262 263/* Precomputation for the group generator. */ 264typedef struct { 265 felem g_pre_comp[2][16][3]; 266 int references; 267} NISTP224_PRE_COMP; 268 269const EC_METHOD *EC_GFp_nistp224_method(void) 270{ 271 static const EC_METHOD ret = { 272 EC_FLAGS_DEFAULT_OCT, 273 NID_X9_62_prime_field, 274 ec_GFp_nistp224_group_init, 275 ec_GFp_simple_group_finish, 276 ec_GFp_simple_group_clear_finish, 277 ec_GFp_nist_group_copy, 278 ec_GFp_nistp224_group_set_curve, 279 ec_GFp_simple_group_get_curve, 280 ec_GFp_simple_group_get_degree, 281 ec_GFp_simple_group_check_discriminant, 282 ec_GFp_simple_point_init, 283 ec_GFp_simple_point_finish, 284 ec_GFp_simple_point_clear_finish, 285 ec_GFp_simple_point_copy, 286 ec_GFp_simple_point_set_to_infinity, 287 ec_GFp_simple_set_Jprojective_coordinates_GFp, 288 ec_GFp_simple_get_Jprojective_coordinates_GFp, 289 ec_GFp_simple_point_set_affine_coordinates, 290 ec_GFp_nistp224_point_get_affine_coordinates, 291 0 /* point_set_compressed_coordinates */ , 292 0 /* point2oct */ , 293 0 /* oct2point */ , 294 ec_GFp_simple_add, 295 ec_GFp_simple_dbl, 296 ec_GFp_simple_invert, 297 ec_GFp_simple_is_at_infinity, 298 ec_GFp_simple_is_on_curve, 299 ec_GFp_simple_cmp, 300 ec_GFp_simple_make_affine, 301 ec_GFp_simple_points_make_affine, 302 ec_GFp_nistp224_points_mul, 303 ec_GFp_nistp224_precompute_mult, 304 ec_GFp_nistp224_have_precompute_mult, 305 ec_GFp_nist_field_mul, 306 ec_GFp_nist_field_sqr, 307 0 /* field_div */ , 308 0 /* field_encode */ , 309 0 /* field_decode */ , 310 0 /* field_set_to_one */ 311 }; 312 313 return &ret; 314} 315 316/* 317 * Helper functions to convert field elements to/from internal representation 318 */ 319static void bin28_to_felem(felem out, const u8 in[28]) 320{ 321 out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff; 322 out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff; 323 out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff; 324 out[3] = (*((const uint64_t *)(in+20))) >> 8; 325} 326 327static void felem_to_bin28(u8 out[28], const felem in) 328{ 329 unsigned i; 330 for (i = 0; i < 7; ++i) { 331 out[i] = in[0] >> (8 * i); 332 out[i + 7] = in[1] >> (8 * i); 333 out[i + 14] = in[2] >> (8 * i); 334 out[i + 21] = in[3] >> (8 * i); 335 } 336} 337 338/* From OpenSSL BIGNUM to internal representation */ 339static int BN_to_felem(felem out, const BIGNUM *bn) 340{ 341 felem_bytearray b_out; 342 int num_bytes; 343 344 if (BN_is_negative(bn)) { 345 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE); 346 return 0; 347 } 348 num_bytes = bn_bn2lebinpad(bn, b_out, sizeof(b_out)); 349 if (num_bytes < 0) { 350 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE); 351 return 0; 352 } 353 bin28_to_felem(out, b_out); 354 return 1; 355} 356 357/* From internal representation to OpenSSL BIGNUM */ 358static BIGNUM *felem_to_BN(BIGNUM *out, const felem in) 359{ 360 felem_bytearray b_out; 361 felem_to_bin28(b_out, in); 362 return bn_lebin2bn(b_out, sizeof(b_out), out); 363} 364 365/******************************************************************************/ 366/*- 367 * FIELD OPERATIONS 368 * 369 * Field operations, using the internal representation of field elements. 370 * NB! These operations are specific to our point multiplication and cannot be 371 * expected to be correct in general - e.g., multiplication with a large scalar 372 * will cause an overflow. 373 * 374 */ 375 376static void felem_one(felem out) 377{ 378 out[0] = 1; 379 out[1] = 0; 380 out[2] = 0; 381 out[3] = 0; 382} 383 384static void felem_assign(felem out, const felem in) 385{ 386 out[0] = in[0]; 387 out[1] = in[1]; 388 out[2] = in[2]; 389 out[3] = in[3]; 390} 391 392/* Sum two field elements: out += in */ 393static void felem_sum(felem out, const felem in) 394{ 395 out[0] += in[0]; 396 out[1] += in[1]; 397 out[2] += in[2]; 398 out[3] += in[3]; 399} 400 401/* Get negative value: out = -in */ 402/* Assumes in[i] < 2^57 */ 403static void felem_neg(felem out, const felem in) 404{ 405 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2); 406 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2); 407 static const limb two58m42m2 = (((limb) 1) << 58) - 408 (((limb) 1) << 42) - (((limb) 1) << 2); 409 410 /* Set to 0 mod 2^224-2^96+1 to ensure out > in */ 411 out[0] = two58p2 - in[0]; 412 out[1] = two58m42m2 - in[1]; 413 out[2] = two58m2 - in[2]; 414 out[3] = two58m2 - in[3]; 415} 416 417/* Subtract field elements: out -= in */ 418/* Assumes in[i] < 2^57 */ 419static void felem_diff(felem out, const felem in) 420{ 421 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2); 422 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2); 423 static const limb two58m42m2 = (((limb) 1) << 58) - 424 (((limb) 1) << 42) - (((limb) 1) << 2); 425 426 /* Add 0 mod 2^224-2^96+1 to ensure out > in */ 427 out[0] += two58p2; 428 out[1] += two58m42m2; 429 out[2] += two58m2; 430 out[3] += two58m2; 431 432 out[0] -= in[0]; 433 out[1] -= in[1]; 434 out[2] -= in[2]; 435 out[3] -= in[3]; 436} 437 438/* Subtract in unreduced 128-bit mode: out -= in */ 439/* Assumes in[i] < 2^119 */ 440static void widefelem_diff(widefelem out, const widefelem in) 441{ 442 static const widelimb two120 = ((widelimb) 1) << 120; 443 static const widelimb two120m64 = (((widelimb) 1) << 120) - 444 (((widelimb) 1) << 64); 445 static const widelimb two120m104m64 = (((widelimb) 1) << 120) - 446 (((widelimb) 1) << 104) - (((widelimb) 1) << 64); 447 448 /* Add 0 mod 2^224-2^96+1 to ensure out > in */ 449 out[0] += two120; 450 out[1] += two120m64; 451 out[2] += two120m64; 452 out[3] += two120; 453 out[4] += two120m104m64; 454 out[5] += two120m64; 455 out[6] += two120m64; 456 457 out[0] -= in[0]; 458 out[1] -= in[1]; 459 out[2] -= in[2]; 460 out[3] -= in[3]; 461 out[4] -= in[4]; 462 out[5] -= in[5]; 463 out[6] -= in[6]; 464} 465 466/* Subtract in mixed mode: out128 -= in64 */ 467/* in[i] < 2^63 */ 468static void felem_diff_128_64(widefelem out, const felem in) 469{ 470 static const widelimb two64p8 = (((widelimb) 1) << 64) + 471 (((widelimb) 1) << 8); 472 static const widelimb two64m8 = (((widelimb) 1) << 64) - 473 (((widelimb) 1) << 8); 474 static const widelimb two64m48m8 = (((widelimb) 1) << 64) - 475 (((widelimb) 1) << 48) - (((widelimb) 1) << 8); 476 477 /* Add 0 mod 2^224-2^96+1 to ensure out > in */ 478 out[0] += two64p8; 479 out[1] += two64m48m8; 480 out[2] += two64m8; 481 out[3] += two64m8; 482 483 out[0] -= in[0]; 484 out[1] -= in[1]; 485 out[2] -= in[2]; 486 out[3] -= in[3]; 487} 488 489/* 490 * Multiply a field element by a scalar: out = out * scalar The scalars we 491 * actually use are small, so results fit without overflow 492 */ 493static void felem_scalar(felem out, const limb scalar) 494{ 495 out[0] *= scalar; 496 out[1] *= scalar; 497 out[2] *= scalar; 498 out[3] *= scalar; 499} 500 501/* 502 * Multiply an unreduced field element by a scalar: out = out * scalar The 503 * scalars we actually use are small, so results fit without overflow 504 */ 505static void widefelem_scalar(widefelem out, const widelimb scalar) 506{ 507 out[0] *= scalar; 508 out[1] *= scalar; 509 out[2] *= scalar; 510 out[3] *= scalar; 511 out[4] *= scalar; 512 out[5] *= scalar; 513 out[6] *= scalar; 514} 515 516/* Square a field element: out = in^2 */ 517static void felem_square(widefelem out, const felem in) 518{ 519 limb tmp0, tmp1, tmp2; 520 tmp0 = 2 * in[0]; 521 tmp1 = 2 * in[1]; 522 tmp2 = 2 * in[2]; 523 out[0] = ((widelimb) in[0]) * in[0]; 524 out[1] = ((widelimb) in[0]) * tmp1; 525 out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1]; 526 out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2; 527 out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2]; 528 out[5] = ((widelimb) in[3]) * tmp2; 529 out[6] = ((widelimb) in[3]) * in[3]; 530} 531 532/* Multiply two field elements: out = in1 * in2 */ 533static void felem_mul(widefelem out, const felem in1, const felem in2) 534{ 535 out[0] = ((widelimb) in1[0]) * in2[0]; 536 out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0]; 537 out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] + 538 ((widelimb) in1[2]) * in2[0]; 539 out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] + 540 ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0]; 541 out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] + 542 ((widelimb) in1[3]) * in2[1]; 543 out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2]; 544 out[6] = ((widelimb) in1[3]) * in2[3]; 545} 546 547/*- 548 * Reduce seven 128-bit coefficients to four 64-bit coefficients. 549 * Requires in[i] < 2^126, 550 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */ 551static void felem_reduce(felem out, const widefelem in) 552{ 553 static const widelimb two127p15 = (((widelimb) 1) << 127) + 554 (((widelimb) 1) << 15); 555 static const widelimb two127m71 = (((widelimb) 1) << 127) - 556 (((widelimb) 1) << 71); 557 static const widelimb two127m71m55 = (((widelimb) 1) << 127) - 558 (((widelimb) 1) << 71) - (((widelimb) 1) << 55); 559 widelimb output[5]; 560 561 /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */ 562 output[0] = in[0] + two127p15; 563 output[1] = in[1] + two127m71m55; 564 output[2] = in[2] + two127m71; 565 output[3] = in[3]; 566 output[4] = in[4]; 567 568 /* Eliminate in[4], in[5], in[6] */ 569 output[4] += in[6] >> 16; 570 output[3] += (in[6] & 0xffff) << 40; 571 output[2] -= in[6]; 572 573 output[3] += in[5] >> 16; 574 output[2] += (in[5] & 0xffff) << 40; 575 output[1] -= in[5]; 576 577 output[2] += output[4] >> 16; 578 output[1] += (output[4] & 0xffff) << 40; 579 output[0] -= output[4]; 580 581 /* Carry 2 -> 3 -> 4 */ 582 output[3] += output[2] >> 56; 583 output[2] &= 0x00ffffffffffffff; 584 585 output[4] = output[3] >> 56; 586 output[3] &= 0x00ffffffffffffff; 587 588 /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */ 589 590 /* Eliminate output[4] */ 591 output[2] += output[4] >> 16; 592 /* output[2] < 2^56 + 2^56 = 2^57 */ 593 output[1] += (output[4] & 0xffff) << 40; 594 output[0] -= output[4]; 595 596 /* Carry 0 -> 1 -> 2 -> 3 */ 597 output[1] += output[0] >> 56; 598 out[0] = output[0] & 0x00ffffffffffffff; 599 600 output[2] += output[1] >> 56; 601 /* output[2] < 2^57 + 2^72 */ 602 out[1] = output[1] & 0x00ffffffffffffff; 603 output[3] += output[2] >> 56; 604 /* output[3] <= 2^56 + 2^16 */ 605 out[2] = output[2] & 0x00ffffffffffffff; 606 607 /*- 608 * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, 609 * out[3] <= 2^56 + 2^16 (due to final carry), 610 * so out < 2*p 611 */ 612 out[3] = output[3]; 613} 614 615static void felem_square_reduce(felem out, const felem in) 616{ 617 widefelem tmp; 618 felem_square(tmp, in); 619 felem_reduce(out, tmp); 620} 621 622static void felem_mul_reduce(felem out, const felem in1, const felem in2) 623{ 624 widefelem tmp; 625 felem_mul(tmp, in1, in2); 626 felem_reduce(out, tmp); 627} 628 629/* 630 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always 631 * call felem_reduce first) 632 */ 633static void felem_contract(felem out, const felem in) 634{ 635 static const int64_t two56 = ((limb) 1) << 56; 636 /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */ 637 /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */ 638 int64_t tmp[4], a; 639 tmp[0] = in[0]; 640 tmp[1] = in[1]; 641 tmp[2] = in[2]; 642 tmp[3] = in[3]; 643 /* Case 1: a = 1 iff in >= 2^224 */ 644 a = (in[3] >> 56); 645 tmp[0] -= a; 646 tmp[1] += a << 40; 647 tmp[3] &= 0x00ffffffffffffff; 648 /* 649 * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1 650 * and the lower part is non-zero 651 */ 652 a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) | 653 (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63); 654 a &= 0x00ffffffffffffff; 655 /* turn a into an all-one mask (if a = 0) or an all-zero mask */ 656 a = (a - 1) >> 63; 657 /* subtract 2^224 - 2^96 + 1 if a is all-one */ 658 tmp[3] &= a ^ 0xffffffffffffffff; 659 tmp[2] &= a ^ 0xffffffffffffffff; 660 tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff; 661 tmp[0] -= 1 & a; 662 663 /* 664 * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be 665 * non-zero, so we only need one step 666 */ 667 a = tmp[0] >> 63; 668 tmp[0] += two56 & a; 669 tmp[1] -= 1 & a; 670 671 /* carry 1 -> 2 -> 3 */ 672 tmp[2] += tmp[1] >> 56; 673 tmp[1] &= 0x00ffffffffffffff; 674 675 tmp[3] += tmp[2] >> 56; 676 tmp[2] &= 0x00ffffffffffffff; 677 678 /* Now 0 <= out < p */ 679 out[0] = tmp[0]; 680 out[1] = tmp[1]; 681 out[2] = tmp[2]; 682 out[3] = tmp[3]; 683} 684 685/* 686 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field 687 * elements are reduced to in < 2^225, so we only need to check three cases: 688 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2 689 */ 690static limb felem_is_zero(const felem in) 691{ 692 limb zero, two224m96p1, two225m97p2; 693 694 zero = in[0] | in[1] | in[2] | in[3]; 695 zero = (((int64_t) (zero) - 1) >> 63) & 1; 696 two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000) 697 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff); 698 two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1; 699 two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000) 700 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff); 701 two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1; 702 return (zero | two224m96p1 | two225m97p2); 703} 704 705static int felem_is_zero_int(const void *in) 706{ 707 return (int)(felem_is_zero(in) & ((limb) 1)); 708} 709 710/* Invert a field element */ 711/* Computation chain copied from djb's code */ 712static void felem_inv(felem out, const felem in) 713{ 714 felem ftmp, ftmp2, ftmp3, ftmp4; 715 widefelem tmp; 716 unsigned i; 717 718 felem_square(tmp, in); 719 felem_reduce(ftmp, tmp); /* 2 */ 720 felem_mul(tmp, in, ftmp); 721 felem_reduce(ftmp, tmp); /* 2^2 - 1 */ 722 felem_square(tmp, ftmp); 723 felem_reduce(ftmp, tmp); /* 2^3 - 2 */ 724 felem_mul(tmp, in, ftmp); 725 felem_reduce(ftmp, tmp); /* 2^3 - 1 */ 726 felem_square(tmp, ftmp); 727 felem_reduce(ftmp2, tmp); /* 2^4 - 2 */ 728 felem_square(tmp, ftmp2); 729 felem_reduce(ftmp2, tmp); /* 2^5 - 4 */ 730 felem_square(tmp, ftmp2); 731 felem_reduce(ftmp2, tmp); /* 2^6 - 8 */ 732 felem_mul(tmp, ftmp2, ftmp); 733 felem_reduce(ftmp, tmp); /* 2^6 - 1 */ 734 felem_square(tmp, ftmp); 735 felem_reduce(ftmp2, tmp); /* 2^7 - 2 */ 736 for (i = 0; i < 5; ++i) { /* 2^12 - 2^6 */ 737 felem_square(tmp, ftmp2); 738 felem_reduce(ftmp2, tmp); 739 } 740 felem_mul(tmp, ftmp2, ftmp); 741 felem_reduce(ftmp2, tmp); /* 2^12 - 1 */ 742 felem_square(tmp, ftmp2); 743 felem_reduce(ftmp3, tmp); /* 2^13 - 2 */ 744 for (i = 0; i < 11; ++i) { /* 2^24 - 2^12 */ 745 felem_square(tmp, ftmp3); 746 felem_reduce(ftmp3, tmp); 747 } 748 felem_mul(tmp, ftmp3, ftmp2); 749 felem_reduce(ftmp2, tmp); /* 2^24 - 1 */ 750 felem_square(tmp, ftmp2); 751 felem_reduce(ftmp3, tmp); /* 2^25 - 2 */ 752 for (i = 0; i < 23; ++i) { /* 2^48 - 2^24 */ 753 felem_square(tmp, ftmp3); 754 felem_reduce(ftmp3, tmp); 755 } 756 felem_mul(tmp, ftmp3, ftmp2); 757 felem_reduce(ftmp3, tmp); /* 2^48 - 1 */ 758 felem_square(tmp, ftmp3); 759 felem_reduce(ftmp4, tmp); /* 2^49 - 2 */ 760 for (i = 0; i < 47; ++i) { /* 2^96 - 2^48 */ 761 felem_square(tmp, ftmp4); 762 felem_reduce(ftmp4, tmp); 763 } 764 felem_mul(tmp, ftmp3, ftmp4); 765 felem_reduce(ftmp3, tmp); /* 2^96 - 1 */ 766 felem_square(tmp, ftmp3); 767 felem_reduce(ftmp4, tmp); /* 2^97 - 2 */ 768 for (i = 0; i < 23; ++i) { /* 2^120 - 2^24 */ 769 felem_square(tmp, ftmp4); 770 felem_reduce(ftmp4, tmp); 771 } 772 felem_mul(tmp, ftmp2, ftmp4); 773 felem_reduce(ftmp2, tmp); /* 2^120 - 1 */ 774 for (i = 0; i < 6; ++i) { /* 2^126 - 2^6 */ 775 felem_square(tmp, ftmp2); 776 felem_reduce(ftmp2, tmp); 777 } 778 felem_mul(tmp, ftmp2, ftmp); 779 felem_reduce(ftmp, tmp); /* 2^126 - 1 */ 780 felem_square(tmp, ftmp); 781 felem_reduce(ftmp, tmp); /* 2^127 - 2 */ 782 felem_mul(tmp, ftmp, in); 783 felem_reduce(ftmp, tmp); /* 2^127 - 1 */ 784 for (i = 0; i < 97; ++i) { /* 2^224 - 2^97 */ 785 felem_square(tmp, ftmp); 786 felem_reduce(ftmp, tmp); 787 } 788 felem_mul(tmp, ftmp, ftmp3); 789 felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */ 790} 791 792/* 793 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy 794 * out to itself. 795 */ 796static void copy_conditional(felem out, const felem in, limb icopy) 797{ 798 unsigned i; 799 /* 800 * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one 801 */ 802 const limb copy = -icopy; 803 for (i = 0; i < 4; ++i) { 804 const limb tmp = copy & (in[i] ^ out[i]); 805 out[i] ^= tmp; 806 } 807} 808 809/******************************************************************************/ 810/*- 811 * ELLIPTIC CURVE POINT OPERATIONS 812 * 813 * Points are represented in Jacobian projective coordinates: 814 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3), 815 * or to the point at infinity if Z == 0. 816 * 817 */ 818 819/*- 820 * Double an elliptic curve point: 821 * (X', Y', Z') = 2 * (X, Y, Z), where 822 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2 823 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2 824 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z 825 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed, 826 * while x_out == y_in is not (maybe this works, but it's not tested). 827 */ 828static void 829point_double(felem x_out, felem y_out, felem z_out, 830 const felem x_in, const felem y_in, const felem z_in) 831{ 832 widefelem tmp, tmp2; 833 felem delta, gamma, beta, alpha, ftmp, ftmp2; 834 835 felem_assign(ftmp, x_in); 836 felem_assign(ftmp2, x_in); 837 838 /* delta = z^2 */ 839 felem_square(tmp, z_in); 840 felem_reduce(delta, tmp); 841 842 /* gamma = y^2 */ 843 felem_square(tmp, y_in); 844 felem_reduce(gamma, tmp); 845 846 /* beta = x*gamma */ 847 felem_mul(tmp, x_in, gamma); 848 felem_reduce(beta, tmp); 849 850 /* alpha = 3*(x-delta)*(x+delta) */ 851 felem_diff(ftmp, delta); 852 /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */ 853 felem_sum(ftmp2, delta); 854 /* ftmp2[i] < 2^57 + 2^57 = 2^58 */ 855 felem_scalar(ftmp2, 3); 856 /* ftmp2[i] < 3 * 2^58 < 2^60 */ 857 felem_mul(tmp, ftmp, ftmp2); 858 /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */ 859 felem_reduce(alpha, tmp); 860 861 /* x' = alpha^2 - 8*beta */ 862 felem_square(tmp, alpha); 863 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */ 864 felem_assign(ftmp, beta); 865 felem_scalar(ftmp, 8); 866 /* ftmp[i] < 8 * 2^57 = 2^60 */ 867 felem_diff_128_64(tmp, ftmp); 868 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */ 869 felem_reduce(x_out, tmp); 870 871 /* z' = (y + z)^2 - gamma - delta */ 872 felem_sum(delta, gamma); 873 /* delta[i] < 2^57 + 2^57 = 2^58 */ 874 felem_assign(ftmp, y_in); 875 felem_sum(ftmp, z_in); 876 /* ftmp[i] < 2^57 + 2^57 = 2^58 */ 877 felem_square(tmp, ftmp); 878 /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */ 879 felem_diff_128_64(tmp, delta); 880 /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */ 881 felem_reduce(z_out, tmp); 882 883 /* y' = alpha*(4*beta - x') - 8*gamma^2 */ 884 felem_scalar(beta, 4); 885 /* beta[i] < 4 * 2^57 = 2^59 */ 886 felem_diff(beta, x_out); 887 /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */ 888 felem_mul(tmp, alpha, beta); 889 /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */ 890 felem_square(tmp2, gamma); 891 /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */ 892 widefelem_scalar(tmp2, 8); 893 /* tmp2[i] < 8 * 2^116 = 2^119 */ 894 widefelem_diff(tmp, tmp2); 895 /* tmp[i] < 2^119 + 2^120 < 2^121 */ 896 felem_reduce(y_out, tmp); 897} 898 899/*- 900 * Add two elliptic curve points: 901 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where 902 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 - 903 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 904 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) - 905 * Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3 906 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2) 907 * 908 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0. 909 */ 910 911/* 912 * This function is not entirely constant-time: it includes a branch for 913 * checking whether the two input points are equal, (while not equal to the 914 * point at infinity). This case never happens during single point 915 * multiplication, so there is no timing leak for ECDH or ECDSA signing. 916 */ 917static void point_add(felem x3, felem y3, felem z3, 918 const felem x1, const felem y1, const felem z1, 919 const int mixed, const felem x2, const felem y2, 920 const felem z2) 921{ 922 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out; 923 widefelem tmp, tmp2; 924 limb z1_is_zero, z2_is_zero, x_equal, y_equal; 925 926 if (!mixed) { 927 /* ftmp2 = z2^2 */ 928 felem_square(tmp, z2); 929 felem_reduce(ftmp2, tmp); 930 931 /* ftmp4 = z2^3 */ 932 felem_mul(tmp, ftmp2, z2); 933 felem_reduce(ftmp4, tmp); 934 935 /* ftmp4 = z2^3*y1 */ 936 felem_mul(tmp2, ftmp4, y1); 937 felem_reduce(ftmp4, tmp2); 938 939 /* ftmp2 = z2^2*x1 */ 940 felem_mul(tmp2, ftmp2, x1); 941 felem_reduce(ftmp2, tmp2); 942 } else { 943 /* 944 * We'll assume z2 = 1 (special case z2 = 0 is handled later) 945 */ 946 947 /* ftmp4 = z2^3*y1 */ 948 felem_assign(ftmp4, y1); 949 950 /* ftmp2 = z2^2*x1 */ 951 felem_assign(ftmp2, x1); 952 } 953 954 /* ftmp = z1^2 */ 955 felem_square(tmp, z1); 956 felem_reduce(ftmp, tmp); 957 958 /* ftmp3 = z1^3 */ 959 felem_mul(tmp, ftmp, z1); 960 felem_reduce(ftmp3, tmp); 961 962 /* tmp = z1^3*y2 */ 963 felem_mul(tmp, ftmp3, y2); 964 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */ 965 966 /* ftmp3 = z1^3*y2 - z2^3*y1 */ 967 felem_diff_128_64(tmp, ftmp4); 968 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */ 969 felem_reduce(ftmp3, tmp); 970 971 /* tmp = z1^2*x2 */ 972 felem_mul(tmp, ftmp, x2); 973 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */ 974 975 /* ftmp = z1^2*x2 - z2^2*x1 */ 976 felem_diff_128_64(tmp, ftmp2); 977 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */ 978 felem_reduce(ftmp, tmp); 979 980 /* 981 * the formulae are incorrect if the points are equal so we check for 982 * this and do doubling if this happens 983 */ 984 x_equal = felem_is_zero(ftmp); 985 y_equal = felem_is_zero(ftmp3); 986 z1_is_zero = felem_is_zero(z1); 987 z2_is_zero = felem_is_zero(z2); 988 /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */ 989 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) { 990 point_double(x3, y3, z3, x1, y1, z1); 991 return; 992 } 993 994 /* ftmp5 = z1*z2 */ 995 if (!mixed) { 996 felem_mul(tmp, z1, z2); 997 felem_reduce(ftmp5, tmp); 998 } else { 999 /* special case z2 = 0 is handled later */ 1000 felem_assign(ftmp5, z1); 1001 } 1002 1003 /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */ 1004 felem_mul(tmp, ftmp, ftmp5); 1005 felem_reduce(z_out, tmp); 1006 1007 /* ftmp = (z1^2*x2 - z2^2*x1)^2 */ 1008 felem_assign(ftmp5, ftmp); 1009 felem_square(tmp, ftmp); 1010 felem_reduce(ftmp, tmp); 1011 1012 /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */ 1013 felem_mul(tmp, ftmp, ftmp5); 1014 felem_reduce(ftmp5, tmp); 1015 1016 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */ 1017 felem_mul(tmp, ftmp2, ftmp); 1018 felem_reduce(ftmp2, tmp); 1019 1020 /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */ 1021 felem_mul(tmp, ftmp4, ftmp5); 1022 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */ 1023 1024 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */ 1025 felem_square(tmp2, ftmp3); 1026 /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */ 1027 1028 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */ 1029 felem_diff_128_64(tmp2, ftmp5); 1030 /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */ 1031 1032 /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */ 1033 felem_assign(ftmp5, ftmp2); 1034 felem_scalar(ftmp5, 2); 1035 /* ftmp5[i] < 2 * 2^57 = 2^58 */ 1036 1037 /*- 1038 * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 - 1039 * 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 1040 */ 1041 felem_diff_128_64(tmp2, ftmp5); 1042 /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */ 1043 felem_reduce(x_out, tmp2); 1044 1045 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */ 1046 felem_diff(ftmp2, x_out); 1047 /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */ 1048 1049 /* 1050 * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) 1051 */ 1052 felem_mul(tmp2, ftmp3, ftmp2); 1053 /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */ 1054 1055 /*- 1056 * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) - 1057 * z2^3*y1*(z1^2*x2 - z2^2*x1)^3 1058 */ 1059 widefelem_diff(tmp2, tmp); 1060 /* tmp2[i] < 2^118 + 2^120 < 2^121 */ 1061 felem_reduce(y_out, tmp2); 1062 1063 /* 1064 * the result (x_out, y_out, z_out) is incorrect if one of the inputs is 1065 * the point at infinity, so we need to check for this separately 1066 */ 1067 1068 /* 1069 * if point 1 is at infinity, copy point 2 to output, and vice versa 1070 */ 1071 copy_conditional(x_out, x2, z1_is_zero); 1072 copy_conditional(x_out, x1, z2_is_zero); 1073 copy_conditional(y_out, y2, z1_is_zero); 1074 copy_conditional(y_out, y1, z2_is_zero); 1075 copy_conditional(z_out, z2, z1_is_zero); 1076 copy_conditional(z_out, z1, z2_is_zero); 1077 felem_assign(x3, x_out); 1078 felem_assign(y3, y_out); 1079 felem_assign(z3, z_out); 1080} 1081 1082/* 1083 * select_point selects the |idx|th point from a precomputation table and 1084 * copies it to out. 1085 * The pre_comp array argument should be size of |size| argument 1086 */ 1087static void select_point(const u64 idx, unsigned int size, 1088 const felem pre_comp[][3], felem out[3]) 1089{ 1090 unsigned i, j; 1091 limb *outlimbs = &out[0][0]; 1092 memset(outlimbs, 0, 3 * sizeof(felem)); 1093 1094 for (i = 0; i < size; i++) { 1095 const limb *inlimbs = &pre_comp[i][0][0]; 1096 u64 mask = i ^ idx; 1097 mask |= mask >> 4; 1098 mask |= mask >> 2; 1099 mask |= mask >> 1; 1100 mask &= 1; 1101 mask--; 1102 for (j = 0; j < 4 * 3; j++) 1103 outlimbs[j] |= inlimbs[j] & mask; 1104 } 1105} 1106 1107/* get_bit returns the |i|th bit in |in| */ 1108static char get_bit(const felem_bytearray in, unsigned i) 1109{ 1110 if (i >= 224) 1111 return 0; 1112 return (in[i >> 3] >> (i & 7)) & 1; 1113} 1114 1115/* 1116 * Interleaved point multiplication using precomputed point multiples: The 1117 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars 1118 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the 1119 * generator, using certain (large) precomputed multiples in g_pre_comp. 1120 * Output point (X, Y, Z) is stored in x_out, y_out, z_out 1121 */ 1122static void batch_mul(felem x_out, felem y_out, felem z_out, 1123 const felem_bytearray scalars[], 1124 const unsigned num_points, const u8 *g_scalar, 1125 const int mixed, const felem pre_comp[][17][3], 1126 const felem g_pre_comp[2][16][3]) 1127{ 1128 int i, skip; 1129 unsigned num; 1130 unsigned gen_mul = (g_scalar != NULL); 1131 felem nq[3], tmp[4]; 1132 u64 bits; 1133 u8 sign, digit; 1134 1135 /* set nq to the point at infinity */ 1136 memset(nq, 0, 3 * sizeof(felem)); 1137 1138 /* 1139 * Loop over all scalars msb-to-lsb, interleaving additions of multiples 1140 * of the generator (two in each of the last 28 rounds) and additions of 1141 * other points multiples (every 5th round). 1142 */ 1143 skip = 1; /* save two point operations in the first 1144 * round */ 1145 for (i = (num_points ? 220 : 27); i >= 0; --i) { 1146 /* double */ 1147 if (!skip) 1148 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]); 1149 1150 /* add multiples of the generator */ 1151 if (gen_mul && (i <= 27)) { 1152 /* first, look 28 bits upwards */ 1153 bits = get_bit(g_scalar, i + 196) << 3; 1154 bits |= get_bit(g_scalar, i + 140) << 2; 1155 bits |= get_bit(g_scalar, i + 84) << 1; 1156 bits |= get_bit(g_scalar, i + 28); 1157 /* select the point to add, in constant time */ 1158 select_point(bits, 16, g_pre_comp[1], tmp); 1159 1160 if (!skip) { 1161 /* value 1 below is argument for "mixed" */ 1162 point_add(nq[0], nq[1], nq[2], 1163 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]); 1164 } else { 1165 memcpy(nq, tmp, 3 * sizeof(felem)); 1166 skip = 0; 1167 } 1168 1169 /* second, look at the current position */ 1170 bits = get_bit(g_scalar, i + 168) << 3; 1171 bits |= get_bit(g_scalar, i + 112) << 2; 1172 bits |= get_bit(g_scalar, i + 56) << 1; 1173 bits |= get_bit(g_scalar, i); 1174 /* select the point to add, in constant time */ 1175 select_point(bits, 16, g_pre_comp[0], tmp); 1176 point_add(nq[0], nq[1], nq[2], 1177 nq[0], nq[1], nq[2], 1178 1 /* mixed */ , tmp[0], tmp[1], tmp[2]); 1179 } 1180 1181 /* do other additions every 5 doublings */ 1182 if (num_points && (i % 5 == 0)) { 1183 /* loop over all scalars */ 1184 for (num = 0; num < num_points; ++num) { 1185 bits = get_bit(scalars[num], i + 4) << 5; 1186 bits |= get_bit(scalars[num], i + 3) << 4; 1187 bits |= get_bit(scalars[num], i + 2) << 3; 1188 bits |= get_bit(scalars[num], i + 1) << 2; 1189 bits |= get_bit(scalars[num], i) << 1; 1190 bits |= get_bit(scalars[num], i - 1); 1191 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits); 1192 1193 /* select the point to add or subtract */ 1194 select_point(digit, 17, pre_comp[num], tmp); 1195 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative 1196 * point */ 1197 copy_conditional(tmp[1], tmp[3], sign); 1198 1199 if (!skip) { 1200 point_add(nq[0], nq[1], nq[2], 1201 nq[0], nq[1], nq[2], 1202 mixed, tmp[0], tmp[1], tmp[2]); 1203 } else { 1204 memcpy(nq, tmp, 3 * sizeof(felem)); 1205 skip = 0; 1206 } 1207 } 1208 } 1209 } 1210 felem_assign(x_out, nq[0]); 1211 felem_assign(y_out, nq[1]); 1212 felem_assign(z_out, nq[2]); 1213} 1214 1215/******************************************************************************/ 1216/* 1217 * FUNCTIONS TO MANAGE PRECOMPUTATION 1218 */ 1219 1220static NISTP224_PRE_COMP *nistp224_pre_comp_new() 1221{ 1222 NISTP224_PRE_COMP *ret = NULL; 1223 ret = (NISTP224_PRE_COMP *) OPENSSL_malloc(sizeof(*ret)); 1224 if (!ret) { 1225 ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE); 1226 return ret; 1227 } 1228 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp)); 1229 ret->references = 1; 1230 return ret; 1231} 1232 1233static void *nistp224_pre_comp_dup(void *src_) 1234{ 1235 NISTP224_PRE_COMP *src = src_; 1236 1237 /* no need to actually copy, these objects never change! */ 1238 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP); 1239 1240 return src_; 1241} 1242 1243static void nistp224_pre_comp_free(void *pre_) 1244{ 1245 int i; 1246 NISTP224_PRE_COMP *pre = pre_; 1247 1248 if (!pre) 1249 return; 1250 1251 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP); 1252 if (i > 0) 1253 return; 1254 1255 OPENSSL_free(pre); 1256} 1257 1258static void nistp224_pre_comp_clear_free(void *pre_) 1259{ 1260 int i; 1261 NISTP224_PRE_COMP *pre = pre_; 1262 1263 if (!pre) 1264 return; 1265 1266 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP); 1267 if (i > 0) 1268 return; 1269 1270 OPENSSL_cleanse(pre, sizeof(*pre)); 1271 OPENSSL_free(pre); 1272} 1273 1274/******************************************************************************/ 1275/* 1276 * OPENSSL EC_METHOD FUNCTIONS 1277 */ 1278 1279int ec_GFp_nistp224_group_init(EC_GROUP *group) 1280{ 1281 int ret; 1282 ret = ec_GFp_simple_group_init(group); 1283 group->a_is_minus3 = 1; 1284 return ret; 1285} 1286 1287int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p, 1288 const BIGNUM *a, const BIGNUM *b, 1289 BN_CTX *ctx) 1290{ 1291 int ret = 0; 1292 BN_CTX *new_ctx = NULL; 1293 BIGNUM *curve_p, *curve_a, *curve_b; 1294 1295 if (ctx == NULL) 1296 if ((ctx = new_ctx = BN_CTX_new()) == NULL) 1297 return 0; 1298 BN_CTX_start(ctx); 1299 if (((curve_p = BN_CTX_get(ctx)) == NULL) || 1300 ((curve_a = BN_CTX_get(ctx)) == NULL) || 1301 ((curve_b = BN_CTX_get(ctx)) == NULL)) 1302 goto err; 1303 BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p); 1304 BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a); 1305 BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b); 1306 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) { 1307 ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE, 1308 EC_R_WRONG_CURVE_PARAMETERS); 1309 goto err; 1310 } 1311 group->field_mod_func = BN_nist_mod_224; 1312 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx); 1313 err: 1314 BN_CTX_end(ctx); 1315 if (new_ctx != NULL) 1316 BN_CTX_free(new_ctx); 1317 return ret; 1318} 1319 1320/* 1321 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') = 1322 * (X/Z^2, Y/Z^3) 1323 */ 1324int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group, 1325 const EC_POINT *point, 1326 BIGNUM *x, BIGNUM *y, 1327 BN_CTX *ctx) 1328{ 1329 felem z1, z2, x_in, y_in, x_out, y_out; 1330 widefelem tmp; 1331 1332 if (EC_POINT_is_at_infinity(group, point)) { 1333 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES, 1334 EC_R_POINT_AT_INFINITY); 1335 return 0; 1336 } 1337 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) || 1338 (!BN_to_felem(z1, &point->Z))) 1339 return 0; 1340 felem_inv(z2, z1); 1341 felem_square(tmp, z2); 1342 felem_reduce(z1, tmp); 1343 felem_mul(tmp, x_in, z1); 1344 felem_reduce(x_in, tmp); 1345 felem_contract(x_out, x_in); 1346 if (x != NULL) { 1347 if (!felem_to_BN(x, x_out)) { 1348 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES, 1349 ERR_R_BN_LIB); 1350 return 0; 1351 } 1352 } 1353 felem_mul(tmp, z1, z2); 1354 felem_reduce(z1, tmp); 1355 felem_mul(tmp, y_in, z1); 1356 felem_reduce(y_in, tmp); 1357 felem_contract(y_out, y_in); 1358 if (y != NULL) { 1359 if (!felem_to_BN(y, y_out)) { 1360 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES, 1361 ERR_R_BN_LIB); 1362 return 0; 1363 } 1364 } 1365 return 1; 1366} 1367 1368static void make_points_affine(size_t num, felem points[ /* num */ ][3], 1369 felem tmp_felems[ /* num+1 */ ]) 1370{ 1371 /* 1372 * Runs in constant time, unless an input is the point at infinity (which 1373 * normally shouldn't happen). 1374 */ 1375 ec_GFp_nistp_points_make_affine_internal(num, 1376 points, 1377 sizeof(felem), 1378 tmp_felems, 1379 (void (*)(void *))felem_one, 1380 felem_is_zero_int, 1381 (void (*)(void *, const void *)) 1382 felem_assign, 1383 (void (*)(void *, const void *)) 1384 felem_square_reduce, (void (*) 1385 (void *, 1386 const void 1387 *, 1388 const void 1389 *)) 1390 felem_mul_reduce, 1391 (void (*)(void *, const void *)) 1392 felem_inv, 1393 (void (*)(void *, const void *)) 1394 felem_contract); 1395} 1396 1397/* 1398 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL 1399 * values Result is stored in r (r can equal one of the inputs). 1400 */ 1401int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r, 1402 const BIGNUM *scalar, size_t num, 1403 const EC_POINT *points[], 1404 const BIGNUM *scalars[], BN_CTX *ctx) 1405{ 1406 int ret = 0; 1407 int j; 1408 unsigned i; 1409 int mixed = 0; 1410 BN_CTX *new_ctx = NULL; 1411 BIGNUM *x, *y, *z, *tmp_scalar; 1412 felem_bytearray g_secret; 1413 felem_bytearray *secrets = NULL; 1414 felem(*pre_comp)[17][3] = NULL; 1415 felem *tmp_felems = NULL; 1416 int num_bytes; 1417 int have_pre_comp = 0; 1418 size_t num_points = num; 1419 felem x_in, y_in, z_in, x_out, y_out, z_out; 1420 NISTP224_PRE_COMP *pre = NULL; 1421 const felem(*g_pre_comp)[16][3] = NULL; 1422 EC_POINT *generator = NULL; 1423 const EC_POINT *p = NULL; 1424 const BIGNUM *p_scalar = NULL; 1425 1426 if (ctx == NULL) 1427 if ((ctx = new_ctx = BN_CTX_new()) == NULL) 1428 return 0; 1429 BN_CTX_start(ctx); 1430 if (((x = BN_CTX_get(ctx)) == NULL) || 1431 ((y = BN_CTX_get(ctx)) == NULL) || 1432 ((z = BN_CTX_get(ctx)) == NULL) || 1433 ((tmp_scalar = BN_CTX_get(ctx)) == NULL)) 1434 goto err; 1435 1436 if (scalar != NULL) { 1437 pre = EC_EX_DATA_get_data(group->extra_data, 1438 nistp224_pre_comp_dup, 1439 nistp224_pre_comp_free, 1440 nistp224_pre_comp_clear_free); 1441 if (pre) 1442 /* we have precomputation, try to use it */ 1443 g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp; 1444 else 1445 /* try to use the standard precomputation */ 1446 g_pre_comp = &gmul[0]; 1447 generator = EC_POINT_new(group); 1448 if (generator == NULL) 1449 goto err; 1450 /* get the generator from precomputation */ 1451 if (!felem_to_BN(x, g_pre_comp[0][1][0]) || 1452 !felem_to_BN(y, g_pre_comp[0][1][1]) || 1453 !felem_to_BN(z, g_pre_comp[0][1][2])) { 1454 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB); 1455 goto err; 1456 } 1457 if (!EC_POINT_set_Jprojective_coordinates_GFp(group, 1458 generator, x, y, z, 1459 ctx)) 1460 goto err; 1461 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) 1462 /* precomputation matches generator */ 1463 have_pre_comp = 1; 1464 else 1465 /* 1466 * we don't have valid precomputation: treat the generator as a 1467 * random point 1468 */ 1469 num_points = num_points + 1; 1470 } 1471 1472 if (num_points > 0) { 1473 if (num_points >= 3) { 1474 /* 1475 * unless we precompute multiples for just one or two points, 1476 * converting those into affine form is time well spent 1477 */ 1478 mixed = 1; 1479 } 1480 secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray)); 1481 pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem)); 1482 if (mixed) 1483 tmp_felems = 1484 OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem)); 1485 if ((secrets == NULL) || (pre_comp == NULL) 1486 || (mixed && (tmp_felems == NULL))) { 1487 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE); 1488 goto err; 1489 } 1490 1491 /* 1492 * we treat NULL scalars as 0, and NULL points as points at infinity, 1493 * i.e., they contribute nothing to the linear combination 1494 */ 1495 memset(secrets, 0, num_points * sizeof(felem_bytearray)); 1496 memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem)); 1497 for (i = 0; i < num_points; ++i) { 1498 if (i == num) { 1499 /* the generator */ 1500 p = EC_GROUP_get0_generator(group); 1501 p_scalar = scalar; 1502 } else { 1503 /* the i^th point */ 1504 p = points[i]; 1505 p_scalar = scalars[i]; 1506 } 1507 if ((p_scalar != NULL) && (p != NULL)) { 1508 /* reduce scalar to 0 <= scalar < 2^224 */ 1509 if ((BN_num_bits(p_scalar) > 224) 1510 || (BN_is_negative(p_scalar))) { 1511 /* 1512 * this is an unusual input, and we don't guarantee 1513 * constant-timeness 1514 */ 1515 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) { 1516 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB); 1517 goto err; 1518 } 1519 num_bytes = bn_bn2lebinpad(tmp_scalar, 1520 secrets[i], sizeof(secrets[i])); 1521 } else { 1522 num_bytes = bn_bn2lebinpad(p_scalar, 1523 secrets[i], sizeof(secrets[i])); 1524 } 1525 if (num_bytes < 0) { 1526 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB); 1527 goto err; 1528 } 1529 /* precompute multiples */ 1530 if ((!BN_to_felem(x_out, &p->X)) || 1531 (!BN_to_felem(y_out, &p->Y)) || 1532 (!BN_to_felem(z_out, &p->Z))) 1533 goto err; 1534 felem_assign(pre_comp[i][1][0], x_out); 1535 felem_assign(pre_comp[i][1][1], y_out); 1536 felem_assign(pre_comp[i][1][2], z_out); 1537 for (j = 2; j <= 16; ++j) { 1538 if (j & 1) { 1539 point_add(pre_comp[i][j][0], pre_comp[i][j][1], 1540 pre_comp[i][j][2], pre_comp[i][1][0], 1541 pre_comp[i][1][1], pre_comp[i][1][2], 0, 1542 pre_comp[i][j - 1][0], 1543 pre_comp[i][j - 1][1], 1544 pre_comp[i][j - 1][2]); 1545 } else { 1546 point_double(pre_comp[i][j][0], pre_comp[i][j][1], 1547 pre_comp[i][j][2], pre_comp[i][j / 2][0], 1548 pre_comp[i][j / 2][1], 1549 pre_comp[i][j / 2][2]); 1550 } 1551 } 1552 } 1553 } 1554 if (mixed) 1555 make_points_affine(num_points * 17, pre_comp[0], tmp_felems); 1556 } 1557 1558 /* the scalar for the generator */ 1559 if ((scalar != NULL) && (have_pre_comp)) { 1560 memset(g_secret, 0, sizeof(g_secret)); 1561 /* reduce scalar to 0 <= scalar < 2^224 */ 1562 if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) { 1563 /* 1564 * this is an unusual input, and we don't guarantee 1565 * constant-timeness 1566 */ 1567 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) { 1568 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB); 1569 goto err; 1570 } 1571 num_bytes = bn_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret)); 1572 } else { 1573 num_bytes = bn_bn2lebinpad(scalar, g_secret, sizeof(g_secret)); 1574 } 1575 /* do the multiplication with generator precomputation */ 1576 batch_mul(x_out, y_out, z_out, 1577 (const felem_bytearray(*))secrets, num_points, 1578 g_secret, 1579 mixed, (const felem(*)[17][3])pre_comp, g_pre_comp); 1580 } else { 1581 /* do the multiplication without generator precomputation */ 1582 batch_mul(x_out, y_out, z_out, 1583 (const felem_bytearray(*))secrets, num_points, 1584 NULL, mixed, (const felem(*)[17][3])pre_comp, NULL); 1585 } 1586 /* reduce the output to its unique minimal representation */ 1587 felem_contract(x_in, x_out); 1588 felem_contract(y_in, y_out); 1589 felem_contract(z_in, z_out); 1590 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) || 1591 (!felem_to_BN(z, z_in))) { 1592 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB); 1593 goto err; 1594 } 1595 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx); 1596 1597 err: 1598 BN_CTX_end(ctx); 1599 if (generator != NULL) 1600 EC_POINT_free(generator); 1601 if (new_ctx != NULL) 1602 BN_CTX_free(new_ctx); 1603 if (secrets != NULL) 1604 OPENSSL_free(secrets); 1605 if (pre_comp != NULL) 1606 OPENSSL_free(pre_comp); 1607 if (tmp_felems != NULL) 1608 OPENSSL_free(tmp_felems); 1609 return ret; 1610} 1611 1612int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx) 1613{ 1614 int ret = 0; 1615 NISTP224_PRE_COMP *pre = NULL; 1616 int i, j; 1617 BN_CTX *new_ctx = NULL; 1618 BIGNUM *x, *y; 1619 EC_POINT *generator = NULL; 1620 felem tmp_felems[32]; 1621 1622 /* throw away old precomputation */ 1623 EC_EX_DATA_free_data(&group->extra_data, nistp224_pre_comp_dup, 1624 nistp224_pre_comp_free, 1625 nistp224_pre_comp_clear_free); 1626 if (ctx == NULL) 1627 if ((ctx = new_ctx = BN_CTX_new()) == NULL) 1628 return 0; 1629 BN_CTX_start(ctx); 1630 if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL)) 1631 goto err; 1632 /* get the generator */ 1633 if (group->generator == NULL) 1634 goto err; 1635 generator = EC_POINT_new(group); 1636 if (generator == NULL) 1637 goto err; 1638 BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x); 1639 BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y); 1640 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx)) 1641 goto err; 1642 if ((pre = nistp224_pre_comp_new()) == NULL) 1643 goto err; 1644 /* 1645 * if the generator is the standard one, use built-in precomputation 1646 */ 1647 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) { 1648 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp)); 1649 goto done; 1650 } 1651 if ((!BN_to_felem(pre->g_pre_comp[0][1][0], &group->generator->X)) || 1652 (!BN_to_felem(pre->g_pre_comp[0][1][1], &group->generator->Y)) || 1653 (!BN_to_felem(pre->g_pre_comp[0][1][2], &group->generator->Z))) 1654 goto err; 1655 /* 1656 * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G, 1657 * 2^140*G, 2^196*G for the second one 1658 */ 1659 for (i = 1; i <= 8; i <<= 1) { 1660 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], 1661 pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0], 1662 pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]); 1663 for (j = 0; j < 27; ++j) { 1664 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], 1665 pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0], 1666 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]); 1667 } 1668 if (i == 8) 1669 break; 1670 point_double(pre->g_pre_comp[0][2 * i][0], 1671 pre->g_pre_comp[0][2 * i][1], 1672 pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0], 1673 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]); 1674 for (j = 0; j < 27; ++j) { 1675 point_double(pre->g_pre_comp[0][2 * i][0], 1676 pre->g_pre_comp[0][2 * i][1], 1677 pre->g_pre_comp[0][2 * i][2], 1678 pre->g_pre_comp[0][2 * i][0], 1679 pre->g_pre_comp[0][2 * i][1], 1680 pre->g_pre_comp[0][2 * i][2]); 1681 } 1682 } 1683 for (i = 0; i < 2; i++) { 1684 /* g_pre_comp[i][0] is the point at infinity */ 1685 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0])); 1686 /* the remaining multiples */ 1687 /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */ 1688 point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1], 1689 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0], 1690 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2], 1691 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], 1692 pre->g_pre_comp[i][2][2]); 1693 /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */ 1694 point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1], 1695 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0], 1696 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2], 1697 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], 1698 pre->g_pre_comp[i][2][2]); 1699 /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */ 1700 point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1], 1701 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0], 1702 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2], 1703 0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1], 1704 pre->g_pre_comp[i][4][2]); 1705 /* 1706 * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G 1707 */ 1708 point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1], 1709 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0], 1710 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2], 1711 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], 1712 pre->g_pre_comp[i][2][2]); 1713 for (j = 1; j < 8; ++j) { 1714 /* odd multiples: add G resp. 2^28*G */ 1715 point_add(pre->g_pre_comp[i][2 * j + 1][0], 1716 pre->g_pre_comp[i][2 * j + 1][1], 1717 pre->g_pre_comp[i][2 * j + 1][2], 1718 pre->g_pre_comp[i][2 * j][0], 1719 pre->g_pre_comp[i][2 * j][1], 1720 pre->g_pre_comp[i][2 * j][2], 0, 1721 pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1], 1722 pre->g_pre_comp[i][1][2]); 1723 } 1724 } 1725 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems); 1726 1727 done: 1728 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp224_pre_comp_dup, 1729 nistp224_pre_comp_free, 1730 nistp224_pre_comp_clear_free)) 1731 goto err; 1732 ret = 1; 1733 pre = NULL; 1734 err: 1735 BN_CTX_end(ctx); 1736 if (generator != NULL) 1737 EC_POINT_free(generator); 1738 if (new_ctx != NULL) 1739 BN_CTX_free(new_ctx); 1740 if (pre) 1741 nistp224_pre_comp_free(pre); 1742 return ret; 1743} 1744 1745int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group) 1746{ 1747 if (EC_EX_DATA_get_data(group->extra_data, nistp224_pre_comp_dup, 1748 nistp224_pre_comp_free, 1749 nistp224_pre_comp_clear_free) 1750 != NULL) 1751 return 1; 1752 else 1753 return 0; 1754} 1755 1756#else 1757static void *dummy = &dummy; 1758#endif 1759