ecp_nistp224.c revision 331638
1/* crypto/ec/ecp_nistp224.c */ 2/* 3 * Written by Emilia Kasper (Google) for the OpenSSL project. 4 */ 5/* Copyright 2011 Google Inc. 6 * 7 * Licensed under the Apache License, Version 2.0 (the "License"); 8 * 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21/* 22 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication 23 * 24 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation 25 * and Adam Langley's public domain 64-bit C implementation of curve25519 26 */ 27 28#include <openssl/opensslconf.h> 29#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 30 31# ifndef OPENSSL_SYS_VMS 32# include <stdint.h> 33# else 34# include <inttypes.h> 35# endif 36 37# include <string.h> 38# include <openssl/err.h> 39# include "ec_lcl.h" 40 41# if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) 42 /* even with gcc, the typedef won't work for 32-bit platforms */ 43typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit 44 * platforms */ 45# else 46# error "Need GCC 3.1 or later to define type uint128_t" 47# endif 48 49typedef uint8_t u8; 50typedef uint64_t u64; 51 52/******************************************************************************/ 53/*- 54 * INTERNAL REPRESENTATION OF FIELD ELEMENTS 55 * 56 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3 57 * using 64-bit coefficients called 'limbs', 58 * and sometimes (for multiplication results) as 59 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6 60 * using 128-bit coefficients called 'widelimbs'. 61 * A 4-limb representation is an 'felem'; 62 * a 7-widelimb representation is a 'widefelem'. 63 * Even within felems, bits of adjacent limbs overlap, and we don't always 64 * reduce the representations: we ensure that inputs to each felem 65 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60, 66 * and fit into a 128-bit word without overflow. The coefficients are then 67 * again partially reduced to obtain an felem satisfying a_i < 2^57. 68 * We only reduce to the unique minimal representation at the end of the 69 * computation. 70 */ 71 72typedef uint64_t limb; 73typedef uint128_t widelimb; 74 75typedef limb felem[4]; 76typedef widelimb widefelem[7]; 77 78/* 79 * Field element represented as a byte arrary. 28*8 = 224 bits is also the 80 * group order size for the elliptic curve, and we also use this type for 81 * scalars for point multiplication. 82 */ 83typedef u8 felem_bytearray[28]; 84 85static const felem_bytearray nistp224_curve_params[5] = { 86 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */ 87 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 88 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, 89 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */ 90 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 91 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE}, 92 {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */ 93 0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA, 94 0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4}, 95 {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */ 96 0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22, 97 0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21}, 98 {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */ 99 0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64, 100 0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34} 101}; 102 103/*- 104 * Precomputed multiples of the standard generator 105 * Points are given in coordinates (X, Y, Z) where Z normally is 1 106 * (0 for the point at infinity). 107 * For each field element, slice a_0 is word 0, etc. 108 * 109 * The table has 2 * 16 elements, starting with the following: 110 * index | bits | point 111 * ------+---------+------------------------------ 112 * 0 | 0 0 0 0 | 0G 113 * 1 | 0 0 0 1 | 1G 114 * 2 | 0 0 1 0 | 2^56G 115 * 3 | 0 0 1 1 | (2^56 + 1)G 116 * 4 | 0 1 0 0 | 2^112G 117 * 5 | 0 1 0 1 | (2^112 + 1)G 118 * 6 | 0 1 1 0 | (2^112 + 2^56)G 119 * 7 | 0 1 1 1 | (2^112 + 2^56 + 1)G 120 * 8 | 1 0 0 0 | 2^168G 121 * 9 | 1 0 0 1 | (2^168 + 1)G 122 * 10 | 1 0 1 0 | (2^168 + 2^56)G 123 * 11 | 1 0 1 1 | (2^168 + 2^56 + 1)G 124 * 12 | 1 1 0 0 | (2^168 + 2^112)G 125 * 13 | 1 1 0 1 | (2^168 + 2^112 + 1)G 126 * 14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G 127 * 15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G 128 * followed by a copy of this with each element multiplied by 2^28. 129 * 130 * The reason for this is so that we can clock bits into four different 131 * locations when doing simple scalar multiplies against the base point, 132 * and then another four locations using the second 16 elements. 133 */ 134static const felem gmul[2][16][3] = { {{{0, 0, 0, 0}, 135 {0, 0, 0, 0}, 136 {0, 0, 0, 0}}, 137 {{0x3280d6115c1d21, 0xc1d356c2112234, 138 0x7f321390b94a03, 0xb70e0cbd6bb4bf}, 139 {0xd5819985007e34, 0x75a05a07476444, 140 0xfb4c22dfe6cd43, 0xbd376388b5f723}, 141 {1, 0, 0, 0}}, 142 {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 143 0x2242df8d8a2a43, 0x1f49bbb0f99bc5}, 144 {0x29e0b892dc9c43, 0xece8608436e662, 145 0xdc858f185310d0, 0x9812dd4eb8d321}, 146 {1, 0, 0, 0}}, 147 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 148 0x16e9a3bbce8a3f, 0xeedcccd8c2a748}, 149 {0xf19f90ed50266d, 0xabf2b4bf65f9df, 150 0x313865468fafec, 0x5cb379ba910a17}, 151 {1, 0, 0, 0}}, 152 {{0x0641966cab26e3, 0x91fb2991fab0a0, 153 0xefec27a4e13a0b, 0x0499aa8a5f8ebe}, 154 {0x7510407766af5d, 0x84d929610d5450, 155 0x81d77aae82f706, 0x6916f6d4338c5b}, 156 {1, 0, 0, 0}}, 157 {{0xea95ac3b1f15c6, 0x086000905e82d4, 158 0xdd323ae4d1c8b1, 0x932b56be7685a3}, 159 {0x9ef93dea25dbbf, 0x41665960f390f0, 160 0xfdec76dbe2a8a7, 0x523e80f019062a}, 161 {1, 0, 0, 0}}, 162 {{0x822fdd26732c73, 0xa01c83531b5d0f, 163 0x363f37347c1ba4, 0xc391b45c84725c}, 164 {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 165 0xc393da7e222a7f, 0x1efb7890ede244}, 166 {1, 0, 0, 0}}, 167 {{0x4c9e90ca217da1, 0xd11beca79159bb, 168 0xff8d33c2c98b7c, 0x2610b39409f849}, 169 {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 170 0x966c079b753c89, 0xfe67e4e820b112}, 171 {1, 0, 0, 0}}, 172 {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 173 0x79b7619a3e7c4c, 0x05c73240899b47}, 174 {0x9f7f6382c73e3a, 0x18615165c56bda, 175 0x641fab2116fd56, 0x72855882b08394}, 176 {1, 0, 0, 0}}, 177 {{0x0469182f161c09, 0x74a98ca8d00fb5, 178 0xb89da93489a3e0, 0x41c98768fb0c1d}, 179 {0xe5ea05fb32da81, 0x3dce9ffbca6855, 180 0x1cfe2d3fbf59e6, 0x0e5e03408738a7}, 181 {1, 0, 0, 0}}, 182 {{0xdab22b2333e87f, 0x4430137a5dd2f6, 183 0xe03ab9f738beb8, 0xcb0c5d0dc34f24}, 184 {0x764a7df0c8fda5, 0x185ba5c3fa2044, 185 0x9281d688bcbe50, 0xc40331df893881}, 186 {1, 0, 0, 0}}, 187 {{0xb89530796f0f60, 0xade92bd26909a3, 188 0x1a0c83fb4884da, 0x1765bf22a5a984}, 189 {0x772a9ee75db09e, 0x23bc6c67cec16f, 190 0x4c1edba8b14e2f, 0xe2a215d9611369}, 191 {1, 0, 0, 0}}, 192 {{0x571e509fb5efb3, 0xade88696410552, 193 0xc8ae85fada74fe, 0x6c7e4be83bbde3}, 194 {0xff9f51160f4652, 0xb47ce2495a6539, 195 0xa2946c53b582f4, 0x286d2db3ee9a60}, 196 {1, 0, 0, 0}}, 197 {{0x40bbd5081a44af, 0x0995183b13926c, 198 0xbcefba6f47f6d0, 0x215619e9cc0057}, 199 {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 200 0x8631b93cdfe8b5, 0xe7e3f4b0982db9}, 201 {1, 0, 0, 0}}, 202 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 203 0x1c29819435d2c6, 0xc813132f4c07e9}, 204 {0x2891425503b11f, 0x08781030579fea, 205 0xf5426ba5cc9674, 0x1e28ebf18562bc}, 206 {1, 0, 0, 0}}, 207 {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 208 0xff17036691a973, 0xf1aef351497c58}, 209 {0xdd1f2d600564ff, 0xdead073b1402db, 210 0x74a684435bd693, 0xeea7471f962558}, 211 {1, 0, 0, 0}}}, 212{{{0, 0, 0, 0}, 213 {0, 0, 0, 0}, 214 {0, 0, 0, 0}}, 215 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31}, 216 {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d}, 217 {1, 0, 0, 0}}, 218 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3}, 219 {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a}, 220 {1, 0, 0, 0}}, 221 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33}, 222 {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100}, 223 {1, 0, 0, 0}}, 224 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5}, 225 {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea}, 226 {1, 0, 0, 0}}, 227 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be}, 228 {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51}, 229 {1, 0, 0, 0}}, 230 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1}, 231 {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb}, 232 {1, 0, 0, 0}}, 233 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233}, 234 {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def}, 235 {1, 0, 0, 0}}, 236 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae}, 237 {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45}, 238 {1, 0, 0, 0}}, 239 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e}, 240 {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb}, 241 {1, 0, 0, 0}}, 242 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de}, 243 {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3}, 244 {1, 0, 0, 0}}, 245 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05}, 246 {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58}, 247 {1, 0, 0, 0}}, 248 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb}, 249 {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0}, 250 {1, 0, 0, 0}}, 251 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9}, 252 {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea}, 253 {1, 0, 0, 0}}, 254 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba}, 255 {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405}, 256 {1, 0, 0, 0}}, 257 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e}, 258 {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e}, 259 {1, 0, 0, 0}}} 260}; 261 262/* Precomputation for the group generator. */ 263typedef struct { 264 felem g_pre_comp[2][16][3]; 265 int references; 266} NISTP224_PRE_COMP; 267 268const EC_METHOD *EC_GFp_nistp224_method(void) 269{ 270 static const EC_METHOD ret = { 271 EC_FLAGS_DEFAULT_OCT, 272 NID_X9_62_prime_field, 273 ec_GFp_nistp224_group_init, 274 ec_GFp_simple_group_finish, 275 ec_GFp_simple_group_clear_finish, 276 ec_GFp_nist_group_copy, 277 ec_GFp_nistp224_group_set_curve, 278 ec_GFp_simple_group_get_curve, 279 ec_GFp_simple_group_get_degree, 280 ec_GFp_simple_group_check_discriminant, 281 ec_GFp_simple_point_init, 282 ec_GFp_simple_point_finish, 283 ec_GFp_simple_point_clear_finish, 284 ec_GFp_simple_point_copy, 285 ec_GFp_simple_point_set_to_infinity, 286 ec_GFp_simple_set_Jprojective_coordinates_GFp, 287 ec_GFp_simple_get_Jprojective_coordinates_GFp, 288 ec_GFp_simple_point_set_affine_coordinates, 289 ec_GFp_nistp224_point_get_affine_coordinates, 290 0 /* point_set_compressed_coordinates */ , 291 0 /* point2oct */ , 292 0 /* oct2point */ , 293 ec_GFp_simple_add, 294 ec_GFp_simple_dbl, 295 ec_GFp_simple_invert, 296 ec_GFp_simple_is_at_infinity, 297 ec_GFp_simple_is_on_curve, 298 ec_GFp_simple_cmp, 299 ec_GFp_simple_make_affine, 300 ec_GFp_simple_points_make_affine, 301 ec_GFp_nistp224_points_mul, 302 ec_GFp_nistp224_precompute_mult, 303 ec_GFp_nistp224_have_precompute_mult, 304 ec_GFp_nist_field_mul, 305 ec_GFp_nist_field_sqr, 306 0 /* field_div */ , 307 0 /* field_encode */ , 308 0 /* field_decode */ , 309 0 /* field_set_to_one */ 310 }; 311 312 return &ret; 313} 314 315/* 316 * Helper functions to convert field elements to/from internal representation 317 */ 318static void bin28_to_felem(felem out, const u8 in[28]) 319{ 320 out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff; 321 out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff; 322 out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff; 323 out[3] = (*((const uint64_t *)(in+20))) >> 8; 324} 325 326static void felem_to_bin28(u8 out[28], const felem in) 327{ 328 unsigned i; 329 for (i = 0; i < 7; ++i) { 330 out[i] = in[0] >> (8 * i); 331 out[i + 7] = in[1] >> (8 * i); 332 out[i + 14] = in[2] >> (8 * i); 333 out[i + 21] = in[3] >> (8 * i); 334 } 335} 336 337/* To preserve endianness when using BN_bn2bin and BN_bin2bn */ 338static void flip_endian(u8 *out, const u8 *in, unsigned len) 339{ 340 unsigned i; 341 for (i = 0; i < len; ++i) 342 out[i] = in[len - 1 - i]; 343} 344 345/* From OpenSSL BIGNUM to internal representation */ 346static int BN_to_felem(felem out, const BIGNUM *bn) 347{ 348 felem_bytearray b_in; 349 felem_bytearray b_out; 350 unsigned num_bytes; 351 352 /* BN_bn2bin eats leading zeroes */ 353 memset(b_out, 0, sizeof(b_out)); 354 num_bytes = BN_num_bytes(bn); 355 if (num_bytes > sizeof(b_out)) { 356 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE); 357 return 0; 358 } 359 if (BN_is_negative(bn)) { 360 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE); 361 return 0; 362 } 363 num_bytes = BN_bn2bin(bn, b_in); 364 flip_endian(b_out, b_in, num_bytes); 365 bin28_to_felem(out, b_out); 366 return 1; 367} 368 369/* From internal representation to OpenSSL BIGNUM */ 370static BIGNUM *felem_to_BN(BIGNUM *out, const felem in) 371{ 372 felem_bytearray b_in, b_out; 373 felem_to_bin28(b_in, in); 374 flip_endian(b_out, b_in, sizeof(b_out)); 375 return BN_bin2bn(b_out, sizeof(b_out), out); 376} 377 378/******************************************************************************/ 379/*- 380 * FIELD OPERATIONS 381 * 382 * Field operations, using the internal representation of field elements. 383 * NB! These operations are specific to our point multiplication and cannot be 384 * expected to be correct in general - e.g., multiplication with a large scalar 385 * will cause an overflow. 386 * 387 */ 388 389static void felem_one(felem out) 390{ 391 out[0] = 1; 392 out[1] = 0; 393 out[2] = 0; 394 out[3] = 0; 395} 396 397static void felem_assign(felem out, const felem in) 398{ 399 out[0] = in[0]; 400 out[1] = in[1]; 401 out[2] = in[2]; 402 out[3] = in[3]; 403} 404 405/* Sum two field elements: out += in */ 406static void felem_sum(felem out, const felem in) 407{ 408 out[0] += in[0]; 409 out[1] += in[1]; 410 out[2] += in[2]; 411 out[3] += in[3]; 412} 413 414/* Get negative value: out = -in */ 415/* Assumes in[i] < 2^57 */ 416static void felem_neg(felem out, const felem in) 417{ 418 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2); 419 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2); 420 static const limb two58m42m2 = (((limb) 1) << 58) - 421 (((limb) 1) << 42) - (((limb) 1) << 2); 422 423 /* Set to 0 mod 2^224-2^96+1 to ensure out > in */ 424 out[0] = two58p2 - in[0]; 425 out[1] = two58m42m2 - in[1]; 426 out[2] = two58m2 - in[2]; 427 out[3] = two58m2 - in[3]; 428} 429 430/* Subtract field elements: out -= in */ 431/* Assumes in[i] < 2^57 */ 432static void felem_diff(felem out, const felem in) 433{ 434 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2); 435 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2); 436 static const limb two58m42m2 = (((limb) 1) << 58) - 437 (((limb) 1) << 42) - (((limb) 1) << 2); 438 439 /* Add 0 mod 2^224-2^96+1 to ensure out > in */ 440 out[0] += two58p2; 441 out[1] += two58m42m2; 442 out[2] += two58m2; 443 out[3] += two58m2; 444 445 out[0] -= in[0]; 446 out[1] -= in[1]; 447 out[2] -= in[2]; 448 out[3] -= in[3]; 449} 450 451/* Subtract in unreduced 128-bit mode: out -= in */ 452/* Assumes in[i] < 2^119 */ 453static void widefelem_diff(widefelem out, const widefelem in) 454{ 455 static const widelimb two120 = ((widelimb) 1) << 120; 456 static const widelimb two120m64 = (((widelimb) 1) << 120) - 457 (((widelimb) 1) << 64); 458 static const widelimb two120m104m64 = (((widelimb) 1) << 120) - 459 (((widelimb) 1) << 104) - (((widelimb) 1) << 64); 460 461 /* Add 0 mod 2^224-2^96+1 to ensure out > in */ 462 out[0] += two120; 463 out[1] += two120m64; 464 out[2] += two120m64; 465 out[3] += two120; 466 out[4] += two120m104m64; 467 out[5] += two120m64; 468 out[6] += two120m64; 469 470 out[0] -= in[0]; 471 out[1] -= in[1]; 472 out[2] -= in[2]; 473 out[3] -= in[3]; 474 out[4] -= in[4]; 475 out[5] -= in[5]; 476 out[6] -= in[6]; 477} 478 479/* Subtract in mixed mode: out128 -= in64 */ 480/* in[i] < 2^63 */ 481static void felem_diff_128_64(widefelem out, const felem in) 482{ 483 static const widelimb two64p8 = (((widelimb) 1) << 64) + 484 (((widelimb) 1) << 8); 485 static const widelimb two64m8 = (((widelimb) 1) << 64) - 486 (((widelimb) 1) << 8); 487 static const widelimb two64m48m8 = (((widelimb) 1) << 64) - 488 (((widelimb) 1) << 48) - (((widelimb) 1) << 8); 489 490 /* Add 0 mod 2^224-2^96+1 to ensure out > in */ 491 out[0] += two64p8; 492 out[1] += two64m48m8; 493 out[2] += two64m8; 494 out[3] += two64m8; 495 496 out[0] -= in[0]; 497 out[1] -= in[1]; 498 out[2] -= in[2]; 499 out[3] -= in[3]; 500} 501 502/* 503 * Multiply a field element by a scalar: out = out * scalar The scalars we 504 * actually use are small, so results fit without overflow 505 */ 506static void felem_scalar(felem out, const limb scalar) 507{ 508 out[0] *= scalar; 509 out[1] *= scalar; 510 out[2] *= scalar; 511 out[3] *= scalar; 512} 513 514/* 515 * Multiply an unreduced field element by a scalar: out = out * scalar The 516 * scalars we actually use are small, so results fit without overflow 517 */ 518static void widefelem_scalar(widefelem out, const widelimb scalar) 519{ 520 out[0] *= scalar; 521 out[1] *= scalar; 522 out[2] *= scalar; 523 out[3] *= scalar; 524 out[4] *= scalar; 525 out[5] *= scalar; 526 out[6] *= scalar; 527} 528 529/* Square a field element: out = in^2 */ 530static void felem_square(widefelem out, const felem in) 531{ 532 limb tmp0, tmp1, tmp2; 533 tmp0 = 2 * in[0]; 534 tmp1 = 2 * in[1]; 535 tmp2 = 2 * in[2]; 536 out[0] = ((widelimb) in[0]) * in[0]; 537 out[1] = ((widelimb) in[0]) * tmp1; 538 out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1]; 539 out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2; 540 out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2]; 541 out[5] = ((widelimb) in[3]) * tmp2; 542 out[6] = ((widelimb) in[3]) * in[3]; 543} 544 545/* Multiply two field elements: out = in1 * in2 */ 546static void felem_mul(widefelem out, const felem in1, const felem in2) 547{ 548 out[0] = ((widelimb) in1[0]) * in2[0]; 549 out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0]; 550 out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] + 551 ((widelimb) in1[2]) * in2[0]; 552 out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] + 553 ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0]; 554 out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] + 555 ((widelimb) in1[3]) * in2[1]; 556 out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2]; 557 out[6] = ((widelimb) in1[3]) * in2[3]; 558} 559 560/*- 561 * Reduce seven 128-bit coefficients to four 64-bit coefficients. 562 * Requires in[i] < 2^126, 563 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */ 564static void felem_reduce(felem out, const widefelem in) 565{ 566 static const widelimb two127p15 = (((widelimb) 1) << 127) + 567 (((widelimb) 1) << 15); 568 static const widelimb two127m71 = (((widelimb) 1) << 127) - 569 (((widelimb) 1) << 71); 570 static const widelimb two127m71m55 = (((widelimb) 1) << 127) - 571 (((widelimb) 1) << 71) - (((widelimb) 1) << 55); 572 widelimb output[5]; 573 574 /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */ 575 output[0] = in[0] + two127p15; 576 output[1] = in[1] + two127m71m55; 577 output[2] = in[2] + two127m71; 578 output[3] = in[3]; 579 output[4] = in[4]; 580 581 /* Eliminate in[4], in[5], in[6] */ 582 output[4] += in[6] >> 16; 583 output[3] += (in[6] & 0xffff) << 40; 584 output[2] -= in[6]; 585 586 output[3] += in[5] >> 16; 587 output[2] += (in[5] & 0xffff) << 40; 588 output[1] -= in[5]; 589 590 output[2] += output[4] >> 16; 591 output[1] += (output[4] & 0xffff) << 40; 592 output[0] -= output[4]; 593 594 /* Carry 2 -> 3 -> 4 */ 595 output[3] += output[2] >> 56; 596 output[2] &= 0x00ffffffffffffff; 597 598 output[4] = output[3] >> 56; 599 output[3] &= 0x00ffffffffffffff; 600 601 /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */ 602 603 /* Eliminate output[4] */ 604 output[2] += output[4] >> 16; 605 /* output[2] < 2^56 + 2^56 = 2^57 */ 606 output[1] += (output[4] & 0xffff) << 40; 607 output[0] -= output[4]; 608 609 /* Carry 0 -> 1 -> 2 -> 3 */ 610 output[1] += output[0] >> 56; 611 out[0] = output[0] & 0x00ffffffffffffff; 612 613 output[2] += output[1] >> 56; 614 /* output[2] < 2^57 + 2^72 */ 615 out[1] = output[1] & 0x00ffffffffffffff; 616 output[3] += output[2] >> 56; 617 /* output[3] <= 2^56 + 2^16 */ 618 out[2] = output[2] & 0x00ffffffffffffff; 619 620 /*- 621 * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, 622 * out[3] <= 2^56 + 2^16 (due to final carry), 623 * so out < 2*p 624 */ 625 out[3] = output[3]; 626} 627 628static void felem_square_reduce(felem out, const felem in) 629{ 630 widefelem tmp; 631 felem_square(tmp, in); 632 felem_reduce(out, tmp); 633} 634 635static void felem_mul_reduce(felem out, const felem in1, const felem in2) 636{ 637 widefelem tmp; 638 felem_mul(tmp, in1, in2); 639 felem_reduce(out, tmp); 640} 641 642/* 643 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always 644 * call felem_reduce first) 645 */ 646static void felem_contract(felem out, const felem in) 647{ 648 static const int64_t two56 = ((limb) 1) << 56; 649 /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */ 650 /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */ 651 int64_t tmp[4], a; 652 tmp[0] = in[0]; 653 tmp[1] = in[1]; 654 tmp[2] = in[2]; 655 tmp[3] = in[3]; 656 /* Case 1: a = 1 iff in >= 2^224 */ 657 a = (in[3] >> 56); 658 tmp[0] -= a; 659 tmp[1] += a << 40; 660 tmp[3] &= 0x00ffffffffffffff; 661 /* 662 * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1 663 * and the lower part is non-zero 664 */ 665 a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) | 666 (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63); 667 a &= 0x00ffffffffffffff; 668 /* turn a into an all-one mask (if a = 0) or an all-zero mask */ 669 a = (a - 1) >> 63; 670 /* subtract 2^224 - 2^96 + 1 if a is all-one */ 671 tmp[3] &= a ^ 0xffffffffffffffff; 672 tmp[2] &= a ^ 0xffffffffffffffff; 673 tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff; 674 tmp[0] -= 1 & a; 675 676 /* 677 * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be 678 * non-zero, so we only need one step 679 */ 680 a = tmp[0] >> 63; 681 tmp[0] += two56 & a; 682 tmp[1] -= 1 & a; 683 684 /* carry 1 -> 2 -> 3 */ 685 tmp[2] += tmp[1] >> 56; 686 tmp[1] &= 0x00ffffffffffffff; 687 688 tmp[3] += tmp[2] >> 56; 689 tmp[2] &= 0x00ffffffffffffff; 690 691 /* Now 0 <= out < p */ 692 out[0] = tmp[0]; 693 out[1] = tmp[1]; 694 out[2] = tmp[2]; 695 out[3] = tmp[3]; 696} 697 698/* 699 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field 700 * elements are reduced to in < 2^225, so we only need to check three cases: 701 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2 702 */ 703static limb felem_is_zero(const felem in) 704{ 705 limb zero, two224m96p1, two225m97p2; 706 707 zero = in[0] | in[1] | in[2] | in[3]; 708 zero = (((int64_t) (zero) - 1) >> 63) & 1; 709 two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000) 710 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff); 711 two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1; 712 two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000) 713 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff); 714 two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1; 715 return (zero | two224m96p1 | two225m97p2); 716} 717 718static int felem_is_zero_int(const void *in) 719{ 720 return (int)(felem_is_zero(in) & ((limb) 1)); 721} 722 723/* Invert a field element */ 724/* Computation chain copied from djb's code */ 725static void felem_inv(felem out, const felem in) 726{ 727 felem ftmp, ftmp2, ftmp3, ftmp4; 728 widefelem tmp; 729 unsigned i; 730 731 felem_square(tmp, in); 732 felem_reduce(ftmp, tmp); /* 2 */ 733 felem_mul(tmp, in, ftmp); 734 felem_reduce(ftmp, tmp); /* 2^2 - 1 */ 735 felem_square(tmp, ftmp); 736 felem_reduce(ftmp, tmp); /* 2^3 - 2 */ 737 felem_mul(tmp, in, ftmp); 738 felem_reduce(ftmp, tmp); /* 2^3 - 1 */ 739 felem_square(tmp, ftmp); 740 felem_reduce(ftmp2, tmp); /* 2^4 - 2 */ 741 felem_square(tmp, ftmp2); 742 felem_reduce(ftmp2, tmp); /* 2^5 - 4 */ 743 felem_square(tmp, ftmp2); 744 felem_reduce(ftmp2, tmp); /* 2^6 - 8 */ 745 felem_mul(tmp, ftmp2, ftmp); 746 felem_reduce(ftmp, tmp); /* 2^6 - 1 */ 747 felem_square(tmp, ftmp); 748 felem_reduce(ftmp2, tmp); /* 2^7 - 2 */ 749 for (i = 0; i < 5; ++i) { /* 2^12 - 2^6 */ 750 felem_square(tmp, ftmp2); 751 felem_reduce(ftmp2, tmp); 752 } 753 felem_mul(tmp, ftmp2, ftmp); 754 felem_reduce(ftmp2, tmp); /* 2^12 - 1 */ 755 felem_square(tmp, ftmp2); 756 felem_reduce(ftmp3, tmp); /* 2^13 - 2 */ 757 for (i = 0; i < 11; ++i) { /* 2^24 - 2^12 */ 758 felem_square(tmp, ftmp3); 759 felem_reduce(ftmp3, tmp); 760 } 761 felem_mul(tmp, ftmp3, ftmp2); 762 felem_reduce(ftmp2, tmp); /* 2^24 - 1 */ 763 felem_square(tmp, ftmp2); 764 felem_reduce(ftmp3, tmp); /* 2^25 - 2 */ 765 for (i = 0; i < 23; ++i) { /* 2^48 - 2^24 */ 766 felem_square(tmp, ftmp3); 767 felem_reduce(ftmp3, tmp); 768 } 769 felem_mul(tmp, ftmp3, ftmp2); 770 felem_reduce(ftmp3, tmp); /* 2^48 - 1 */ 771 felem_square(tmp, ftmp3); 772 felem_reduce(ftmp4, tmp); /* 2^49 - 2 */ 773 for (i = 0; i < 47; ++i) { /* 2^96 - 2^48 */ 774 felem_square(tmp, ftmp4); 775 felem_reduce(ftmp4, tmp); 776 } 777 felem_mul(tmp, ftmp3, ftmp4); 778 felem_reduce(ftmp3, tmp); /* 2^96 - 1 */ 779 felem_square(tmp, ftmp3); 780 felem_reduce(ftmp4, tmp); /* 2^97 - 2 */ 781 for (i = 0; i < 23; ++i) { /* 2^120 - 2^24 */ 782 felem_square(tmp, ftmp4); 783 felem_reduce(ftmp4, tmp); 784 } 785 felem_mul(tmp, ftmp2, ftmp4); 786 felem_reduce(ftmp2, tmp); /* 2^120 - 1 */ 787 for (i = 0; i < 6; ++i) { /* 2^126 - 2^6 */ 788 felem_square(tmp, ftmp2); 789 felem_reduce(ftmp2, tmp); 790 } 791 felem_mul(tmp, ftmp2, ftmp); 792 felem_reduce(ftmp, tmp); /* 2^126 - 1 */ 793 felem_square(tmp, ftmp); 794 felem_reduce(ftmp, tmp); /* 2^127 - 2 */ 795 felem_mul(tmp, ftmp, in); 796 felem_reduce(ftmp, tmp); /* 2^127 - 1 */ 797 for (i = 0; i < 97; ++i) { /* 2^224 - 2^97 */ 798 felem_square(tmp, ftmp); 799 felem_reduce(ftmp, tmp); 800 } 801 felem_mul(tmp, ftmp, ftmp3); 802 felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */ 803} 804 805/* 806 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy 807 * out to itself. 808 */ 809static void copy_conditional(felem out, const felem in, limb icopy) 810{ 811 unsigned i; 812 /* 813 * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one 814 */ 815 const limb copy = -icopy; 816 for (i = 0; i < 4; ++i) { 817 const limb tmp = copy & (in[i] ^ out[i]); 818 out[i] ^= tmp; 819 } 820} 821 822/******************************************************************************/ 823/*- 824 * ELLIPTIC CURVE POINT OPERATIONS 825 * 826 * Points are represented in Jacobian projective coordinates: 827 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3), 828 * or to the point at infinity if Z == 0. 829 * 830 */ 831 832/*- 833 * Double an elliptic curve point: 834 * (X', Y', Z') = 2 * (X, Y, Z), where 835 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2 836 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2 837 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z 838 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed, 839 * while x_out == y_in is not (maybe this works, but it's not tested). 840 */ 841static void 842point_double(felem x_out, felem y_out, felem z_out, 843 const felem x_in, const felem y_in, const felem z_in) 844{ 845 widefelem tmp, tmp2; 846 felem delta, gamma, beta, alpha, ftmp, ftmp2; 847 848 felem_assign(ftmp, x_in); 849 felem_assign(ftmp2, x_in); 850 851 /* delta = z^2 */ 852 felem_square(tmp, z_in); 853 felem_reduce(delta, tmp); 854 855 /* gamma = y^2 */ 856 felem_square(tmp, y_in); 857 felem_reduce(gamma, tmp); 858 859 /* beta = x*gamma */ 860 felem_mul(tmp, x_in, gamma); 861 felem_reduce(beta, tmp); 862 863 /* alpha = 3*(x-delta)*(x+delta) */ 864 felem_diff(ftmp, delta); 865 /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */ 866 felem_sum(ftmp2, delta); 867 /* ftmp2[i] < 2^57 + 2^57 = 2^58 */ 868 felem_scalar(ftmp2, 3); 869 /* ftmp2[i] < 3 * 2^58 < 2^60 */ 870 felem_mul(tmp, ftmp, ftmp2); 871 /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */ 872 felem_reduce(alpha, tmp); 873 874 /* x' = alpha^2 - 8*beta */ 875 felem_square(tmp, alpha); 876 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */ 877 felem_assign(ftmp, beta); 878 felem_scalar(ftmp, 8); 879 /* ftmp[i] < 8 * 2^57 = 2^60 */ 880 felem_diff_128_64(tmp, ftmp); 881 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */ 882 felem_reduce(x_out, tmp); 883 884 /* z' = (y + z)^2 - gamma - delta */ 885 felem_sum(delta, gamma); 886 /* delta[i] < 2^57 + 2^57 = 2^58 */ 887 felem_assign(ftmp, y_in); 888 felem_sum(ftmp, z_in); 889 /* ftmp[i] < 2^57 + 2^57 = 2^58 */ 890 felem_square(tmp, ftmp); 891 /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */ 892 felem_diff_128_64(tmp, delta); 893 /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */ 894 felem_reduce(z_out, tmp); 895 896 /* y' = alpha*(4*beta - x') - 8*gamma^2 */ 897 felem_scalar(beta, 4); 898 /* beta[i] < 4 * 2^57 = 2^59 */ 899 felem_diff(beta, x_out); 900 /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */ 901 felem_mul(tmp, alpha, beta); 902 /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */ 903 felem_square(tmp2, gamma); 904 /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */ 905 widefelem_scalar(tmp2, 8); 906 /* tmp2[i] < 8 * 2^116 = 2^119 */ 907 widefelem_diff(tmp, tmp2); 908 /* tmp[i] < 2^119 + 2^120 < 2^121 */ 909 felem_reduce(y_out, tmp); 910} 911 912/*- 913 * Add two elliptic curve points: 914 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where 915 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 - 916 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 917 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) - 918 * Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3 919 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2) 920 * 921 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0. 922 */ 923 924/* 925 * This function is not entirely constant-time: it includes a branch for 926 * checking whether the two input points are equal, (while not equal to the 927 * point at infinity). This case never happens during single point 928 * multiplication, so there is no timing leak for ECDH or ECDSA signing. 929 */ 930static void point_add(felem x3, felem y3, felem z3, 931 const felem x1, const felem y1, const felem z1, 932 const int mixed, const felem x2, const felem y2, 933 const felem z2) 934{ 935 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out; 936 widefelem tmp, tmp2; 937 limb z1_is_zero, z2_is_zero, x_equal, y_equal; 938 939 if (!mixed) { 940 /* ftmp2 = z2^2 */ 941 felem_square(tmp, z2); 942 felem_reduce(ftmp2, tmp); 943 944 /* ftmp4 = z2^3 */ 945 felem_mul(tmp, ftmp2, z2); 946 felem_reduce(ftmp4, tmp); 947 948 /* ftmp4 = z2^3*y1 */ 949 felem_mul(tmp2, ftmp4, y1); 950 felem_reduce(ftmp4, tmp2); 951 952 /* ftmp2 = z2^2*x1 */ 953 felem_mul(tmp2, ftmp2, x1); 954 felem_reduce(ftmp2, tmp2); 955 } else { 956 /* 957 * We'll assume z2 = 1 (special case z2 = 0 is handled later) 958 */ 959 960 /* ftmp4 = z2^3*y1 */ 961 felem_assign(ftmp4, y1); 962 963 /* ftmp2 = z2^2*x1 */ 964 felem_assign(ftmp2, x1); 965 } 966 967 /* ftmp = z1^2 */ 968 felem_square(tmp, z1); 969 felem_reduce(ftmp, tmp); 970 971 /* ftmp3 = z1^3 */ 972 felem_mul(tmp, ftmp, z1); 973 felem_reduce(ftmp3, tmp); 974 975 /* tmp = z1^3*y2 */ 976 felem_mul(tmp, ftmp3, y2); 977 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */ 978 979 /* ftmp3 = z1^3*y2 - z2^3*y1 */ 980 felem_diff_128_64(tmp, ftmp4); 981 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */ 982 felem_reduce(ftmp3, tmp); 983 984 /* tmp = z1^2*x2 */ 985 felem_mul(tmp, ftmp, x2); 986 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */ 987 988 /* ftmp = z1^2*x2 - z2^2*x1 */ 989 felem_diff_128_64(tmp, ftmp2); 990 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */ 991 felem_reduce(ftmp, tmp); 992 993 /* 994 * the formulae are incorrect if the points are equal so we check for 995 * this and do doubling if this happens 996 */ 997 x_equal = felem_is_zero(ftmp); 998 y_equal = felem_is_zero(ftmp3); 999 z1_is_zero = felem_is_zero(z1); 1000 z2_is_zero = felem_is_zero(z2); 1001 /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */ 1002 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) { 1003 point_double(x3, y3, z3, x1, y1, z1); 1004 return; 1005 } 1006 1007 /* ftmp5 = z1*z2 */ 1008 if (!mixed) { 1009 felem_mul(tmp, z1, z2); 1010 felem_reduce(ftmp5, tmp); 1011 } else { 1012 /* special case z2 = 0 is handled later */ 1013 felem_assign(ftmp5, z1); 1014 } 1015 1016 /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */ 1017 felem_mul(tmp, ftmp, ftmp5); 1018 felem_reduce(z_out, tmp); 1019 1020 /* ftmp = (z1^2*x2 - z2^2*x1)^2 */ 1021 felem_assign(ftmp5, ftmp); 1022 felem_square(tmp, ftmp); 1023 felem_reduce(ftmp, tmp); 1024 1025 /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */ 1026 felem_mul(tmp, ftmp, ftmp5); 1027 felem_reduce(ftmp5, tmp); 1028 1029 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */ 1030 felem_mul(tmp, ftmp2, ftmp); 1031 felem_reduce(ftmp2, tmp); 1032 1033 /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */ 1034 felem_mul(tmp, ftmp4, ftmp5); 1035 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */ 1036 1037 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */ 1038 felem_square(tmp2, ftmp3); 1039 /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */ 1040 1041 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */ 1042 felem_diff_128_64(tmp2, ftmp5); 1043 /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */ 1044 1045 /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */ 1046 felem_assign(ftmp5, ftmp2); 1047 felem_scalar(ftmp5, 2); 1048 /* ftmp5[i] < 2 * 2^57 = 2^58 */ 1049 1050 /*- 1051 * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 - 1052 * 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 1053 */ 1054 felem_diff_128_64(tmp2, ftmp5); 1055 /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */ 1056 felem_reduce(x_out, tmp2); 1057 1058 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */ 1059 felem_diff(ftmp2, x_out); 1060 /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */ 1061 1062 /* 1063 * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) 1064 */ 1065 felem_mul(tmp2, ftmp3, ftmp2); 1066 /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */ 1067 1068 /*- 1069 * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) - 1070 * z2^3*y1*(z1^2*x2 - z2^2*x1)^3 1071 */ 1072 widefelem_diff(tmp2, tmp); 1073 /* tmp2[i] < 2^118 + 2^120 < 2^121 */ 1074 felem_reduce(y_out, tmp2); 1075 1076 /* 1077 * the result (x_out, y_out, z_out) is incorrect if one of the inputs is 1078 * the point at infinity, so we need to check for this separately 1079 */ 1080 1081 /* 1082 * if point 1 is at infinity, copy point 2 to output, and vice versa 1083 */ 1084 copy_conditional(x_out, x2, z1_is_zero); 1085 copy_conditional(x_out, x1, z2_is_zero); 1086 copy_conditional(y_out, y2, z1_is_zero); 1087 copy_conditional(y_out, y1, z2_is_zero); 1088 copy_conditional(z_out, z2, z1_is_zero); 1089 copy_conditional(z_out, z1, z2_is_zero); 1090 felem_assign(x3, x_out); 1091 felem_assign(y3, y_out); 1092 felem_assign(z3, z_out); 1093} 1094 1095/* 1096 * select_point selects the |idx|th point from a precomputation table and 1097 * copies it to out. 1098 * The pre_comp array argument should be size of |size| argument 1099 */ 1100static void select_point(const u64 idx, unsigned int size, 1101 const felem pre_comp[][3], felem out[3]) 1102{ 1103 unsigned i, j; 1104 limb *outlimbs = &out[0][0]; 1105 memset(outlimbs, 0, 3 * sizeof(felem)); 1106 1107 for (i = 0; i < size; i++) { 1108 const limb *inlimbs = &pre_comp[i][0][0]; 1109 u64 mask = i ^ idx; 1110 mask |= mask >> 4; 1111 mask |= mask >> 2; 1112 mask |= mask >> 1; 1113 mask &= 1; 1114 mask--; 1115 for (j = 0; j < 4 * 3; j++) 1116 outlimbs[j] |= inlimbs[j] & mask; 1117 } 1118} 1119 1120/* get_bit returns the |i|th bit in |in| */ 1121static char get_bit(const felem_bytearray in, unsigned i) 1122{ 1123 if (i >= 224) 1124 return 0; 1125 return (in[i >> 3] >> (i & 7)) & 1; 1126} 1127 1128/* 1129 * Interleaved point multiplication using precomputed point multiples: The 1130 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars 1131 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the 1132 * generator, using certain (large) precomputed multiples in g_pre_comp. 1133 * Output point (X, Y, Z) is stored in x_out, y_out, z_out 1134 */ 1135static void batch_mul(felem x_out, felem y_out, felem z_out, 1136 const felem_bytearray scalars[], 1137 const unsigned num_points, const u8 *g_scalar, 1138 const int mixed, const felem pre_comp[][17][3], 1139 const felem g_pre_comp[2][16][3]) 1140{ 1141 int i, skip; 1142 unsigned num; 1143 unsigned gen_mul = (g_scalar != NULL); 1144 felem nq[3], tmp[4]; 1145 u64 bits; 1146 u8 sign, digit; 1147 1148 /* set nq to the point at infinity */ 1149 memset(nq, 0, 3 * sizeof(felem)); 1150 1151 /* 1152 * Loop over all scalars msb-to-lsb, interleaving additions of multiples 1153 * of the generator (two in each of the last 28 rounds) and additions of 1154 * other points multiples (every 5th round). 1155 */ 1156 skip = 1; /* save two point operations in the first 1157 * round */ 1158 for (i = (num_points ? 220 : 27); i >= 0; --i) { 1159 /* double */ 1160 if (!skip) 1161 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]); 1162 1163 /* add multiples of the generator */ 1164 if (gen_mul && (i <= 27)) { 1165 /* first, look 28 bits upwards */ 1166 bits = get_bit(g_scalar, i + 196) << 3; 1167 bits |= get_bit(g_scalar, i + 140) << 2; 1168 bits |= get_bit(g_scalar, i + 84) << 1; 1169 bits |= get_bit(g_scalar, i + 28); 1170 /* select the point to add, in constant time */ 1171 select_point(bits, 16, g_pre_comp[1], tmp); 1172 1173 if (!skip) { 1174 /* value 1 below is argument for "mixed" */ 1175 point_add(nq[0], nq[1], nq[2], 1176 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]); 1177 } else { 1178 memcpy(nq, tmp, 3 * sizeof(felem)); 1179 skip = 0; 1180 } 1181 1182 /* second, look at the current position */ 1183 bits = get_bit(g_scalar, i + 168) << 3; 1184 bits |= get_bit(g_scalar, i + 112) << 2; 1185 bits |= get_bit(g_scalar, i + 56) << 1; 1186 bits |= get_bit(g_scalar, i); 1187 /* select the point to add, in constant time */ 1188 select_point(bits, 16, g_pre_comp[0], tmp); 1189 point_add(nq[0], nq[1], nq[2], 1190 nq[0], nq[1], nq[2], 1191 1 /* mixed */ , tmp[0], tmp[1], tmp[2]); 1192 } 1193 1194 /* do other additions every 5 doublings */ 1195 if (num_points && (i % 5 == 0)) { 1196 /* loop over all scalars */ 1197 for (num = 0; num < num_points; ++num) { 1198 bits = get_bit(scalars[num], i + 4) << 5; 1199 bits |= get_bit(scalars[num], i + 3) << 4; 1200 bits |= get_bit(scalars[num], i + 2) << 3; 1201 bits |= get_bit(scalars[num], i + 1) << 2; 1202 bits |= get_bit(scalars[num], i) << 1; 1203 bits |= get_bit(scalars[num], i - 1); 1204 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits); 1205 1206 /* select the point to add or subtract */ 1207 select_point(digit, 17, pre_comp[num], tmp); 1208 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative 1209 * point */ 1210 copy_conditional(tmp[1], tmp[3], sign); 1211 1212 if (!skip) { 1213 point_add(nq[0], nq[1], nq[2], 1214 nq[0], nq[1], nq[2], 1215 mixed, tmp[0], tmp[1], tmp[2]); 1216 } else { 1217 memcpy(nq, tmp, 3 * sizeof(felem)); 1218 skip = 0; 1219 } 1220 } 1221 } 1222 } 1223 felem_assign(x_out, nq[0]); 1224 felem_assign(y_out, nq[1]); 1225 felem_assign(z_out, nq[2]); 1226} 1227 1228/******************************************************************************/ 1229/* 1230 * FUNCTIONS TO MANAGE PRECOMPUTATION 1231 */ 1232 1233static NISTP224_PRE_COMP *nistp224_pre_comp_new() 1234{ 1235 NISTP224_PRE_COMP *ret = NULL; 1236 ret = (NISTP224_PRE_COMP *) OPENSSL_malloc(sizeof(*ret)); 1237 if (!ret) { 1238 ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE); 1239 return ret; 1240 } 1241 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp)); 1242 ret->references = 1; 1243 return ret; 1244} 1245 1246static void *nistp224_pre_comp_dup(void *src_) 1247{ 1248 NISTP224_PRE_COMP *src = src_; 1249 1250 /* no need to actually copy, these objects never change! */ 1251 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP); 1252 1253 return src_; 1254} 1255 1256static void nistp224_pre_comp_free(void *pre_) 1257{ 1258 int i; 1259 NISTP224_PRE_COMP *pre = pre_; 1260 1261 if (!pre) 1262 return; 1263 1264 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP); 1265 if (i > 0) 1266 return; 1267 1268 OPENSSL_free(pre); 1269} 1270 1271static void nistp224_pre_comp_clear_free(void *pre_) 1272{ 1273 int i; 1274 NISTP224_PRE_COMP *pre = pre_; 1275 1276 if (!pre) 1277 return; 1278 1279 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP); 1280 if (i > 0) 1281 return; 1282 1283 OPENSSL_cleanse(pre, sizeof(*pre)); 1284 OPENSSL_free(pre); 1285} 1286 1287/******************************************************************************/ 1288/* 1289 * OPENSSL EC_METHOD FUNCTIONS 1290 */ 1291 1292int ec_GFp_nistp224_group_init(EC_GROUP *group) 1293{ 1294 int ret; 1295 ret = ec_GFp_simple_group_init(group); 1296 group->a_is_minus3 = 1; 1297 return ret; 1298} 1299 1300int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p, 1301 const BIGNUM *a, const BIGNUM *b, 1302 BN_CTX *ctx) 1303{ 1304 int ret = 0; 1305 BN_CTX *new_ctx = NULL; 1306 BIGNUM *curve_p, *curve_a, *curve_b; 1307 1308 if (ctx == NULL) 1309 if ((ctx = new_ctx = BN_CTX_new()) == NULL) 1310 return 0; 1311 BN_CTX_start(ctx); 1312 if (((curve_p = BN_CTX_get(ctx)) == NULL) || 1313 ((curve_a = BN_CTX_get(ctx)) == NULL) || 1314 ((curve_b = BN_CTX_get(ctx)) == NULL)) 1315 goto err; 1316 BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p); 1317 BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a); 1318 BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b); 1319 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) { 1320 ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE, 1321 EC_R_WRONG_CURVE_PARAMETERS); 1322 goto err; 1323 } 1324 group->field_mod_func = BN_nist_mod_224; 1325 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx); 1326 err: 1327 BN_CTX_end(ctx); 1328 if (new_ctx != NULL) 1329 BN_CTX_free(new_ctx); 1330 return ret; 1331} 1332 1333/* 1334 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') = 1335 * (X/Z^2, Y/Z^3) 1336 */ 1337int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group, 1338 const EC_POINT *point, 1339 BIGNUM *x, BIGNUM *y, 1340 BN_CTX *ctx) 1341{ 1342 felem z1, z2, x_in, y_in, x_out, y_out; 1343 widefelem tmp; 1344 1345 if (EC_POINT_is_at_infinity(group, point)) { 1346 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES, 1347 EC_R_POINT_AT_INFINITY); 1348 return 0; 1349 } 1350 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) || 1351 (!BN_to_felem(z1, &point->Z))) 1352 return 0; 1353 felem_inv(z2, z1); 1354 felem_square(tmp, z2); 1355 felem_reduce(z1, tmp); 1356 felem_mul(tmp, x_in, z1); 1357 felem_reduce(x_in, tmp); 1358 felem_contract(x_out, x_in); 1359 if (x != NULL) { 1360 if (!felem_to_BN(x, x_out)) { 1361 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES, 1362 ERR_R_BN_LIB); 1363 return 0; 1364 } 1365 } 1366 felem_mul(tmp, z1, z2); 1367 felem_reduce(z1, tmp); 1368 felem_mul(tmp, y_in, z1); 1369 felem_reduce(y_in, tmp); 1370 felem_contract(y_out, y_in); 1371 if (y != NULL) { 1372 if (!felem_to_BN(y, y_out)) { 1373 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES, 1374 ERR_R_BN_LIB); 1375 return 0; 1376 } 1377 } 1378 return 1; 1379} 1380 1381static void make_points_affine(size_t num, felem points[ /* num */ ][3], 1382 felem tmp_felems[ /* num+1 */ ]) 1383{ 1384 /* 1385 * Runs in constant time, unless an input is the point at infinity (which 1386 * normally shouldn't happen). 1387 */ 1388 ec_GFp_nistp_points_make_affine_internal(num, 1389 points, 1390 sizeof(felem), 1391 tmp_felems, 1392 (void (*)(void *))felem_one, 1393 felem_is_zero_int, 1394 (void (*)(void *, const void *)) 1395 felem_assign, 1396 (void (*)(void *, const void *)) 1397 felem_square_reduce, (void (*) 1398 (void *, 1399 const void 1400 *, 1401 const void 1402 *)) 1403 felem_mul_reduce, 1404 (void (*)(void *, const void *)) 1405 felem_inv, 1406 (void (*)(void *, const void *)) 1407 felem_contract); 1408} 1409 1410/* 1411 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL 1412 * values Result is stored in r (r can equal one of the inputs). 1413 */ 1414int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r, 1415 const BIGNUM *scalar, size_t num, 1416 const EC_POINT *points[], 1417 const BIGNUM *scalars[], BN_CTX *ctx) 1418{ 1419 int ret = 0; 1420 int j; 1421 unsigned i; 1422 int mixed = 0; 1423 BN_CTX *new_ctx = NULL; 1424 BIGNUM *x, *y, *z, *tmp_scalar; 1425 felem_bytearray g_secret; 1426 felem_bytearray *secrets = NULL; 1427 felem(*pre_comp)[17][3] = NULL; 1428 felem *tmp_felems = NULL; 1429 felem_bytearray tmp; 1430 unsigned num_bytes; 1431 int have_pre_comp = 0; 1432 size_t num_points = num; 1433 felem x_in, y_in, z_in, x_out, y_out, z_out; 1434 NISTP224_PRE_COMP *pre = NULL; 1435 const felem(*g_pre_comp)[16][3] = NULL; 1436 EC_POINT *generator = NULL; 1437 const EC_POINT *p = NULL; 1438 const BIGNUM *p_scalar = NULL; 1439 1440 if (ctx == NULL) 1441 if ((ctx = new_ctx = BN_CTX_new()) == NULL) 1442 return 0; 1443 BN_CTX_start(ctx); 1444 if (((x = BN_CTX_get(ctx)) == NULL) || 1445 ((y = BN_CTX_get(ctx)) == NULL) || 1446 ((z = BN_CTX_get(ctx)) == NULL) || 1447 ((tmp_scalar = BN_CTX_get(ctx)) == NULL)) 1448 goto err; 1449 1450 if (scalar != NULL) { 1451 pre = EC_EX_DATA_get_data(group->extra_data, 1452 nistp224_pre_comp_dup, 1453 nistp224_pre_comp_free, 1454 nistp224_pre_comp_clear_free); 1455 if (pre) 1456 /* we have precomputation, try to use it */ 1457 g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp; 1458 else 1459 /* try to use the standard precomputation */ 1460 g_pre_comp = &gmul[0]; 1461 generator = EC_POINT_new(group); 1462 if (generator == NULL) 1463 goto err; 1464 /* get the generator from precomputation */ 1465 if (!felem_to_BN(x, g_pre_comp[0][1][0]) || 1466 !felem_to_BN(y, g_pre_comp[0][1][1]) || 1467 !felem_to_BN(z, g_pre_comp[0][1][2])) { 1468 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB); 1469 goto err; 1470 } 1471 if (!EC_POINT_set_Jprojective_coordinates_GFp(group, 1472 generator, x, y, z, 1473 ctx)) 1474 goto err; 1475 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) 1476 /* precomputation matches generator */ 1477 have_pre_comp = 1; 1478 else 1479 /* 1480 * we don't have valid precomputation: treat the generator as a 1481 * random point 1482 */ 1483 num_points = num_points + 1; 1484 } 1485 1486 if (num_points > 0) { 1487 if (num_points >= 3) { 1488 /* 1489 * unless we precompute multiples for just one or two points, 1490 * converting those into affine form is time well spent 1491 */ 1492 mixed = 1; 1493 } 1494 secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray)); 1495 pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem)); 1496 if (mixed) 1497 tmp_felems = 1498 OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem)); 1499 if ((secrets == NULL) || (pre_comp == NULL) 1500 || (mixed && (tmp_felems == NULL))) { 1501 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE); 1502 goto err; 1503 } 1504 1505 /* 1506 * we treat NULL scalars as 0, and NULL points as points at infinity, 1507 * i.e., they contribute nothing to the linear combination 1508 */ 1509 memset(secrets, 0, num_points * sizeof(felem_bytearray)); 1510 memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem)); 1511 for (i = 0; i < num_points; ++i) { 1512 if (i == num) 1513 /* the generator */ 1514 { 1515 p = EC_GROUP_get0_generator(group); 1516 p_scalar = scalar; 1517 } else 1518 /* the i^th point */ 1519 { 1520 p = points[i]; 1521 p_scalar = scalars[i]; 1522 } 1523 if ((p_scalar != NULL) && (p != NULL)) { 1524 /* reduce scalar to 0 <= scalar < 2^224 */ 1525 if ((BN_num_bits(p_scalar) > 224) 1526 || (BN_is_negative(p_scalar))) { 1527 /* 1528 * this is an unusual input, and we don't guarantee 1529 * constant-timeness 1530 */ 1531 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) { 1532 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB); 1533 goto err; 1534 } 1535 num_bytes = BN_bn2bin(tmp_scalar, tmp); 1536 } else 1537 num_bytes = BN_bn2bin(p_scalar, tmp); 1538 flip_endian(secrets[i], tmp, num_bytes); 1539 /* precompute multiples */ 1540 if ((!BN_to_felem(x_out, &p->X)) || 1541 (!BN_to_felem(y_out, &p->Y)) || 1542 (!BN_to_felem(z_out, &p->Z))) 1543 goto err; 1544 felem_assign(pre_comp[i][1][0], x_out); 1545 felem_assign(pre_comp[i][1][1], y_out); 1546 felem_assign(pre_comp[i][1][2], z_out); 1547 for (j = 2; j <= 16; ++j) { 1548 if (j & 1) { 1549 point_add(pre_comp[i][j][0], pre_comp[i][j][1], 1550 pre_comp[i][j][2], pre_comp[i][1][0], 1551 pre_comp[i][1][1], pre_comp[i][1][2], 0, 1552 pre_comp[i][j - 1][0], 1553 pre_comp[i][j - 1][1], 1554 pre_comp[i][j - 1][2]); 1555 } else { 1556 point_double(pre_comp[i][j][0], pre_comp[i][j][1], 1557 pre_comp[i][j][2], pre_comp[i][j / 2][0], 1558 pre_comp[i][j / 2][1], 1559 pre_comp[i][j / 2][2]); 1560 } 1561 } 1562 } 1563 } 1564 if (mixed) 1565 make_points_affine(num_points * 17, pre_comp[0], tmp_felems); 1566 } 1567 1568 /* the scalar for the generator */ 1569 if ((scalar != NULL) && (have_pre_comp)) { 1570 memset(g_secret, 0, sizeof(g_secret)); 1571 /* reduce scalar to 0 <= scalar < 2^224 */ 1572 if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) { 1573 /* 1574 * this is an unusual input, and we don't guarantee 1575 * constant-timeness 1576 */ 1577 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) { 1578 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB); 1579 goto err; 1580 } 1581 num_bytes = BN_bn2bin(tmp_scalar, tmp); 1582 } else 1583 num_bytes = BN_bn2bin(scalar, tmp); 1584 flip_endian(g_secret, tmp, num_bytes); 1585 /* do the multiplication with generator precomputation */ 1586 batch_mul(x_out, y_out, z_out, 1587 (const felem_bytearray(*))secrets, num_points, 1588 g_secret, 1589 mixed, (const felem(*)[17][3])pre_comp, g_pre_comp); 1590 } else 1591 /* do the multiplication without generator precomputation */ 1592 batch_mul(x_out, y_out, z_out, 1593 (const felem_bytearray(*))secrets, num_points, 1594 NULL, mixed, (const felem(*)[17][3])pre_comp, NULL); 1595 /* reduce the output to its unique minimal representation */ 1596 felem_contract(x_in, x_out); 1597 felem_contract(y_in, y_out); 1598 felem_contract(z_in, z_out); 1599 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) || 1600 (!felem_to_BN(z, z_in))) { 1601 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB); 1602 goto err; 1603 } 1604 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx); 1605 1606 err: 1607 BN_CTX_end(ctx); 1608 if (generator != NULL) 1609 EC_POINT_free(generator); 1610 if (new_ctx != NULL) 1611 BN_CTX_free(new_ctx); 1612 if (secrets != NULL) 1613 OPENSSL_free(secrets); 1614 if (pre_comp != NULL) 1615 OPENSSL_free(pre_comp); 1616 if (tmp_felems != NULL) 1617 OPENSSL_free(tmp_felems); 1618 return ret; 1619} 1620 1621int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx) 1622{ 1623 int ret = 0; 1624 NISTP224_PRE_COMP *pre = NULL; 1625 int i, j; 1626 BN_CTX *new_ctx = NULL; 1627 BIGNUM *x, *y; 1628 EC_POINT *generator = NULL; 1629 felem tmp_felems[32]; 1630 1631 /* throw away old precomputation */ 1632 EC_EX_DATA_free_data(&group->extra_data, nistp224_pre_comp_dup, 1633 nistp224_pre_comp_free, 1634 nistp224_pre_comp_clear_free); 1635 if (ctx == NULL) 1636 if ((ctx = new_ctx = BN_CTX_new()) == NULL) 1637 return 0; 1638 BN_CTX_start(ctx); 1639 if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL)) 1640 goto err; 1641 /* get the generator */ 1642 if (group->generator == NULL) 1643 goto err; 1644 generator = EC_POINT_new(group); 1645 if (generator == NULL) 1646 goto err; 1647 BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x); 1648 BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y); 1649 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx)) 1650 goto err; 1651 if ((pre = nistp224_pre_comp_new()) == NULL) 1652 goto err; 1653 /* 1654 * if the generator is the standard one, use built-in precomputation 1655 */ 1656 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) { 1657 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp)); 1658 goto done; 1659 } 1660 if ((!BN_to_felem(pre->g_pre_comp[0][1][0], &group->generator->X)) || 1661 (!BN_to_felem(pre->g_pre_comp[0][1][1], &group->generator->Y)) || 1662 (!BN_to_felem(pre->g_pre_comp[0][1][2], &group->generator->Z))) 1663 goto err; 1664 /* 1665 * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G, 1666 * 2^140*G, 2^196*G for the second one 1667 */ 1668 for (i = 1; i <= 8; i <<= 1) { 1669 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], 1670 pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0], 1671 pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]); 1672 for (j = 0; j < 27; ++j) { 1673 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], 1674 pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0], 1675 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]); 1676 } 1677 if (i == 8) 1678 break; 1679 point_double(pre->g_pre_comp[0][2 * i][0], 1680 pre->g_pre_comp[0][2 * i][1], 1681 pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0], 1682 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]); 1683 for (j = 0; j < 27; ++j) { 1684 point_double(pre->g_pre_comp[0][2 * i][0], 1685 pre->g_pre_comp[0][2 * i][1], 1686 pre->g_pre_comp[0][2 * i][2], 1687 pre->g_pre_comp[0][2 * i][0], 1688 pre->g_pre_comp[0][2 * i][1], 1689 pre->g_pre_comp[0][2 * i][2]); 1690 } 1691 } 1692 for (i = 0; i < 2; i++) { 1693 /* g_pre_comp[i][0] is the point at infinity */ 1694 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0])); 1695 /* the remaining multiples */ 1696 /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */ 1697 point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1], 1698 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0], 1699 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2], 1700 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], 1701 pre->g_pre_comp[i][2][2]); 1702 /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */ 1703 point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1], 1704 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0], 1705 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2], 1706 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], 1707 pre->g_pre_comp[i][2][2]); 1708 /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */ 1709 point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1], 1710 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0], 1711 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2], 1712 0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1], 1713 pre->g_pre_comp[i][4][2]); 1714 /* 1715 * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G 1716 */ 1717 point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1], 1718 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0], 1719 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2], 1720 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], 1721 pre->g_pre_comp[i][2][2]); 1722 for (j = 1; j < 8; ++j) { 1723 /* odd multiples: add G resp. 2^28*G */ 1724 point_add(pre->g_pre_comp[i][2 * j + 1][0], 1725 pre->g_pre_comp[i][2 * j + 1][1], 1726 pre->g_pre_comp[i][2 * j + 1][2], 1727 pre->g_pre_comp[i][2 * j][0], 1728 pre->g_pre_comp[i][2 * j][1], 1729 pre->g_pre_comp[i][2 * j][2], 0, 1730 pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1], 1731 pre->g_pre_comp[i][1][2]); 1732 } 1733 } 1734 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems); 1735 1736 done: 1737 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp224_pre_comp_dup, 1738 nistp224_pre_comp_free, 1739 nistp224_pre_comp_clear_free)) 1740 goto err; 1741 ret = 1; 1742 pre = NULL; 1743 err: 1744 BN_CTX_end(ctx); 1745 if (generator != NULL) 1746 EC_POINT_free(generator); 1747 if (new_ctx != NULL) 1748 BN_CTX_free(new_ctx); 1749 if (pre) 1750 nistp224_pre_comp_free(pre); 1751 return ret; 1752} 1753 1754int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group) 1755{ 1756 if (EC_EX_DATA_get_data(group->extra_data, nistp224_pre_comp_dup, 1757 nistp224_pre_comp_free, 1758 nistp224_pre_comp_clear_free) 1759 != NULL) 1760 return 1; 1761 else 1762 return 0; 1763} 1764 1765#else 1766static void *dummy = &dummy; 1767#endif 1768