ecp_nistp224.c revision 325337
1/* crypto/ec/ecp_nistp224.c */ 2/* 3 * Written by Emilia Kasper (Google) for the OpenSSL project. 4 */ 5/* Copyright 2011 Google Inc. 6 * 7 * Licensed under the Apache License, Version 2.0 (the "License"); 8 * 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21/* 22 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication 23 * 24 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation 25 * and Adam Langley's public domain 64-bit C implementation of curve25519 26 */ 27 28#include <openssl/opensslconf.h> 29#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 30 31# ifndef OPENSSL_SYS_VMS 32# include <stdint.h> 33# else 34# include <inttypes.h> 35# endif 36 37# include <string.h> 38# include <openssl/err.h> 39# include "ec_lcl.h" 40 41# if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) 42 /* even with gcc, the typedef won't work for 32-bit platforms */ 43typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit 44 * platforms */ 45# else 46# error "Need GCC 3.1 or later to define type uint128_t" 47# endif 48 49typedef uint8_t u8; 50typedef uint64_t u64; 51typedef int64_t s64; 52 53/******************************************************************************/ 54/*- 55 * INTERNAL REPRESENTATION OF FIELD ELEMENTS 56 * 57 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3 58 * using 64-bit coefficients called 'limbs', 59 * and sometimes (for multiplication results) as 60 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6 61 * using 128-bit coefficients called 'widelimbs'. 62 * A 4-limb representation is an 'felem'; 63 * a 7-widelimb representation is a 'widefelem'. 64 * Even within felems, bits of adjacent limbs overlap, and we don't always 65 * reduce the representations: we ensure that inputs to each felem 66 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60, 67 * and fit into a 128-bit word without overflow. The coefficients are then 68 * again partially reduced to obtain an felem satisfying a_i < 2^57. 69 * We only reduce to the unique minimal representation at the end of the 70 * computation. 71 */ 72 73typedef uint64_t limb; 74typedef uint128_t widelimb; 75 76typedef limb felem[4]; 77typedef widelimb widefelem[7]; 78 79/* 80 * Field element represented as a byte arrary. 28*8 = 224 bits is also the 81 * group order size for the elliptic curve, and we also use this type for 82 * scalars for point multiplication. 83 */ 84typedef u8 felem_bytearray[28]; 85 86static const felem_bytearray nistp224_curve_params[5] = { 87 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */ 88 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 89 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, 90 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */ 91 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 92 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE}, 93 {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */ 94 0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA, 95 0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4}, 96 {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */ 97 0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22, 98 0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21}, 99 {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */ 100 0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64, 101 0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34} 102}; 103 104/*- 105 * Precomputed multiples of the standard generator 106 * Points are given in coordinates (X, Y, Z) where Z normally is 1 107 * (0 for the point at infinity). 108 * For each field element, slice a_0 is word 0, etc. 109 * 110 * The table has 2 * 16 elements, starting with the following: 111 * index | bits | point 112 * ------+---------+------------------------------ 113 * 0 | 0 0 0 0 | 0G 114 * 1 | 0 0 0 1 | 1G 115 * 2 | 0 0 1 0 | 2^56G 116 * 3 | 0 0 1 1 | (2^56 + 1)G 117 * 4 | 0 1 0 0 | 2^112G 118 * 5 | 0 1 0 1 | (2^112 + 1)G 119 * 6 | 0 1 1 0 | (2^112 + 2^56)G 120 * 7 | 0 1 1 1 | (2^112 + 2^56 + 1)G 121 * 8 | 1 0 0 0 | 2^168G 122 * 9 | 1 0 0 1 | (2^168 + 1)G 123 * 10 | 1 0 1 0 | (2^168 + 2^56)G 124 * 11 | 1 0 1 1 | (2^168 + 2^56 + 1)G 125 * 12 | 1 1 0 0 | (2^168 + 2^112)G 126 * 13 | 1 1 0 1 | (2^168 + 2^112 + 1)G 127 * 14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G 128 * 15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G 129 * followed by a copy of this with each element multiplied by 2^28. 130 * 131 * The reason for this is so that we can clock bits into four different 132 * locations when doing simple scalar multiplies against the base point, 133 * and then another four locations using the second 16 elements. 134 */ 135static const felem gmul[2][16][3] = { {{{0, 0, 0, 0}, 136 {0, 0, 0, 0}, 137 {0, 0, 0, 0}}, 138 {{0x3280d6115c1d21, 0xc1d356c2112234, 139 0x7f321390b94a03, 0xb70e0cbd6bb4bf}, 140 {0xd5819985007e34, 0x75a05a07476444, 141 0xfb4c22dfe6cd43, 0xbd376388b5f723}, 142 {1, 0, 0, 0}}, 143 {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 144 0x2242df8d8a2a43, 0x1f49bbb0f99bc5}, 145 {0x29e0b892dc9c43, 0xece8608436e662, 146 0xdc858f185310d0, 0x9812dd4eb8d321}, 147 {1, 0, 0, 0}}, 148 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 149 0x16e9a3bbce8a3f, 0xeedcccd8c2a748}, 150 {0xf19f90ed50266d, 0xabf2b4bf65f9df, 151 0x313865468fafec, 0x5cb379ba910a17}, 152 {1, 0, 0, 0}}, 153 {{0x0641966cab26e3, 0x91fb2991fab0a0, 154 0xefec27a4e13a0b, 0x0499aa8a5f8ebe}, 155 {0x7510407766af5d, 0x84d929610d5450, 156 0x81d77aae82f706, 0x6916f6d4338c5b}, 157 {1, 0, 0, 0}}, 158 {{0xea95ac3b1f15c6, 0x086000905e82d4, 159 0xdd323ae4d1c8b1, 0x932b56be7685a3}, 160 {0x9ef93dea25dbbf, 0x41665960f390f0, 161 0xfdec76dbe2a8a7, 0x523e80f019062a}, 162 {1, 0, 0, 0}}, 163 {{0x822fdd26732c73, 0xa01c83531b5d0f, 164 0x363f37347c1ba4, 0xc391b45c84725c}, 165 {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 166 0xc393da7e222a7f, 0x1efb7890ede244}, 167 {1, 0, 0, 0}}, 168 {{0x4c9e90ca217da1, 0xd11beca79159bb, 169 0xff8d33c2c98b7c, 0x2610b39409f849}, 170 {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 171 0x966c079b753c89, 0xfe67e4e820b112}, 172 {1, 0, 0, 0}}, 173 {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 174 0x79b7619a3e7c4c, 0x05c73240899b47}, 175 {0x9f7f6382c73e3a, 0x18615165c56bda, 176 0x641fab2116fd56, 0x72855882b08394}, 177 {1, 0, 0, 0}}, 178 {{0x0469182f161c09, 0x74a98ca8d00fb5, 179 0xb89da93489a3e0, 0x41c98768fb0c1d}, 180 {0xe5ea05fb32da81, 0x3dce9ffbca6855, 181 0x1cfe2d3fbf59e6, 0x0e5e03408738a7}, 182 {1, 0, 0, 0}}, 183 {{0xdab22b2333e87f, 0x4430137a5dd2f6, 184 0xe03ab9f738beb8, 0xcb0c5d0dc34f24}, 185 {0x764a7df0c8fda5, 0x185ba5c3fa2044, 186 0x9281d688bcbe50, 0xc40331df893881}, 187 {1, 0, 0, 0}}, 188 {{0xb89530796f0f60, 0xade92bd26909a3, 189 0x1a0c83fb4884da, 0x1765bf22a5a984}, 190 {0x772a9ee75db09e, 0x23bc6c67cec16f, 191 0x4c1edba8b14e2f, 0xe2a215d9611369}, 192 {1, 0, 0, 0}}, 193 {{0x571e509fb5efb3, 0xade88696410552, 194 0xc8ae85fada74fe, 0x6c7e4be83bbde3}, 195 {0xff9f51160f4652, 0xb47ce2495a6539, 196 0xa2946c53b582f4, 0x286d2db3ee9a60}, 197 {1, 0, 0, 0}}, 198 {{0x40bbd5081a44af, 0x0995183b13926c, 199 0xbcefba6f47f6d0, 0x215619e9cc0057}, 200 {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 201 0x8631b93cdfe8b5, 0xe7e3f4b0982db9}, 202 {1, 0, 0, 0}}, 203 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 204 0x1c29819435d2c6, 0xc813132f4c07e9}, 205 {0x2891425503b11f, 0x08781030579fea, 206 0xf5426ba5cc9674, 0x1e28ebf18562bc}, 207 {1, 0, 0, 0}}, 208 {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 209 0xff17036691a973, 0xf1aef351497c58}, 210 {0xdd1f2d600564ff, 0xdead073b1402db, 211 0x74a684435bd693, 0xeea7471f962558}, 212 {1, 0, 0, 0}}}, 213{{{0, 0, 0, 0}, 214 {0, 0, 0, 0}, 215 {0, 0, 0, 0}}, 216 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31}, 217 {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d}, 218 {1, 0, 0, 0}}, 219 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3}, 220 {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a}, 221 {1, 0, 0, 0}}, 222 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33}, 223 {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100}, 224 {1, 0, 0, 0}}, 225 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5}, 226 {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea}, 227 {1, 0, 0, 0}}, 228 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be}, 229 {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51}, 230 {1, 0, 0, 0}}, 231 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1}, 232 {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb}, 233 {1, 0, 0, 0}}, 234 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233}, 235 {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def}, 236 {1, 0, 0, 0}}, 237 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae}, 238 {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45}, 239 {1, 0, 0, 0}}, 240 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e}, 241 {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb}, 242 {1, 0, 0, 0}}, 243 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de}, 244 {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3}, 245 {1, 0, 0, 0}}, 246 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05}, 247 {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58}, 248 {1, 0, 0, 0}}, 249 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb}, 250 {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0}, 251 {1, 0, 0, 0}}, 252 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9}, 253 {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea}, 254 {1, 0, 0, 0}}, 255 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba}, 256 {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405}, 257 {1, 0, 0, 0}}, 258 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e}, 259 {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e}, 260 {1, 0, 0, 0}}} 261}; 262 263/* Precomputation for the group generator. */ 264typedef struct { 265 felem g_pre_comp[2][16][3]; 266 int references; 267} NISTP224_PRE_COMP; 268 269const EC_METHOD *EC_GFp_nistp224_method(void) 270{ 271 static const EC_METHOD ret = { 272 EC_FLAGS_DEFAULT_OCT, 273 NID_X9_62_prime_field, 274 ec_GFp_nistp224_group_init, 275 ec_GFp_simple_group_finish, 276 ec_GFp_simple_group_clear_finish, 277 ec_GFp_nist_group_copy, 278 ec_GFp_nistp224_group_set_curve, 279 ec_GFp_simple_group_get_curve, 280 ec_GFp_simple_group_get_degree, 281 ec_GFp_simple_group_check_discriminant, 282 ec_GFp_simple_point_init, 283 ec_GFp_simple_point_finish, 284 ec_GFp_simple_point_clear_finish, 285 ec_GFp_simple_point_copy, 286 ec_GFp_simple_point_set_to_infinity, 287 ec_GFp_simple_set_Jprojective_coordinates_GFp, 288 ec_GFp_simple_get_Jprojective_coordinates_GFp, 289 ec_GFp_simple_point_set_affine_coordinates, 290 ec_GFp_nistp224_point_get_affine_coordinates, 291 0 /* point_set_compressed_coordinates */ , 292 0 /* point2oct */ , 293 0 /* oct2point */ , 294 ec_GFp_simple_add, 295 ec_GFp_simple_dbl, 296 ec_GFp_simple_invert, 297 ec_GFp_simple_is_at_infinity, 298 ec_GFp_simple_is_on_curve, 299 ec_GFp_simple_cmp, 300 ec_GFp_simple_make_affine, 301 ec_GFp_simple_points_make_affine, 302 ec_GFp_nistp224_points_mul, 303 ec_GFp_nistp224_precompute_mult, 304 ec_GFp_nistp224_have_precompute_mult, 305 ec_GFp_nist_field_mul, 306 ec_GFp_nist_field_sqr, 307 0 /* field_div */ , 308 0 /* field_encode */ , 309 0 /* field_decode */ , 310 0 /* field_set_to_one */ 311 }; 312 313 return &ret; 314} 315 316/* 317 * Helper functions to convert field elements to/from internal representation 318 */ 319static void bin28_to_felem(felem out, const u8 in[28]) 320{ 321 out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff; 322 out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff; 323 out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff; 324 out[3] = (*((const uint64_t *)(in+20))) >> 8; 325} 326 327static void felem_to_bin28(u8 out[28], const felem in) 328{ 329 unsigned i; 330 for (i = 0; i < 7; ++i) { 331 out[i] = in[0] >> (8 * i); 332 out[i + 7] = in[1] >> (8 * i); 333 out[i + 14] = in[2] >> (8 * i); 334 out[i + 21] = in[3] >> (8 * i); 335 } 336} 337 338/* To preserve endianness when using BN_bn2bin and BN_bin2bn */ 339static void flip_endian(u8 *out, const u8 *in, unsigned len) 340{ 341 unsigned i; 342 for (i = 0; i < len; ++i) 343 out[i] = in[len - 1 - i]; 344} 345 346/* From OpenSSL BIGNUM to internal representation */ 347static int BN_to_felem(felem out, const BIGNUM *bn) 348{ 349 felem_bytearray b_in; 350 felem_bytearray b_out; 351 unsigned num_bytes; 352 353 /* BN_bn2bin eats leading zeroes */ 354 memset(b_out, 0, sizeof b_out); 355 num_bytes = BN_num_bytes(bn); 356 if (num_bytes > sizeof b_out) { 357 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE); 358 return 0; 359 } 360 if (BN_is_negative(bn)) { 361 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE); 362 return 0; 363 } 364 num_bytes = BN_bn2bin(bn, b_in); 365 flip_endian(b_out, b_in, num_bytes); 366 bin28_to_felem(out, b_out); 367 return 1; 368} 369 370/* From internal representation to OpenSSL BIGNUM */ 371static BIGNUM *felem_to_BN(BIGNUM *out, const felem in) 372{ 373 felem_bytearray b_in, b_out; 374 felem_to_bin28(b_in, in); 375 flip_endian(b_out, b_in, sizeof b_out); 376 return BN_bin2bn(b_out, sizeof b_out, out); 377} 378 379/******************************************************************************/ 380/*- 381 * FIELD OPERATIONS 382 * 383 * Field operations, using the internal representation of field elements. 384 * NB! These operations are specific to our point multiplication and cannot be 385 * expected to be correct in general - e.g., multiplication with a large scalar 386 * will cause an overflow. 387 * 388 */ 389 390static void felem_one(felem out) 391{ 392 out[0] = 1; 393 out[1] = 0; 394 out[2] = 0; 395 out[3] = 0; 396} 397 398static void felem_assign(felem out, const felem in) 399{ 400 out[0] = in[0]; 401 out[1] = in[1]; 402 out[2] = in[2]; 403 out[3] = in[3]; 404} 405 406/* Sum two field elements: out += in */ 407static void felem_sum(felem out, const felem in) 408{ 409 out[0] += in[0]; 410 out[1] += in[1]; 411 out[2] += in[2]; 412 out[3] += in[3]; 413} 414 415/* Get negative value: out = -in */ 416/* Assumes in[i] < 2^57 */ 417static void felem_neg(felem out, const felem in) 418{ 419 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2); 420 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2); 421 static const limb two58m42m2 = (((limb) 1) << 58) - 422 (((limb) 1) << 42) - (((limb) 1) << 2); 423 424 /* Set to 0 mod 2^224-2^96+1 to ensure out > in */ 425 out[0] = two58p2 - in[0]; 426 out[1] = two58m42m2 - in[1]; 427 out[2] = two58m2 - in[2]; 428 out[3] = two58m2 - in[3]; 429} 430 431/* Subtract field elements: out -= in */ 432/* Assumes in[i] < 2^57 */ 433static void felem_diff(felem out, const felem in) 434{ 435 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2); 436 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2); 437 static const limb two58m42m2 = (((limb) 1) << 58) - 438 (((limb) 1) << 42) - (((limb) 1) << 2); 439 440 /* Add 0 mod 2^224-2^96+1 to ensure out > in */ 441 out[0] += two58p2; 442 out[1] += two58m42m2; 443 out[2] += two58m2; 444 out[3] += two58m2; 445 446 out[0] -= in[0]; 447 out[1] -= in[1]; 448 out[2] -= in[2]; 449 out[3] -= in[3]; 450} 451 452/* Subtract in unreduced 128-bit mode: out -= in */ 453/* Assumes in[i] < 2^119 */ 454static void widefelem_diff(widefelem out, const widefelem in) 455{ 456 static const widelimb two120 = ((widelimb) 1) << 120; 457 static const widelimb two120m64 = (((widelimb) 1) << 120) - 458 (((widelimb) 1) << 64); 459 static const widelimb two120m104m64 = (((widelimb) 1) << 120) - 460 (((widelimb) 1) << 104) - (((widelimb) 1) << 64); 461 462 /* Add 0 mod 2^224-2^96+1 to ensure out > in */ 463 out[0] += two120; 464 out[1] += two120m64; 465 out[2] += two120m64; 466 out[3] += two120; 467 out[4] += two120m104m64; 468 out[5] += two120m64; 469 out[6] += two120m64; 470 471 out[0] -= in[0]; 472 out[1] -= in[1]; 473 out[2] -= in[2]; 474 out[3] -= in[3]; 475 out[4] -= in[4]; 476 out[5] -= in[5]; 477 out[6] -= in[6]; 478} 479 480/* Subtract in mixed mode: out128 -= in64 */ 481/* in[i] < 2^63 */ 482static void felem_diff_128_64(widefelem out, const felem in) 483{ 484 static const widelimb two64p8 = (((widelimb) 1) << 64) + 485 (((widelimb) 1) << 8); 486 static const widelimb two64m8 = (((widelimb) 1) << 64) - 487 (((widelimb) 1) << 8); 488 static const widelimb two64m48m8 = (((widelimb) 1) << 64) - 489 (((widelimb) 1) << 48) - (((widelimb) 1) << 8); 490 491 /* Add 0 mod 2^224-2^96+1 to ensure out > in */ 492 out[0] += two64p8; 493 out[1] += two64m48m8; 494 out[2] += two64m8; 495 out[3] += two64m8; 496 497 out[0] -= in[0]; 498 out[1] -= in[1]; 499 out[2] -= in[2]; 500 out[3] -= in[3]; 501} 502 503/* 504 * Multiply a field element by a scalar: out = out * scalar The scalars we 505 * actually use are small, so results fit without overflow 506 */ 507static void felem_scalar(felem out, const limb scalar) 508{ 509 out[0] *= scalar; 510 out[1] *= scalar; 511 out[2] *= scalar; 512 out[3] *= scalar; 513} 514 515/* 516 * Multiply an unreduced field element by a scalar: out = out * scalar The 517 * scalars we actually use are small, so results fit without overflow 518 */ 519static void widefelem_scalar(widefelem out, const widelimb scalar) 520{ 521 out[0] *= scalar; 522 out[1] *= scalar; 523 out[2] *= scalar; 524 out[3] *= scalar; 525 out[4] *= scalar; 526 out[5] *= scalar; 527 out[6] *= scalar; 528} 529 530/* Square a field element: out = in^2 */ 531static void felem_square(widefelem out, const felem in) 532{ 533 limb tmp0, tmp1, tmp2; 534 tmp0 = 2 * in[0]; 535 tmp1 = 2 * in[1]; 536 tmp2 = 2 * in[2]; 537 out[0] = ((widelimb) in[0]) * in[0]; 538 out[1] = ((widelimb) in[0]) * tmp1; 539 out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1]; 540 out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2; 541 out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2]; 542 out[5] = ((widelimb) in[3]) * tmp2; 543 out[6] = ((widelimb) in[3]) * in[3]; 544} 545 546/* Multiply two field elements: out = in1 * in2 */ 547static void felem_mul(widefelem out, const felem in1, const felem in2) 548{ 549 out[0] = ((widelimb) in1[0]) * in2[0]; 550 out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0]; 551 out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] + 552 ((widelimb) in1[2]) * in2[0]; 553 out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] + 554 ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0]; 555 out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] + 556 ((widelimb) in1[3]) * in2[1]; 557 out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2]; 558 out[6] = ((widelimb) in1[3]) * in2[3]; 559} 560 561/*- 562 * Reduce seven 128-bit coefficients to four 64-bit coefficients. 563 * Requires in[i] < 2^126, 564 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */ 565static void felem_reduce(felem out, const widefelem in) 566{ 567 static const widelimb two127p15 = (((widelimb) 1) << 127) + 568 (((widelimb) 1) << 15); 569 static const widelimb two127m71 = (((widelimb) 1) << 127) - 570 (((widelimb) 1) << 71); 571 static const widelimb two127m71m55 = (((widelimb) 1) << 127) - 572 (((widelimb) 1) << 71) - (((widelimb) 1) << 55); 573 widelimb output[5]; 574 575 /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */ 576 output[0] = in[0] + two127p15; 577 output[1] = in[1] + two127m71m55; 578 output[2] = in[2] + two127m71; 579 output[3] = in[3]; 580 output[4] = in[4]; 581 582 /* Eliminate in[4], in[5], in[6] */ 583 output[4] += in[6] >> 16; 584 output[3] += (in[6] & 0xffff) << 40; 585 output[2] -= in[6]; 586 587 output[3] += in[5] >> 16; 588 output[2] += (in[5] & 0xffff) << 40; 589 output[1] -= in[5]; 590 591 output[2] += output[4] >> 16; 592 output[1] += (output[4] & 0xffff) << 40; 593 output[0] -= output[4]; 594 595 /* Carry 2 -> 3 -> 4 */ 596 output[3] += output[2] >> 56; 597 output[2] &= 0x00ffffffffffffff; 598 599 output[4] = output[3] >> 56; 600 output[3] &= 0x00ffffffffffffff; 601 602 /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */ 603 604 /* Eliminate output[4] */ 605 output[2] += output[4] >> 16; 606 /* output[2] < 2^56 + 2^56 = 2^57 */ 607 output[1] += (output[4] & 0xffff) << 40; 608 output[0] -= output[4]; 609 610 /* Carry 0 -> 1 -> 2 -> 3 */ 611 output[1] += output[0] >> 56; 612 out[0] = output[0] & 0x00ffffffffffffff; 613 614 output[2] += output[1] >> 56; 615 /* output[2] < 2^57 + 2^72 */ 616 out[1] = output[1] & 0x00ffffffffffffff; 617 output[3] += output[2] >> 56; 618 /* output[3] <= 2^56 + 2^16 */ 619 out[2] = output[2] & 0x00ffffffffffffff; 620 621 /*- 622 * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, 623 * out[3] <= 2^56 + 2^16 (due to final carry), 624 * so out < 2*p 625 */ 626 out[3] = output[3]; 627} 628 629static void felem_square_reduce(felem out, const felem in) 630{ 631 widefelem tmp; 632 felem_square(tmp, in); 633 felem_reduce(out, tmp); 634} 635 636static void felem_mul_reduce(felem out, const felem in1, const felem in2) 637{ 638 widefelem tmp; 639 felem_mul(tmp, in1, in2); 640 felem_reduce(out, tmp); 641} 642 643/* 644 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always 645 * call felem_reduce first) 646 */ 647static void felem_contract(felem out, const felem in) 648{ 649 static const int64_t two56 = ((limb) 1) << 56; 650 /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */ 651 /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */ 652 int64_t tmp[4], a; 653 tmp[0] = in[0]; 654 tmp[1] = in[1]; 655 tmp[2] = in[2]; 656 tmp[3] = in[3]; 657 /* Case 1: a = 1 iff in >= 2^224 */ 658 a = (in[3] >> 56); 659 tmp[0] -= a; 660 tmp[1] += a << 40; 661 tmp[3] &= 0x00ffffffffffffff; 662 /* 663 * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1 664 * and the lower part is non-zero 665 */ 666 a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) | 667 (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63); 668 a &= 0x00ffffffffffffff; 669 /* turn a into an all-one mask (if a = 0) or an all-zero mask */ 670 a = (a - 1) >> 63; 671 /* subtract 2^224 - 2^96 + 1 if a is all-one */ 672 tmp[3] &= a ^ 0xffffffffffffffff; 673 tmp[2] &= a ^ 0xffffffffffffffff; 674 tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff; 675 tmp[0] -= 1 & a; 676 677 /* 678 * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be 679 * non-zero, so we only need one step 680 */ 681 a = tmp[0] >> 63; 682 tmp[0] += two56 & a; 683 tmp[1] -= 1 & a; 684 685 /* carry 1 -> 2 -> 3 */ 686 tmp[2] += tmp[1] >> 56; 687 tmp[1] &= 0x00ffffffffffffff; 688 689 tmp[3] += tmp[2] >> 56; 690 tmp[2] &= 0x00ffffffffffffff; 691 692 /* Now 0 <= out < p */ 693 out[0] = tmp[0]; 694 out[1] = tmp[1]; 695 out[2] = tmp[2]; 696 out[3] = tmp[3]; 697} 698 699/* 700 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field 701 * elements are reduced to in < 2^225, so we only need to check three cases: 702 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2 703 */ 704static limb felem_is_zero(const felem in) 705{ 706 limb zero, two224m96p1, two225m97p2; 707 708 zero = in[0] | in[1] | in[2] | in[3]; 709 zero = (((int64_t) (zero) - 1) >> 63) & 1; 710 two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000) 711 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff); 712 two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1; 713 two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000) 714 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff); 715 two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1; 716 return (zero | two224m96p1 | two225m97p2); 717} 718 719static int felem_is_zero_int(const void *in) 720{ 721 return (int)(felem_is_zero(in) & ((limb) 1)); 722} 723 724/* Invert a field element */ 725/* Computation chain copied from djb's code */ 726static void felem_inv(felem out, const felem in) 727{ 728 felem ftmp, ftmp2, ftmp3, ftmp4; 729 widefelem tmp; 730 unsigned i; 731 732 felem_square(tmp, in); 733 felem_reduce(ftmp, tmp); /* 2 */ 734 felem_mul(tmp, in, ftmp); 735 felem_reduce(ftmp, tmp); /* 2^2 - 1 */ 736 felem_square(tmp, ftmp); 737 felem_reduce(ftmp, tmp); /* 2^3 - 2 */ 738 felem_mul(tmp, in, ftmp); 739 felem_reduce(ftmp, tmp); /* 2^3 - 1 */ 740 felem_square(tmp, ftmp); 741 felem_reduce(ftmp2, tmp); /* 2^4 - 2 */ 742 felem_square(tmp, ftmp2); 743 felem_reduce(ftmp2, tmp); /* 2^5 - 4 */ 744 felem_square(tmp, ftmp2); 745 felem_reduce(ftmp2, tmp); /* 2^6 - 8 */ 746 felem_mul(tmp, ftmp2, ftmp); 747 felem_reduce(ftmp, tmp); /* 2^6 - 1 */ 748 felem_square(tmp, ftmp); 749 felem_reduce(ftmp2, tmp); /* 2^7 - 2 */ 750 for (i = 0; i < 5; ++i) { /* 2^12 - 2^6 */ 751 felem_square(tmp, ftmp2); 752 felem_reduce(ftmp2, tmp); 753 } 754 felem_mul(tmp, ftmp2, ftmp); 755 felem_reduce(ftmp2, tmp); /* 2^12 - 1 */ 756 felem_square(tmp, ftmp2); 757 felem_reduce(ftmp3, tmp); /* 2^13 - 2 */ 758 for (i = 0; i < 11; ++i) { /* 2^24 - 2^12 */ 759 felem_square(tmp, ftmp3); 760 felem_reduce(ftmp3, tmp); 761 } 762 felem_mul(tmp, ftmp3, ftmp2); 763 felem_reduce(ftmp2, tmp); /* 2^24 - 1 */ 764 felem_square(tmp, ftmp2); 765 felem_reduce(ftmp3, tmp); /* 2^25 - 2 */ 766 for (i = 0; i < 23; ++i) { /* 2^48 - 2^24 */ 767 felem_square(tmp, ftmp3); 768 felem_reduce(ftmp3, tmp); 769 } 770 felem_mul(tmp, ftmp3, ftmp2); 771 felem_reduce(ftmp3, tmp); /* 2^48 - 1 */ 772 felem_square(tmp, ftmp3); 773 felem_reduce(ftmp4, tmp); /* 2^49 - 2 */ 774 for (i = 0; i < 47; ++i) { /* 2^96 - 2^48 */ 775 felem_square(tmp, ftmp4); 776 felem_reduce(ftmp4, tmp); 777 } 778 felem_mul(tmp, ftmp3, ftmp4); 779 felem_reduce(ftmp3, tmp); /* 2^96 - 1 */ 780 felem_square(tmp, ftmp3); 781 felem_reduce(ftmp4, tmp); /* 2^97 - 2 */ 782 for (i = 0; i < 23; ++i) { /* 2^120 - 2^24 */ 783 felem_square(tmp, ftmp4); 784 felem_reduce(ftmp4, tmp); 785 } 786 felem_mul(tmp, ftmp2, ftmp4); 787 felem_reduce(ftmp2, tmp); /* 2^120 - 1 */ 788 for (i = 0; i < 6; ++i) { /* 2^126 - 2^6 */ 789 felem_square(tmp, ftmp2); 790 felem_reduce(ftmp2, tmp); 791 } 792 felem_mul(tmp, ftmp2, ftmp); 793 felem_reduce(ftmp, tmp); /* 2^126 - 1 */ 794 felem_square(tmp, ftmp); 795 felem_reduce(ftmp, tmp); /* 2^127 - 2 */ 796 felem_mul(tmp, ftmp, in); 797 felem_reduce(ftmp, tmp); /* 2^127 - 1 */ 798 for (i = 0; i < 97; ++i) { /* 2^224 - 2^97 */ 799 felem_square(tmp, ftmp); 800 felem_reduce(ftmp, tmp); 801 } 802 felem_mul(tmp, ftmp, ftmp3); 803 felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */ 804} 805 806/* 807 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy 808 * out to itself. 809 */ 810static void copy_conditional(felem out, const felem in, limb icopy) 811{ 812 unsigned i; 813 /* 814 * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one 815 */ 816 const limb copy = -icopy; 817 for (i = 0; i < 4; ++i) { 818 const limb tmp = copy & (in[i] ^ out[i]); 819 out[i] ^= tmp; 820 } 821} 822 823/******************************************************************************/ 824/*- 825 * ELLIPTIC CURVE POINT OPERATIONS 826 * 827 * Points are represented in Jacobian projective coordinates: 828 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3), 829 * or to the point at infinity if Z == 0. 830 * 831 */ 832 833/*- 834 * Double an elliptic curve point: 835 * (X', Y', Z') = 2 * (X, Y, Z), where 836 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2 837 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2 838 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z 839 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed, 840 * while x_out == y_in is not (maybe this works, but it's not tested). 841 */ 842static void 843point_double(felem x_out, felem y_out, felem z_out, 844 const felem x_in, const felem y_in, const felem z_in) 845{ 846 widefelem tmp, tmp2; 847 felem delta, gamma, beta, alpha, ftmp, ftmp2; 848 849 felem_assign(ftmp, x_in); 850 felem_assign(ftmp2, x_in); 851 852 /* delta = z^2 */ 853 felem_square(tmp, z_in); 854 felem_reduce(delta, tmp); 855 856 /* gamma = y^2 */ 857 felem_square(tmp, y_in); 858 felem_reduce(gamma, tmp); 859 860 /* beta = x*gamma */ 861 felem_mul(tmp, x_in, gamma); 862 felem_reduce(beta, tmp); 863 864 /* alpha = 3*(x-delta)*(x+delta) */ 865 felem_diff(ftmp, delta); 866 /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */ 867 felem_sum(ftmp2, delta); 868 /* ftmp2[i] < 2^57 + 2^57 = 2^58 */ 869 felem_scalar(ftmp2, 3); 870 /* ftmp2[i] < 3 * 2^58 < 2^60 */ 871 felem_mul(tmp, ftmp, ftmp2); 872 /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */ 873 felem_reduce(alpha, tmp); 874 875 /* x' = alpha^2 - 8*beta */ 876 felem_square(tmp, alpha); 877 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */ 878 felem_assign(ftmp, beta); 879 felem_scalar(ftmp, 8); 880 /* ftmp[i] < 8 * 2^57 = 2^60 */ 881 felem_diff_128_64(tmp, ftmp); 882 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */ 883 felem_reduce(x_out, tmp); 884 885 /* z' = (y + z)^2 - gamma - delta */ 886 felem_sum(delta, gamma); 887 /* delta[i] < 2^57 + 2^57 = 2^58 */ 888 felem_assign(ftmp, y_in); 889 felem_sum(ftmp, z_in); 890 /* ftmp[i] < 2^57 + 2^57 = 2^58 */ 891 felem_square(tmp, ftmp); 892 /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */ 893 felem_diff_128_64(tmp, delta); 894 /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */ 895 felem_reduce(z_out, tmp); 896 897 /* y' = alpha*(4*beta - x') - 8*gamma^2 */ 898 felem_scalar(beta, 4); 899 /* beta[i] < 4 * 2^57 = 2^59 */ 900 felem_diff(beta, x_out); 901 /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */ 902 felem_mul(tmp, alpha, beta); 903 /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */ 904 felem_square(tmp2, gamma); 905 /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */ 906 widefelem_scalar(tmp2, 8); 907 /* tmp2[i] < 8 * 2^116 = 2^119 */ 908 widefelem_diff(tmp, tmp2); 909 /* tmp[i] < 2^119 + 2^120 < 2^121 */ 910 felem_reduce(y_out, tmp); 911} 912 913/*- 914 * Add two elliptic curve points: 915 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where 916 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 - 917 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 918 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) - 919 * Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3 920 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2) 921 * 922 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0. 923 */ 924 925/* 926 * This function is not entirely constant-time: it includes a branch for 927 * checking whether the two input points are equal, (while not equal to the 928 * point at infinity). This case never happens during single point 929 * multiplication, so there is no timing leak for ECDH or ECDSA signing. 930 */ 931static void point_add(felem x3, felem y3, felem z3, 932 const felem x1, const felem y1, const felem z1, 933 const int mixed, const felem x2, const felem y2, 934 const felem z2) 935{ 936 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out; 937 widefelem tmp, tmp2; 938 limb z1_is_zero, z2_is_zero, x_equal, y_equal; 939 940 if (!mixed) { 941 /* ftmp2 = z2^2 */ 942 felem_square(tmp, z2); 943 felem_reduce(ftmp2, tmp); 944 945 /* ftmp4 = z2^3 */ 946 felem_mul(tmp, ftmp2, z2); 947 felem_reduce(ftmp4, tmp); 948 949 /* ftmp4 = z2^3*y1 */ 950 felem_mul(tmp2, ftmp4, y1); 951 felem_reduce(ftmp4, tmp2); 952 953 /* ftmp2 = z2^2*x1 */ 954 felem_mul(tmp2, ftmp2, x1); 955 felem_reduce(ftmp2, tmp2); 956 } else { 957 /* 958 * We'll assume z2 = 1 (special case z2 = 0 is handled later) 959 */ 960 961 /* ftmp4 = z2^3*y1 */ 962 felem_assign(ftmp4, y1); 963 964 /* ftmp2 = z2^2*x1 */ 965 felem_assign(ftmp2, x1); 966 } 967 968 /* ftmp = z1^2 */ 969 felem_square(tmp, z1); 970 felem_reduce(ftmp, tmp); 971 972 /* ftmp3 = z1^3 */ 973 felem_mul(tmp, ftmp, z1); 974 felem_reduce(ftmp3, tmp); 975 976 /* tmp = z1^3*y2 */ 977 felem_mul(tmp, ftmp3, y2); 978 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */ 979 980 /* ftmp3 = z1^3*y2 - z2^3*y1 */ 981 felem_diff_128_64(tmp, ftmp4); 982 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */ 983 felem_reduce(ftmp3, tmp); 984 985 /* tmp = z1^2*x2 */ 986 felem_mul(tmp, ftmp, x2); 987 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */ 988 989 /* ftmp = z1^2*x2 - z2^2*x1 */ 990 felem_diff_128_64(tmp, ftmp2); 991 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */ 992 felem_reduce(ftmp, tmp); 993 994 /* 995 * the formulae are incorrect if the points are equal so we check for 996 * this and do doubling if this happens 997 */ 998 x_equal = felem_is_zero(ftmp); 999 y_equal = felem_is_zero(ftmp3); 1000 z1_is_zero = felem_is_zero(z1); 1001 z2_is_zero = felem_is_zero(z2); 1002 /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */ 1003 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) { 1004 point_double(x3, y3, z3, x1, y1, z1); 1005 return; 1006 } 1007 1008 /* ftmp5 = z1*z2 */ 1009 if (!mixed) { 1010 felem_mul(tmp, z1, z2); 1011 felem_reduce(ftmp5, tmp); 1012 } else { 1013 /* special case z2 = 0 is handled later */ 1014 felem_assign(ftmp5, z1); 1015 } 1016 1017 /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */ 1018 felem_mul(tmp, ftmp, ftmp5); 1019 felem_reduce(z_out, tmp); 1020 1021 /* ftmp = (z1^2*x2 - z2^2*x1)^2 */ 1022 felem_assign(ftmp5, ftmp); 1023 felem_square(tmp, ftmp); 1024 felem_reduce(ftmp, tmp); 1025 1026 /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */ 1027 felem_mul(tmp, ftmp, ftmp5); 1028 felem_reduce(ftmp5, tmp); 1029 1030 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */ 1031 felem_mul(tmp, ftmp2, ftmp); 1032 felem_reduce(ftmp2, tmp); 1033 1034 /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */ 1035 felem_mul(tmp, ftmp4, ftmp5); 1036 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */ 1037 1038 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */ 1039 felem_square(tmp2, ftmp3); 1040 /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */ 1041 1042 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */ 1043 felem_diff_128_64(tmp2, ftmp5); 1044 /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */ 1045 1046 /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */ 1047 felem_assign(ftmp5, ftmp2); 1048 felem_scalar(ftmp5, 2); 1049 /* ftmp5[i] < 2 * 2^57 = 2^58 */ 1050 1051 /*- 1052 * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 - 1053 * 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 1054 */ 1055 felem_diff_128_64(tmp2, ftmp5); 1056 /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */ 1057 felem_reduce(x_out, tmp2); 1058 1059 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */ 1060 felem_diff(ftmp2, x_out); 1061 /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */ 1062 1063 /* 1064 * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) 1065 */ 1066 felem_mul(tmp2, ftmp3, ftmp2); 1067 /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */ 1068 1069 /*- 1070 * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) - 1071 * z2^3*y1*(z1^2*x2 - z2^2*x1)^3 1072 */ 1073 widefelem_diff(tmp2, tmp); 1074 /* tmp2[i] < 2^118 + 2^120 < 2^121 */ 1075 felem_reduce(y_out, tmp2); 1076 1077 /* 1078 * the result (x_out, y_out, z_out) is incorrect if one of the inputs is 1079 * the point at infinity, so we need to check for this separately 1080 */ 1081 1082 /* 1083 * if point 1 is at infinity, copy point 2 to output, and vice versa 1084 */ 1085 copy_conditional(x_out, x2, z1_is_zero); 1086 copy_conditional(x_out, x1, z2_is_zero); 1087 copy_conditional(y_out, y2, z1_is_zero); 1088 copy_conditional(y_out, y1, z2_is_zero); 1089 copy_conditional(z_out, z2, z1_is_zero); 1090 copy_conditional(z_out, z1, z2_is_zero); 1091 felem_assign(x3, x_out); 1092 felem_assign(y3, y_out); 1093 felem_assign(z3, z_out); 1094} 1095 1096/* 1097 * select_point selects the |idx|th point from a precomputation table and 1098 * copies it to out. 1099 * The pre_comp array argument should be size of |size| argument 1100 */ 1101static void select_point(const u64 idx, unsigned int size, 1102 const felem pre_comp[][3], felem out[3]) 1103{ 1104 unsigned i, j; 1105 limb *outlimbs = &out[0][0]; 1106 memset(outlimbs, 0, 3 * sizeof(felem)); 1107 1108 for (i = 0; i < size; i++) { 1109 const limb *inlimbs = &pre_comp[i][0][0]; 1110 u64 mask = i ^ idx; 1111 mask |= mask >> 4; 1112 mask |= mask >> 2; 1113 mask |= mask >> 1; 1114 mask &= 1; 1115 mask--; 1116 for (j = 0; j < 4 * 3; j++) 1117 outlimbs[j] |= inlimbs[j] & mask; 1118 } 1119} 1120 1121/* get_bit returns the |i|th bit in |in| */ 1122static char get_bit(const felem_bytearray in, unsigned i) 1123{ 1124 if (i >= 224) 1125 return 0; 1126 return (in[i >> 3] >> (i & 7)) & 1; 1127} 1128 1129/* 1130 * Interleaved point multiplication using precomputed point multiples: The 1131 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars 1132 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the 1133 * generator, using certain (large) precomputed multiples in g_pre_comp. 1134 * Output point (X, Y, Z) is stored in x_out, y_out, z_out 1135 */ 1136static void batch_mul(felem x_out, felem y_out, felem z_out, 1137 const felem_bytearray scalars[], 1138 const unsigned num_points, const u8 *g_scalar, 1139 const int mixed, const felem pre_comp[][17][3], 1140 const felem g_pre_comp[2][16][3]) 1141{ 1142 int i, skip; 1143 unsigned num; 1144 unsigned gen_mul = (g_scalar != NULL); 1145 felem nq[3], tmp[4]; 1146 u64 bits; 1147 u8 sign, digit; 1148 1149 /* set nq to the point at infinity */ 1150 memset(nq, 0, 3 * sizeof(felem)); 1151 1152 /* 1153 * Loop over all scalars msb-to-lsb, interleaving additions of multiples 1154 * of the generator (two in each of the last 28 rounds) and additions of 1155 * other points multiples (every 5th round). 1156 */ 1157 skip = 1; /* save two point operations in the first 1158 * round */ 1159 for (i = (num_points ? 220 : 27); i >= 0; --i) { 1160 /* double */ 1161 if (!skip) 1162 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]); 1163 1164 /* add multiples of the generator */ 1165 if (gen_mul && (i <= 27)) { 1166 /* first, look 28 bits upwards */ 1167 bits = get_bit(g_scalar, i + 196) << 3; 1168 bits |= get_bit(g_scalar, i + 140) << 2; 1169 bits |= get_bit(g_scalar, i + 84) << 1; 1170 bits |= get_bit(g_scalar, i + 28); 1171 /* select the point to add, in constant time */ 1172 select_point(bits, 16, g_pre_comp[1], tmp); 1173 1174 if (!skip) { 1175 /* value 1 below is argument for "mixed" */ 1176 point_add(nq[0], nq[1], nq[2], 1177 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]); 1178 } else { 1179 memcpy(nq, tmp, 3 * sizeof(felem)); 1180 skip = 0; 1181 } 1182 1183 /* second, look at the current position */ 1184 bits = get_bit(g_scalar, i + 168) << 3; 1185 bits |= get_bit(g_scalar, i + 112) << 2; 1186 bits |= get_bit(g_scalar, i + 56) << 1; 1187 bits |= get_bit(g_scalar, i); 1188 /* select the point to add, in constant time */ 1189 select_point(bits, 16, g_pre_comp[0], tmp); 1190 point_add(nq[0], nq[1], nq[2], 1191 nq[0], nq[1], nq[2], 1192 1 /* mixed */ , tmp[0], tmp[1], tmp[2]); 1193 } 1194 1195 /* do other additions every 5 doublings */ 1196 if (num_points && (i % 5 == 0)) { 1197 /* loop over all scalars */ 1198 for (num = 0; num < num_points; ++num) { 1199 bits = get_bit(scalars[num], i + 4) << 5; 1200 bits |= get_bit(scalars[num], i + 3) << 4; 1201 bits |= get_bit(scalars[num], i + 2) << 3; 1202 bits |= get_bit(scalars[num], i + 1) << 2; 1203 bits |= get_bit(scalars[num], i) << 1; 1204 bits |= get_bit(scalars[num], i - 1); 1205 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits); 1206 1207 /* select the point to add or subtract */ 1208 select_point(digit, 17, pre_comp[num], tmp); 1209 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative 1210 * point */ 1211 copy_conditional(tmp[1], tmp[3], sign); 1212 1213 if (!skip) { 1214 point_add(nq[0], nq[1], nq[2], 1215 nq[0], nq[1], nq[2], 1216 mixed, tmp[0], tmp[1], tmp[2]); 1217 } else { 1218 memcpy(nq, tmp, 3 * sizeof(felem)); 1219 skip = 0; 1220 } 1221 } 1222 } 1223 } 1224 felem_assign(x_out, nq[0]); 1225 felem_assign(y_out, nq[1]); 1226 felem_assign(z_out, nq[2]); 1227} 1228 1229/******************************************************************************/ 1230/* 1231 * FUNCTIONS TO MANAGE PRECOMPUTATION 1232 */ 1233 1234static NISTP224_PRE_COMP *nistp224_pre_comp_new() 1235{ 1236 NISTP224_PRE_COMP *ret = NULL; 1237 ret = (NISTP224_PRE_COMP *) OPENSSL_malloc(sizeof *ret); 1238 if (!ret) { 1239 ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE); 1240 return ret; 1241 } 1242 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp)); 1243 ret->references = 1; 1244 return ret; 1245} 1246 1247static void *nistp224_pre_comp_dup(void *src_) 1248{ 1249 NISTP224_PRE_COMP *src = src_; 1250 1251 /* no need to actually copy, these objects never change! */ 1252 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP); 1253 1254 return src_; 1255} 1256 1257static void nistp224_pre_comp_free(void *pre_) 1258{ 1259 int i; 1260 NISTP224_PRE_COMP *pre = pre_; 1261 1262 if (!pre) 1263 return; 1264 1265 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP); 1266 if (i > 0) 1267 return; 1268 1269 OPENSSL_free(pre); 1270} 1271 1272static void nistp224_pre_comp_clear_free(void *pre_) 1273{ 1274 int i; 1275 NISTP224_PRE_COMP *pre = pre_; 1276 1277 if (!pre) 1278 return; 1279 1280 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP); 1281 if (i > 0) 1282 return; 1283 1284 OPENSSL_cleanse(pre, sizeof *pre); 1285 OPENSSL_free(pre); 1286} 1287 1288/******************************************************************************/ 1289/* 1290 * OPENSSL EC_METHOD FUNCTIONS 1291 */ 1292 1293int ec_GFp_nistp224_group_init(EC_GROUP *group) 1294{ 1295 int ret; 1296 ret = ec_GFp_simple_group_init(group); 1297 group->a_is_minus3 = 1; 1298 return ret; 1299} 1300 1301int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p, 1302 const BIGNUM *a, const BIGNUM *b, 1303 BN_CTX *ctx) 1304{ 1305 int ret = 0; 1306 BN_CTX *new_ctx = NULL; 1307 BIGNUM *curve_p, *curve_a, *curve_b; 1308 1309 if (ctx == NULL) 1310 if ((ctx = new_ctx = BN_CTX_new()) == NULL) 1311 return 0; 1312 BN_CTX_start(ctx); 1313 if (((curve_p = BN_CTX_get(ctx)) == NULL) || 1314 ((curve_a = BN_CTX_get(ctx)) == NULL) || 1315 ((curve_b = BN_CTX_get(ctx)) == NULL)) 1316 goto err; 1317 BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p); 1318 BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a); 1319 BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b); 1320 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) { 1321 ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE, 1322 EC_R_WRONG_CURVE_PARAMETERS); 1323 goto err; 1324 } 1325 group->field_mod_func = BN_nist_mod_224; 1326 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx); 1327 err: 1328 BN_CTX_end(ctx); 1329 if (new_ctx != NULL) 1330 BN_CTX_free(new_ctx); 1331 return ret; 1332} 1333 1334/* 1335 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') = 1336 * (X/Z^2, Y/Z^3) 1337 */ 1338int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group, 1339 const EC_POINT *point, 1340 BIGNUM *x, BIGNUM *y, 1341 BN_CTX *ctx) 1342{ 1343 felem z1, z2, x_in, y_in, x_out, y_out; 1344 widefelem tmp; 1345 1346 if (EC_POINT_is_at_infinity(group, point)) { 1347 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES, 1348 EC_R_POINT_AT_INFINITY); 1349 return 0; 1350 } 1351 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) || 1352 (!BN_to_felem(z1, &point->Z))) 1353 return 0; 1354 felem_inv(z2, z1); 1355 felem_square(tmp, z2); 1356 felem_reduce(z1, tmp); 1357 felem_mul(tmp, x_in, z1); 1358 felem_reduce(x_in, tmp); 1359 felem_contract(x_out, x_in); 1360 if (x != NULL) { 1361 if (!felem_to_BN(x, x_out)) { 1362 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES, 1363 ERR_R_BN_LIB); 1364 return 0; 1365 } 1366 } 1367 felem_mul(tmp, z1, z2); 1368 felem_reduce(z1, tmp); 1369 felem_mul(tmp, y_in, z1); 1370 felem_reduce(y_in, tmp); 1371 felem_contract(y_out, y_in); 1372 if (y != NULL) { 1373 if (!felem_to_BN(y, y_out)) { 1374 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES, 1375 ERR_R_BN_LIB); 1376 return 0; 1377 } 1378 } 1379 return 1; 1380} 1381 1382static void make_points_affine(size_t num, felem points[ /* num */ ][3], 1383 felem tmp_felems[ /* num+1 */ ]) 1384{ 1385 /* 1386 * Runs in constant time, unless an input is the point at infinity (which 1387 * normally shouldn't happen). 1388 */ 1389 ec_GFp_nistp_points_make_affine_internal(num, 1390 points, 1391 sizeof(felem), 1392 tmp_felems, 1393 (void (*)(void *))felem_one, 1394 felem_is_zero_int, 1395 (void (*)(void *, const void *)) 1396 felem_assign, 1397 (void (*)(void *, const void *)) 1398 felem_square_reduce, (void (*) 1399 (void *, 1400 const void 1401 *, 1402 const void 1403 *)) 1404 felem_mul_reduce, 1405 (void (*)(void *, const void *)) 1406 felem_inv, 1407 (void (*)(void *, const void *)) 1408 felem_contract); 1409} 1410 1411/* 1412 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL 1413 * values Result is stored in r (r can equal one of the inputs). 1414 */ 1415int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r, 1416 const BIGNUM *scalar, size_t num, 1417 const EC_POINT *points[], 1418 const BIGNUM *scalars[], BN_CTX *ctx) 1419{ 1420 int ret = 0; 1421 int j; 1422 unsigned i; 1423 int mixed = 0; 1424 BN_CTX *new_ctx = NULL; 1425 BIGNUM *x, *y, *z, *tmp_scalar; 1426 felem_bytearray g_secret; 1427 felem_bytearray *secrets = NULL; 1428 felem(*pre_comp)[17][3] = NULL; 1429 felem *tmp_felems = NULL; 1430 felem_bytearray tmp; 1431 unsigned num_bytes; 1432 int have_pre_comp = 0; 1433 size_t num_points = num; 1434 felem x_in, y_in, z_in, x_out, y_out, z_out; 1435 NISTP224_PRE_COMP *pre = NULL; 1436 const felem(*g_pre_comp)[16][3] = NULL; 1437 EC_POINT *generator = NULL; 1438 const EC_POINT *p = NULL; 1439 const BIGNUM *p_scalar = NULL; 1440 1441 if (ctx == NULL) 1442 if ((ctx = new_ctx = BN_CTX_new()) == NULL) 1443 return 0; 1444 BN_CTX_start(ctx); 1445 if (((x = BN_CTX_get(ctx)) == NULL) || 1446 ((y = BN_CTX_get(ctx)) == NULL) || 1447 ((z = BN_CTX_get(ctx)) == NULL) || 1448 ((tmp_scalar = BN_CTX_get(ctx)) == NULL)) 1449 goto err; 1450 1451 if (scalar != NULL) { 1452 pre = EC_EX_DATA_get_data(group->extra_data, 1453 nistp224_pre_comp_dup, 1454 nistp224_pre_comp_free, 1455 nistp224_pre_comp_clear_free); 1456 if (pre) 1457 /* we have precomputation, try to use it */ 1458 g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp; 1459 else 1460 /* try to use the standard precomputation */ 1461 g_pre_comp = &gmul[0]; 1462 generator = EC_POINT_new(group); 1463 if (generator == NULL) 1464 goto err; 1465 /* get the generator from precomputation */ 1466 if (!felem_to_BN(x, g_pre_comp[0][1][0]) || 1467 !felem_to_BN(y, g_pre_comp[0][1][1]) || 1468 !felem_to_BN(z, g_pre_comp[0][1][2])) { 1469 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB); 1470 goto err; 1471 } 1472 if (!EC_POINT_set_Jprojective_coordinates_GFp(group, 1473 generator, x, y, z, 1474 ctx)) 1475 goto err; 1476 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) 1477 /* precomputation matches generator */ 1478 have_pre_comp = 1; 1479 else 1480 /* 1481 * we don't have valid precomputation: treat the generator as a 1482 * random point 1483 */ 1484 num_points = num_points + 1; 1485 } 1486 1487 if (num_points > 0) { 1488 if (num_points >= 3) { 1489 /* 1490 * unless we precompute multiples for just one or two points, 1491 * converting those into affine form is time well spent 1492 */ 1493 mixed = 1; 1494 } 1495 secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray)); 1496 pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem)); 1497 if (mixed) 1498 tmp_felems = 1499 OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem)); 1500 if ((secrets == NULL) || (pre_comp == NULL) 1501 || (mixed && (tmp_felems == NULL))) { 1502 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE); 1503 goto err; 1504 } 1505 1506 /* 1507 * we treat NULL scalars as 0, and NULL points as points at infinity, 1508 * i.e., they contribute nothing to the linear combination 1509 */ 1510 memset(secrets, 0, num_points * sizeof(felem_bytearray)); 1511 memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem)); 1512 for (i = 0; i < num_points; ++i) { 1513 if (i == num) 1514 /* the generator */ 1515 { 1516 p = EC_GROUP_get0_generator(group); 1517 p_scalar = scalar; 1518 } else 1519 /* the i^th point */ 1520 { 1521 p = points[i]; 1522 p_scalar = scalars[i]; 1523 } 1524 if ((p_scalar != NULL) && (p != NULL)) { 1525 /* reduce scalar to 0 <= scalar < 2^224 */ 1526 if ((BN_num_bits(p_scalar) > 224) 1527 || (BN_is_negative(p_scalar))) { 1528 /* 1529 * this is an unusual input, and we don't guarantee 1530 * constant-timeness 1531 */ 1532 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) { 1533 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB); 1534 goto err; 1535 } 1536 num_bytes = BN_bn2bin(tmp_scalar, tmp); 1537 } else 1538 num_bytes = BN_bn2bin(p_scalar, tmp); 1539 flip_endian(secrets[i], tmp, num_bytes); 1540 /* precompute multiples */ 1541 if ((!BN_to_felem(x_out, &p->X)) || 1542 (!BN_to_felem(y_out, &p->Y)) || 1543 (!BN_to_felem(z_out, &p->Z))) 1544 goto err; 1545 felem_assign(pre_comp[i][1][0], x_out); 1546 felem_assign(pre_comp[i][1][1], y_out); 1547 felem_assign(pre_comp[i][1][2], z_out); 1548 for (j = 2; j <= 16; ++j) { 1549 if (j & 1) { 1550 point_add(pre_comp[i][j][0], pre_comp[i][j][1], 1551 pre_comp[i][j][2], pre_comp[i][1][0], 1552 pre_comp[i][1][1], pre_comp[i][1][2], 0, 1553 pre_comp[i][j - 1][0], 1554 pre_comp[i][j - 1][1], 1555 pre_comp[i][j - 1][2]); 1556 } else { 1557 point_double(pre_comp[i][j][0], pre_comp[i][j][1], 1558 pre_comp[i][j][2], pre_comp[i][j / 2][0], 1559 pre_comp[i][j / 2][1], 1560 pre_comp[i][j / 2][2]); 1561 } 1562 } 1563 } 1564 } 1565 if (mixed) 1566 make_points_affine(num_points * 17, pre_comp[0], tmp_felems); 1567 } 1568 1569 /* the scalar for the generator */ 1570 if ((scalar != NULL) && (have_pre_comp)) { 1571 memset(g_secret, 0, sizeof g_secret); 1572 /* reduce scalar to 0 <= scalar < 2^224 */ 1573 if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) { 1574 /* 1575 * this is an unusual input, and we don't guarantee 1576 * constant-timeness 1577 */ 1578 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) { 1579 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB); 1580 goto err; 1581 } 1582 num_bytes = BN_bn2bin(tmp_scalar, tmp); 1583 } else 1584 num_bytes = BN_bn2bin(scalar, tmp); 1585 flip_endian(g_secret, tmp, num_bytes); 1586 /* do the multiplication with generator precomputation */ 1587 batch_mul(x_out, y_out, z_out, 1588 (const felem_bytearray(*))secrets, num_points, 1589 g_secret, 1590 mixed, (const felem(*)[17][3])pre_comp, g_pre_comp); 1591 } else 1592 /* do the multiplication without generator precomputation */ 1593 batch_mul(x_out, y_out, z_out, 1594 (const felem_bytearray(*))secrets, num_points, 1595 NULL, mixed, (const felem(*)[17][3])pre_comp, NULL); 1596 /* reduce the output to its unique minimal representation */ 1597 felem_contract(x_in, x_out); 1598 felem_contract(y_in, y_out); 1599 felem_contract(z_in, z_out); 1600 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) || 1601 (!felem_to_BN(z, z_in))) { 1602 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB); 1603 goto err; 1604 } 1605 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx); 1606 1607 err: 1608 BN_CTX_end(ctx); 1609 if (generator != NULL) 1610 EC_POINT_free(generator); 1611 if (new_ctx != NULL) 1612 BN_CTX_free(new_ctx); 1613 if (secrets != NULL) 1614 OPENSSL_free(secrets); 1615 if (pre_comp != NULL) 1616 OPENSSL_free(pre_comp); 1617 if (tmp_felems != NULL) 1618 OPENSSL_free(tmp_felems); 1619 return ret; 1620} 1621 1622int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx) 1623{ 1624 int ret = 0; 1625 NISTP224_PRE_COMP *pre = NULL; 1626 int i, j; 1627 BN_CTX *new_ctx = NULL; 1628 BIGNUM *x, *y; 1629 EC_POINT *generator = NULL; 1630 felem tmp_felems[32]; 1631 1632 /* throw away old precomputation */ 1633 EC_EX_DATA_free_data(&group->extra_data, nistp224_pre_comp_dup, 1634 nistp224_pre_comp_free, 1635 nistp224_pre_comp_clear_free); 1636 if (ctx == NULL) 1637 if ((ctx = new_ctx = BN_CTX_new()) == NULL) 1638 return 0; 1639 BN_CTX_start(ctx); 1640 if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL)) 1641 goto err; 1642 /* get the generator */ 1643 if (group->generator == NULL) 1644 goto err; 1645 generator = EC_POINT_new(group); 1646 if (generator == NULL) 1647 goto err; 1648 BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x); 1649 BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y); 1650 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx)) 1651 goto err; 1652 if ((pre = nistp224_pre_comp_new()) == NULL) 1653 goto err; 1654 /* 1655 * if the generator is the standard one, use built-in precomputation 1656 */ 1657 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) { 1658 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp)); 1659 goto done; 1660 } 1661 if ((!BN_to_felem(pre->g_pre_comp[0][1][0], &group->generator->X)) || 1662 (!BN_to_felem(pre->g_pre_comp[0][1][1], &group->generator->Y)) || 1663 (!BN_to_felem(pre->g_pre_comp[0][1][2], &group->generator->Z))) 1664 goto err; 1665 /* 1666 * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G, 1667 * 2^140*G, 2^196*G for the second one 1668 */ 1669 for (i = 1; i <= 8; i <<= 1) { 1670 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], 1671 pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0], 1672 pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]); 1673 for (j = 0; j < 27; ++j) { 1674 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], 1675 pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0], 1676 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]); 1677 } 1678 if (i == 8) 1679 break; 1680 point_double(pre->g_pre_comp[0][2 * i][0], 1681 pre->g_pre_comp[0][2 * i][1], 1682 pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0], 1683 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]); 1684 for (j = 0; j < 27; ++j) { 1685 point_double(pre->g_pre_comp[0][2 * i][0], 1686 pre->g_pre_comp[0][2 * i][1], 1687 pre->g_pre_comp[0][2 * i][2], 1688 pre->g_pre_comp[0][2 * i][0], 1689 pre->g_pre_comp[0][2 * i][1], 1690 pre->g_pre_comp[0][2 * i][2]); 1691 } 1692 } 1693 for (i = 0; i < 2; i++) { 1694 /* g_pre_comp[i][0] is the point at infinity */ 1695 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0])); 1696 /* the remaining multiples */ 1697 /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */ 1698 point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1], 1699 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0], 1700 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2], 1701 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], 1702 pre->g_pre_comp[i][2][2]); 1703 /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */ 1704 point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1], 1705 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0], 1706 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2], 1707 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], 1708 pre->g_pre_comp[i][2][2]); 1709 /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */ 1710 point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1], 1711 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0], 1712 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2], 1713 0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1], 1714 pre->g_pre_comp[i][4][2]); 1715 /* 1716 * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G 1717 */ 1718 point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1], 1719 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0], 1720 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2], 1721 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], 1722 pre->g_pre_comp[i][2][2]); 1723 for (j = 1; j < 8; ++j) { 1724 /* odd multiples: add G resp. 2^28*G */ 1725 point_add(pre->g_pre_comp[i][2 * j + 1][0], 1726 pre->g_pre_comp[i][2 * j + 1][1], 1727 pre->g_pre_comp[i][2 * j + 1][2], 1728 pre->g_pre_comp[i][2 * j][0], 1729 pre->g_pre_comp[i][2 * j][1], 1730 pre->g_pre_comp[i][2 * j][2], 0, 1731 pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1], 1732 pre->g_pre_comp[i][1][2]); 1733 } 1734 } 1735 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems); 1736 1737 done: 1738 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp224_pre_comp_dup, 1739 nistp224_pre_comp_free, 1740 nistp224_pre_comp_clear_free)) 1741 goto err; 1742 ret = 1; 1743 pre = NULL; 1744 err: 1745 BN_CTX_end(ctx); 1746 if (generator != NULL) 1747 EC_POINT_free(generator); 1748 if (new_ctx != NULL) 1749 BN_CTX_free(new_ctx); 1750 if (pre) 1751 nistp224_pre_comp_free(pre); 1752 return ret; 1753} 1754 1755int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group) 1756{ 1757 if (EC_EX_DATA_get_data(group->extra_data, nistp224_pre_comp_dup, 1758 nistp224_pre_comp_free, 1759 nistp224_pre_comp_clear_free) 1760 != NULL) 1761 return 1; 1762 else 1763 return 0; 1764} 1765 1766#else 1767static void *dummy = &dummy; 1768#endif 1769