1238384Sjkim/* crypto/ec/ecp_nistp224.c */ 2238384Sjkim/* 3238384Sjkim * Written by Emilia Kasper (Google) for the OpenSSL project. 4238384Sjkim */ 5238384Sjkim/* Copyright 2011 Google Inc. 6238384Sjkim * 7238384Sjkim * Licensed under the Apache License, Version 2.0 (the "License"); 8238384Sjkim * 9238384Sjkim * you may not use this file except in compliance with the License. 10238384Sjkim * You may obtain a copy of the License at 11238384Sjkim * 12238384Sjkim * http://www.apache.org/licenses/LICENSE-2.0 13238384Sjkim * 14238384Sjkim * Unless required by applicable law or agreed to in writing, software 15238384Sjkim * distributed under the License is distributed on an "AS IS" BASIS, 16238384Sjkim * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17238384Sjkim * See the License for the specific language governing permissions and 18238384Sjkim * limitations under the License. 19238384Sjkim */ 20238384Sjkim 21238384Sjkim/* 22238384Sjkim * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication 23238384Sjkim * 24238384Sjkim * Inspired by Daniel J. Bernstein's public domain nistp224 implementation 25238384Sjkim * and Adam Langley's public domain 64-bit C implementation of curve25519 26238384Sjkim */ 27238384Sjkim 28238384Sjkim#include <openssl/opensslconf.h> 29238384Sjkim#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 30238384Sjkim 31280304Sjkim# ifndef OPENSSL_SYS_VMS 32280304Sjkim# include <stdint.h> 33280304Sjkim# else 34280304Sjkim# include <inttypes.h> 35280304Sjkim# endif 36238384Sjkim 37280304Sjkim# include <string.h> 38280304Sjkim# include <openssl/err.h> 39280304Sjkim# include "ec_lcl.h" 40238384Sjkim 41280304Sjkim# if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) 42238384Sjkim /* even with gcc, the typedef won't work for 32-bit platforms */ 43280304Sjkimtypedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit 44280304Sjkim * platforms */ 45280304Sjkim# else 46280304Sjkim# error "Need GCC 3.1 or later to define type uint128_t" 47280304Sjkim# endif 48238384Sjkim 49238384Sjkimtypedef uint8_t u8; 50238384Sjkimtypedef uint64_t u64; 51238384Sjkimtypedef int64_t s64; 52238384Sjkim 53238384Sjkim/******************************************************************************/ 54280304Sjkim/*- 55280304Sjkim * INTERNAL REPRESENTATION OF FIELD ELEMENTS 56238384Sjkim * 57238384Sjkim * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3 58238384Sjkim * using 64-bit coefficients called 'limbs', 59238384Sjkim * and sometimes (for multiplication results) as 60238384Sjkim * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6 61238384Sjkim * using 128-bit coefficients called 'widelimbs'. 62238384Sjkim * A 4-limb representation is an 'felem'; 63238384Sjkim * a 7-widelimb representation is a 'widefelem'. 64238384Sjkim * Even within felems, bits of adjacent limbs overlap, and we don't always 65238384Sjkim * reduce the representations: we ensure that inputs to each felem 66238384Sjkim * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60, 67238384Sjkim * and fit into a 128-bit word without overflow. The coefficients are then 68238384Sjkim * again partially reduced to obtain an felem satisfying a_i < 2^57. 69238384Sjkim * We only reduce to the unique minimal representation at the end of the 70238384Sjkim * computation. 71238384Sjkim */ 72238384Sjkim 73238384Sjkimtypedef uint64_t limb; 74238384Sjkimtypedef uint128_t widelimb; 75238384Sjkim 76238384Sjkimtypedef limb felem[4]; 77238384Sjkimtypedef widelimb widefelem[7]; 78238384Sjkim 79280304Sjkim/* 80280304Sjkim * Field element represented as a byte arrary. 28*8 = 224 bits is also the 81280304Sjkim * group order size for the elliptic curve, and we also use this type for 82280304Sjkim * scalars for point multiplication. 83280304Sjkim */ 84238384Sjkimtypedef u8 felem_bytearray[28]; 85238384Sjkim 86238384Sjkimstatic const felem_bytearray nistp224_curve_params[5] = { 87280304Sjkim {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */ 88280304Sjkim 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 89280304Sjkim 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, 90280304Sjkim {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */ 91280304Sjkim 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 92280304Sjkim 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE}, 93280304Sjkim {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */ 94280304Sjkim 0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA, 95280304Sjkim 0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4}, 96280304Sjkim {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */ 97280304Sjkim 0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22, 98280304Sjkim 0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21}, 99280304Sjkim {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */ 100280304Sjkim 0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64, 101280304Sjkim 0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34} 102238384Sjkim}; 103238384Sjkim 104280304Sjkim/*- 105280304Sjkim * Precomputed multiples of the standard generator 106238384Sjkim * Points are given in coordinates (X, Y, Z) where Z normally is 1 107238384Sjkim * (0 for the point at infinity). 108238384Sjkim * For each field element, slice a_0 is word 0, etc. 109238384Sjkim * 110238384Sjkim * The table has 2 * 16 elements, starting with the following: 111238384Sjkim * index | bits | point 112238384Sjkim * ------+---------+------------------------------ 113238384Sjkim * 0 | 0 0 0 0 | 0G 114238384Sjkim * 1 | 0 0 0 1 | 1G 115238384Sjkim * 2 | 0 0 1 0 | 2^56G 116238384Sjkim * 3 | 0 0 1 1 | (2^56 + 1)G 117238384Sjkim * 4 | 0 1 0 0 | 2^112G 118238384Sjkim * 5 | 0 1 0 1 | (2^112 + 1)G 119238384Sjkim * 6 | 0 1 1 0 | (2^112 + 2^56)G 120238384Sjkim * 7 | 0 1 1 1 | (2^112 + 2^56 + 1)G 121238384Sjkim * 8 | 1 0 0 0 | 2^168G 122238384Sjkim * 9 | 1 0 0 1 | (2^168 + 1)G 123238384Sjkim * 10 | 1 0 1 0 | (2^168 + 2^56)G 124238384Sjkim * 11 | 1 0 1 1 | (2^168 + 2^56 + 1)G 125238384Sjkim * 12 | 1 1 0 0 | (2^168 + 2^112)G 126238384Sjkim * 13 | 1 1 0 1 | (2^168 + 2^112 + 1)G 127238384Sjkim * 14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G 128238384Sjkim * 15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G 129238384Sjkim * followed by a copy of this with each element multiplied by 2^28. 130238384Sjkim * 131238384Sjkim * The reason for this is so that we can clock bits into four different 132238384Sjkim * locations when doing simple scalar multiplies against the base point, 133238384Sjkim * and then another four locations using the second 16 elements. 134238384Sjkim */ 135280304Sjkimstatic const felem gmul[2][16][3] = { {{{0, 0, 0, 0}, 136280304Sjkim {0, 0, 0, 0}, 137280304Sjkim {0, 0, 0, 0}}, 138280304Sjkim {{0x3280d6115c1d21, 0xc1d356c2112234, 139280304Sjkim 0x7f321390b94a03, 0xb70e0cbd6bb4bf}, 140280304Sjkim {0xd5819985007e34, 0x75a05a07476444, 141280304Sjkim 0xfb4c22dfe6cd43, 0xbd376388b5f723}, 142280304Sjkim {1, 0, 0, 0}}, 143280304Sjkim {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 144280304Sjkim 0x2242df8d8a2a43, 0x1f49bbb0f99bc5}, 145280304Sjkim {0x29e0b892dc9c43, 0xece8608436e662, 146280304Sjkim 0xdc858f185310d0, 0x9812dd4eb8d321}, 147280304Sjkim {1, 0, 0, 0}}, 148280304Sjkim {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 149280304Sjkim 0x16e9a3bbce8a3f, 0xeedcccd8c2a748}, 150280304Sjkim {0xf19f90ed50266d, 0xabf2b4bf65f9df, 151280304Sjkim 0x313865468fafec, 0x5cb379ba910a17}, 152280304Sjkim {1, 0, 0, 0}}, 153280304Sjkim {{0x0641966cab26e3, 0x91fb2991fab0a0, 154280304Sjkim 0xefec27a4e13a0b, 0x0499aa8a5f8ebe}, 155280304Sjkim {0x7510407766af5d, 0x84d929610d5450, 156280304Sjkim 0x81d77aae82f706, 0x6916f6d4338c5b}, 157280304Sjkim {1, 0, 0, 0}}, 158280304Sjkim {{0xea95ac3b1f15c6, 0x086000905e82d4, 159280304Sjkim 0xdd323ae4d1c8b1, 0x932b56be7685a3}, 160280304Sjkim {0x9ef93dea25dbbf, 0x41665960f390f0, 161280304Sjkim 0xfdec76dbe2a8a7, 0x523e80f019062a}, 162280304Sjkim {1, 0, 0, 0}}, 163280304Sjkim {{0x822fdd26732c73, 0xa01c83531b5d0f, 164280304Sjkim 0x363f37347c1ba4, 0xc391b45c84725c}, 165280304Sjkim {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 166280304Sjkim 0xc393da7e222a7f, 0x1efb7890ede244}, 167280304Sjkim {1, 0, 0, 0}}, 168280304Sjkim {{0x4c9e90ca217da1, 0xd11beca79159bb, 169280304Sjkim 0xff8d33c2c98b7c, 0x2610b39409f849}, 170280304Sjkim {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 171280304Sjkim 0x966c079b753c89, 0xfe67e4e820b112}, 172280304Sjkim {1, 0, 0, 0}}, 173280304Sjkim {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 174280304Sjkim 0x79b7619a3e7c4c, 0x05c73240899b47}, 175280304Sjkim {0x9f7f6382c73e3a, 0x18615165c56bda, 176280304Sjkim 0x641fab2116fd56, 0x72855882b08394}, 177280304Sjkim {1, 0, 0, 0}}, 178280304Sjkim {{0x0469182f161c09, 0x74a98ca8d00fb5, 179280304Sjkim 0xb89da93489a3e0, 0x41c98768fb0c1d}, 180280304Sjkim {0xe5ea05fb32da81, 0x3dce9ffbca6855, 181280304Sjkim 0x1cfe2d3fbf59e6, 0x0e5e03408738a7}, 182280304Sjkim {1, 0, 0, 0}}, 183280304Sjkim {{0xdab22b2333e87f, 0x4430137a5dd2f6, 184280304Sjkim 0xe03ab9f738beb8, 0xcb0c5d0dc34f24}, 185280304Sjkim {0x764a7df0c8fda5, 0x185ba5c3fa2044, 186280304Sjkim 0x9281d688bcbe50, 0xc40331df893881}, 187280304Sjkim {1, 0, 0, 0}}, 188280304Sjkim {{0xb89530796f0f60, 0xade92bd26909a3, 189280304Sjkim 0x1a0c83fb4884da, 0x1765bf22a5a984}, 190280304Sjkim {0x772a9ee75db09e, 0x23bc6c67cec16f, 191280304Sjkim 0x4c1edba8b14e2f, 0xe2a215d9611369}, 192280304Sjkim {1, 0, 0, 0}}, 193280304Sjkim {{0x571e509fb5efb3, 0xade88696410552, 194280304Sjkim 0xc8ae85fada74fe, 0x6c7e4be83bbde3}, 195280304Sjkim {0xff9f51160f4652, 0xb47ce2495a6539, 196280304Sjkim 0xa2946c53b582f4, 0x286d2db3ee9a60}, 197280304Sjkim {1, 0, 0, 0}}, 198280304Sjkim {{0x40bbd5081a44af, 0x0995183b13926c, 199280304Sjkim 0xbcefba6f47f6d0, 0x215619e9cc0057}, 200280304Sjkim {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 201280304Sjkim 0x8631b93cdfe8b5, 0xe7e3f4b0982db9}, 202280304Sjkim {1, 0, 0, 0}}, 203280304Sjkim {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 204280304Sjkim 0x1c29819435d2c6, 0xc813132f4c07e9}, 205280304Sjkim {0x2891425503b11f, 0x08781030579fea, 206280304Sjkim 0xf5426ba5cc9674, 0x1e28ebf18562bc}, 207280304Sjkim {1, 0, 0, 0}}, 208280304Sjkim {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 209280304Sjkim 0xff17036691a973, 0xf1aef351497c58}, 210280304Sjkim {0xdd1f2d600564ff, 0xdead073b1402db, 211280304Sjkim 0x74a684435bd693, 0xeea7471f962558}, 212280304Sjkim {1, 0, 0, 0}}}, 213280304Sjkim{{{0, 0, 0, 0}, 214280304Sjkim {0, 0, 0, 0}, 215280304Sjkim {0, 0, 0, 0}}, 216280304Sjkim {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31}, 217280304Sjkim {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d}, 218280304Sjkim {1, 0, 0, 0}}, 219280304Sjkim {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3}, 220280304Sjkim {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a}, 221280304Sjkim {1, 0, 0, 0}}, 222280304Sjkim {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33}, 223280304Sjkim {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100}, 224280304Sjkim {1, 0, 0, 0}}, 225280304Sjkim {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5}, 226280304Sjkim {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea}, 227280304Sjkim {1, 0, 0, 0}}, 228280304Sjkim {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be}, 229280304Sjkim {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51}, 230280304Sjkim {1, 0, 0, 0}}, 231280304Sjkim {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1}, 232280304Sjkim {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb}, 233280304Sjkim {1, 0, 0, 0}}, 234280304Sjkim {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233}, 235280304Sjkim {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def}, 236280304Sjkim {1, 0, 0, 0}}, 237280304Sjkim {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae}, 238280304Sjkim {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45}, 239280304Sjkim {1, 0, 0, 0}}, 240280304Sjkim {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e}, 241280304Sjkim {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb}, 242280304Sjkim {1, 0, 0, 0}}, 243280304Sjkim {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de}, 244280304Sjkim {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3}, 245280304Sjkim {1, 0, 0, 0}}, 246280304Sjkim {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05}, 247280304Sjkim {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58}, 248280304Sjkim {1, 0, 0, 0}}, 249280304Sjkim {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb}, 250280304Sjkim {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0}, 251280304Sjkim {1, 0, 0, 0}}, 252280304Sjkim {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9}, 253280304Sjkim {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea}, 254280304Sjkim {1, 0, 0, 0}}, 255280304Sjkim {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba}, 256280304Sjkim {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405}, 257280304Sjkim {1, 0, 0, 0}}, 258280304Sjkim {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e}, 259280304Sjkim {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e}, 260280304Sjkim {1, 0, 0, 0}}} 261280304Sjkim}; 262238384Sjkim 263238384Sjkim/* Precomputation for the group generator. */ 264238384Sjkimtypedef struct { 265280304Sjkim felem g_pre_comp[2][16][3]; 266280304Sjkim int references; 267238384Sjkim} NISTP224_PRE_COMP; 268238384Sjkim 269238384Sjkimconst EC_METHOD *EC_GFp_nistp224_method(void) 270280304Sjkim{ 271280304Sjkim static const EC_METHOD ret = { 272280304Sjkim EC_FLAGS_DEFAULT_OCT, 273280304Sjkim NID_X9_62_prime_field, 274280304Sjkim ec_GFp_nistp224_group_init, 275280304Sjkim ec_GFp_simple_group_finish, 276280304Sjkim ec_GFp_simple_group_clear_finish, 277280304Sjkim ec_GFp_nist_group_copy, 278280304Sjkim ec_GFp_nistp224_group_set_curve, 279280304Sjkim ec_GFp_simple_group_get_curve, 280280304Sjkim ec_GFp_simple_group_get_degree, 281280304Sjkim ec_GFp_simple_group_check_discriminant, 282280304Sjkim ec_GFp_simple_point_init, 283280304Sjkim ec_GFp_simple_point_finish, 284280304Sjkim ec_GFp_simple_point_clear_finish, 285280304Sjkim ec_GFp_simple_point_copy, 286280304Sjkim ec_GFp_simple_point_set_to_infinity, 287280304Sjkim ec_GFp_simple_set_Jprojective_coordinates_GFp, 288280304Sjkim ec_GFp_simple_get_Jprojective_coordinates_GFp, 289280304Sjkim ec_GFp_simple_point_set_affine_coordinates, 290280304Sjkim ec_GFp_nistp224_point_get_affine_coordinates, 291280304Sjkim 0 /* point_set_compressed_coordinates */ , 292280304Sjkim 0 /* point2oct */ , 293280304Sjkim 0 /* oct2point */ , 294280304Sjkim ec_GFp_simple_add, 295280304Sjkim ec_GFp_simple_dbl, 296280304Sjkim ec_GFp_simple_invert, 297280304Sjkim ec_GFp_simple_is_at_infinity, 298280304Sjkim ec_GFp_simple_is_on_curve, 299280304Sjkim ec_GFp_simple_cmp, 300280304Sjkim ec_GFp_simple_make_affine, 301280304Sjkim ec_GFp_simple_points_make_affine, 302280304Sjkim ec_GFp_nistp224_points_mul, 303280304Sjkim ec_GFp_nistp224_precompute_mult, 304280304Sjkim ec_GFp_nistp224_have_precompute_mult, 305280304Sjkim ec_GFp_nist_field_mul, 306280304Sjkim ec_GFp_nist_field_sqr, 307280304Sjkim 0 /* field_div */ , 308280304Sjkim 0 /* field_encode */ , 309280304Sjkim 0 /* field_decode */ , 310280304Sjkim 0 /* field_set_to_one */ 311280304Sjkim }; 312238384Sjkim 313280304Sjkim return &ret; 314280304Sjkim} 315238384Sjkim 316280304Sjkim/* 317280304Sjkim * Helper functions to convert field elements to/from internal representation 318280304Sjkim */ 319238384Sjkimstatic void bin28_to_felem(felem out, const u8 in[28]) 320280304Sjkim{ 321280304Sjkim out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff; 322280304Sjkim out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff; 323280304Sjkim out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff; 324280304Sjkim out[3] = (*((const uint64_t *)(in+20))) >> 8; 325280304Sjkim} 326238384Sjkim 327238384Sjkimstatic void felem_to_bin28(u8 out[28], const felem in) 328280304Sjkim{ 329280304Sjkim unsigned i; 330280304Sjkim for (i = 0; i < 7; ++i) { 331280304Sjkim out[i] = in[0] >> (8 * i); 332280304Sjkim out[i + 7] = in[1] >> (8 * i); 333280304Sjkim out[i + 14] = in[2] >> (8 * i); 334280304Sjkim out[i + 21] = in[3] >> (8 * i); 335280304Sjkim } 336280304Sjkim} 337238384Sjkim 338238384Sjkim/* To preserve endianness when using BN_bn2bin and BN_bin2bn */ 339238384Sjkimstatic void flip_endian(u8 *out, const u8 *in, unsigned len) 340280304Sjkim{ 341280304Sjkim unsigned i; 342280304Sjkim for (i = 0; i < len; ++i) 343280304Sjkim out[i] = in[len - 1 - i]; 344280304Sjkim} 345238384Sjkim 346238384Sjkim/* From OpenSSL BIGNUM to internal representation */ 347238384Sjkimstatic int BN_to_felem(felem out, const BIGNUM *bn) 348280304Sjkim{ 349280304Sjkim felem_bytearray b_in; 350280304Sjkim felem_bytearray b_out; 351280304Sjkim unsigned num_bytes; 352238384Sjkim 353280304Sjkim /* BN_bn2bin eats leading zeroes */ 354280304Sjkim memset(b_out, 0, sizeof b_out); 355280304Sjkim num_bytes = BN_num_bytes(bn); 356280304Sjkim if (num_bytes > sizeof b_out) { 357280304Sjkim ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE); 358280304Sjkim return 0; 359280304Sjkim } 360280304Sjkim if (BN_is_negative(bn)) { 361280304Sjkim ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE); 362280304Sjkim return 0; 363280304Sjkim } 364280304Sjkim num_bytes = BN_bn2bin(bn, b_in); 365280304Sjkim flip_endian(b_out, b_in, num_bytes); 366280304Sjkim bin28_to_felem(out, b_out); 367280304Sjkim return 1; 368280304Sjkim} 369238384Sjkim 370238384Sjkim/* From internal representation to OpenSSL BIGNUM */ 371238384Sjkimstatic BIGNUM *felem_to_BN(BIGNUM *out, const felem in) 372280304Sjkim{ 373280304Sjkim felem_bytearray b_in, b_out; 374280304Sjkim felem_to_bin28(b_in, in); 375280304Sjkim flip_endian(b_out, b_in, sizeof b_out); 376280304Sjkim return BN_bin2bn(b_out, sizeof b_out, out); 377280304Sjkim} 378238384Sjkim 379238384Sjkim/******************************************************************************/ 380280304Sjkim/*- 381280304Sjkim * FIELD OPERATIONS 382238384Sjkim * 383238384Sjkim * Field operations, using the internal representation of field elements. 384238384Sjkim * NB! These operations are specific to our point multiplication and cannot be 385238384Sjkim * expected to be correct in general - e.g., multiplication with a large scalar 386238384Sjkim * will cause an overflow. 387238384Sjkim * 388238384Sjkim */ 389238384Sjkim 390238384Sjkimstatic void felem_one(felem out) 391280304Sjkim{ 392280304Sjkim out[0] = 1; 393280304Sjkim out[1] = 0; 394280304Sjkim out[2] = 0; 395280304Sjkim out[3] = 0; 396280304Sjkim} 397238384Sjkim 398238384Sjkimstatic void felem_assign(felem out, const felem in) 399280304Sjkim{ 400280304Sjkim out[0] = in[0]; 401280304Sjkim out[1] = in[1]; 402280304Sjkim out[2] = in[2]; 403280304Sjkim out[3] = in[3]; 404280304Sjkim} 405238384Sjkim 406238384Sjkim/* Sum two field elements: out += in */ 407238384Sjkimstatic void felem_sum(felem out, const felem in) 408280304Sjkim{ 409280304Sjkim out[0] += in[0]; 410280304Sjkim out[1] += in[1]; 411280304Sjkim out[2] += in[2]; 412280304Sjkim out[3] += in[3]; 413280304Sjkim} 414238384Sjkim 415238384Sjkim/* Get negative value: out = -in */ 416238384Sjkim/* Assumes in[i] < 2^57 */ 417238384Sjkimstatic void felem_neg(felem out, const felem in) 418280304Sjkim{ 419280304Sjkim static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2); 420280304Sjkim static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2); 421280304Sjkim static const limb two58m42m2 = (((limb) 1) << 58) - 422280304Sjkim (((limb) 1) << 42) - (((limb) 1) << 2); 423238384Sjkim 424280304Sjkim /* Set to 0 mod 2^224-2^96+1 to ensure out > in */ 425280304Sjkim out[0] = two58p2 - in[0]; 426280304Sjkim out[1] = two58m42m2 - in[1]; 427280304Sjkim out[2] = two58m2 - in[2]; 428280304Sjkim out[3] = two58m2 - in[3]; 429280304Sjkim} 430238384Sjkim 431238384Sjkim/* Subtract field elements: out -= in */ 432238384Sjkim/* Assumes in[i] < 2^57 */ 433238384Sjkimstatic void felem_diff(felem out, const felem in) 434280304Sjkim{ 435280304Sjkim static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2); 436280304Sjkim static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2); 437280304Sjkim static const limb two58m42m2 = (((limb) 1) << 58) - 438280304Sjkim (((limb) 1) << 42) - (((limb) 1) << 2); 439238384Sjkim 440280304Sjkim /* Add 0 mod 2^224-2^96+1 to ensure out > in */ 441280304Sjkim out[0] += two58p2; 442280304Sjkim out[1] += two58m42m2; 443280304Sjkim out[2] += two58m2; 444280304Sjkim out[3] += two58m2; 445238384Sjkim 446280304Sjkim out[0] -= in[0]; 447280304Sjkim out[1] -= in[1]; 448280304Sjkim out[2] -= in[2]; 449280304Sjkim out[3] -= in[3]; 450280304Sjkim} 451238384Sjkim 452238384Sjkim/* Subtract in unreduced 128-bit mode: out -= in */ 453238384Sjkim/* Assumes in[i] < 2^119 */ 454238384Sjkimstatic void widefelem_diff(widefelem out, const widefelem in) 455280304Sjkim{ 456280304Sjkim static const widelimb two120 = ((widelimb) 1) << 120; 457280304Sjkim static const widelimb two120m64 = (((widelimb) 1) << 120) - 458280304Sjkim (((widelimb) 1) << 64); 459280304Sjkim static const widelimb two120m104m64 = (((widelimb) 1) << 120) - 460280304Sjkim (((widelimb) 1) << 104) - (((widelimb) 1) << 64); 461238384Sjkim 462280304Sjkim /* Add 0 mod 2^224-2^96+1 to ensure out > in */ 463280304Sjkim out[0] += two120; 464280304Sjkim out[1] += two120m64; 465280304Sjkim out[2] += two120m64; 466280304Sjkim out[3] += two120; 467280304Sjkim out[4] += two120m104m64; 468280304Sjkim out[5] += two120m64; 469280304Sjkim out[6] += two120m64; 470238384Sjkim 471280304Sjkim out[0] -= in[0]; 472280304Sjkim out[1] -= in[1]; 473280304Sjkim out[2] -= in[2]; 474280304Sjkim out[3] -= in[3]; 475280304Sjkim out[4] -= in[4]; 476280304Sjkim out[5] -= in[5]; 477280304Sjkim out[6] -= in[6]; 478280304Sjkim} 479238384Sjkim 480238384Sjkim/* Subtract in mixed mode: out128 -= in64 */ 481238384Sjkim/* in[i] < 2^63 */ 482238384Sjkimstatic void felem_diff_128_64(widefelem out, const felem in) 483280304Sjkim{ 484280304Sjkim static const widelimb two64p8 = (((widelimb) 1) << 64) + 485280304Sjkim (((widelimb) 1) << 8); 486280304Sjkim static const widelimb two64m8 = (((widelimb) 1) << 64) - 487280304Sjkim (((widelimb) 1) << 8); 488280304Sjkim static const widelimb two64m48m8 = (((widelimb) 1) << 64) - 489280304Sjkim (((widelimb) 1) << 48) - (((widelimb) 1) << 8); 490238384Sjkim 491280304Sjkim /* Add 0 mod 2^224-2^96+1 to ensure out > in */ 492280304Sjkim out[0] += two64p8; 493280304Sjkim out[1] += two64m48m8; 494280304Sjkim out[2] += two64m8; 495280304Sjkim out[3] += two64m8; 496238384Sjkim 497280304Sjkim out[0] -= in[0]; 498280304Sjkim out[1] -= in[1]; 499280304Sjkim out[2] -= in[2]; 500280304Sjkim out[3] -= in[3]; 501280304Sjkim} 502238384Sjkim 503280304Sjkim/* 504280304Sjkim * Multiply a field element by a scalar: out = out * scalar The scalars we 505280304Sjkim * actually use are small, so results fit without overflow 506280304Sjkim */ 507238384Sjkimstatic void felem_scalar(felem out, const limb scalar) 508280304Sjkim{ 509280304Sjkim out[0] *= scalar; 510280304Sjkim out[1] *= scalar; 511280304Sjkim out[2] *= scalar; 512280304Sjkim out[3] *= scalar; 513280304Sjkim} 514238384Sjkim 515280304Sjkim/* 516280304Sjkim * Multiply an unreduced field element by a scalar: out = out * scalar The 517280304Sjkim * scalars we actually use are small, so results fit without overflow 518280304Sjkim */ 519238384Sjkimstatic void widefelem_scalar(widefelem out, const widelimb scalar) 520280304Sjkim{ 521280304Sjkim out[0] *= scalar; 522280304Sjkim out[1] *= scalar; 523280304Sjkim out[2] *= scalar; 524280304Sjkim out[3] *= scalar; 525280304Sjkim out[4] *= scalar; 526280304Sjkim out[5] *= scalar; 527280304Sjkim out[6] *= scalar; 528280304Sjkim} 529238384Sjkim 530238384Sjkim/* Square a field element: out = in^2 */ 531238384Sjkimstatic void felem_square(widefelem out, const felem in) 532280304Sjkim{ 533280304Sjkim limb tmp0, tmp1, tmp2; 534280304Sjkim tmp0 = 2 * in[0]; 535280304Sjkim tmp1 = 2 * in[1]; 536280304Sjkim tmp2 = 2 * in[2]; 537280304Sjkim out[0] = ((widelimb) in[0]) * in[0]; 538280304Sjkim out[1] = ((widelimb) in[0]) * tmp1; 539280304Sjkim out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1]; 540280304Sjkim out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2; 541280304Sjkim out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2]; 542280304Sjkim out[5] = ((widelimb) in[3]) * tmp2; 543280304Sjkim out[6] = ((widelimb) in[3]) * in[3]; 544280304Sjkim} 545238384Sjkim 546238384Sjkim/* Multiply two field elements: out = in1 * in2 */ 547238384Sjkimstatic void felem_mul(widefelem out, const felem in1, const felem in2) 548280304Sjkim{ 549280304Sjkim out[0] = ((widelimb) in1[0]) * in2[0]; 550280304Sjkim out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0]; 551280304Sjkim out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] + 552280304Sjkim ((widelimb) in1[2]) * in2[0]; 553280304Sjkim out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] + 554280304Sjkim ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0]; 555280304Sjkim out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] + 556280304Sjkim ((widelimb) in1[3]) * in2[1]; 557280304Sjkim out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2]; 558280304Sjkim out[6] = ((widelimb) in1[3]) * in2[3]; 559280304Sjkim} 560238384Sjkim 561280304Sjkim/*- 562280304Sjkim * Reduce seven 128-bit coefficients to four 64-bit coefficients. 563238384Sjkim * Requires in[i] < 2^126, 564238384Sjkim * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */ 565238384Sjkimstatic void felem_reduce(felem out, const widefelem in) 566280304Sjkim{ 567280304Sjkim static const widelimb two127p15 = (((widelimb) 1) << 127) + 568280304Sjkim (((widelimb) 1) << 15); 569280304Sjkim static const widelimb two127m71 = (((widelimb) 1) << 127) - 570280304Sjkim (((widelimb) 1) << 71); 571280304Sjkim static const widelimb two127m71m55 = (((widelimb) 1) << 127) - 572280304Sjkim (((widelimb) 1) << 71) - (((widelimb) 1) << 55); 573280304Sjkim widelimb output[5]; 574238384Sjkim 575280304Sjkim /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */ 576280304Sjkim output[0] = in[0] + two127p15; 577280304Sjkim output[1] = in[1] + two127m71m55; 578280304Sjkim output[2] = in[2] + two127m71; 579280304Sjkim output[3] = in[3]; 580280304Sjkim output[4] = in[4]; 581238384Sjkim 582280304Sjkim /* Eliminate in[4], in[5], in[6] */ 583280304Sjkim output[4] += in[6] >> 16; 584280304Sjkim output[3] += (in[6] & 0xffff) << 40; 585280304Sjkim output[2] -= in[6]; 586238384Sjkim 587280304Sjkim output[3] += in[5] >> 16; 588280304Sjkim output[2] += (in[5] & 0xffff) << 40; 589280304Sjkim output[1] -= in[5]; 590238384Sjkim 591280304Sjkim output[2] += output[4] >> 16; 592280304Sjkim output[1] += (output[4] & 0xffff) << 40; 593280304Sjkim output[0] -= output[4]; 594238384Sjkim 595280304Sjkim /* Carry 2 -> 3 -> 4 */ 596280304Sjkim output[3] += output[2] >> 56; 597280304Sjkim output[2] &= 0x00ffffffffffffff; 598238384Sjkim 599280304Sjkim output[4] = output[3] >> 56; 600280304Sjkim output[3] &= 0x00ffffffffffffff; 601238384Sjkim 602280304Sjkim /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */ 603238384Sjkim 604280304Sjkim /* Eliminate output[4] */ 605280304Sjkim output[2] += output[4] >> 16; 606280304Sjkim /* output[2] < 2^56 + 2^56 = 2^57 */ 607280304Sjkim output[1] += (output[4] & 0xffff) << 40; 608280304Sjkim output[0] -= output[4]; 609238384Sjkim 610280304Sjkim /* Carry 0 -> 1 -> 2 -> 3 */ 611280304Sjkim output[1] += output[0] >> 56; 612280304Sjkim out[0] = output[0] & 0x00ffffffffffffff; 613238384Sjkim 614280304Sjkim output[2] += output[1] >> 56; 615280304Sjkim /* output[2] < 2^57 + 2^72 */ 616280304Sjkim out[1] = output[1] & 0x00ffffffffffffff; 617280304Sjkim output[3] += output[2] >> 56; 618280304Sjkim /* output[3] <= 2^56 + 2^16 */ 619280304Sjkim out[2] = output[2] & 0x00ffffffffffffff; 620238384Sjkim 621280304Sjkim /*- 622280304Sjkim * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, 623280304Sjkim * out[3] <= 2^56 + 2^16 (due to final carry), 624280304Sjkim * so out < 2*p 625280304Sjkim */ 626280304Sjkim out[3] = output[3]; 627280304Sjkim} 628238384Sjkim 629238384Sjkimstatic void felem_square_reduce(felem out, const felem in) 630280304Sjkim{ 631280304Sjkim widefelem tmp; 632280304Sjkim felem_square(tmp, in); 633280304Sjkim felem_reduce(out, tmp); 634280304Sjkim} 635238384Sjkim 636238384Sjkimstatic void felem_mul_reduce(felem out, const felem in1, const felem in2) 637280304Sjkim{ 638280304Sjkim widefelem tmp; 639280304Sjkim felem_mul(tmp, in1, in2); 640280304Sjkim felem_reduce(out, tmp); 641280304Sjkim} 642238384Sjkim 643280304Sjkim/* 644280304Sjkim * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always 645280304Sjkim * call felem_reduce first) 646280304Sjkim */ 647238384Sjkimstatic void felem_contract(felem out, const felem in) 648280304Sjkim{ 649280304Sjkim static const int64_t two56 = ((limb) 1) << 56; 650280304Sjkim /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */ 651280304Sjkim /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */ 652280304Sjkim int64_t tmp[4], a; 653280304Sjkim tmp[0] = in[0]; 654280304Sjkim tmp[1] = in[1]; 655280304Sjkim tmp[2] = in[2]; 656280304Sjkim tmp[3] = in[3]; 657280304Sjkim /* Case 1: a = 1 iff in >= 2^224 */ 658280304Sjkim a = (in[3] >> 56); 659280304Sjkim tmp[0] -= a; 660280304Sjkim tmp[1] += a << 40; 661280304Sjkim tmp[3] &= 0x00ffffffffffffff; 662280304Sjkim /* 663280304Sjkim * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1 664280304Sjkim * and the lower part is non-zero 665280304Sjkim */ 666280304Sjkim a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) | 667280304Sjkim (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63); 668280304Sjkim a &= 0x00ffffffffffffff; 669280304Sjkim /* turn a into an all-one mask (if a = 0) or an all-zero mask */ 670280304Sjkim a = (a - 1) >> 63; 671280304Sjkim /* subtract 2^224 - 2^96 + 1 if a is all-one */ 672280304Sjkim tmp[3] &= a ^ 0xffffffffffffffff; 673280304Sjkim tmp[2] &= a ^ 0xffffffffffffffff; 674280304Sjkim tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff; 675280304Sjkim tmp[0] -= 1 & a; 676238384Sjkim 677280304Sjkim /* 678280304Sjkim * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be 679280304Sjkim * non-zero, so we only need one step 680280304Sjkim */ 681280304Sjkim a = tmp[0] >> 63; 682280304Sjkim tmp[0] += two56 & a; 683280304Sjkim tmp[1] -= 1 & a; 684238384Sjkim 685280304Sjkim /* carry 1 -> 2 -> 3 */ 686280304Sjkim tmp[2] += tmp[1] >> 56; 687280304Sjkim tmp[1] &= 0x00ffffffffffffff; 688238384Sjkim 689280304Sjkim tmp[3] += tmp[2] >> 56; 690280304Sjkim tmp[2] &= 0x00ffffffffffffff; 691238384Sjkim 692280304Sjkim /* Now 0 <= out < p */ 693280304Sjkim out[0] = tmp[0]; 694280304Sjkim out[1] = tmp[1]; 695280304Sjkim out[2] = tmp[2]; 696280304Sjkim out[3] = tmp[3]; 697280304Sjkim} 698238384Sjkim 699280304Sjkim/* 700280304Sjkim * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field 701280304Sjkim * elements are reduced to in < 2^225, so we only need to check three cases: 702280304Sjkim * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2 703280304Sjkim */ 704238384Sjkimstatic limb felem_is_zero(const felem in) 705280304Sjkim{ 706280304Sjkim limb zero, two224m96p1, two225m97p2; 707238384Sjkim 708280304Sjkim zero = in[0] | in[1] | in[2] | in[3]; 709280304Sjkim zero = (((int64_t) (zero) - 1) >> 63) & 1; 710280304Sjkim two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000) 711280304Sjkim | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff); 712280304Sjkim two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1; 713280304Sjkim two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000) 714280304Sjkim | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff); 715280304Sjkim two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1; 716280304Sjkim return (zero | two224m96p1 | two225m97p2); 717280304Sjkim} 718238384Sjkim 719238384Sjkimstatic limb felem_is_zero_int(const felem in) 720280304Sjkim{ 721280304Sjkim return (int)(felem_is_zero(in) & ((limb) 1)); 722280304Sjkim} 723238384Sjkim 724238384Sjkim/* Invert a field element */ 725238384Sjkim/* Computation chain copied from djb's code */ 726238384Sjkimstatic void felem_inv(felem out, const felem in) 727280304Sjkim{ 728280304Sjkim felem ftmp, ftmp2, ftmp3, ftmp4; 729280304Sjkim widefelem tmp; 730280304Sjkim unsigned i; 731238384Sjkim 732280304Sjkim felem_square(tmp, in); 733280304Sjkim felem_reduce(ftmp, tmp); /* 2 */ 734280304Sjkim felem_mul(tmp, in, ftmp); 735280304Sjkim felem_reduce(ftmp, tmp); /* 2^2 - 1 */ 736280304Sjkim felem_square(tmp, ftmp); 737280304Sjkim felem_reduce(ftmp, tmp); /* 2^3 - 2 */ 738280304Sjkim felem_mul(tmp, in, ftmp); 739280304Sjkim felem_reduce(ftmp, tmp); /* 2^3 - 1 */ 740280304Sjkim felem_square(tmp, ftmp); 741280304Sjkim felem_reduce(ftmp2, tmp); /* 2^4 - 2 */ 742280304Sjkim felem_square(tmp, ftmp2); 743280304Sjkim felem_reduce(ftmp2, tmp); /* 2^5 - 4 */ 744280304Sjkim felem_square(tmp, ftmp2); 745280304Sjkim felem_reduce(ftmp2, tmp); /* 2^6 - 8 */ 746280304Sjkim felem_mul(tmp, ftmp2, ftmp); 747280304Sjkim felem_reduce(ftmp, tmp); /* 2^6 - 1 */ 748280304Sjkim felem_square(tmp, ftmp); 749280304Sjkim felem_reduce(ftmp2, tmp); /* 2^7 - 2 */ 750280304Sjkim for (i = 0; i < 5; ++i) { /* 2^12 - 2^6 */ 751280304Sjkim felem_square(tmp, ftmp2); 752280304Sjkim felem_reduce(ftmp2, tmp); 753280304Sjkim } 754280304Sjkim felem_mul(tmp, ftmp2, ftmp); 755280304Sjkim felem_reduce(ftmp2, tmp); /* 2^12 - 1 */ 756280304Sjkim felem_square(tmp, ftmp2); 757280304Sjkim felem_reduce(ftmp3, tmp); /* 2^13 - 2 */ 758280304Sjkim for (i = 0; i < 11; ++i) { /* 2^24 - 2^12 */ 759280304Sjkim felem_square(tmp, ftmp3); 760280304Sjkim felem_reduce(ftmp3, tmp); 761280304Sjkim } 762280304Sjkim felem_mul(tmp, ftmp3, ftmp2); 763280304Sjkim felem_reduce(ftmp2, tmp); /* 2^24 - 1 */ 764280304Sjkim felem_square(tmp, ftmp2); 765280304Sjkim felem_reduce(ftmp3, tmp); /* 2^25 - 2 */ 766280304Sjkim for (i = 0; i < 23; ++i) { /* 2^48 - 2^24 */ 767280304Sjkim felem_square(tmp, ftmp3); 768280304Sjkim felem_reduce(ftmp3, tmp); 769280304Sjkim } 770280304Sjkim felem_mul(tmp, ftmp3, ftmp2); 771280304Sjkim felem_reduce(ftmp3, tmp); /* 2^48 - 1 */ 772280304Sjkim felem_square(tmp, ftmp3); 773280304Sjkim felem_reduce(ftmp4, tmp); /* 2^49 - 2 */ 774280304Sjkim for (i = 0; i < 47; ++i) { /* 2^96 - 2^48 */ 775280304Sjkim felem_square(tmp, ftmp4); 776280304Sjkim felem_reduce(ftmp4, tmp); 777280304Sjkim } 778280304Sjkim felem_mul(tmp, ftmp3, ftmp4); 779280304Sjkim felem_reduce(ftmp3, tmp); /* 2^96 - 1 */ 780280304Sjkim felem_square(tmp, ftmp3); 781280304Sjkim felem_reduce(ftmp4, tmp); /* 2^97 - 2 */ 782280304Sjkim for (i = 0; i < 23; ++i) { /* 2^120 - 2^24 */ 783280304Sjkim felem_square(tmp, ftmp4); 784280304Sjkim felem_reduce(ftmp4, tmp); 785280304Sjkim } 786280304Sjkim felem_mul(tmp, ftmp2, ftmp4); 787280304Sjkim felem_reduce(ftmp2, tmp); /* 2^120 - 1 */ 788280304Sjkim for (i = 0; i < 6; ++i) { /* 2^126 - 2^6 */ 789280304Sjkim felem_square(tmp, ftmp2); 790280304Sjkim felem_reduce(ftmp2, tmp); 791280304Sjkim } 792280304Sjkim felem_mul(tmp, ftmp2, ftmp); 793280304Sjkim felem_reduce(ftmp, tmp); /* 2^126 - 1 */ 794280304Sjkim felem_square(tmp, ftmp); 795280304Sjkim felem_reduce(ftmp, tmp); /* 2^127 - 2 */ 796280304Sjkim felem_mul(tmp, ftmp, in); 797280304Sjkim felem_reduce(ftmp, tmp); /* 2^127 - 1 */ 798280304Sjkim for (i = 0; i < 97; ++i) { /* 2^224 - 2^97 */ 799280304Sjkim felem_square(tmp, ftmp); 800280304Sjkim felem_reduce(ftmp, tmp); 801280304Sjkim } 802280304Sjkim felem_mul(tmp, ftmp, ftmp3); 803280304Sjkim felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */ 804280304Sjkim} 805238384Sjkim 806280304Sjkim/* 807280304Sjkim * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy 808280304Sjkim * out to itself. 809280304Sjkim */ 810280304Sjkimstatic void copy_conditional(felem out, const felem in, limb icopy) 811280304Sjkim{ 812280304Sjkim unsigned i; 813280304Sjkim /* 814280304Sjkim * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one 815280304Sjkim */ 816280304Sjkim const limb copy = -icopy; 817280304Sjkim for (i = 0; i < 4; ++i) { 818280304Sjkim const limb tmp = copy & (in[i] ^ out[i]); 819280304Sjkim out[i] ^= tmp; 820280304Sjkim } 821280304Sjkim} 822238384Sjkim 823238384Sjkim/******************************************************************************/ 824280304Sjkim/*- 825280304Sjkim * ELLIPTIC CURVE POINT OPERATIONS 826238384Sjkim * 827238384Sjkim * Points are represented in Jacobian projective coordinates: 828238384Sjkim * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3), 829238384Sjkim * or to the point at infinity if Z == 0. 830238384Sjkim * 831238384Sjkim */ 832238384Sjkim 833280304Sjkim/*- 834280304Sjkim * Double an elliptic curve point: 835238384Sjkim * (X', Y', Z') = 2 * (X, Y, Z), where 836238384Sjkim * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2 837238384Sjkim * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2 838238384Sjkim * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z 839238384Sjkim * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed, 840280304Sjkim * while x_out == y_in is not (maybe this works, but it's not tested). 841280304Sjkim */ 842238384Sjkimstatic void 843238384Sjkimpoint_double(felem x_out, felem y_out, felem z_out, 844238384Sjkim const felem x_in, const felem y_in, const felem z_in) 845280304Sjkim{ 846280304Sjkim widefelem tmp, tmp2; 847280304Sjkim felem delta, gamma, beta, alpha, ftmp, ftmp2; 848238384Sjkim 849280304Sjkim felem_assign(ftmp, x_in); 850280304Sjkim felem_assign(ftmp2, x_in); 851238384Sjkim 852280304Sjkim /* delta = z^2 */ 853280304Sjkim felem_square(tmp, z_in); 854280304Sjkim felem_reduce(delta, tmp); 855238384Sjkim 856280304Sjkim /* gamma = y^2 */ 857280304Sjkim felem_square(tmp, y_in); 858280304Sjkim felem_reduce(gamma, tmp); 859238384Sjkim 860280304Sjkim /* beta = x*gamma */ 861280304Sjkim felem_mul(tmp, x_in, gamma); 862280304Sjkim felem_reduce(beta, tmp); 863238384Sjkim 864280304Sjkim /* alpha = 3*(x-delta)*(x+delta) */ 865280304Sjkim felem_diff(ftmp, delta); 866280304Sjkim /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */ 867280304Sjkim felem_sum(ftmp2, delta); 868280304Sjkim /* ftmp2[i] < 2^57 + 2^57 = 2^58 */ 869280304Sjkim felem_scalar(ftmp2, 3); 870280304Sjkim /* ftmp2[i] < 3 * 2^58 < 2^60 */ 871280304Sjkim felem_mul(tmp, ftmp, ftmp2); 872280304Sjkim /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */ 873280304Sjkim felem_reduce(alpha, tmp); 874238384Sjkim 875280304Sjkim /* x' = alpha^2 - 8*beta */ 876280304Sjkim felem_square(tmp, alpha); 877280304Sjkim /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */ 878280304Sjkim felem_assign(ftmp, beta); 879280304Sjkim felem_scalar(ftmp, 8); 880280304Sjkim /* ftmp[i] < 8 * 2^57 = 2^60 */ 881280304Sjkim felem_diff_128_64(tmp, ftmp); 882280304Sjkim /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */ 883280304Sjkim felem_reduce(x_out, tmp); 884238384Sjkim 885280304Sjkim /* z' = (y + z)^2 - gamma - delta */ 886280304Sjkim felem_sum(delta, gamma); 887280304Sjkim /* delta[i] < 2^57 + 2^57 = 2^58 */ 888280304Sjkim felem_assign(ftmp, y_in); 889280304Sjkim felem_sum(ftmp, z_in); 890280304Sjkim /* ftmp[i] < 2^57 + 2^57 = 2^58 */ 891280304Sjkim felem_square(tmp, ftmp); 892280304Sjkim /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */ 893280304Sjkim felem_diff_128_64(tmp, delta); 894280304Sjkim /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */ 895280304Sjkim felem_reduce(z_out, tmp); 896238384Sjkim 897280304Sjkim /* y' = alpha*(4*beta - x') - 8*gamma^2 */ 898280304Sjkim felem_scalar(beta, 4); 899280304Sjkim /* beta[i] < 4 * 2^57 = 2^59 */ 900280304Sjkim felem_diff(beta, x_out); 901280304Sjkim /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */ 902280304Sjkim felem_mul(tmp, alpha, beta); 903280304Sjkim /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */ 904280304Sjkim felem_square(tmp2, gamma); 905280304Sjkim /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */ 906280304Sjkim widefelem_scalar(tmp2, 8); 907280304Sjkim /* tmp2[i] < 8 * 2^116 = 2^119 */ 908280304Sjkim widefelem_diff(tmp, tmp2); 909280304Sjkim /* tmp[i] < 2^119 + 2^120 < 2^121 */ 910280304Sjkim felem_reduce(y_out, tmp); 911280304Sjkim} 912238384Sjkim 913280304Sjkim/*- 914280304Sjkim * Add two elliptic curve points: 915238384Sjkim * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where 916238384Sjkim * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 - 917238384Sjkim * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 918238384Sjkim * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) - 919238384Sjkim * Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3 920238384Sjkim * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2) 921238384Sjkim * 922238384Sjkim * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0. 923238384Sjkim */ 924238384Sjkim 925280304Sjkim/* 926280304Sjkim * This function is not entirely constant-time: it includes a branch for 927280304Sjkim * checking whether the two input points are equal, (while not equal to the 928280304Sjkim * point at infinity). This case never happens during single point 929280304Sjkim * multiplication, so there is no timing leak for ECDH or ECDSA signing. 930280304Sjkim */ 931238384Sjkimstatic void point_add(felem x3, felem y3, felem z3, 932280304Sjkim const felem x1, const felem y1, const felem z1, 933280304Sjkim const int mixed, const felem x2, const felem y2, 934280304Sjkim const felem z2) 935280304Sjkim{ 936280304Sjkim felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out; 937280304Sjkim widefelem tmp, tmp2; 938280304Sjkim limb z1_is_zero, z2_is_zero, x_equal, y_equal; 939238384Sjkim 940280304Sjkim if (!mixed) { 941280304Sjkim /* ftmp2 = z2^2 */ 942280304Sjkim felem_square(tmp, z2); 943280304Sjkim felem_reduce(ftmp2, tmp); 944238384Sjkim 945280304Sjkim /* ftmp4 = z2^3 */ 946280304Sjkim felem_mul(tmp, ftmp2, z2); 947280304Sjkim felem_reduce(ftmp4, tmp); 948238384Sjkim 949280304Sjkim /* ftmp4 = z2^3*y1 */ 950280304Sjkim felem_mul(tmp2, ftmp4, y1); 951280304Sjkim felem_reduce(ftmp4, tmp2); 952238384Sjkim 953280304Sjkim /* ftmp2 = z2^2*x1 */ 954280304Sjkim felem_mul(tmp2, ftmp2, x1); 955280304Sjkim felem_reduce(ftmp2, tmp2); 956280304Sjkim } else { 957280304Sjkim /* 958280304Sjkim * We'll assume z2 = 1 (special case z2 = 0 is handled later) 959280304Sjkim */ 960238384Sjkim 961280304Sjkim /* ftmp4 = z2^3*y1 */ 962280304Sjkim felem_assign(ftmp4, y1); 963238384Sjkim 964280304Sjkim /* ftmp2 = z2^2*x1 */ 965280304Sjkim felem_assign(ftmp2, x1); 966280304Sjkim } 967238384Sjkim 968280304Sjkim /* ftmp = z1^2 */ 969280304Sjkim felem_square(tmp, z1); 970280304Sjkim felem_reduce(ftmp, tmp); 971238384Sjkim 972280304Sjkim /* ftmp3 = z1^3 */ 973280304Sjkim felem_mul(tmp, ftmp, z1); 974280304Sjkim felem_reduce(ftmp3, tmp); 975238384Sjkim 976280304Sjkim /* tmp = z1^3*y2 */ 977280304Sjkim felem_mul(tmp, ftmp3, y2); 978280304Sjkim /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */ 979238384Sjkim 980280304Sjkim /* ftmp3 = z1^3*y2 - z2^3*y1 */ 981280304Sjkim felem_diff_128_64(tmp, ftmp4); 982280304Sjkim /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */ 983280304Sjkim felem_reduce(ftmp3, tmp); 984238384Sjkim 985280304Sjkim /* tmp = z1^2*x2 */ 986280304Sjkim felem_mul(tmp, ftmp, x2); 987280304Sjkim /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */ 988238384Sjkim 989280304Sjkim /* ftmp = z1^2*x2 - z2^2*x1 */ 990280304Sjkim felem_diff_128_64(tmp, ftmp2); 991280304Sjkim /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */ 992280304Sjkim felem_reduce(ftmp, tmp); 993238384Sjkim 994280304Sjkim /* 995280304Sjkim * the formulae are incorrect if the points are equal so we check for 996280304Sjkim * this and do doubling if this happens 997280304Sjkim */ 998280304Sjkim x_equal = felem_is_zero(ftmp); 999280304Sjkim y_equal = felem_is_zero(ftmp3); 1000280304Sjkim z1_is_zero = felem_is_zero(z1); 1001280304Sjkim z2_is_zero = felem_is_zero(z2); 1002280304Sjkim /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */ 1003280304Sjkim if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) { 1004280304Sjkim point_double(x3, y3, z3, x1, y1, z1); 1005280304Sjkim return; 1006280304Sjkim } 1007238384Sjkim 1008280304Sjkim /* ftmp5 = z1*z2 */ 1009280304Sjkim if (!mixed) { 1010280304Sjkim felem_mul(tmp, z1, z2); 1011280304Sjkim felem_reduce(ftmp5, tmp); 1012280304Sjkim } else { 1013280304Sjkim /* special case z2 = 0 is handled later */ 1014280304Sjkim felem_assign(ftmp5, z1); 1015280304Sjkim } 1016238384Sjkim 1017280304Sjkim /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */ 1018280304Sjkim felem_mul(tmp, ftmp, ftmp5); 1019280304Sjkim felem_reduce(z_out, tmp); 1020238384Sjkim 1021280304Sjkim /* ftmp = (z1^2*x2 - z2^2*x1)^2 */ 1022280304Sjkim felem_assign(ftmp5, ftmp); 1023280304Sjkim felem_square(tmp, ftmp); 1024280304Sjkim felem_reduce(ftmp, tmp); 1025238384Sjkim 1026280304Sjkim /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */ 1027280304Sjkim felem_mul(tmp, ftmp, ftmp5); 1028280304Sjkim felem_reduce(ftmp5, tmp); 1029238384Sjkim 1030280304Sjkim /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */ 1031280304Sjkim felem_mul(tmp, ftmp2, ftmp); 1032280304Sjkim felem_reduce(ftmp2, tmp); 1033238384Sjkim 1034280304Sjkim /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */ 1035280304Sjkim felem_mul(tmp, ftmp4, ftmp5); 1036280304Sjkim /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */ 1037238384Sjkim 1038280304Sjkim /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */ 1039280304Sjkim felem_square(tmp2, ftmp3); 1040280304Sjkim /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */ 1041238384Sjkim 1042280304Sjkim /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */ 1043280304Sjkim felem_diff_128_64(tmp2, ftmp5); 1044280304Sjkim /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */ 1045238384Sjkim 1046280304Sjkim /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */ 1047280304Sjkim felem_assign(ftmp5, ftmp2); 1048280304Sjkim felem_scalar(ftmp5, 2); 1049280304Sjkim /* ftmp5[i] < 2 * 2^57 = 2^58 */ 1050238384Sjkim 1051280304Sjkim /*- 1052280304Sjkim * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 - 1053280304Sjkim * 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 1054280304Sjkim */ 1055280304Sjkim felem_diff_128_64(tmp2, ftmp5); 1056280304Sjkim /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */ 1057280304Sjkim felem_reduce(x_out, tmp2); 1058238384Sjkim 1059280304Sjkim /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */ 1060280304Sjkim felem_diff(ftmp2, x_out); 1061280304Sjkim /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */ 1062238384Sjkim 1063280304Sjkim /* 1064280304Sjkim * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) 1065280304Sjkim */ 1066280304Sjkim felem_mul(tmp2, ftmp3, ftmp2); 1067280304Sjkim /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */ 1068238384Sjkim 1069280304Sjkim /*- 1070280304Sjkim * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) - 1071280304Sjkim * z2^3*y1*(z1^2*x2 - z2^2*x1)^3 1072280304Sjkim */ 1073280304Sjkim widefelem_diff(tmp2, tmp); 1074280304Sjkim /* tmp2[i] < 2^118 + 2^120 < 2^121 */ 1075280304Sjkim felem_reduce(y_out, tmp2); 1076238384Sjkim 1077280304Sjkim /* 1078280304Sjkim * the result (x_out, y_out, z_out) is incorrect if one of the inputs is 1079280304Sjkim * the point at infinity, so we need to check for this separately 1080280304Sjkim */ 1081238384Sjkim 1082280304Sjkim /* 1083280304Sjkim * if point 1 is at infinity, copy point 2 to output, and vice versa 1084280304Sjkim */ 1085280304Sjkim copy_conditional(x_out, x2, z1_is_zero); 1086280304Sjkim copy_conditional(x_out, x1, z2_is_zero); 1087280304Sjkim copy_conditional(y_out, y2, z1_is_zero); 1088280304Sjkim copy_conditional(y_out, y1, z2_is_zero); 1089280304Sjkim copy_conditional(z_out, z2, z1_is_zero); 1090280304Sjkim copy_conditional(z_out, z1, z2_is_zero); 1091280304Sjkim felem_assign(x3, x_out); 1092280304Sjkim felem_assign(y3, y_out); 1093280304Sjkim felem_assign(z3, z_out); 1094280304Sjkim} 1095238384Sjkim 1096280304Sjkim/* 1097280304Sjkim * select_point selects the |idx|th point from a precomputation table and 1098280304Sjkim * copies it to out. 1099280304Sjkim * The pre_comp array argument should be size of |size| argument 1100280304Sjkim */ 1101280304Sjkimstatic void select_point(const u64 idx, unsigned int size, 1102280304Sjkim const felem pre_comp[][3], felem out[3]) 1103280304Sjkim{ 1104280304Sjkim unsigned i, j; 1105280304Sjkim limb *outlimbs = &out[0][0]; 1106280304Sjkim memset(outlimbs, 0, 3 * sizeof(felem)); 1107238384Sjkim 1108280304Sjkim for (i = 0; i < size; i++) { 1109280304Sjkim const limb *inlimbs = &pre_comp[i][0][0]; 1110280304Sjkim u64 mask = i ^ idx; 1111280304Sjkim mask |= mask >> 4; 1112280304Sjkim mask |= mask >> 2; 1113280304Sjkim mask |= mask >> 1; 1114280304Sjkim mask &= 1; 1115280304Sjkim mask--; 1116280304Sjkim for (j = 0; j < 4 * 3; j++) 1117280304Sjkim outlimbs[j] |= inlimbs[j] & mask; 1118280304Sjkim } 1119280304Sjkim} 1120238384Sjkim 1121238384Sjkim/* get_bit returns the |i|th bit in |in| */ 1122238384Sjkimstatic char get_bit(const felem_bytearray in, unsigned i) 1123280304Sjkim{ 1124280304Sjkim if (i >= 224) 1125280304Sjkim return 0; 1126280304Sjkim return (in[i >> 3] >> (i & 7)) & 1; 1127280304Sjkim} 1128238384Sjkim 1129280304Sjkim/* 1130280304Sjkim * Interleaved point multiplication using precomputed point multiples: The 1131280304Sjkim * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars 1132280304Sjkim * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the 1133280304Sjkim * generator, using certain (large) precomputed multiples in g_pre_comp. 1134280304Sjkim * Output point (X, Y, Z) is stored in x_out, y_out, z_out 1135280304Sjkim */ 1136238384Sjkimstatic void batch_mul(felem x_out, felem y_out, felem z_out, 1137280304Sjkim const felem_bytearray scalars[], 1138280304Sjkim const unsigned num_points, const u8 *g_scalar, 1139280304Sjkim const int mixed, const felem pre_comp[][17][3], 1140280304Sjkim const felem g_pre_comp[2][16][3]) 1141280304Sjkim{ 1142280304Sjkim int i, skip; 1143280304Sjkim unsigned num; 1144280304Sjkim unsigned gen_mul = (g_scalar != NULL); 1145280304Sjkim felem nq[3], tmp[4]; 1146280304Sjkim u64 bits; 1147280304Sjkim u8 sign, digit; 1148238384Sjkim 1149280304Sjkim /* set nq to the point at infinity */ 1150280304Sjkim memset(nq, 0, 3 * sizeof(felem)); 1151238384Sjkim 1152280304Sjkim /* 1153280304Sjkim * Loop over all scalars msb-to-lsb, interleaving additions of multiples 1154280304Sjkim * of the generator (two in each of the last 28 rounds) and additions of 1155280304Sjkim * other points multiples (every 5th round). 1156280304Sjkim */ 1157280304Sjkim skip = 1; /* save two point operations in the first 1158280304Sjkim * round */ 1159280304Sjkim for (i = (num_points ? 220 : 27); i >= 0; --i) { 1160280304Sjkim /* double */ 1161280304Sjkim if (!skip) 1162280304Sjkim point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]); 1163238384Sjkim 1164280304Sjkim /* add multiples of the generator */ 1165280304Sjkim if (gen_mul && (i <= 27)) { 1166280304Sjkim /* first, look 28 bits upwards */ 1167280304Sjkim bits = get_bit(g_scalar, i + 196) << 3; 1168280304Sjkim bits |= get_bit(g_scalar, i + 140) << 2; 1169280304Sjkim bits |= get_bit(g_scalar, i + 84) << 1; 1170280304Sjkim bits |= get_bit(g_scalar, i + 28); 1171280304Sjkim /* select the point to add, in constant time */ 1172280304Sjkim select_point(bits, 16, g_pre_comp[1], tmp); 1173238384Sjkim 1174280304Sjkim if (!skip) { 1175280304Sjkim /* value 1 below is argument for "mixed" */ 1176280304Sjkim point_add(nq[0], nq[1], nq[2], 1177280304Sjkim nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]); 1178280304Sjkim } else { 1179280304Sjkim memcpy(nq, tmp, 3 * sizeof(felem)); 1180280304Sjkim skip = 0; 1181280304Sjkim } 1182238384Sjkim 1183280304Sjkim /* second, look at the current position */ 1184280304Sjkim bits = get_bit(g_scalar, i + 168) << 3; 1185280304Sjkim bits |= get_bit(g_scalar, i + 112) << 2; 1186280304Sjkim bits |= get_bit(g_scalar, i + 56) << 1; 1187280304Sjkim bits |= get_bit(g_scalar, i); 1188280304Sjkim /* select the point to add, in constant time */ 1189280304Sjkim select_point(bits, 16, g_pre_comp[0], tmp); 1190280304Sjkim point_add(nq[0], nq[1], nq[2], 1191280304Sjkim nq[0], nq[1], nq[2], 1192280304Sjkim 1 /* mixed */ , tmp[0], tmp[1], tmp[2]); 1193280304Sjkim } 1194238384Sjkim 1195280304Sjkim /* do other additions every 5 doublings */ 1196280304Sjkim if (num_points && (i % 5 == 0)) { 1197280304Sjkim /* loop over all scalars */ 1198280304Sjkim for (num = 0; num < num_points; ++num) { 1199280304Sjkim bits = get_bit(scalars[num], i + 4) << 5; 1200280304Sjkim bits |= get_bit(scalars[num], i + 3) << 4; 1201280304Sjkim bits |= get_bit(scalars[num], i + 2) << 3; 1202280304Sjkim bits |= get_bit(scalars[num], i + 1) << 2; 1203280304Sjkim bits |= get_bit(scalars[num], i) << 1; 1204280304Sjkim bits |= get_bit(scalars[num], i - 1); 1205280304Sjkim ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits); 1206238384Sjkim 1207280304Sjkim /* select the point to add or subtract */ 1208280304Sjkim select_point(digit, 17, pre_comp[num], tmp); 1209280304Sjkim felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative 1210280304Sjkim * point */ 1211280304Sjkim copy_conditional(tmp[1], tmp[3], sign); 1212238384Sjkim 1213280304Sjkim if (!skip) { 1214280304Sjkim point_add(nq[0], nq[1], nq[2], 1215280304Sjkim nq[0], nq[1], nq[2], 1216280304Sjkim mixed, tmp[0], tmp[1], tmp[2]); 1217280304Sjkim } else { 1218280304Sjkim memcpy(nq, tmp, 3 * sizeof(felem)); 1219280304Sjkim skip = 0; 1220280304Sjkim } 1221280304Sjkim } 1222280304Sjkim } 1223280304Sjkim } 1224280304Sjkim felem_assign(x_out, nq[0]); 1225280304Sjkim felem_assign(y_out, nq[1]); 1226280304Sjkim felem_assign(z_out, nq[2]); 1227280304Sjkim} 1228238384Sjkim 1229238384Sjkim/******************************************************************************/ 1230280304Sjkim/* 1231280304Sjkim * FUNCTIONS TO MANAGE PRECOMPUTATION 1232238384Sjkim */ 1233238384Sjkim 1234238384Sjkimstatic NISTP224_PRE_COMP *nistp224_pre_comp_new() 1235280304Sjkim{ 1236280304Sjkim NISTP224_PRE_COMP *ret = NULL; 1237280304Sjkim ret = (NISTP224_PRE_COMP *) OPENSSL_malloc(sizeof *ret); 1238280304Sjkim if (!ret) { 1239280304Sjkim ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE); 1240280304Sjkim return ret; 1241280304Sjkim } 1242280304Sjkim memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp)); 1243280304Sjkim ret->references = 1; 1244280304Sjkim return ret; 1245280304Sjkim} 1246238384Sjkim 1247238384Sjkimstatic void *nistp224_pre_comp_dup(void *src_) 1248280304Sjkim{ 1249280304Sjkim NISTP224_PRE_COMP *src = src_; 1250238384Sjkim 1251280304Sjkim /* no need to actually copy, these objects never change! */ 1252280304Sjkim CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP); 1253238384Sjkim 1254280304Sjkim return src_; 1255280304Sjkim} 1256238384Sjkim 1257238384Sjkimstatic void nistp224_pre_comp_free(void *pre_) 1258280304Sjkim{ 1259280304Sjkim int i; 1260280304Sjkim NISTP224_PRE_COMP *pre = pre_; 1261238384Sjkim 1262280304Sjkim if (!pre) 1263280304Sjkim return; 1264238384Sjkim 1265280304Sjkim i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP); 1266280304Sjkim if (i > 0) 1267280304Sjkim return; 1268238384Sjkim 1269280304Sjkim OPENSSL_free(pre); 1270280304Sjkim} 1271238384Sjkim 1272238384Sjkimstatic void nistp224_pre_comp_clear_free(void *pre_) 1273280304Sjkim{ 1274280304Sjkim int i; 1275280304Sjkim NISTP224_PRE_COMP *pre = pre_; 1276238384Sjkim 1277280304Sjkim if (!pre) 1278280304Sjkim return; 1279238384Sjkim 1280280304Sjkim i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP); 1281280304Sjkim if (i > 0) 1282280304Sjkim return; 1283238384Sjkim 1284280304Sjkim OPENSSL_cleanse(pre, sizeof *pre); 1285280304Sjkim OPENSSL_free(pre); 1286280304Sjkim} 1287238384Sjkim 1288238384Sjkim/******************************************************************************/ 1289280304Sjkim/* 1290280304Sjkim * OPENSSL EC_METHOD FUNCTIONS 1291238384Sjkim */ 1292238384Sjkim 1293238384Sjkimint ec_GFp_nistp224_group_init(EC_GROUP *group) 1294280304Sjkim{ 1295280304Sjkim int ret; 1296280304Sjkim ret = ec_GFp_simple_group_init(group); 1297280304Sjkim group->a_is_minus3 = 1; 1298280304Sjkim return ret; 1299280304Sjkim} 1300238384Sjkim 1301238384Sjkimint ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p, 1302280304Sjkim const BIGNUM *a, const BIGNUM *b, 1303280304Sjkim BN_CTX *ctx) 1304280304Sjkim{ 1305280304Sjkim int ret = 0; 1306280304Sjkim BN_CTX *new_ctx = NULL; 1307280304Sjkim BIGNUM *curve_p, *curve_a, *curve_b; 1308238384Sjkim 1309280304Sjkim if (ctx == NULL) 1310280304Sjkim if ((ctx = new_ctx = BN_CTX_new()) == NULL) 1311280304Sjkim return 0; 1312280304Sjkim BN_CTX_start(ctx); 1313280304Sjkim if (((curve_p = BN_CTX_get(ctx)) == NULL) || 1314280304Sjkim ((curve_a = BN_CTX_get(ctx)) == NULL) || 1315280304Sjkim ((curve_b = BN_CTX_get(ctx)) == NULL)) 1316280304Sjkim goto err; 1317280304Sjkim BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p); 1318280304Sjkim BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a); 1319280304Sjkim BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b); 1320280304Sjkim if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) { 1321280304Sjkim ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE, 1322280304Sjkim EC_R_WRONG_CURVE_PARAMETERS); 1323280304Sjkim goto err; 1324280304Sjkim } 1325280304Sjkim group->field_mod_func = BN_nist_mod_224; 1326280304Sjkim ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx); 1327280304Sjkim err: 1328280304Sjkim BN_CTX_end(ctx); 1329280304Sjkim if (new_ctx != NULL) 1330280304Sjkim BN_CTX_free(new_ctx); 1331280304Sjkim return ret; 1332280304Sjkim} 1333238384Sjkim 1334280304Sjkim/* 1335280304Sjkim * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') = 1336280304Sjkim * (X/Z^2, Y/Z^3) 1337280304Sjkim */ 1338238384Sjkimint ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group, 1339280304Sjkim const EC_POINT *point, 1340280304Sjkim BIGNUM *x, BIGNUM *y, 1341280304Sjkim BN_CTX *ctx) 1342280304Sjkim{ 1343280304Sjkim felem z1, z2, x_in, y_in, x_out, y_out; 1344280304Sjkim widefelem tmp; 1345238384Sjkim 1346280304Sjkim if (EC_POINT_is_at_infinity(group, point)) { 1347280304Sjkim ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES, 1348280304Sjkim EC_R_POINT_AT_INFINITY); 1349280304Sjkim return 0; 1350280304Sjkim } 1351280304Sjkim if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) || 1352280304Sjkim (!BN_to_felem(z1, &point->Z))) 1353280304Sjkim return 0; 1354280304Sjkim felem_inv(z2, z1); 1355280304Sjkim felem_square(tmp, z2); 1356280304Sjkim felem_reduce(z1, tmp); 1357280304Sjkim felem_mul(tmp, x_in, z1); 1358280304Sjkim felem_reduce(x_in, tmp); 1359280304Sjkim felem_contract(x_out, x_in); 1360280304Sjkim if (x != NULL) { 1361280304Sjkim if (!felem_to_BN(x, x_out)) { 1362280304Sjkim ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES, 1363280304Sjkim ERR_R_BN_LIB); 1364280304Sjkim return 0; 1365280304Sjkim } 1366280304Sjkim } 1367280304Sjkim felem_mul(tmp, z1, z2); 1368280304Sjkim felem_reduce(z1, tmp); 1369280304Sjkim felem_mul(tmp, y_in, z1); 1370280304Sjkim felem_reduce(y_in, tmp); 1371280304Sjkim felem_contract(y_out, y_in); 1372280304Sjkim if (y != NULL) { 1373280304Sjkim if (!felem_to_BN(y, y_out)) { 1374280304Sjkim ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES, 1375280304Sjkim ERR_R_BN_LIB); 1376280304Sjkim return 0; 1377280304Sjkim } 1378280304Sjkim } 1379280304Sjkim return 1; 1380280304Sjkim} 1381238384Sjkim 1382280304Sjkimstatic void make_points_affine(size_t num, felem points[ /* num */ ][3], 1383280304Sjkim felem tmp_felems[ /* num+1 */ ]) 1384280304Sjkim{ 1385280304Sjkim /* 1386280304Sjkim * Runs in constant time, unless an input is the point at infinity (which 1387280304Sjkim * normally shouldn't happen). 1388280304Sjkim */ 1389280304Sjkim ec_GFp_nistp_points_make_affine_internal(num, 1390280304Sjkim points, 1391280304Sjkim sizeof(felem), 1392280304Sjkim tmp_felems, 1393280304Sjkim (void (*)(void *))felem_one, 1394280304Sjkim (int (*)(const void *)) 1395280304Sjkim felem_is_zero_int, 1396280304Sjkim (void (*)(void *, const void *)) 1397280304Sjkim felem_assign, 1398280304Sjkim (void (*)(void *, const void *)) 1399280304Sjkim felem_square_reduce, (void (*) 1400280304Sjkim (void *, 1401280304Sjkim const void 1402280304Sjkim *, 1403280304Sjkim const void 1404280304Sjkim *)) 1405280304Sjkim felem_mul_reduce, 1406280304Sjkim (void (*)(void *, const void *)) 1407280304Sjkim felem_inv, 1408280304Sjkim (void (*)(void *, const void *)) 1409280304Sjkim felem_contract); 1410280304Sjkim} 1411238384Sjkim 1412280304Sjkim/* 1413280304Sjkim * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL 1414280304Sjkim * values Result is stored in r (r can equal one of the inputs). 1415280304Sjkim */ 1416238384Sjkimint ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r, 1417280304Sjkim const BIGNUM *scalar, size_t num, 1418280304Sjkim const EC_POINT *points[], 1419280304Sjkim const BIGNUM *scalars[], BN_CTX *ctx) 1420280304Sjkim{ 1421280304Sjkim int ret = 0; 1422280304Sjkim int j; 1423280304Sjkim unsigned i; 1424280304Sjkim int mixed = 0; 1425280304Sjkim BN_CTX *new_ctx = NULL; 1426280304Sjkim BIGNUM *x, *y, *z, *tmp_scalar; 1427280304Sjkim felem_bytearray g_secret; 1428280304Sjkim felem_bytearray *secrets = NULL; 1429280304Sjkim felem(*pre_comp)[17][3] = NULL; 1430280304Sjkim felem *tmp_felems = NULL; 1431280304Sjkim felem_bytearray tmp; 1432280304Sjkim unsigned num_bytes; 1433280304Sjkim int have_pre_comp = 0; 1434280304Sjkim size_t num_points = num; 1435280304Sjkim felem x_in, y_in, z_in, x_out, y_out, z_out; 1436280304Sjkim NISTP224_PRE_COMP *pre = NULL; 1437280304Sjkim const felem(*g_pre_comp)[16][3] = NULL; 1438280304Sjkim EC_POINT *generator = NULL; 1439280304Sjkim const EC_POINT *p = NULL; 1440280304Sjkim const BIGNUM *p_scalar = NULL; 1441238384Sjkim 1442280304Sjkim if (ctx == NULL) 1443280304Sjkim if ((ctx = new_ctx = BN_CTX_new()) == NULL) 1444280304Sjkim return 0; 1445280304Sjkim BN_CTX_start(ctx); 1446280304Sjkim if (((x = BN_CTX_get(ctx)) == NULL) || 1447280304Sjkim ((y = BN_CTX_get(ctx)) == NULL) || 1448280304Sjkim ((z = BN_CTX_get(ctx)) == NULL) || 1449280304Sjkim ((tmp_scalar = BN_CTX_get(ctx)) == NULL)) 1450280304Sjkim goto err; 1451238384Sjkim 1452280304Sjkim if (scalar != NULL) { 1453280304Sjkim pre = EC_EX_DATA_get_data(group->extra_data, 1454280304Sjkim nistp224_pre_comp_dup, 1455280304Sjkim nistp224_pre_comp_free, 1456280304Sjkim nistp224_pre_comp_clear_free); 1457280304Sjkim if (pre) 1458280304Sjkim /* we have precomputation, try to use it */ 1459280304Sjkim g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp; 1460280304Sjkim else 1461280304Sjkim /* try to use the standard precomputation */ 1462280304Sjkim g_pre_comp = &gmul[0]; 1463280304Sjkim generator = EC_POINT_new(group); 1464280304Sjkim if (generator == NULL) 1465280304Sjkim goto err; 1466280304Sjkim /* get the generator from precomputation */ 1467280304Sjkim if (!felem_to_BN(x, g_pre_comp[0][1][0]) || 1468280304Sjkim !felem_to_BN(y, g_pre_comp[0][1][1]) || 1469280304Sjkim !felem_to_BN(z, g_pre_comp[0][1][2])) { 1470280304Sjkim ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB); 1471280304Sjkim goto err; 1472280304Sjkim } 1473280304Sjkim if (!EC_POINT_set_Jprojective_coordinates_GFp(group, 1474280304Sjkim generator, x, y, z, 1475280304Sjkim ctx)) 1476280304Sjkim goto err; 1477280304Sjkim if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) 1478280304Sjkim /* precomputation matches generator */ 1479280304Sjkim have_pre_comp = 1; 1480280304Sjkim else 1481280304Sjkim /* 1482280304Sjkim * we don't have valid precomputation: treat the generator as a 1483280304Sjkim * random point 1484280304Sjkim */ 1485280304Sjkim num_points = num_points + 1; 1486280304Sjkim } 1487238384Sjkim 1488280304Sjkim if (num_points > 0) { 1489280304Sjkim if (num_points >= 3) { 1490280304Sjkim /* 1491280304Sjkim * unless we precompute multiples for just one or two points, 1492280304Sjkim * converting those into affine form is time well spent 1493280304Sjkim */ 1494280304Sjkim mixed = 1; 1495280304Sjkim } 1496280304Sjkim secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray)); 1497280304Sjkim pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem)); 1498280304Sjkim if (mixed) 1499280304Sjkim tmp_felems = 1500280304Sjkim OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem)); 1501280304Sjkim if ((secrets == NULL) || (pre_comp == NULL) 1502280304Sjkim || (mixed && (tmp_felems == NULL))) { 1503280304Sjkim ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE); 1504280304Sjkim goto err; 1505280304Sjkim } 1506238384Sjkim 1507280304Sjkim /* 1508280304Sjkim * we treat NULL scalars as 0, and NULL points as points at infinity, 1509280304Sjkim * i.e., they contribute nothing to the linear combination 1510280304Sjkim */ 1511280304Sjkim memset(secrets, 0, num_points * sizeof(felem_bytearray)); 1512280304Sjkim memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem)); 1513280304Sjkim for (i = 0; i < num_points; ++i) { 1514280304Sjkim if (i == num) 1515280304Sjkim /* the generator */ 1516280304Sjkim { 1517280304Sjkim p = EC_GROUP_get0_generator(group); 1518280304Sjkim p_scalar = scalar; 1519280304Sjkim } else 1520280304Sjkim /* the i^th point */ 1521280304Sjkim { 1522280304Sjkim p = points[i]; 1523280304Sjkim p_scalar = scalars[i]; 1524280304Sjkim } 1525280304Sjkim if ((p_scalar != NULL) && (p != NULL)) { 1526280304Sjkim /* reduce scalar to 0 <= scalar < 2^224 */ 1527280304Sjkim if ((BN_num_bits(p_scalar) > 224) 1528280304Sjkim || (BN_is_negative(p_scalar))) { 1529280304Sjkim /* 1530280304Sjkim * this is an unusual input, and we don't guarantee 1531280304Sjkim * constant-timeness 1532280304Sjkim */ 1533280304Sjkim if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) { 1534280304Sjkim ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB); 1535280304Sjkim goto err; 1536280304Sjkim } 1537280304Sjkim num_bytes = BN_bn2bin(tmp_scalar, tmp); 1538280304Sjkim } else 1539280304Sjkim num_bytes = BN_bn2bin(p_scalar, tmp); 1540280304Sjkim flip_endian(secrets[i], tmp, num_bytes); 1541280304Sjkim /* precompute multiples */ 1542280304Sjkim if ((!BN_to_felem(x_out, &p->X)) || 1543280304Sjkim (!BN_to_felem(y_out, &p->Y)) || 1544280304Sjkim (!BN_to_felem(z_out, &p->Z))) 1545280304Sjkim goto err; 1546280304Sjkim felem_assign(pre_comp[i][1][0], x_out); 1547280304Sjkim felem_assign(pre_comp[i][1][1], y_out); 1548280304Sjkim felem_assign(pre_comp[i][1][2], z_out); 1549280304Sjkim for (j = 2; j <= 16; ++j) { 1550280304Sjkim if (j & 1) { 1551280304Sjkim point_add(pre_comp[i][j][0], pre_comp[i][j][1], 1552280304Sjkim pre_comp[i][j][2], pre_comp[i][1][0], 1553280304Sjkim pre_comp[i][1][1], pre_comp[i][1][2], 0, 1554280304Sjkim pre_comp[i][j - 1][0], 1555280304Sjkim pre_comp[i][j - 1][1], 1556280304Sjkim pre_comp[i][j - 1][2]); 1557280304Sjkim } else { 1558280304Sjkim point_double(pre_comp[i][j][0], pre_comp[i][j][1], 1559280304Sjkim pre_comp[i][j][2], pre_comp[i][j / 2][0], 1560280304Sjkim pre_comp[i][j / 2][1], 1561280304Sjkim pre_comp[i][j / 2][2]); 1562280304Sjkim } 1563280304Sjkim } 1564280304Sjkim } 1565280304Sjkim } 1566280304Sjkim if (mixed) 1567280304Sjkim make_points_affine(num_points * 17, pre_comp[0], tmp_felems); 1568280304Sjkim } 1569238384Sjkim 1570280304Sjkim /* the scalar for the generator */ 1571280304Sjkim if ((scalar != NULL) && (have_pre_comp)) { 1572280304Sjkim memset(g_secret, 0, sizeof g_secret); 1573280304Sjkim /* reduce scalar to 0 <= scalar < 2^224 */ 1574280304Sjkim if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) { 1575280304Sjkim /* 1576280304Sjkim * this is an unusual input, and we don't guarantee 1577280304Sjkim * constant-timeness 1578280304Sjkim */ 1579280304Sjkim if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) { 1580280304Sjkim ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB); 1581280304Sjkim goto err; 1582280304Sjkim } 1583280304Sjkim num_bytes = BN_bn2bin(tmp_scalar, tmp); 1584280304Sjkim } else 1585280304Sjkim num_bytes = BN_bn2bin(scalar, tmp); 1586280304Sjkim flip_endian(g_secret, tmp, num_bytes); 1587280304Sjkim /* do the multiplication with generator precomputation */ 1588280304Sjkim batch_mul(x_out, y_out, z_out, 1589280304Sjkim (const felem_bytearray(*))secrets, num_points, 1590280304Sjkim g_secret, 1591280304Sjkim mixed, (const felem(*)[17][3])pre_comp, g_pre_comp); 1592280304Sjkim } else 1593280304Sjkim /* do the multiplication without generator precomputation */ 1594280304Sjkim batch_mul(x_out, y_out, z_out, 1595280304Sjkim (const felem_bytearray(*))secrets, num_points, 1596280304Sjkim NULL, mixed, (const felem(*)[17][3])pre_comp, NULL); 1597280304Sjkim /* reduce the output to its unique minimal representation */ 1598280304Sjkim felem_contract(x_in, x_out); 1599280304Sjkim felem_contract(y_in, y_out); 1600280304Sjkim felem_contract(z_in, z_out); 1601280304Sjkim if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) || 1602280304Sjkim (!felem_to_BN(z, z_in))) { 1603280304Sjkim ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB); 1604280304Sjkim goto err; 1605280304Sjkim } 1606280304Sjkim ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx); 1607238384Sjkim 1608280304Sjkim err: 1609280304Sjkim BN_CTX_end(ctx); 1610280304Sjkim if (generator != NULL) 1611280304Sjkim EC_POINT_free(generator); 1612280304Sjkim if (new_ctx != NULL) 1613280304Sjkim BN_CTX_free(new_ctx); 1614280304Sjkim if (secrets != NULL) 1615280304Sjkim OPENSSL_free(secrets); 1616280304Sjkim if (pre_comp != NULL) 1617280304Sjkim OPENSSL_free(pre_comp); 1618280304Sjkim if (tmp_felems != NULL) 1619280304Sjkim OPENSSL_free(tmp_felems); 1620280304Sjkim return ret; 1621280304Sjkim} 1622238384Sjkim 1623238384Sjkimint ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx) 1624280304Sjkim{ 1625280304Sjkim int ret = 0; 1626280304Sjkim NISTP224_PRE_COMP *pre = NULL; 1627280304Sjkim int i, j; 1628280304Sjkim BN_CTX *new_ctx = NULL; 1629280304Sjkim BIGNUM *x, *y; 1630280304Sjkim EC_POINT *generator = NULL; 1631280304Sjkim felem tmp_felems[32]; 1632238384Sjkim 1633280304Sjkim /* throw away old precomputation */ 1634280304Sjkim EC_EX_DATA_free_data(&group->extra_data, nistp224_pre_comp_dup, 1635280304Sjkim nistp224_pre_comp_free, 1636280304Sjkim nistp224_pre_comp_clear_free); 1637280304Sjkim if (ctx == NULL) 1638280304Sjkim if ((ctx = new_ctx = BN_CTX_new()) == NULL) 1639280304Sjkim return 0; 1640280304Sjkim BN_CTX_start(ctx); 1641280304Sjkim if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL)) 1642280304Sjkim goto err; 1643280304Sjkim /* get the generator */ 1644280304Sjkim if (group->generator == NULL) 1645280304Sjkim goto err; 1646280304Sjkim generator = EC_POINT_new(group); 1647280304Sjkim if (generator == NULL) 1648280304Sjkim goto err; 1649280304Sjkim BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x); 1650280304Sjkim BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y); 1651280304Sjkim if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx)) 1652280304Sjkim goto err; 1653280304Sjkim if ((pre = nistp224_pre_comp_new()) == NULL) 1654280304Sjkim goto err; 1655280304Sjkim /* 1656280304Sjkim * if the generator is the standard one, use built-in precomputation 1657280304Sjkim */ 1658280304Sjkim if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) { 1659280304Sjkim memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp)); 1660280304Sjkim ret = 1; 1661280304Sjkim goto err; 1662280304Sjkim } 1663280304Sjkim if ((!BN_to_felem(pre->g_pre_comp[0][1][0], &group->generator->X)) || 1664280304Sjkim (!BN_to_felem(pre->g_pre_comp[0][1][1], &group->generator->Y)) || 1665280304Sjkim (!BN_to_felem(pre->g_pre_comp[0][1][2], &group->generator->Z))) 1666280304Sjkim goto err; 1667280304Sjkim /* 1668280304Sjkim * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G, 1669280304Sjkim * 2^140*G, 2^196*G for the second one 1670280304Sjkim */ 1671280304Sjkim for (i = 1; i <= 8; i <<= 1) { 1672280304Sjkim point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], 1673280304Sjkim pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0], 1674280304Sjkim pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]); 1675280304Sjkim for (j = 0; j < 27; ++j) { 1676280304Sjkim point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], 1677280304Sjkim pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0], 1678280304Sjkim pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]); 1679280304Sjkim } 1680280304Sjkim if (i == 8) 1681280304Sjkim break; 1682280304Sjkim point_double(pre->g_pre_comp[0][2 * i][0], 1683280304Sjkim pre->g_pre_comp[0][2 * i][1], 1684280304Sjkim pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0], 1685280304Sjkim pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]); 1686280304Sjkim for (j = 0; j < 27; ++j) { 1687280304Sjkim point_double(pre->g_pre_comp[0][2 * i][0], 1688280304Sjkim pre->g_pre_comp[0][2 * i][1], 1689280304Sjkim pre->g_pre_comp[0][2 * i][2], 1690280304Sjkim pre->g_pre_comp[0][2 * i][0], 1691280304Sjkim pre->g_pre_comp[0][2 * i][1], 1692280304Sjkim pre->g_pre_comp[0][2 * i][2]); 1693280304Sjkim } 1694280304Sjkim } 1695280304Sjkim for (i = 0; i < 2; i++) { 1696280304Sjkim /* g_pre_comp[i][0] is the point at infinity */ 1697280304Sjkim memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0])); 1698280304Sjkim /* the remaining multiples */ 1699280304Sjkim /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */ 1700280304Sjkim point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1], 1701280304Sjkim pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0], 1702280304Sjkim pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2], 1703280304Sjkim 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], 1704280304Sjkim pre->g_pre_comp[i][2][2]); 1705280304Sjkim /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */ 1706280304Sjkim point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1], 1707280304Sjkim pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0], 1708280304Sjkim pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2], 1709280304Sjkim 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], 1710280304Sjkim pre->g_pre_comp[i][2][2]); 1711280304Sjkim /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */ 1712280304Sjkim point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1], 1713280304Sjkim pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0], 1714280304Sjkim pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2], 1715280304Sjkim 0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1], 1716280304Sjkim pre->g_pre_comp[i][4][2]); 1717280304Sjkim /* 1718280304Sjkim * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G 1719280304Sjkim */ 1720280304Sjkim point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1], 1721280304Sjkim pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0], 1722280304Sjkim pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2], 1723280304Sjkim 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], 1724280304Sjkim pre->g_pre_comp[i][2][2]); 1725280304Sjkim for (j = 1; j < 8; ++j) { 1726280304Sjkim /* odd multiples: add G resp. 2^28*G */ 1727280304Sjkim point_add(pre->g_pre_comp[i][2 * j + 1][0], 1728280304Sjkim pre->g_pre_comp[i][2 * j + 1][1], 1729280304Sjkim pre->g_pre_comp[i][2 * j + 1][2], 1730280304Sjkim pre->g_pre_comp[i][2 * j][0], 1731280304Sjkim pre->g_pre_comp[i][2 * j][1], 1732280304Sjkim pre->g_pre_comp[i][2 * j][2], 0, 1733280304Sjkim pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1], 1734280304Sjkim pre->g_pre_comp[i][1][2]); 1735280304Sjkim } 1736280304Sjkim } 1737280304Sjkim make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems); 1738238384Sjkim 1739280304Sjkim if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp224_pre_comp_dup, 1740280304Sjkim nistp224_pre_comp_free, 1741280304Sjkim nistp224_pre_comp_clear_free)) 1742280304Sjkim goto err; 1743280304Sjkim ret = 1; 1744280304Sjkim pre = NULL; 1745238384Sjkim err: 1746280304Sjkim BN_CTX_end(ctx); 1747280304Sjkim if (generator != NULL) 1748280304Sjkim EC_POINT_free(generator); 1749280304Sjkim if (new_ctx != NULL) 1750280304Sjkim BN_CTX_free(new_ctx); 1751280304Sjkim if (pre) 1752280304Sjkim nistp224_pre_comp_free(pre); 1753280304Sjkim return ret; 1754280304Sjkim} 1755238384Sjkim 1756238384Sjkimint ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group) 1757280304Sjkim{ 1758280304Sjkim if (EC_EX_DATA_get_data(group->extra_data, nistp224_pre_comp_dup, 1759280304Sjkim nistp224_pre_comp_free, 1760280304Sjkim nistp224_pre_comp_clear_free) 1761280304Sjkim != NULL) 1762280304Sjkim return 1; 1763280304Sjkim else 1764280304Sjkim return 0; 1765280304Sjkim} 1766238384Sjkim 1767238384Sjkim#else 1768280304Sjkimstatic void *dummy = &dummy; 1769238384Sjkim#endif 1770