1238384Sjkim/* crypto/ec/ecp_nistp224.c */ 2238384Sjkim/* 3238384Sjkim * Written by Emilia Kasper (Google) for the OpenSSL project. 4238384Sjkim */ 5238384Sjkim/* Copyright 2011 Google Inc. 6238384Sjkim * 7238384Sjkim * Licensed under the Apache License, Version 2.0 (the "License"); 8238384Sjkim * 9238384Sjkim * you may not use this file except in compliance with the License. 10238384Sjkim * You may obtain a copy of the License at 11238384Sjkim * 12238384Sjkim * http://www.apache.org/licenses/LICENSE-2.0 13238384Sjkim * 14238384Sjkim * Unless required by applicable law or agreed to in writing, software 15238384Sjkim * distributed under the License is distributed on an "AS IS" BASIS, 16238384Sjkim * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17238384Sjkim * See the License for the specific language governing permissions and 18238384Sjkim * limitations under the License. 19238384Sjkim */ 20238384Sjkim 21238384Sjkim/* 22238384Sjkim * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication 23238384Sjkim * 24238384Sjkim * Inspired by Daniel J. Bernstein's public domain nistp224 implementation 25238384Sjkim * and Adam Langley's public domain 64-bit C implementation of curve25519 26238384Sjkim */ 27238384Sjkim 28238384Sjkim#include <openssl/opensslconf.h> 29238384Sjkim#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 30238384Sjkim 31296341Sdelphij# ifndef OPENSSL_SYS_VMS 32296341Sdelphij# include <stdint.h> 33296341Sdelphij# else 34296341Sdelphij# include <inttypes.h> 35296341Sdelphij# endif 36238384Sjkim 37296341Sdelphij# include <string.h> 38296341Sdelphij# include <openssl/err.h> 39296341Sdelphij# include "ec_lcl.h" 40238384Sjkim 41296341Sdelphij# if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) 42238384Sjkim /* even with gcc, the typedef won't work for 32-bit platforms */ 43296341Sdelphijtypedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit 44296341Sdelphij * platforms */ 45296341Sdelphij# else 46296341Sdelphij# error "Need GCC 3.1 or later to define type uint128_t" 47296341Sdelphij# endif 48238384Sjkim 49238384Sjkimtypedef uint8_t u8; 50238384Sjkimtypedef uint64_t u64; 51238384Sjkimtypedef int64_t s64; 52238384Sjkim 53238384Sjkim/******************************************************************************/ 54296341Sdelphij/*- 55296341Sdelphij * INTERNAL REPRESENTATION OF FIELD ELEMENTS 56238384Sjkim * 57238384Sjkim * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3 58238384Sjkim * using 64-bit coefficients called 'limbs', 59238384Sjkim * and sometimes (for multiplication results) as 60238384Sjkim * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6 61238384Sjkim * using 128-bit coefficients called 'widelimbs'. 62238384Sjkim * A 4-limb representation is an 'felem'; 63238384Sjkim * a 7-widelimb representation is a 'widefelem'. 64238384Sjkim * Even within felems, bits of adjacent limbs overlap, and we don't always 65238384Sjkim * reduce the representations: we ensure that inputs to each felem 66238384Sjkim * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60, 67238384Sjkim * and fit into a 128-bit word without overflow. The coefficients are then 68238384Sjkim * again partially reduced to obtain an felem satisfying a_i < 2^57. 69238384Sjkim * We only reduce to the unique minimal representation at the end of the 70238384Sjkim * computation. 71238384Sjkim */ 72238384Sjkim 73238384Sjkimtypedef uint64_t limb; 74238384Sjkimtypedef uint128_t widelimb; 75238384Sjkim 76238384Sjkimtypedef limb felem[4]; 77238384Sjkimtypedef widelimb widefelem[7]; 78238384Sjkim 79296341Sdelphij/* 80296341Sdelphij * Field element represented as a byte arrary. 28*8 = 224 bits is also the 81296341Sdelphij * group order size for the elliptic curve, and we also use this type for 82296341Sdelphij * scalars for point multiplication. 83296341Sdelphij */ 84238384Sjkimtypedef u8 felem_bytearray[28]; 85238384Sjkim 86238384Sjkimstatic const felem_bytearray nistp224_curve_params[5] = { 87296341Sdelphij {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */ 88296341Sdelphij 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 89296341Sdelphij 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, 90296341Sdelphij {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */ 91296341Sdelphij 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 92296341Sdelphij 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE}, 93296341Sdelphij {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */ 94296341Sdelphij 0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA, 95296341Sdelphij 0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4}, 96296341Sdelphij {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */ 97296341Sdelphij 0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22, 98296341Sdelphij 0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21}, 99296341Sdelphij {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */ 100296341Sdelphij 0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64, 101296341Sdelphij 0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34} 102238384Sjkim}; 103238384Sjkim 104296341Sdelphij/*- 105296341Sdelphij * Precomputed multiples of the standard generator 106238384Sjkim * Points are given in coordinates (X, Y, Z) where Z normally is 1 107238384Sjkim * (0 for the point at infinity). 108238384Sjkim * For each field element, slice a_0 is word 0, etc. 109238384Sjkim * 110238384Sjkim * The table has 2 * 16 elements, starting with the following: 111238384Sjkim * index | bits | point 112238384Sjkim * ------+---------+------------------------------ 113238384Sjkim * 0 | 0 0 0 0 | 0G 114238384Sjkim * 1 | 0 0 0 1 | 1G 115238384Sjkim * 2 | 0 0 1 0 | 2^56G 116238384Sjkim * 3 | 0 0 1 1 | (2^56 + 1)G 117238384Sjkim * 4 | 0 1 0 0 | 2^112G 118238384Sjkim * 5 | 0 1 0 1 | (2^112 + 1)G 119238384Sjkim * 6 | 0 1 1 0 | (2^112 + 2^56)G 120238384Sjkim * 7 | 0 1 1 1 | (2^112 + 2^56 + 1)G 121238384Sjkim * 8 | 1 0 0 0 | 2^168G 122238384Sjkim * 9 | 1 0 0 1 | (2^168 + 1)G 123238384Sjkim * 10 | 1 0 1 0 | (2^168 + 2^56)G 124238384Sjkim * 11 | 1 0 1 1 | (2^168 + 2^56 + 1)G 125238384Sjkim * 12 | 1 1 0 0 | (2^168 + 2^112)G 126238384Sjkim * 13 | 1 1 0 1 | (2^168 + 2^112 + 1)G 127238384Sjkim * 14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G 128238384Sjkim * 15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G 129238384Sjkim * followed by a copy of this with each element multiplied by 2^28. 130238384Sjkim * 131238384Sjkim * The reason for this is so that we can clock bits into four different 132238384Sjkim * locations when doing simple scalar multiplies against the base point, 133238384Sjkim * and then another four locations using the second 16 elements. 134238384Sjkim */ 135296341Sdelphijstatic const felem gmul[2][16][3] = { {{{0, 0, 0, 0}, 136296341Sdelphij {0, 0, 0, 0}, 137296341Sdelphij {0, 0, 0, 0}}, 138296341Sdelphij {{0x3280d6115c1d21, 0xc1d356c2112234, 139296341Sdelphij 0x7f321390b94a03, 0xb70e0cbd6bb4bf}, 140296341Sdelphij {0xd5819985007e34, 0x75a05a07476444, 141296341Sdelphij 0xfb4c22dfe6cd43, 0xbd376388b5f723}, 142296341Sdelphij {1, 0, 0, 0}}, 143296341Sdelphij {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 144296341Sdelphij 0x2242df8d8a2a43, 0x1f49bbb0f99bc5}, 145296341Sdelphij {0x29e0b892dc9c43, 0xece8608436e662, 146296341Sdelphij 0xdc858f185310d0, 0x9812dd4eb8d321}, 147296341Sdelphij {1, 0, 0, 0}}, 148296341Sdelphij {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 149296341Sdelphij 0x16e9a3bbce8a3f, 0xeedcccd8c2a748}, 150296341Sdelphij {0xf19f90ed50266d, 0xabf2b4bf65f9df, 151296341Sdelphij 0x313865468fafec, 0x5cb379ba910a17}, 152296341Sdelphij {1, 0, 0, 0}}, 153296341Sdelphij {{0x0641966cab26e3, 0x91fb2991fab0a0, 154296341Sdelphij 0xefec27a4e13a0b, 0x0499aa8a5f8ebe}, 155296341Sdelphij {0x7510407766af5d, 0x84d929610d5450, 156296341Sdelphij 0x81d77aae82f706, 0x6916f6d4338c5b}, 157296341Sdelphij {1, 0, 0, 0}}, 158296341Sdelphij {{0xea95ac3b1f15c6, 0x086000905e82d4, 159296341Sdelphij 0xdd323ae4d1c8b1, 0x932b56be7685a3}, 160296341Sdelphij {0x9ef93dea25dbbf, 0x41665960f390f0, 161296341Sdelphij 0xfdec76dbe2a8a7, 0x523e80f019062a}, 162296341Sdelphij {1, 0, 0, 0}}, 163296341Sdelphij {{0x822fdd26732c73, 0xa01c83531b5d0f, 164296341Sdelphij 0x363f37347c1ba4, 0xc391b45c84725c}, 165296341Sdelphij {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 166296341Sdelphij 0xc393da7e222a7f, 0x1efb7890ede244}, 167296341Sdelphij {1, 0, 0, 0}}, 168296341Sdelphij {{0x4c9e90ca217da1, 0xd11beca79159bb, 169296341Sdelphij 0xff8d33c2c98b7c, 0x2610b39409f849}, 170296341Sdelphij {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 171296341Sdelphij 0x966c079b753c89, 0xfe67e4e820b112}, 172296341Sdelphij {1, 0, 0, 0}}, 173296341Sdelphij {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 174296341Sdelphij 0x79b7619a3e7c4c, 0x05c73240899b47}, 175296341Sdelphij {0x9f7f6382c73e3a, 0x18615165c56bda, 176296341Sdelphij 0x641fab2116fd56, 0x72855882b08394}, 177296341Sdelphij {1, 0, 0, 0}}, 178296341Sdelphij {{0x0469182f161c09, 0x74a98ca8d00fb5, 179296341Sdelphij 0xb89da93489a3e0, 0x41c98768fb0c1d}, 180296341Sdelphij {0xe5ea05fb32da81, 0x3dce9ffbca6855, 181296341Sdelphij 0x1cfe2d3fbf59e6, 0x0e5e03408738a7}, 182296341Sdelphij {1, 0, 0, 0}}, 183296341Sdelphij {{0xdab22b2333e87f, 0x4430137a5dd2f6, 184296341Sdelphij 0xe03ab9f738beb8, 0xcb0c5d0dc34f24}, 185296341Sdelphij {0x764a7df0c8fda5, 0x185ba5c3fa2044, 186296341Sdelphij 0x9281d688bcbe50, 0xc40331df893881}, 187296341Sdelphij {1, 0, 0, 0}}, 188296341Sdelphij {{0xb89530796f0f60, 0xade92bd26909a3, 189296341Sdelphij 0x1a0c83fb4884da, 0x1765bf22a5a984}, 190296341Sdelphij {0x772a9ee75db09e, 0x23bc6c67cec16f, 191296341Sdelphij 0x4c1edba8b14e2f, 0xe2a215d9611369}, 192296341Sdelphij {1, 0, 0, 0}}, 193296341Sdelphij {{0x571e509fb5efb3, 0xade88696410552, 194296341Sdelphij 0xc8ae85fada74fe, 0x6c7e4be83bbde3}, 195296341Sdelphij {0xff9f51160f4652, 0xb47ce2495a6539, 196296341Sdelphij 0xa2946c53b582f4, 0x286d2db3ee9a60}, 197296341Sdelphij {1, 0, 0, 0}}, 198296341Sdelphij {{0x40bbd5081a44af, 0x0995183b13926c, 199296341Sdelphij 0xbcefba6f47f6d0, 0x215619e9cc0057}, 200296341Sdelphij {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 201296341Sdelphij 0x8631b93cdfe8b5, 0xe7e3f4b0982db9}, 202296341Sdelphij {1, 0, 0, 0}}, 203296341Sdelphij {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 204296341Sdelphij 0x1c29819435d2c6, 0xc813132f4c07e9}, 205296341Sdelphij {0x2891425503b11f, 0x08781030579fea, 206296341Sdelphij 0xf5426ba5cc9674, 0x1e28ebf18562bc}, 207296341Sdelphij {1, 0, 0, 0}}, 208296341Sdelphij {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 209296341Sdelphij 0xff17036691a973, 0xf1aef351497c58}, 210296341Sdelphij {0xdd1f2d600564ff, 0xdead073b1402db, 211296341Sdelphij 0x74a684435bd693, 0xeea7471f962558}, 212296341Sdelphij {1, 0, 0, 0}}}, 213296341Sdelphij{{{0, 0, 0, 0}, 214296341Sdelphij {0, 0, 0, 0}, 215296341Sdelphij {0, 0, 0, 0}}, 216296341Sdelphij {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31}, 217296341Sdelphij {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d}, 218296341Sdelphij {1, 0, 0, 0}}, 219296341Sdelphij {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3}, 220296341Sdelphij {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a}, 221296341Sdelphij {1, 0, 0, 0}}, 222296341Sdelphij {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33}, 223296341Sdelphij {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100}, 224296341Sdelphij {1, 0, 0, 0}}, 225296341Sdelphij {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5}, 226296341Sdelphij {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea}, 227296341Sdelphij {1, 0, 0, 0}}, 228296341Sdelphij {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be}, 229296341Sdelphij {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51}, 230296341Sdelphij {1, 0, 0, 0}}, 231296341Sdelphij {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1}, 232296341Sdelphij {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb}, 233296341Sdelphij {1, 0, 0, 0}}, 234296341Sdelphij {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233}, 235296341Sdelphij {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def}, 236296341Sdelphij {1, 0, 0, 0}}, 237296341Sdelphij {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae}, 238296341Sdelphij {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45}, 239296341Sdelphij {1, 0, 0, 0}}, 240296341Sdelphij {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e}, 241296341Sdelphij {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb}, 242296341Sdelphij {1, 0, 0, 0}}, 243296341Sdelphij {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de}, 244296341Sdelphij {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3}, 245296341Sdelphij {1, 0, 0, 0}}, 246296341Sdelphij {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05}, 247296341Sdelphij {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58}, 248296341Sdelphij {1, 0, 0, 0}}, 249296341Sdelphij {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb}, 250296341Sdelphij {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0}, 251296341Sdelphij {1, 0, 0, 0}}, 252296341Sdelphij {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9}, 253296341Sdelphij {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea}, 254296341Sdelphij {1, 0, 0, 0}}, 255296341Sdelphij {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba}, 256296341Sdelphij {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405}, 257296341Sdelphij {1, 0, 0, 0}}, 258296341Sdelphij {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e}, 259296341Sdelphij {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e}, 260296341Sdelphij {1, 0, 0, 0}}} 261296341Sdelphij}; 262238384Sjkim 263238384Sjkim/* Precomputation for the group generator. */ 264238384Sjkimtypedef struct { 265296341Sdelphij felem g_pre_comp[2][16][3]; 266296341Sdelphij int references; 267238384Sjkim} NISTP224_PRE_COMP; 268238384Sjkim 269238384Sjkimconst EC_METHOD *EC_GFp_nistp224_method(void) 270296341Sdelphij{ 271296341Sdelphij static const EC_METHOD ret = { 272296341Sdelphij EC_FLAGS_DEFAULT_OCT, 273296341Sdelphij NID_X9_62_prime_field, 274296341Sdelphij ec_GFp_nistp224_group_init, 275296341Sdelphij ec_GFp_simple_group_finish, 276296341Sdelphij ec_GFp_simple_group_clear_finish, 277296341Sdelphij ec_GFp_nist_group_copy, 278296341Sdelphij ec_GFp_nistp224_group_set_curve, 279296341Sdelphij ec_GFp_simple_group_get_curve, 280296341Sdelphij ec_GFp_simple_group_get_degree, 281296341Sdelphij ec_GFp_simple_group_check_discriminant, 282296341Sdelphij ec_GFp_simple_point_init, 283296341Sdelphij ec_GFp_simple_point_finish, 284296341Sdelphij ec_GFp_simple_point_clear_finish, 285296341Sdelphij ec_GFp_simple_point_copy, 286296341Sdelphij ec_GFp_simple_point_set_to_infinity, 287296341Sdelphij ec_GFp_simple_set_Jprojective_coordinates_GFp, 288296341Sdelphij ec_GFp_simple_get_Jprojective_coordinates_GFp, 289296341Sdelphij ec_GFp_simple_point_set_affine_coordinates, 290296341Sdelphij ec_GFp_nistp224_point_get_affine_coordinates, 291296341Sdelphij 0 /* point_set_compressed_coordinates */ , 292296341Sdelphij 0 /* point2oct */ , 293296341Sdelphij 0 /* oct2point */ , 294296341Sdelphij ec_GFp_simple_add, 295296341Sdelphij ec_GFp_simple_dbl, 296296341Sdelphij ec_GFp_simple_invert, 297296341Sdelphij ec_GFp_simple_is_at_infinity, 298296341Sdelphij ec_GFp_simple_is_on_curve, 299296341Sdelphij ec_GFp_simple_cmp, 300296341Sdelphij ec_GFp_simple_make_affine, 301296341Sdelphij ec_GFp_simple_points_make_affine, 302296341Sdelphij ec_GFp_nistp224_points_mul, 303296341Sdelphij ec_GFp_nistp224_precompute_mult, 304296341Sdelphij ec_GFp_nistp224_have_precompute_mult, 305296341Sdelphij ec_GFp_nist_field_mul, 306296341Sdelphij ec_GFp_nist_field_sqr, 307296341Sdelphij 0 /* field_div */ , 308296341Sdelphij 0 /* field_encode */ , 309296341Sdelphij 0 /* field_decode */ , 310296341Sdelphij 0 /* field_set_to_one */ 311296341Sdelphij }; 312238384Sjkim 313296341Sdelphij return &ret; 314296341Sdelphij} 315238384Sjkim 316296341Sdelphij/* 317296341Sdelphij * Helper functions to convert field elements to/from internal representation 318296341Sdelphij */ 319238384Sjkimstatic void bin28_to_felem(felem out, const u8 in[28]) 320296341Sdelphij{ 321296341Sdelphij out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff; 322296341Sdelphij out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff; 323296341Sdelphij out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff; 324296341Sdelphij out[3] = (*((const uint64_t *)(in+20))) >> 8; 325296341Sdelphij} 326238384Sjkim 327238384Sjkimstatic void felem_to_bin28(u8 out[28], const felem in) 328296341Sdelphij{ 329296341Sdelphij unsigned i; 330296341Sdelphij for (i = 0; i < 7; ++i) { 331296341Sdelphij out[i] = in[0] >> (8 * i); 332296341Sdelphij out[i + 7] = in[1] >> (8 * i); 333296341Sdelphij out[i + 14] = in[2] >> (8 * i); 334296341Sdelphij out[i + 21] = in[3] >> (8 * i); 335296341Sdelphij } 336296341Sdelphij} 337238384Sjkim 338238384Sjkim/* To preserve endianness when using BN_bn2bin and BN_bin2bn */ 339238384Sjkimstatic void flip_endian(u8 *out, const u8 *in, unsigned len) 340296341Sdelphij{ 341296341Sdelphij unsigned i; 342296341Sdelphij for (i = 0; i < len; ++i) 343296341Sdelphij out[i] = in[len - 1 - i]; 344296341Sdelphij} 345238384Sjkim 346238384Sjkim/* From OpenSSL BIGNUM to internal representation */ 347238384Sjkimstatic int BN_to_felem(felem out, const BIGNUM *bn) 348296341Sdelphij{ 349296341Sdelphij felem_bytearray b_in; 350296341Sdelphij felem_bytearray b_out; 351296341Sdelphij unsigned num_bytes; 352238384Sjkim 353296341Sdelphij /* BN_bn2bin eats leading zeroes */ 354296341Sdelphij memset(b_out, 0, sizeof b_out); 355296341Sdelphij num_bytes = BN_num_bytes(bn); 356296341Sdelphij if (num_bytes > sizeof b_out) { 357296341Sdelphij ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE); 358296341Sdelphij return 0; 359296341Sdelphij } 360296341Sdelphij if (BN_is_negative(bn)) { 361296341Sdelphij ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE); 362296341Sdelphij return 0; 363296341Sdelphij } 364296341Sdelphij num_bytes = BN_bn2bin(bn, b_in); 365296341Sdelphij flip_endian(b_out, b_in, num_bytes); 366296341Sdelphij bin28_to_felem(out, b_out); 367296341Sdelphij return 1; 368296341Sdelphij} 369238384Sjkim 370238384Sjkim/* From internal representation to OpenSSL BIGNUM */ 371238384Sjkimstatic BIGNUM *felem_to_BN(BIGNUM *out, const felem in) 372296341Sdelphij{ 373296341Sdelphij felem_bytearray b_in, b_out; 374296341Sdelphij felem_to_bin28(b_in, in); 375296341Sdelphij flip_endian(b_out, b_in, sizeof b_out); 376296341Sdelphij return BN_bin2bn(b_out, sizeof b_out, out); 377296341Sdelphij} 378238384Sjkim 379238384Sjkim/******************************************************************************/ 380296341Sdelphij/*- 381296341Sdelphij * FIELD OPERATIONS 382238384Sjkim * 383238384Sjkim * Field operations, using the internal representation of field elements. 384238384Sjkim * NB! These operations are specific to our point multiplication and cannot be 385238384Sjkim * expected to be correct in general - e.g., multiplication with a large scalar 386238384Sjkim * will cause an overflow. 387238384Sjkim * 388238384Sjkim */ 389238384Sjkim 390238384Sjkimstatic void felem_one(felem out) 391296341Sdelphij{ 392296341Sdelphij out[0] = 1; 393296341Sdelphij out[1] = 0; 394296341Sdelphij out[2] = 0; 395296341Sdelphij out[3] = 0; 396296341Sdelphij} 397238384Sjkim 398238384Sjkimstatic void felem_assign(felem out, const felem in) 399296341Sdelphij{ 400296341Sdelphij out[0] = in[0]; 401296341Sdelphij out[1] = in[1]; 402296341Sdelphij out[2] = in[2]; 403296341Sdelphij out[3] = in[3]; 404296341Sdelphij} 405238384Sjkim 406238384Sjkim/* Sum two field elements: out += in */ 407238384Sjkimstatic void felem_sum(felem out, const felem in) 408296341Sdelphij{ 409296341Sdelphij out[0] += in[0]; 410296341Sdelphij out[1] += in[1]; 411296341Sdelphij out[2] += in[2]; 412296341Sdelphij out[3] += in[3]; 413296341Sdelphij} 414238384Sjkim 415238384Sjkim/* Get negative value: out = -in */ 416238384Sjkim/* Assumes in[i] < 2^57 */ 417238384Sjkimstatic void felem_neg(felem out, const felem in) 418296341Sdelphij{ 419296341Sdelphij static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2); 420296341Sdelphij static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2); 421296341Sdelphij static const limb two58m42m2 = (((limb) 1) << 58) - 422296341Sdelphij (((limb) 1) << 42) - (((limb) 1) << 2); 423238384Sjkim 424296341Sdelphij /* Set to 0 mod 2^224-2^96+1 to ensure out > in */ 425296341Sdelphij out[0] = two58p2 - in[0]; 426296341Sdelphij out[1] = two58m42m2 - in[1]; 427296341Sdelphij out[2] = two58m2 - in[2]; 428296341Sdelphij out[3] = two58m2 - in[3]; 429296341Sdelphij} 430238384Sjkim 431238384Sjkim/* Subtract field elements: out -= in */ 432238384Sjkim/* Assumes in[i] < 2^57 */ 433238384Sjkimstatic void felem_diff(felem out, const felem in) 434296341Sdelphij{ 435296341Sdelphij static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2); 436296341Sdelphij static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2); 437296341Sdelphij static const limb two58m42m2 = (((limb) 1) << 58) - 438296341Sdelphij (((limb) 1) << 42) - (((limb) 1) << 2); 439238384Sjkim 440296341Sdelphij /* Add 0 mod 2^224-2^96+1 to ensure out > in */ 441296341Sdelphij out[0] += two58p2; 442296341Sdelphij out[1] += two58m42m2; 443296341Sdelphij out[2] += two58m2; 444296341Sdelphij out[3] += two58m2; 445238384Sjkim 446296341Sdelphij out[0] -= in[0]; 447296341Sdelphij out[1] -= in[1]; 448296341Sdelphij out[2] -= in[2]; 449296341Sdelphij out[3] -= in[3]; 450296341Sdelphij} 451238384Sjkim 452238384Sjkim/* Subtract in unreduced 128-bit mode: out -= in */ 453238384Sjkim/* Assumes in[i] < 2^119 */ 454238384Sjkimstatic void widefelem_diff(widefelem out, const widefelem in) 455296341Sdelphij{ 456296341Sdelphij static const widelimb two120 = ((widelimb) 1) << 120; 457296341Sdelphij static const widelimb two120m64 = (((widelimb) 1) << 120) - 458296341Sdelphij (((widelimb) 1) << 64); 459296341Sdelphij static const widelimb two120m104m64 = (((widelimb) 1) << 120) - 460296341Sdelphij (((widelimb) 1) << 104) - (((widelimb) 1) << 64); 461238384Sjkim 462296341Sdelphij /* Add 0 mod 2^224-2^96+1 to ensure out > in */ 463296341Sdelphij out[0] += two120; 464296341Sdelphij out[1] += two120m64; 465296341Sdelphij out[2] += two120m64; 466296341Sdelphij out[3] += two120; 467296341Sdelphij out[4] += two120m104m64; 468296341Sdelphij out[5] += two120m64; 469296341Sdelphij out[6] += two120m64; 470238384Sjkim 471296341Sdelphij out[0] -= in[0]; 472296341Sdelphij out[1] -= in[1]; 473296341Sdelphij out[2] -= in[2]; 474296341Sdelphij out[3] -= in[3]; 475296341Sdelphij out[4] -= in[4]; 476296341Sdelphij out[5] -= in[5]; 477296341Sdelphij out[6] -= in[6]; 478296341Sdelphij} 479238384Sjkim 480238384Sjkim/* Subtract in mixed mode: out128 -= in64 */ 481238384Sjkim/* in[i] < 2^63 */ 482238384Sjkimstatic void felem_diff_128_64(widefelem out, const felem in) 483296341Sdelphij{ 484296341Sdelphij static const widelimb two64p8 = (((widelimb) 1) << 64) + 485296341Sdelphij (((widelimb) 1) << 8); 486296341Sdelphij static const widelimb two64m8 = (((widelimb) 1) << 64) - 487296341Sdelphij (((widelimb) 1) << 8); 488296341Sdelphij static const widelimb two64m48m8 = (((widelimb) 1) << 64) - 489296341Sdelphij (((widelimb) 1) << 48) - (((widelimb) 1) << 8); 490238384Sjkim 491296341Sdelphij /* Add 0 mod 2^224-2^96+1 to ensure out > in */ 492296341Sdelphij out[0] += two64p8; 493296341Sdelphij out[1] += two64m48m8; 494296341Sdelphij out[2] += two64m8; 495296341Sdelphij out[3] += two64m8; 496238384Sjkim 497296341Sdelphij out[0] -= in[0]; 498296341Sdelphij out[1] -= in[1]; 499296341Sdelphij out[2] -= in[2]; 500296341Sdelphij out[3] -= in[3]; 501296341Sdelphij} 502238384Sjkim 503296341Sdelphij/* 504296341Sdelphij * Multiply a field element by a scalar: out = out * scalar The scalars we 505296341Sdelphij * actually use are small, so results fit without overflow 506296341Sdelphij */ 507238384Sjkimstatic void felem_scalar(felem out, const limb scalar) 508296341Sdelphij{ 509296341Sdelphij out[0] *= scalar; 510296341Sdelphij out[1] *= scalar; 511296341Sdelphij out[2] *= scalar; 512296341Sdelphij out[3] *= scalar; 513296341Sdelphij} 514238384Sjkim 515296341Sdelphij/* 516296341Sdelphij * Multiply an unreduced field element by a scalar: out = out * scalar The 517296341Sdelphij * scalars we actually use are small, so results fit without overflow 518296341Sdelphij */ 519238384Sjkimstatic void widefelem_scalar(widefelem out, const widelimb scalar) 520296341Sdelphij{ 521296341Sdelphij out[0] *= scalar; 522296341Sdelphij out[1] *= scalar; 523296341Sdelphij out[2] *= scalar; 524296341Sdelphij out[3] *= scalar; 525296341Sdelphij out[4] *= scalar; 526296341Sdelphij out[5] *= scalar; 527296341Sdelphij out[6] *= scalar; 528296341Sdelphij} 529238384Sjkim 530238384Sjkim/* Square a field element: out = in^2 */ 531238384Sjkimstatic void felem_square(widefelem out, const felem in) 532296341Sdelphij{ 533296341Sdelphij limb tmp0, tmp1, tmp2; 534296341Sdelphij tmp0 = 2 * in[0]; 535296341Sdelphij tmp1 = 2 * in[1]; 536296341Sdelphij tmp2 = 2 * in[2]; 537296341Sdelphij out[0] = ((widelimb) in[0]) * in[0]; 538296341Sdelphij out[1] = ((widelimb) in[0]) * tmp1; 539296341Sdelphij out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1]; 540296341Sdelphij out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2; 541296341Sdelphij out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2]; 542296341Sdelphij out[5] = ((widelimb) in[3]) * tmp2; 543296341Sdelphij out[6] = ((widelimb) in[3]) * in[3]; 544296341Sdelphij} 545238384Sjkim 546238384Sjkim/* Multiply two field elements: out = in1 * in2 */ 547238384Sjkimstatic void felem_mul(widefelem out, const felem in1, const felem in2) 548296341Sdelphij{ 549296341Sdelphij out[0] = ((widelimb) in1[0]) * in2[0]; 550296341Sdelphij out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0]; 551296341Sdelphij out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] + 552296341Sdelphij ((widelimb) in1[2]) * in2[0]; 553296341Sdelphij out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] + 554296341Sdelphij ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0]; 555296341Sdelphij out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] + 556296341Sdelphij ((widelimb) in1[3]) * in2[1]; 557296341Sdelphij out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2]; 558296341Sdelphij out[6] = ((widelimb) in1[3]) * in2[3]; 559296341Sdelphij} 560238384Sjkim 561296341Sdelphij/*- 562296341Sdelphij * Reduce seven 128-bit coefficients to four 64-bit coefficients. 563238384Sjkim * Requires in[i] < 2^126, 564238384Sjkim * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */ 565238384Sjkimstatic void felem_reduce(felem out, const widefelem in) 566296341Sdelphij{ 567296341Sdelphij static const widelimb two127p15 = (((widelimb) 1) << 127) + 568296341Sdelphij (((widelimb) 1) << 15); 569296341Sdelphij static const widelimb two127m71 = (((widelimb) 1) << 127) - 570296341Sdelphij (((widelimb) 1) << 71); 571296341Sdelphij static const widelimb two127m71m55 = (((widelimb) 1) << 127) - 572296341Sdelphij (((widelimb) 1) << 71) - (((widelimb) 1) << 55); 573296341Sdelphij widelimb output[5]; 574238384Sjkim 575296341Sdelphij /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */ 576296341Sdelphij output[0] = in[0] + two127p15; 577296341Sdelphij output[1] = in[1] + two127m71m55; 578296341Sdelphij output[2] = in[2] + two127m71; 579296341Sdelphij output[3] = in[3]; 580296341Sdelphij output[4] = in[4]; 581238384Sjkim 582296341Sdelphij /* Eliminate in[4], in[5], in[6] */ 583296341Sdelphij output[4] += in[6] >> 16; 584296341Sdelphij output[3] += (in[6] & 0xffff) << 40; 585296341Sdelphij output[2] -= in[6]; 586238384Sjkim 587296341Sdelphij output[3] += in[5] >> 16; 588296341Sdelphij output[2] += (in[5] & 0xffff) << 40; 589296341Sdelphij output[1] -= in[5]; 590238384Sjkim 591296341Sdelphij output[2] += output[4] >> 16; 592296341Sdelphij output[1] += (output[4] & 0xffff) << 40; 593296341Sdelphij output[0] -= output[4]; 594238384Sjkim 595296341Sdelphij /* Carry 2 -> 3 -> 4 */ 596296341Sdelphij output[3] += output[2] >> 56; 597296341Sdelphij output[2] &= 0x00ffffffffffffff; 598238384Sjkim 599296341Sdelphij output[4] = output[3] >> 56; 600296341Sdelphij output[3] &= 0x00ffffffffffffff; 601238384Sjkim 602296341Sdelphij /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */ 603238384Sjkim 604296341Sdelphij /* Eliminate output[4] */ 605296341Sdelphij output[2] += output[4] >> 16; 606296341Sdelphij /* output[2] < 2^56 + 2^56 = 2^57 */ 607296341Sdelphij output[1] += (output[4] & 0xffff) << 40; 608296341Sdelphij output[0] -= output[4]; 609238384Sjkim 610296341Sdelphij /* Carry 0 -> 1 -> 2 -> 3 */ 611296341Sdelphij output[1] += output[0] >> 56; 612296341Sdelphij out[0] = output[0] & 0x00ffffffffffffff; 613238384Sjkim 614296341Sdelphij output[2] += output[1] >> 56; 615296341Sdelphij /* output[2] < 2^57 + 2^72 */ 616296341Sdelphij out[1] = output[1] & 0x00ffffffffffffff; 617296341Sdelphij output[3] += output[2] >> 56; 618296341Sdelphij /* output[3] <= 2^56 + 2^16 */ 619296341Sdelphij out[2] = output[2] & 0x00ffffffffffffff; 620238384Sjkim 621296341Sdelphij /*- 622296341Sdelphij * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, 623296341Sdelphij * out[3] <= 2^56 + 2^16 (due to final carry), 624296341Sdelphij * so out < 2*p 625296341Sdelphij */ 626296341Sdelphij out[3] = output[3]; 627296341Sdelphij} 628238384Sjkim 629238384Sjkimstatic void felem_square_reduce(felem out, const felem in) 630296341Sdelphij{ 631296341Sdelphij widefelem tmp; 632296341Sdelphij felem_square(tmp, in); 633296341Sdelphij felem_reduce(out, tmp); 634296341Sdelphij} 635238384Sjkim 636238384Sjkimstatic void felem_mul_reduce(felem out, const felem in1, const felem in2) 637296341Sdelphij{ 638296341Sdelphij widefelem tmp; 639296341Sdelphij felem_mul(tmp, in1, in2); 640296341Sdelphij felem_reduce(out, tmp); 641296341Sdelphij} 642238384Sjkim 643296341Sdelphij/* 644296341Sdelphij * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always 645296341Sdelphij * call felem_reduce first) 646296341Sdelphij */ 647238384Sjkimstatic void felem_contract(felem out, const felem in) 648296341Sdelphij{ 649296341Sdelphij static const int64_t two56 = ((limb) 1) << 56; 650296341Sdelphij /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */ 651296341Sdelphij /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */ 652296341Sdelphij int64_t tmp[4], a; 653296341Sdelphij tmp[0] = in[0]; 654296341Sdelphij tmp[1] = in[1]; 655296341Sdelphij tmp[2] = in[2]; 656296341Sdelphij tmp[3] = in[3]; 657296341Sdelphij /* Case 1: a = 1 iff in >= 2^224 */ 658296341Sdelphij a = (in[3] >> 56); 659296341Sdelphij tmp[0] -= a; 660296341Sdelphij tmp[1] += a << 40; 661296341Sdelphij tmp[3] &= 0x00ffffffffffffff; 662296341Sdelphij /* 663296341Sdelphij * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1 664296341Sdelphij * and the lower part is non-zero 665296341Sdelphij */ 666296341Sdelphij a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) | 667296341Sdelphij (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63); 668296341Sdelphij a &= 0x00ffffffffffffff; 669296341Sdelphij /* turn a into an all-one mask (if a = 0) or an all-zero mask */ 670296341Sdelphij a = (a - 1) >> 63; 671296341Sdelphij /* subtract 2^224 - 2^96 + 1 if a is all-one */ 672296341Sdelphij tmp[3] &= a ^ 0xffffffffffffffff; 673296341Sdelphij tmp[2] &= a ^ 0xffffffffffffffff; 674296341Sdelphij tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff; 675296341Sdelphij tmp[0] -= 1 & a; 676238384Sjkim 677296341Sdelphij /* 678296341Sdelphij * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be 679296341Sdelphij * non-zero, so we only need one step 680296341Sdelphij */ 681296341Sdelphij a = tmp[0] >> 63; 682296341Sdelphij tmp[0] += two56 & a; 683296341Sdelphij tmp[1] -= 1 & a; 684238384Sjkim 685296341Sdelphij /* carry 1 -> 2 -> 3 */ 686296341Sdelphij tmp[2] += tmp[1] >> 56; 687296341Sdelphij tmp[1] &= 0x00ffffffffffffff; 688238384Sjkim 689296341Sdelphij tmp[3] += tmp[2] >> 56; 690296341Sdelphij tmp[2] &= 0x00ffffffffffffff; 691238384Sjkim 692296341Sdelphij /* Now 0 <= out < p */ 693296341Sdelphij out[0] = tmp[0]; 694296341Sdelphij out[1] = tmp[1]; 695296341Sdelphij out[2] = tmp[2]; 696296341Sdelphij out[3] = tmp[3]; 697296341Sdelphij} 698238384Sjkim 699296341Sdelphij/* 700296341Sdelphij * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field 701296341Sdelphij * elements are reduced to in < 2^225, so we only need to check three cases: 702296341Sdelphij * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2 703296341Sdelphij */ 704238384Sjkimstatic limb felem_is_zero(const felem in) 705296341Sdelphij{ 706296341Sdelphij limb zero, two224m96p1, two225m97p2; 707238384Sjkim 708296341Sdelphij zero = in[0] | in[1] | in[2] | in[3]; 709296341Sdelphij zero = (((int64_t) (zero) - 1) >> 63) & 1; 710296341Sdelphij two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000) 711296341Sdelphij | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff); 712296341Sdelphij two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1; 713296341Sdelphij two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000) 714296341Sdelphij | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff); 715296341Sdelphij two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1; 716296341Sdelphij return (zero | two224m96p1 | two225m97p2); 717296341Sdelphij} 718238384Sjkim 719238384Sjkimstatic limb felem_is_zero_int(const felem in) 720296341Sdelphij{ 721296341Sdelphij return (int)(felem_is_zero(in) & ((limb) 1)); 722296341Sdelphij} 723238384Sjkim 724238384Sjkim/* Invert a field element */ 725238384Sjkim/* Computation chain copied from djb's code */ 726238384Sjkimstatic void felem_inv(felem out, const felem in) 727296341Sdelphij{ 728296341Sdelphij felem ftmp, ftmp2, ftmp3, ftmp4; 729296341Sdelphij widefelem tmp; 730296341Sdelphij unsigned i; 731238384Sjkim 732296341Sdelphij felem_square(tmp, in); 733296341Sdelphij felem_reduce(ftmp, tmp); /* 2 */ 734296341Sdelphij felem_mul(tmp, in, ftmp); 735296341Sdelphij felem_reduce(ftmp, tmp); /* 2^2 - 1 */ 736296341Sdelphij felem_square(tmp, ftmp); 737296341Sdelphij felem_reduce(ftmp, tmp); /* 2^3 - 2 */ 738296341Sdelphij felem_mul(tmp, in, ftmp); 739296341Sdelphij felem_reduce(ftmp, tmp); /* 2^3 - 1 */ 740296341Sdelphij felem_square(tmp, ftmp); 741296341Sdelphij felem_reduce(ftmp2, tmp); /* 2^4 - 2 */ 742296341Sdelphij felem_square(tmp, ftmp2); 743296341Sdelphij felem_reduce(ftmp2, tmp); /* 2^5 - 4 */ 744296341Sdelphij felem_square(tmp, ftmp2); 745296341Sdelphij felem_reduce(ftmp2, tmp); /* 2^6 - 8 */ 746296341Sdelphij felem_mul(tmp, ftmp2, ftmp); 747296341Sdelphij felem_reduce(ftmp, tmp); /* 2^6 - 1 */ 748296341Sdelphij felem_square(tmp, ftmp); 749296341Sdelphij felem_reduce(ftmp2, tmp); /* 2^7 - 2 */ 750296341Sdelphij for (i = 0; i < 5; ++i) { /* 2^12 - 2^6 */ 751296341Sdelphij felem_square(tmp, ftmp2); 752296341Sdelphij felem_reduce(ftmp2, tmp); 753296341Sdelphij } 754296341Sdelphij felem_mul(tmp, ftmp2, ftmp); 755296341Sdelphij felem_reduce(ftmp2, tmp); /* 2^12 - 1 */ 756296341Sdelphij felem_square(tmp, ftmp2); 757296341Sdelphij felem_reduce(ftmp3, tmp); /* 2^13 - 2 */ 758296341Sdelphij for (i = 0; i < 11; ++i) { /* 2^24 - 2^12 */ 759296341Sdelphij felem_square(tmp, ftmp3); 760296341Sdelphij felem_reduce(ftmp3, tmp); 761296341Sdelphij } 762296341Sdelphij felem_mul(tmp, ftmp3, ftmp2); 763296341Sdelphij felem_reduce(ftmp2, tmp); /* 2^24 - 1 */ 764296341Sdelphij felem_square(tmp, ftmp2); 765296341Sdelphij felem_reduce(ftmp3, tmp); /* 2^25 - 2 */ 766296341Sdelphij for (i = 0; i < 23; ++i) { /* 2^48 - 2^24 */ 767296341Sdelphij felem_square(tmp, ftmp3); 768296341Sdelphij felem_reduce(ftmp3, tmp); 769296341Sdelphij } 770296341Sdelphij felem_mul(tmp, ftmp3, ftmp2); 771296341Sdelphij felem_reduce(ftmp3, tmp); /* 2^48 - 1 */ 772296341Sdelphij felem_square(tmp, ftmp3); 773296341Sdelphij felem_reduce(ftmp4, tmp); /* 2^49 - 2 */ 774296341Sdelphij for (i = 0; i < 47; ++i) { /* 2^96 - 2^48 */ 775296341Sdelphij felem_square(tmp, ftmp4); 776296341Sdelphij felem_reduce(ftmp4, tmp); 777296341Sdelphij } 778296341Sdelphij felem_mul(tmp, ftmp3, ftmp4); 779296341Sdelphij felem_reduce(ftmp3, tmp); /* 2^96 - 1 */ 780296341Sdelphij felem_square(tmp, ftmp3); 781296341Sdelphij felem_reduce(ftmp4, tmp); /* 2^97 - 2 */ 782296341Sdelphij for (i = 0; i < 23; ++i) { /* 2^120 - 2^24 */ 783296341Sdelphij felem_square(tmp, ftmp4); 784296341Sdelphij felem_reduce(ftmp4, tmp); 785296341Sdelphij } 786296341Sdelphij felem_mul(tmp, ftmp2, ftmp4); 787296341Sdelphij felem_reduce(ftmp2, tmp); /* 2^120 - 1 */ 788296341Sdelphij for (i = 0; i < 6; ++i) { /* 2^126 - 2^6 */ 789296341Sdelphij felem_square(tmp, ftmp2); 790296341Sdelphij felem_reduce(ftmp2, tmp); 791296341Sdelphij } 792296341Sdelphij felem_mul(tmp, ftmp2, ftmp); 793296341Sdelphij felem_reduce(ftmp, tmp); /* 2^126 - 1 */ 794296341Sdelphij felem_square(tmp, ftmp); 795296341Sdelphij felem_reduce(ftmp, tmp); /* 2^127 - 2 */ 796296341Sdelphij felem_mul(tmp, ftmp, in); 797296341Sdelphij felem_reduce(ftmp, tmp); /* 2^127 - 1 */ 798296341Sdelphij for (i = 0; i < 97; ++i) { /* 2^224 - 2^97 */ 799296341Sdelphij felem_square(tmp, ftmp); 800296341Sdelphij felem_reduce(ftmp, tmp); 801296341Sdelphij } 802296341Sdelphij felem_mul(tmp, ftmp, ftmp3); 803296341Sdelphij felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */ 804296341Sdelphij} 805238384Sjkim 806296341Sdelphij/* 807296341Sdelphij * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy 808296341Sdelphij * out to itself. 809296341Sdelphij */ 810296341Sdelphijstatic void copy_conditional(felem out, const felem in, limb icopy) 811296341Sdelphij{ 812296341Sdelphij unsigned i; 813296341Sdelphij /* 814296341Sdelphij * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one 815296341Sdelphij */ 816296341Sdelphij const limb copy = -icopy; 817296341Sdelphij for (i = 0; i < 4; ++i) { 818296341Sdelphij const limb tmp = copy & (in[i] ^ out[i]); 819296341Sdelphij out[i] ^= tmp; 820296341Sdelphij } 821296341Sdelphij} 822238384Sjkim 823238384Sjkim/******************************************************************************/ 824296341Sdelphij/*- 825296341Sdelphij * ELLIPTIC CURVE POINT OPERATIONS 826238384Sjkim * 827238384Sjkim * Points are represented in Jacobian projective coordinates: 828238384Sjkim * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3), 829238384Sjkim * or to the point at infinity if Z == 0. 830238384Sjkim * 831238384Sjkim */ 832238384Sjkim 833296341Sdelphij/*- 834296341Sdelphij * Double an elliptic curve point: 835238384Sjkim * (X', Y', Z') = 2 * (X, Y, Z), where 836238384Sjkim * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2 837238384Sjkim * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2 838238384Sjkim * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z 839238384Sjkim * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed, 840296341Sdelphij * while x_out == y_in is not (maybe this works, but it's not tested). 841296341Sdelphij */ 842238384Sjkimstatic void 843238384Sjkimpoint_double(felem x_out, felem y_out, felem z_out, 844238384Sjkim const felem x_in, const felem y_in, const felem z_in) 845296341Sdelphij{ 846296341Sdelphij widefelem tmp, tmp2; 847296341Sdelphij felem delta, gamma, beta, alpha, ftmp, ftmp2; 848238384Sjkim 849296341Sdelphij felem_assign(ftmp, x_in); 850296341Sdelphij felem_assign(ftmp2, x_in); 851238384Sjkim 852296341Sdelphij /* delta = z^2 */ 853296341Sdelphij felem_square(tmp, z_in); 854296341Sdelphij felem_reduce(delta, tmp); 855238384Sjkim 856296341Sdelphij /* gamma = y^2 */ 857296341Sdelphij felem_square(tmp, y_in); 858296341Sdelphij felem_reduce(gamma, tmp); 859238384Sjkim 860296341Sdelphij /* beta = x*gamma */ 861296341Sdelphij felem_mul(tmp, x_in, gamma); 862296341Sdelphij felem_reduce(beta, tmp); 863238384Sjkim 864296341Sdelphij /* alpha = 3*(x-delta)*(x+delta) */ 865296341Sdelphij felem_diff(ftmp, delta); 866296341Sdelphij /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */ 867296341Sdelphij felem_sum(ftmp2, delta); 868296341Sdelphij /* ftmp2[i] < 2^57 + 2^57 = 2^58 */ 869296341Sdelphij felem_scalar(ftmp2, 3); 870296341Sdelphij /* ftmp2[i] < 3 * 2^58 < 2^60 */ 871296341Sdelphij felem_mul(tmp, ftmp, ftmp2); 872296341Sdelphij /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */ 873296341Sdelphij felem_reduce(alpha, tmp); 874238384Sjkim 875296341Sdelphij /* x' = alpha^2 - 8*beta */ 876296341Sdelphij felem_square(tmp, alpha); 877296341Sdelphij /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */ 878296341Sdelphij felem_assign(ftmp, beta); 879296341Sdelphij felem_scalar(ftmp, 8); 880296341Sdelphij /* ftmp[i] < 8 * 2^57 = 2^60 */ 881296341Sdelphij felem_diff_128_64(tmp, ftmp); 882296341Sdelphij /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */ 883296341Sdelphij felem_reduce(x_out, tmp); 884238384Sjkim 885296341Sdelphij /* z' = (y + z)^2 - gamma - delta */ 886296341Sdelphij felem_sum(delta, gamma); 887296341Sdelphij /* delta[i] < 2^57 + 2^57 = 2^58 */ 888296341Sdelphij felem_assign(ftmp, y_in); 889296341Sdelphij felem_sum(ftmp, z_in); 890296341Sdelphij /* ftmp[i] < 2^57 + 2^57 = 2^58 */ 891296341Sdelphij felem_square(tmp, ftmp); 892296341Sdelphij /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */ 893296341Sdelphij felem_diff_128_64(tmp, delta); 894296341Sdelphij /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */ 895296341Sdelphij felem_reduce(z_out, tmp); 896238384Sjkim 897296341Sdelphij /* y' = alpha*(4*beta - x') - 8*gamma^2 */ 898296341Sdelphij felem_scalar(beta, 4); 899296341Sdelphij /* beta[i] < 4 * 2^57 = 2^59 */ 900296341Sdelphij felem_diff(beta, x_out); 901296341Sdelphij /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */ 902296341Sdelphij felem_mul(tmp, alpha, beta); 903296341Sdelphij /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */ 904296341Sdelphij felem_square(tmp2, gamma); 905296341Sdelphij /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */ 906296341Sdelphij widefelem_scalar(tmp2, 8); 907296341Sdelphij /* tmp2[i] < 8 * 2^116 = 2^119 */ 908296341Sdelphij widefelem_diff(tmp, tmp2); 909296341Sdelphij /* tmp[i] < 2^119 + 2^120 < 2^121 */ 910296341Sdelphij felem_reduce(y_out, tmp); 911296341Sdelphij} 912238384Sjkim 913296341Sdelphij/*- 914296341Sdelphij * Add two elliptic curve points: 915238384Sjkim * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where 916238384Sjkim * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 - 917238384Sjkim * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 918238384Sjkim * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) - 919238384Sjkim * Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3 920238384Sjkim * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2) 921238384Sjkim * 922238384Sjkim * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0. 923238384Sjkim */ 924238384Sjkim 925296341Sdelphij/* 926296341Sdelphij * This function is not entirely constant-time: it includes a branch for 927296341Sdelphij * checking whether the two input points are equal, (while not equal to the 928296341Sdelphij * point at infinity). This case never happens during single point 929296341Sdelphij * multiplication, so there is no timing leak for ECDH or ECDSA signing. 930296341Sdelphij */ 931238384Sjkimstatic void point_add(felem x3, felem y3, felem z3, 932296341Sdelphij const felem x1, const felem y1, const felem z1, 933296341Sdelphij const int mixed, const felem x2, const felem y2, 934296341Sdelphij const felem z2) 935296341Sdelphij{ 936296341Sdelphij felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out; 937296341Sdelphij widefelem tmp, tmp2; 938296341Sdelphij limb z1_is_zero, z2_is_zero, x_equal, y_equal; 939238384Sjkim 940296341Sdelphij if (!mixed) { 941296341Sdelphij /* ftmp2 = z2^2 */ 942296341Sdelphij felem_square(tmp, z2); 943296341Sdelphij felem_reduce(ftmp2, tmp); 944238384Sjkim 945296341Sdelphij /* ftmp4 = z2^3 */ 946296341Sdelphij felem_mul(tmp, ftmp2, z2); 947296341Sdelphij felem_reduce(ftmp4, tmp); 948238384Sjkim 949296341Sdelphij /* ftmp4 = z2^3*y1 */ 950296341Sdelphij felem_mul(tmp2, ftmp4, y1); 951296341Sdelphij felem_reduce(ftmp4, tmp2); 952238384Sjkim 953296341Sdelphij /* ftmp2 = z2^2*x1 */ 954296341Sdelphij felem_mul(tmp2, ftmp2, x1); 955296341Sdelphij felem_reduce(ftmp2, tmp2); 956296341Sdelphij } else { 957296341Sdelphij /* 958296341Sdelphij * We'll assume z2 = 1 (special case z2 = 0 is handled later) 959296341Sdelphij */ 960238384Sjkim 961296341Sdelphij /* ftmp4 = z2^3*y1 */ 962296341Sdelphij felem_assign(ftmp4, y1); 963238384Sjkim 964296341Sdelphij /* ftmp2 = z2^2*x1 */ 965296341Sdelphij felem_assign(ftmp2, x1); 966296341Sdelphij } 967238384Sjkim 968296341Sdelphij /* ftmp = z1^2 */ 969296341Sdelphij felem_square(tmp, z1); 970296341Sdelphij felem_reduce(ftmp, tmp); 971238384Sjkim 972296341Sdelphij /* ftmp3 = z1^3 */ 973296341Sdelphij felem_mul(tmp, ftmp, z1); 974296341Sdelphij felem_reduce(ftmp3, tmp); 975238384Sjkim 976296341Sdelphij /* tmp = z1^3*y2 */ 977296341Sdelphij felem_mul(tmp, ftmp3, y2); 978296341Sdelphij /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */ 979238384Sjkim 980296341Sdelphij /* ftmp3 = z1^3*y2 - z2^3*y1 */ 981296341Sdelphij felem_diff_128_64(tmp, ftmp4); 982296341Sdelphij /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */ 983296341Sdelphij felem_reduce(ftmp3, tmp); 984238384Sjkim 985296341Sdelphij /* tmp = z1^2*x2 */ 986296341Sdelphij felem_mul(tmp, ftmp, x2); 987296341Sdelphij /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */ 988238384Sjkim 989296341Sdelphij /* ftmp = z1^2*x2 - z2^2*x1 */ 990296341Sdelphij felem_diff_128_64(tmp, ftmp2); 991296341Sdelphij /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */ 992296341Sdelphij felem_reduce(ftmp, tmp); 993238384Sjkim 994296341Sdelphij /* 995296341Sdelphij * the formulae are incorrect if the points are equal so we check for 996296341Sdelphij * this and do doubling if this happens 997296341Sdelphij */ 998296341Sdelphij x_equal = felem_is_zero(ftmp); 999296341Sdelphij y_equal = felem_is_zero(ftmp3); 1000296341Sdelphij z1_is_zero = felem_is_zero(z1); 1001296341Sdelphij z2_is_zero = felem_is_zero(z2); 1002296341Sdelphij /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */ 1003296341Sdelphij if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) { 1004296341Sdelphij point_double(x3, y3, z3, x1, y1, z1); 1005296341Sdelphij return; 1006296341Sdelphij } 1007238384Sjkim 1008296341Sdelphij /* ftmp5 = z1*z2 */ 1009296341Sdelphij if (!mixed) { 1010296341Sdelphij felem_mul(tmp, z1, z2); 1011296341Sdelphij felem_reduce(ftmp5, tmp); 1012296341Sdelphij } else { 1013296341Sdelphij /* special case z2 = 0 is handled later */ 1014296341Sdelphij felem_assign(ftmp5, z1); 1015296341Sdelphij } 1016238384Sjkim 1017296341Sdelphij /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */ 1018296341Sdelphij felem_mul(tmp, ftmp, ftmp5); 1019296341Sdelphij felem_reduce(z_out, tmp); 1020238384Sjkim 1021296341Sdelphij /* ftmp = (z1^2*x2 - z2^2*x1)^2 */ 1022296341Sdelphij felem_assign(ftmp5, ftmp); 1023296341Sdelphij felem_square(tmp, ftmp); 1024296341Sdelphij felem_reduce(ftmp, tmp); 1025238384Sjkim 1026296341Sdelphij /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */ 1027296341Sdelphij felem_mul(tmp, ftmp, ftmp5); 1028296341Sdelphij felem_reduce(ftmp5, tmp); 1029238384Sjkim 1030296341Sdelphij /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */ 1031296341Sdelphij felem_mul(tmp, ftmp2, ftmp); 1032296341Sdelphij felem_reduce(ftmp2, tmp); 1033238384Sjkim 1034296341Sdelphij /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */ 1035296341Sdelphij felem_mul(tmp, ftmp4, ftmp5); 1036296341Sdelphij /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */ 1037238384Sjkim 1038296341Sdelphij /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */ 1039296341Sdelphij felem_square(tmp2, ftmp3); 1040296341Sdelphij /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */ 1041238384Sjkim 1042296341Sdelphij /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */ 1043296341Sdelphij felem_diff_128_64(tmp2, ftmp5); 1044296341Sdelphij /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */ 1045238384Sjkim 1046296341Sdelphij /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */ 1047296341Sdelphij felem_assign(ftmp5, ftmp2); 1048296341Sdelphij felem_scalar(ftmp5, 2); 1049296341Sdelphij /* ftmp5[i] < 2 * 2^57 = 2^58 */ 1050238384Sjkim 1051296341Sdelphij /*- 1052296341Sdelphij * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 - 1053296341Sdelphij * 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 1054296341Sdelphij */ 1055296341Sdelphij felem_diff_128_64(tmp2, ftmp5); 1056296341Sdelphij /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */ 1057296341Sdelphij felem_reduce(x_out, tmp2); 1058238384Sjkim 1059296341Sdelphij /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */ 1060296341Sdelphij felem_diff(ftmp2, x_out); 1061296341Sdelphij /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */ 1062238384Sjkim 1063296341Sdelphij /* 1064296341Sdelphij * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) 1065296341Sdelphij */ 1066296341Sdelphij felem_mul(tmp2, ftmp3, ftmp2); 1067296341Sdelphij /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */ 1068238384Sjkim 1069296341Sdelphij /*- 1070296341Sdelphij * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) - 1071296341Sdelphij * z2^3*y1*(z1^2*x2 - z2^2*x1)^3 1072296341Sdelphij */ 1073296341Sdelphij widefelem_diff(tmp2, tmp); 1074296341Sdelphij /* tmp2[i] < 2^118 + 2^120 < 2^121 */ 1075296341Sdelphij felem_reduce(y_out, tmp2); 1076238384Sjkim 1077296341Sdelphij /* 1078296341Sdelphij * the result (x_out, y_out, z_out) is incorrect if one of the inputs is 1079296341Sdelphij * the point at infinity, so we need to check for this separately 1080296341Sdelphij */ 1081238384Sjkim 1082296341Sdelphij /* 1083296341Sdelphij * if point 1 is at infinity, copy point 2 to output, and vice versa 1084296341Sdelphij */ 1085296341Sdelphij copy_conditional(x_out, x2, z1_is_zero); 1086296341Sdelphij copy_conditional(x_out, x1, z2_is_zero); 1087296341Sdelphij copy_conditional(y_out, y2, z1_is_zero); 1088296341Sdelphij copy_conditional(y_out, y1, z2_is_zero); 1089296341Sdelphij copy_conditional(z_out, z2, z1_is_zero); 1090296341Sdelphij copy_conditional(z_out, z1, z2_is_zero); 1091296341Sdelphij felem_assign(x3, x_out); 1092296341Sdelphij felem_assign(y3, y_out); 1093296341Sdelphij felem_assign(z3, z_out); 1094296341Sdelphij} 1095238384Sjkim 1096296341Sdelphij/* 1097296341Sdelphij * select_point selects the |idx|th point from a precomputation table and 1098296341Sdelphij * copies it to out. 1099296341Sdelphij * The pre_comp array argument should be size of |size| argument 1100296341Sdelphij */ 1101296341Sdelphijstatic void select_point(const u64 idx, unsigned int size, 1102296341Sdelphij const felem pre_comp[][3], felem out[3]) 1103296341Sdelphij{ 1104296341Sdelphij unsigned i, j; 1105296341Sdelphij limb *outlimbs = &out[0][0]; 1106296341Sdelphij memset(outlimbs, 0, 3 * sizeof(felem)); 1107238384Sjkim 1108296341Sdelphij for (i = 0; i < size; i++) { 1109296341Sdelphij const limb *inlimbs = &pre_comp[i][0][0]; 1110296341Sdelphij u64 mask = i ^ idx; 1111296341Sdelphij mask |= mask >> 4; 1112296341Sdelphij mask |= mask >> 2; 1113296341Sdelphij mask |= mask >> 1; 1114296341Sdelphij mask &= 1; 1115296341Sdelphij mask--; 1116296341Sdelphij for (j = 0; j < 4 * 3; j++) 1117296341Sdelphij outlimbs[j] |= inlimbs[j] & mask; 1118296341Sdelphij } 1119296341Sdelphij} 1120238384Sjkim 1121238384Sjkim/* get_bit returns the |i|th bit in |in| */ 1122238384Sjkimstatic char get_bit(const felem_bytearray in, unsigned i) 1123296341Sdelphij{ 1124296341Sdelphij if (i >= 224) 1125296341Sdelphij return 0; 1126296341Sdelphij return (in[i >> 3] >> (i & 7)) & 1; 1127296341Sdelphij} 1128238384Sjkim 1129296341Sdelphij/* 1130296341Sdelphij * Interleaved point multiplication using precomputed point multiples: The 1131296341Sdelphij * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars 1132296341Sdelphij * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the 1133296341Sdelphij * generator, using certain (large) precomputed multiples in g_pre_comp. 1134296341Sdelphij * Output point (X, Y, Z) is stored in x_out, y_out, z_out 1135296341Sdelphij */ 1136238384Sjkimstatic void batch_mul(felem x_out, felem y_out, felem z_out, 1137296341Sdelphij const felem_bytearray scalars[], 1138296341Sdelphij const unsigned num_points, const u8 *g_scalar, 1139296341Sdelphij const int mixed, const felem pre_comp[][17][3], 1140296341Sdelphij const felem g_pre_comp[2][16][3]) 1141296341Sdelphij{ 1142296341Sdelphij int i, skip; 1143296341Sdelphij unsigned num; 1144296341Sdelphij unsigned gen_mul = (g_scalar != NULL); 1145296341Sdelphij felem nq[3], tmp[4]; 1146296341Sdelphij u64 bits; 1147296341Sdelphij u8 sign, digit; 1148238384Sjkim 1149296341Sdelphij /* set nq to the point at infinity */ 1150296341Sdelphij memset(nq, 0, 3 * sizeof(felem)); 1151238384Sjkim 1152296341Sdelphij /* 1153296341Sdelphij * Loop over all scalars msb-to-lsb, interleaving additions of multiples 1154296341Sdelphij * of the generator (two in each of the last 28 rounds) and additions of 1155296341Sdelphij * other points multiples (every 5th round). 1156296341Sdelphij */ 1157296341Sdelphij skip = 1; /* save two point operations in the first 1158296341Sdelphij * round */ 1159296341Sdelphij for (i = (num_points ? 220 : 27); i >= 0; --i) { 1160296341Sdelphij /* double */ 1161296341Sdelphij if (!skip) 1162296341Sdelphij point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]); 1163238384Sjkim 1164296341Sdelphij /* add multiples of the generator */ 1165296341Sdelphij if (gen_mul && (i <= 27)) { 1166296341Sdelphij /* first, look 28 bits upwards */ 1167296341Sdelphij bits = get_bit(g_scalar, i + 196) << 3; 1168296341Sdelphij bits |= get_bit(g_scalar, i + 140) << 2; 1169296341Sdelphij bits |= get_bit(g_scalar, i + 84) << 1; 1170296341Sdelphij bits |= get_bit(g_scalar, i + 28); 1171296341Sdelphij /* select the point to add, in constant time */ 1172296341Sdelphij select_point(bits, 16, g_pre_comp[1], tmp); 1173238384Sjkim 1174296341Sdelphij if (!skip) { 1175296341Sdelphij /* value 1 below is argument for "mixed" */ 1176296341Sdelphij point_add(nq[0], nq[1], nq[2], 1177296341Sdelphij nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]); 1178296341Sdelphij } else { 1179296341Sdelphij memcpy(nq, tmp, 3 * sizeof(felem)); 1180296341Sdelphij skip = 0; 1181296341Sdelphij } 1182238384Sjkim 1183296341Sdelphij /* second, look at the current position */ 1184296341Sdelphij bits = get_bit(g_scalar, i + 168) << 3; 1185296341Sdelphij bits |= get_bit(g_scalar, i + 112) << 2; 1186296341Sdelphij bits |= get_bit(g_scalar, i + 56) << 1; 1187296341Sdelphij bits |= get_bit(g_scalar, i); 1188296341Sdelphij /* select the point to add, in constant time */ 1189296341Sdelphij select_point(bits, 16, g_pre_comp[0], tmp); 1190296341Sdelphij point_add(nq[0], nq[1], nq[2], 1191296341Sdelphij nq[0], nq[1], nq[2], 1192296341Sdelphij 1 /* mixed */ , tmp[0], tmp[1], tmp[2]); 1193296341Sdelphij } 1194238384Sjkim 1195296341Sdelphij /* do other additions every 5 doublings */ 1196296341Sdelphij if (num_points && (i % 5 == 0)) { 1197296341Sdelphij /* loop over all scalars */ 1198296341Sdelphij for (num = 0; num < num_points; ++num) { 1199296341Sdelphij bits = get_bit(scalars[num], i + 4) << 5; 1200296341Sdelphij bits |= get_bit(scalars[num], i + 3) << 4; 1201296341Sdelphij bits |= get_bit(scalars[num], i + 2) << 3; 1202296341Sdelphij bits |= get_bit(scalars[num], i + 1) << 2; 1203296341Sdelphij bits |= get_bit(scalars[num], i) << 1; 1204296341Sdelphij bits |= get_bit(scalars[num], i - 1); 1205296341Sdelphij ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits); 1206238384Sjkim 1207296341Sdelphij /* select the point to add or subtract */ 1208296341Sdelphij select_point(digit, 17, pre_comp[num], tmp); 1209296341Sdelphij felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative 1210296341Sdelphij * point */ 1211296341Sdelphij copy_conditional(tmp[1], tmp[3], sign); 1212238384Sjkim 1213296341Sdelphij if (!skip) { 1214296341Sdelphij point_add(nq[0], nq[1], nq[2], 1215296341Sdelphij nq[0], nq[1], nq[2], 1216296341Sdelphij mixed, tmp[0], tmp[1], tmp[2]); 1217296341Sdelphij } else { 1218296341Sdelphij memcpy(nq, tmp, 3 * sizeof(felem)); 1219296341Sdelphij skip = 0; 1220296341Sdelphij } 1221296341Sdelphij } 1222296341Sdelphij } 1223296341Sdelphij } 1224296341Sdelphij felem_assign(x_out, nq[0]); 1225296341Sdelphij felem_assign(y_out, nq[1]); 1226296341Sdelphij felem_assign(z_out, nq[2]); 1227296341Sdelphij} 1228238384Sjkim 1229238384Sjkim/******************************************************************************/ 1230296341Sdelphij/* 1231296341Sdelphij * FUNCTIONS TO MANAGE PRECOMPUTATION 1232238384Sjkim */ 1233238384Sjkim 1234238384Sjkimstatic NISTP224_PRE_COMP *nistp224_pre_comp_new() 1235296341Sdelphij{ 1236296341Sdelphij NISTP224_PRE_COMP *ret = NULL; 1237296341Sdelphij ret = (NISTP224_PRE_COMP *) OPENSSL_malloc(sizeof *ret); 1238296341Sdelphij if (!ret) { 1239296341Sdelphij ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE); 1240296341Sdelphij return ret; 1241296341Sdelphij } 1242296341Sdelphij memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp)); 1243296341Sdelphij ret->references = 1; 1244296341Sdelphij return ret; 1245296341Sdelphij} 1246238384Sjkim 1247238384Sjkimstatic void *nistp224_pre_comp_dup(void *src_) 1248296341Sdelphij{ 1249296341Sdelphij NISTP224_PRE_COMP *src = src_; 1250238384Sjkim 1251296341Sdelphij /* no need to actually copy, these objects never change! */ 1252296341Sdelphij CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP); 1253238384Sjkim 1254296341Sdelphij return src_; 1255296341Sdelphij} 1256238384Sjkim 1257238384Sjkimstatic void nistp224_pre_comp_free(void *pre_) 1258296341Sdelphij{ 1259296341Sdelphij int i; 1260296341Sdelphij NISTP224_PRE_COMP *pre = pre_; 1261238384Sjkim 1262296341Sdelphij if (!pre) 1263296341Sdelphij return; 1264238384Sjkim 1265296341Sdelphij i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP); 1266296341Sdelphij if (i > 0) 1267296341Sdelphij return; 1268238384Sjkim 1269296341Sdelphij OPENSSL_free(pre); 1270296341Sdelphij} 1271238384Sjkim 1272238384Sjkimstatic void nistp224_pre_comp_clear_free(void *pre_) 1273296341Sdelphij{ 1274296341Sdelphij int i; 1275296341Sdelphij NISTP224_PRE_COMP *pre = pre_; 1276238384Sjkim 1277296341Sdelphij if (!pre) 1278296341Sdelphij return; 1279238384Sjkim 1280296341Sdelphij i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP); 1281296341Sdelphij if (i > 0) 1282296341Sdelphij return; 1283238384Sjkim 1284296341Sdelphij OPENSSL_cleanse(pre, sizeof *pre); 1285296341Sdelphij OPENSSL_free(pre); 1286296341Sdelphij} 1287238384Sjkim 1288238384Sjkim/******************************************************************************/ 1289296341Sdelphij/* 1290296341Sdelphij * OPENSSL EC_METHOD FUNCTIONS 1291238384Sjkim */ 1292238384Sjkim 1293238384Sjkimint ec_GFp_nistp224_group_init(EC_GROUP *group) 1294296341Sdelphij{ 1295296341Sdelphij int ret; 1296296341Sdelphij ret = ec_GFp_simple_group_init(group); 1297296341Sdelphij group->a_is_minus3 = 1; 1298296341Sdelphij return ret; 1299296341Sdelphij} 1300238384Sjkim 1301238384Sjkimint ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p, 1302296341Sdelphij const BIGNUM *a, const BIGNUM *b, 1303296341Sdelphij BN_CTX *ctx) 1304296341Sdelphij{ 1305296341Sdelphij int ret = 0; 1306296341Sdelphij BN_CTX *new_ctx = NULL; 1307296341Sdelphij BIGNUM *curve_p, *curve_a, *curve_b; 1308238384Sjkim 1309296341Sdelphij if (ctx == NULL) 1310296341Sdelphij if ((ctx = new_ctx = BN_CTX_new()) == NULL) 1311296341Sdelphij return 0; 1312296341Sdelphij BN_CTX_start(ctx); 1313296341Sdelphij if (((curve_p = BN_CTX_get(ctx)) == NULL) || 1314296341Sdelphij ((curve_a = BN_CTX_get(ctx)) == NULL) || 1315296341Sdelphij ((curve_b = BN_CTX_get(ctx)) == NULL)) 1316296341Sdelphij goto err; 1317296341Sdelphij BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p); 1318296341Sdelphij BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a); 1319296341Sdelphij BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b); 1320296341Sdelphij if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) { 1321296341Sdelphij ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE, 1322296341Sdelphij EC_R_WRONG_CURVE_PARAMETERS); 1323296341Sdelphij goto err; 1324296341Sdelphij } 1325296341Sdelphij group->field_mod_func = BN_nist_mod_224; 1326296341Sdelphij ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx); 1327296341Sdelphij err: 1328296341Sdelphij BN_CTX_end(ctx); 1329296341Sdelphij if (new_ctx != NULL) 1330296341Sdelphij BN_CTX_free(new_ctx); 1331296341Sdelphij return ret; 1332296341Sdelphij} 1333238384Sjkim 1334296341Sdelphij/* 1335296341Sdelphij * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') = 1336296341Sdelphij * (X/Z^2, Y/Z^3) 1337296341Sdelphij */ 1338238384Sjkimint ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group, 1339296341Sdelphij const EC_POINT *point, 1340296341Sdelphij BIGNUM *x, BIGNUM *y, 1341296341Sdelphij BN_CTX *ctx) 1342296341Sdelphij{ 1343296341Sdelphij felem z1, z2, x_in, y_in, x_out, y_out; 1344296341Sdelphij widefelem tmp; 1345238384Sjkim 1346296341Sdelphij if (EC_POINT_is_at_infinity(group, point)) { 1347296341Sdelphij ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES, 1348296341Sdelphij EC_R_POINT_AT_INFINITY); 1349296341Sdelphij return 0; 1350296341Sdelphij } 1351296341Sdelphij if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) || 1352296341Sdelphij (!BN_to_felem(z1, &point->Z))) 1353296341Sdelphij return 0; 1354296341Sdelphij felem_inv(z2, z1); 1355296341Sdelphij felem_square(tmp, z2); 1356296341Sdelphij felem_reduce(z1, tmp); 1357296341Sdelphij felem_mul(tmp, x_in, z1); 1358296341Sdelphij felem_reduce(x_in, tmp); 1359296341Sdelphij felem_contract(x_out, x_in); 1360296341Sdelphij if (x != NULL) { 1361296341Sdelphij if (!felem_to_BN(x, x_out)) { 1362296341Sdelphij ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES, 1363296341Sdelphij ERR_R_BN_LIB); 1364296341Sdelphij return 0; 1365296341Sdelphij } 1366296341Sdelphij } 1367296341Sdelphij felem_mul(tmp, z1, z2); 1368296341Sdelphij felem_reduce(z1, tmp); 1369296341Sdelphij felem_mul(tmp, y_in, z1); 1370296341Sdelphij felem_reduce(y_in, tmp); 1371296341Sdelphij felem_contract(y_out, y_in); 1372296341Sdelphij if (y != NULL) { 1373296341Sdelphij if (!felem_to_BN(y, y_out)) { 1374296341Sdelphij ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES, 1375296341Sdelphij ERR_R_BN_LIB); 1376296341Sdelphij return 0; 1377296341Sdelphij } 1378296341Sdelphij } 1379296341Sdelphij return 1; 1380296341Sdelphij} 1381238384Sjkim 1382296341Sdelphijstatic void make_points_affine(size_t num, felem points[ /* num */ ][3], 1383296341Sdelphij felem tmp_felems[ /* num+1 */ ]) 1384296341Sdelphij{ 1385296341Sdelphij /* 1386296341Sdelphij * Runs in constant time, unless an input is the point at infinity (which 1387296341Sdelphij * normally shouldn't happen). 1388296341Sdelphij */ 1389296341Sdelphij ec_GFp_nistp_points_make_affine_internal(num, 1390296341Sdelphij points, 1391296341Sdelphij sizeof(felem), 1392296341Sdelphij tmp_felems, 1393296341Sdelphij (void (*)(void *))felem_one, 1394296341Sdelphij (int (*)(const void *)) 1395296341Sdelphij felem_is_zero_int, 1396296341Sdelphij (void (*)(void *, const void *)) 1397296341Sdelphij felem_assign, 1398296341Sdelphij (void (*)(void *, const void *)) 1399296341Sdelphij felem_square_reduce, (void (*) 1400296341Sdelphij (void *, 1401296341Sdelphij const void 1402296341Sdelphij *, 1403296341Sdelphij const void 1404296341Sdelphij *)) 1405296341Sdelphij felem_mul_reduce, 1406296341Sdelphij (void (*)(void *, const void *)) 1407296341Sdelphij felem_inv, 1408296341Sdelphij (void (*)(void *, const void *)) 1409296341Sdelphij felem_contract); 1410296341Sdelphij} 1411238384Sjkim 1412296341Sdelphij/* 1413296341Sdelphij * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL 1414296341Sdelphij * values Result is stored in r (r can equal one of the inputs). 1415296341Sdelphij */ 1416238384Sjkimint ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r, 1417296341Sdelphij const BIGNUM *scalar, size_t num, 1418296341Sdelphij const EC_POINT *points[], 1419296341Sdelphij const BIGNUM *scalars[], BN_CTX *ctx) 1420296341Sdelphij{ 1421296341Sdelphij int ret = 0; 1422296341Sdelphij int j; 1423296341Sdelphij unsigned i; 1424296341Sdelphij int mixed = 0; 1425296341Sdelphij BN_CTX *new_ctx = NULL; 1426296341Sdelphij BIGNUM *x, *y, *z, *tmp_scalar; 1427296341Sdelphij felem_bytearray g_secret; 1428296341Sdelphij felem_bytearray *secrets = NULL; 1429296341Sdelphij felem(*pre_comp)[17][3] = NULL; 1430296341Sdelphij felem *tmp_felems = NULL; 1431296341Sdelphij felem_bytearray tmp; 1432296341Sdelphij unsigned num_bytes; 1433296341Sdelphij int have_pre_comp = 0; 1434296341Sdelphij size_t num_points = num; 1435296341Sdelphij felem x_in, y_in, z_in, x_out, y_out, z_out; 1436296341Sdelphij NISTP224_PRE_COMP *pre = NULL; 1437296341Sdelphij const felem(*g_pre_comp)[16][3] = NULL; 1438296341Sdelphij EC_POINT *generator = NULL; 1439296341Sdelphij const EC_POINT *p = NULL; 1440296341Sdelphij const BIGNUM *p_scalar = NULL; 1441238384Sjkim 1442296341Sdelphij if (ctx == NULL) 1443296341Sdelphij if ((ctx = new_ctx = BN_CTX_new()) == NULL) 1444296341Sdelphij return 0; 1445296341Sdelphij BN_CTX_start(ctx); 1446296341Sdelphij if (((x = BN_CTX_get(ctx)) == NULL) || 1447296341Sdelphij ((y = BN_CTX_get(ctx)) == NULL) || 1448296341Sdelphij ((z = BN_CTX_get(ctx)) == NULL) || 1449296341Sdelphij ((tmp_scalar = BN_CTX_get(ctx)) == NULL)) 1450296341Sdelphij goto err; 1451238384Sjkim 1452296341Sdelphij if (scalar != NULL) { 1453296341Sdelphij pre = EC_EX_DATA_get_data(group->extra_data, 1454296341Sdelphij nistp224_pre_comp_dup, 1455296341Sdelphij nistp224_pre_comp_free, 1456296341Sdelphij nistp224_pre_comp_clear_free); 1457296341Sdelphij if (pre) 1458296341Sdelphij /* we have precomputation, try to use it */ 1459296341Sdelphij g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp; 1460296341Sdelphij else 1461296341Sdelphij /* try to use the standard precomputation */ 1462296341Sdelphij g_pre_comp = &gmul[0]; 1463296341Sdelphij generator = EC_POINT_new(group); 1464296341Sdelphij if (generator == NULL) 1465296341Sdelphij goto err; 1466296341Sdelphij /* get the generator from precomputation */ 1467296341Sdelphij if (!felem_to_BN(x, g_pre_comp[0][1][0]) || 1468296341Sdelphij !felem_to_BN(y, g_pre_comp[0][1][1]) || 1469296341Sdelphij !felem_to_BN(z, g_pre_comp[0][1][2])) { 1470296341Sdelphij ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB); 1471296341Sdelphij goto err; 1472296341Sdelphij } 1473296341Sdelphij if (!EC_POINT_set_Jprojective_coordinates_GFp(group, 1474296341Sdelphij generator, x, y, z, 1475296341Sdelphij ctx)) 1476296341Sdelphij goto err; 1477296341Sdelphij if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) 1478296341Sdelphij /* precomputation matches generator */ 1479296341Sdelphij have_pre_comp = 1; 1480296341Sdelphij else 1481296341Sdelphij /* 1482296341Sdelphij * we don't have valid precomputation: treat the generator as a 1483296341Sdelphij * random point 1484296341Sdelphij */ 1485296341Sdelphij num_points = num_points + 1; 1486296341Sdelphij } 1487238384Sjkim 1488296341Sdelphij if (num_points > 0) { 1489296341Sdelphij if (num_points >= 3) { 1490296341Sdelphij /* 1491296341Sdelphij * unless we precompute multiples for just one or two points, 1492296341Sdelphij * converting those into affine form is time well spent 1493296341Sdelphij */ 1494296341Sdelphij mixed = 1; 1495296341Sdelphij } 1496296341Sdelphij secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray)); 1497296341Sdelphij pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem)); 1498296341Sdelphij if (mixed) 1499296341Sdelphij tmp_felems = 1500296341Sdelphij OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem)); 1501296341Sdelphij if ((secrets == NULL) || (pre_comp == NULL) 1502296341Sdelphij || (mixed && (tmp_felems == NULL))) { 1503296341Sdelphij ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE); 1504296341Sdelphij goto err; 1505296341Sdelphij } 1506238384Sjkim 1507296341Sdelphij /* 1508296341Sdelphij * we treat NULL scalars as 0, and NULL points as points at infinity, 1509296341Sdelphij * i.e., they contribute nothing to the linear combination 1510296341Sdelphij */ 1511296341Sdelphij memset(secrets, 0, num_points * sizeof(felem_bytearray)); 1512296341Sdelphij memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem)); 1513296341Sdelphij for (i = 0; i < num_points; ++i) { 1514296341Sdelphij if (i == num) 1515296341Sdelphij /* the generator */ 1516296341Sdelphij { 1517296341Sdelphij p = EC_GROUP_get0_generator(group); 1518296341Sdelphij p_scalar = scalar; 1519296341Sdelphij } else 1520296341Sdelphij /* the i^th point */ 1521296341Sdelphij { 1522296341Sdelphij p = points[i]; 1523296341Sdelphij p_scalar = scalars[i]; 1524296341Sdelphij } 1525296341Sdelphij if ((p_scalar != NULL) && (p != NULL)) { 1526296341Sdelphij /* reduce scalar to 0 <= scalar < 2^224 */ 1527296341Sdelphij if ((BN_num_bits(p_scalar) > 224) 1528296341Sdelphij || (BN_is_negative(p_scalar))) { 1529296341Sdelphij /* 1530296341Sdelphij * this is an unusual input, and we don't guarantee 1531296341Sdelphij * constant-timeness 1532296341Sdelphij */ 1533296341Sdelphij if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) { 1534296341Sdelphij ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB); 1535296341Sdelphij goto err; 1536296341Sdelphij } 1537296341Sdelphij num_bytes = BN_bn2bin(tmp_scalar, tmp); 1538296341Sdelphij } else 1539296341Sdelphij num_bytes = BN_bn2bin(p_scalar, tmp); 1540296341Sdelphij flip_endian(secrets[i], tmp, num_bytes); 1541296341Sdelphij /* precompute multiples */ 1542296341Sdelphij if ((!BN_to_felem(x_out, &p->X)) || 1543296341Sdelphij (!BN_to_felem(y_out, &p->Y)) || 1544296341Sdelphij (!BN_to_felem(z_out, &p->Z))) 1545296341Sdelphij goto err; 1546296341Sdelphij felem_assign(pre_comp[i][1][0], x_out); 1547296341Sdelphij felem_assign(pre_comp[i][1][1], y_out); 1548296341Sdelphij felem_assign(pre_comp[i][1][2], z_out); 1549296341Sdelphij for (j = 2; j <= 16; ++j) { 1550296341Sdelphij if (j & 1) { 1551296341Sdelphij point_add(pre_comp[i][j][0], pre_comp[i][j][1], 1552296341Sdelphij pre_comp[i][j][2], pre_comp[i][1][0], 1553296341Sdelphij pre_comp[i][1][1], pre_comp[i][1][2], 0, 1554296341Sdelphij pre_comp[i][j - 1][0], 1555296341Sdelphij pre_comp[i][j - 1][1], 1556296341Sdelphij pre_comp[i][j - 1][2]); 1557296341Sdelphij } else { 1558296341Sdelphij point_double(pre_comp[i][j][0], pre_comp[i][j][1], 1559296341Sdelphij pre_comp[i][j][2], pre_comp[i][j / 2][0], 1560296341Sdelphij pre_comp[i][j / 2][1], 1561296341Sdelphij pre_comp[i][j / 2][2]); 1562296341Sdelphij } 1563296341Sdelphij } 1564296341Sdelphij } 1565296341Sdelphij } 1566296341Sdelphij if (mixed) 1567296341Sdelphij make_points_affine(num_points * 17, pre_comp[0], tmp_felems); 1568296341Sdelphij } 1569238384Sjkim 1570296341Sdelphij /* the scalar for the generator */ 1571296341Sdelphij if ((scalar != NULL) && (have_pre_comp)) { 1572296341Sdelphij memset(g_secret, 0, sizeof g_secret); 1573296341Sdelphij /* reduce scalar to 0 <= scalar < 2^224 */ 1574296341Sdelphij if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) { 1575296341Sdelphij /* 1576296341Sdelphij * this is an unusual input, and we don't guarantee 1577296341Sdelphij * constant-timeness 1578296341Sdelphij */ 1579296341Sdelphij if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) { 1580296341Sdelphij ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB); 1581296341Sdelphij goto err; 1582296341Sdelphij } 1583296341Sdelphij num_bytes = BN_bn2bin(tmp_scalar, tmp); 1584296341Sdelphij } else 1585296341Sdelphij num_bytes = BN_bn2bin(scalar, tmp); 1586296341Sdelphij flip_endian(g_secret, tmp, num_bytes); 1587296341Sdelphij /* do the multiplication with generator precomputation */ 1588296341Sdelphij batch_mul(x_out, y_out, z_out, 1589296341Sdelphij (const felem_bytearray(*))secrets, num_points, 1590296341Sdelphij g_secret, 1591296341Sdelphij mixed, (const felem(*)[17][3])pre_comp, g_pre_comp); 1592296341Sdelphij } else 1593296341Sdelphij /* do the multiplication without generator precomputation */ 1594296341Sdelphij batch_mul(x_out, y_out, z_out, 1595296341Sdelphij (const felem_bytearray(*))secrets, num_points, 1596296341Sdelphij NULL, mixed, (const felem(*)[17][3])pre_comp, NULL); 1597296341Sdelphij /* reduce the output to its unique minimal representation */ 1598296341Sdelphij felem_contract(x_in, x_out); 1599296341Sdelphij felem_contract(y_in, y_out); 1600296341Sdelphij felem_contract(z_in, z_out); 1601296341Sdelphij if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) || 1602296341Sdelphij (!felem_to_BN(z, z_in))) { 1603296341Sdelphij ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB); 1604296341Sdelphij goto err; 1605296341Sdelphij } 1606296341Sdelphij ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx); 1607238384Sjkim 1608296341Sdelphij err: 1609296341Sdelphij BN_CTX_end(ctx); 1610296341Sdelphij if (generator != NULL) 1611296341Sdelphij EC_POINT_free(generator); 1612296341Sdelphij if (new_ctx != NULL) 1613296341Sdelphij BN_CTX_free(new_ctx); 1614296341Sdelphij if (secrets != NULL) 1615296341Sdelphij OPENSSL_free(secrets); 1616296341Sdelphij if (pre_comp != NULL) 1617296341Sdelphij OPENSSL_free(pre_comp); 1618296341Sdelphij if (tmp_felems != NULL) 1619296341Sdelphij OPENSSL_free(tmp_felems); 1620296341Sdelphij return ret; 1621296341Sdelphij} 1622238384Sjkim 1623238384Sjkimint ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx) 1624296341Sdelphij{ 1625296341Sdelphij int ret = 0; 1626296341Sdelphij NISTP224_PRE_COMP *pre = NULL; 1627296341Sdelphij int i, j; 1628296341Sdelphij BN_CTX *new_ctx = NULL; 1629296341Sdelphij BIGNUM *x, *y; 1630296341Sdelphij EC_POINT *generator = NULL; 1631296341Sdelphij felem tmp_felems[32]; 1632238384Sjkim 1633296341Sdelphij /* throw away old precomputation */ 1634296341Sdelphij EC_EX_DATA_free_data(&group->extra_data, nistp224_pre_comp_dup, 1635296341Sdelphij nistp224_pre_comp_free, 1636296341Sdelphij nistp224_pre_comp_clear_free); 1637296341Sdelphij if (ctx == NULL) 1638296341Sdelphij if ((ctx = new_ctx = BN_CTX_new()) == NULL) 1639296341Sdelphij return 0; 1640296341Sdelphij BN_CTX_start(ctx); 1641296341Sdelphij if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL)) 1642296341Sdelphij goto err; 1643296341Sdelphij /* get the generator */ 1644296341Sdelphij if (group->generator == NULL) 1645296341Sdelphij goto err; 1646296341Sdelphij generator = EC_POINT_new(group); 1647296341Sdelphij if (generator == NULL) 1648296341Sdelphij goto err; 1649296341Sdelphij BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x); 1650296341Sdelphij BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y); 1651296341Sdelphij if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx)) 1652296341Sdelphij goto err; 1653296341Sdelphij if ((pre = nistp224_pre_comp_new()) == NULL) 1654296341Sdelphij goto err; 1655296341Sdelphij /* 1656296341Sdelphij * if the generator is the standard one, use built-in precomputation 1657296341Sdelphij */ 1658296341Sdelphij if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) { 1659296341Sdelphij memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp)); 1660296341Sdelphij ret = 1; 1661296341Sdelphij goto err; 1662296341Sdelphij } 1663296341Sdelphij if ((!BN_to_felem(pre->g_pre_comp[0][1][0], &group->generator->X)) || 1664296341Sdelphij (!BN_to_felem(pre->g_pre_comp[0][1][1], &group->generator->Y)) || 1665296341Sdelphij (!BN_to_felem(pre->g_pre_comp[0][1][2], &group->generator->Z))) 1666296341Sdelphij goto err; 1667296341Sdelphij /* 1668296341Sdelphij * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G, 1669296341Sdelphij * 2^140*G, 2^196*G for the second one 1670296341Sdelphij */ 1671296341Sdelphij for (i = 1; i <= 8; i <<= 1) { 1672296341Sdelphij point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], 1673296341Sdelphij pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0], 1674296341Sdelphij pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]); 1675296341Sdelphij for (j = 0; j < 27; ++j) { 1676296341Sdelphij point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], 1677296341Sdelphij pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0], 1678296341Sdelphij pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]); 1679296341Sdelphij } 1680296341Sdelphij if (i == 8) 1681296341Sdelphij break; 1682296341Sdelphij point_double(pre->g_pre_comp[0][2 * i][0], 1683296341Sdelphij pre->g_pre_comp[0][2 * i][1], 1684296341Sdelphij pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0], 1685296341Sdelphij pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]); 1686296341Sdelphij for (j = 0; j < 27; ++j) { 1687296341Sdelphij point_double(pre->g_pre_comp[0][2 * i][0], 1688296341Sdelphij pre->g_pre_comp[0][2 * i][1], 1689296341Sdelphij pre->g_pre_comp[0][2 * i][2], 1690296341Sdelphij pre->g_pre_comp[0][2 * i][0], 1691296341Sdelphij pre->g_pre_comp[0][2 * i][1], 1692296341Sdelphij pre->g_pre_comp[0][2 * i][2]); 1693296341Sdelphij } 1694296341Sdelphij } 1695296341Sdelphij for (i = 0; i < 2; i++) { 1696296341Sdelphij /* g_pre_comp[i][0] is the point at infinity */ 1697296341Sdelphij memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0])); 1698296341Sdelphij /* the remaining multiples */ 1699296341Sdelphij /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */ 1700296341Sdelphij point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1], 1701296341Sdelphij pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0], 1702296341Sdelphij pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2], 1703296341Sdelphij 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], 1704296341Sdelphij pre->g_pre_comp[i][2][2]); 1705296341Sdelphij /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */ 1706296341Sdelphij point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1], 1707296341Sdelphij pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0], 1708296341Sdelphij pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2], 1709296341Sdelphij 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], 1710296341Sdelphij pre->g_pre_comp[i][2][2]); 1711296341Sdelphij /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */ 1712296341Sdelphij point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1], 1713296341Sdelphij pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0], 1714296341Sdelphij pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2], 1715296341Sdelphij 0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1], 1716296341Sdelphij pre->g_pre_comp[i][4][2]); 1717296341Sdelphij /* 1718296341Sdelphij * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G 1719296341Sdelphij */ 1720296341Sdelphij point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1], 1721296341Sdelphij pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0], 1722296341Sdelphij pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2], 1723296341Sdelphij 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], 1724296341Sdelphij pre->g_pre_comp[i][2][2]); 1725296341Sdelphij for (j = 1; j < 8; ++j) { 1726296341Sdelphij /* odd multiples: add G resp. 2^28*G */ 1727296341Sdelphij point_add(pre->g_pre_comp[i][2 * j + 1][0], 1728296341Sdelphij pre->g_pre_comp[i][2 * j + 1][1], 1729296341Sdelphij pre->g_pre_comp[i][2 * j + 1][2], 1730296341Sdelphij pre->g_pre_comp[i][2 * j][0], 1731296341Sdelphij pre->g_pre_comp[i][2 * j][1], 1732296341Sdelphij pre->g_pre_comp[i][2 * j][2], 0, 1733296341Sdelphij pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1], 1734296341Sdelphij pre->g_pre_comp[i][1][2]); 1735296341Sdelphij } 1736296341Sdelphij } 1737296341Sdelphij make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems); 1738238384Sjkim 1739296341Sdelphij if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp224_pre_comp_dup, 1740296341Sdelphij nistp224_pre_comp_free, 1741296341Sdelphij nistp224_pre_comp_clear_free)) 1742296341Sdelphij goto err; 1743296341Sdelphij ret = 1; 1744296341Sdelphij pre = NULL; 1745238384Sjkim err: 1746296341Sdelphij BN_CTX_end(ctx); 1747296341Sdelphij if (generator != NULL) 1748296341Sdelphij EC_POINT_free(generator); 1749296341Sdelphij if (new_ctx != NULL) 1750296341Sdelphij BN_CTX_free(new_ctx); 1751296341Sdelphij if (pre) 1752296341Sdelphij nistp224_pre_comp_free(pre); 1753296341Sdelphij return ret; 1754296341Sdelphij} 1755238384Sjkim 1756238384Sjkimint ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group) 1757296341Sdelphij{ 1758296341Sdelphij if (EC_EX_DATA_get_data(group->extra_data, nistp224_pre_comp_dup, 1759296341Sdelphij nistp224_pre_comp_free, 1760296341Sdelphij nistp224_pre_comp_clear_free) 1761296341Sdelphij != NULL) 1762296341Sdelphij return 1; 1763296341Sdelphij else 1764296341Sdelphij return 0; 1765296341Sdelphij} 1766238384Sjkim 1767238384Sjkim#else 1768296341Sdelphijstatic void *dummy = &dummy; 1769238384Sjkim#endif 1770