1238384Sjkim/* crypto/ec/ecp_nistp224.c */ 2238384Sjkim/* 3238384Sjkim * Written by Emilia Kasper (Google) for the OpenSSL project. 4238384Sjkim */ 5238384Sjkim/* Copyright 2011 Google Inc. 6238384Sjkim * 7238384Sjkim * Licensed under the Apache License, Version 2.0 (the "License"); 8238384Sjkim * 9238384Sjkim * you may not use this file except in compliance with the License. 10238384Sjkim * You may obtain a copy of the License at 11238384Sjkim * 12238384Sjkim * http://www.apache.org/licenses/LICENSE-2.0 13238384Sjkim * 14238384Sjkim * Unless required by applicable law or agreed to in writing, software 15238384Sjkim * distributed under the License is distributed on an "AS IS" BASIS, 16238384Sjkim * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17238384Sjkim * See the License for the specific language governing permissions and 18238384Sjkim * limitations under the License. 19238384Sjkim */ 20238384Sjkim 21238384Sjkim/* 22238384Sjkim * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication 23238384Sjkim * 24238384Sjkim * Inspired by Daniel J. Bernstein's public domain nistp224 implementation 25238384Sjkim * and Adam Langley's public domain 64-bit C implementation of curve25519 26238384Sjkim */ 27238384Sjkim 28238384Sjkim#include <openssl/opensslconf.h> 29238384Sjkim#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 30238384Sjkim 31238384Sjkim#ifndef OPENSSL_SYS_VMS 32238384Sjkim#include <stdint.h> 33238384Sjkim#else 34238384Sjkim#include <inttypes.h> 35238384Sjkim#endif 36238384Sjkim 37238384Sjkim#include <string.h> 38238384Sjkim#include <openssl/err.h> 39238384Sjkim#include "ec_lcl.h" 40238384Sjkim 41238384Sjkim#if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) 42238384Sjkim /* even with gcc, the typedef won't work for 32-bit platforms */ 43238384Sjkim typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit platforms */ 44238384Sjkim#else 45238384Sjkim #error "Need GCC 3.1 or later to define type uint128_t" 46238384Sjkim#endif 47238384Sjkim 48238384Sjkimtypedef uint8_t u8; 49238384Sjkimtypedef uint64_t u64; 50238384Sjkimtypedef int64_t s64; 51238384Sjkim 52238384Sjkim 53238384Sjkim/******************************************************************************/ 54238384Sjkim/* INTERNAL REPRESENTATION OF FIELD ELEMENTS 55238384Sjkim * 56238384Sjkim * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3 57238384Sjkim * using 64-bit coefficients called 'limbs', 58238384Sjkim * and sometimes (for multiplication results) as 59238384Sjkim * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6 60238384Sjkim * using 128-bit coefficients called 'widelimbs'. 61238384Sjkim * A 4-limb representation is an 'felem'; 62238384Sjkim * a 7-widelimb representation is a 'widefelem'. 63238384Sjkim * Even within felems, bits of adjacent limbs overlap, and we don't always 64238384Sjkim * reduce the representations: we ensure that inputs to each felem 65238384Sjkim * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60, 66238384Sjkim * and fit into a 128-bit word without overflow. The coefficients are then 67238384Sjkim * again partially reduced to obtain an felem satisfying a_i < 2^57. 68238384Sjkim * We only reduce to the unique minimal representation at the end of the 69238384Sjkim * computation. 70238384Sjkim */ 71238384Sjkim 72238384Sjkimtypedef uint64_t limb; 73238384Sjkimtypedef uint128_t widelimb; 74238384Sjkim 75238384Sjkimtypedef limb felem[4]; 76238384Sjkimtypedef widelimb widefelem[7]; 77238384Sjkim 78238384Sjkim/* Field element represented as a byte arrary. 79238384Sjkim * 28*8 = 224 bits is also the group order size for the elliptic curve, 80238384Sjkim * and we also use this type for scalars for point multiplication. 81238384Sjkim */ 82238384Sjkimtypedef u8 felem_bytearray[28]; 83238384Sjkim 84238384Sjkimstatic const felem_bytearray nistp224_curve_params[5] = { 85238384Sjkim {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, /* p */ 86238384Sjkim 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00, 87238384Sjkim 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01}, 88238384Sjkim {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, /* a */ 89238384Sjkim 0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,0xFF,0xFF,0xFF,0xFF, 90238384Sjkim 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE}, 91238384Sjkim {0xB4,0x05,0x0A,0x85,0x0C,0x04,0xB3,0xAB,0xF5,0x41, /* b */ 92238384Sjkim 0x32,0x56,0x50,0x44,0xB0,0xB7,0xD7,0xBF,0xD8,0xBA, 93238384Sjkim 0x27,0x0B,0x39,0x43,0x23,0x55,0xFF,0xB4}, 94238384Sjkim {0xB7,0x0E,0x0C,0xBD,0x6B,0xB4,0xBF,0x7F,0x32,0x13, /* x */ 95238384Sjkim 0x90,0xB9,0x4A,0x03,0xC1,0xD3,0x56,0xC2,0x11,0x22, 96238384Sjkim 0x34,0x32,0x80,0xD6,0x11,0x5C,0x1D,0x21}, 97238384Sjkim {0xbd,0x37,0x63,0x88,0xb5,0xf7,0x23,0xfb,0x4c,0x22, /* y */ 98238384Sjkim 0xdf,0xe6,0xcd,0x43,0x75,0xa0,0x5a,0x07,0x47,0x64, 99238384Sjkim 0x44,0xd5,0x81,0x99,0x85,0x00,0x7e,0x34} 100238384Sjkim}; 101238384Sjkim 102238384Sjkim/* Precomputed multiples of the standard generator 103238384Sjkim * Points are given in coordinates (X, Y, Z) where Z normally is 1 104238384Sjkim * (0 for the point at infinity). 105238384Sjkim * For each field element, slice a_0 is word 0, etc. 106238384Sjkim * 107238384Sjkim * The table has 2 * 16 elements, starting with the following: 108238384Sjkim * index | bits | point 109238384Sjkim * ------+---------+------------------------------ 110238384Sjkim * 0 | 0 0 0 0 | 0G 111238384Sjkim * 1 | 0 0 0 1 | 1G 112238384Sjkim * 2 | 0 0 1 0 | 2^56G 113238384Sjkim * 3 | 0 0 1 1 | (2^56 + 1)G 114238384Sjkim * 4 | 0 1 0 0 | 2^112G 115238384Sjkim * 5 | 0 1 0 1 | (2^112 + 1)G 116238384Sjkim * 6 | 0 1 1 0 | (2^112 + 2^56)G 117238384Sjkim * 7 | 0 1 1 1 | (2^112 + 2^56 + 1)G 118238384Sjkim * 8 | 1 0 0 0 | 2^168G 119238384Sjkim * 9 | 1 0 0 1 | (2^168 + 1)G 120238384Sjkim * 10 | 1 0 1 0 | (2^168 + 2^56)G 121238384Sjkim * 11 | 1 0 1 1 | (2^168 + 2^56 + 1)G 122238384Sjkim * 12 | 1 1 0 0 | (2^168 + 2^112)G 123238384Sjkim * 13 | 1 1 0 1 | (2^168 + 2^112 + 1)G 124238384Sjkim * 14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G 125238384Sjkim * 15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G 126238384Sjkim * followed by a copy of this with each element multiplied by 2^28. 127238384Sjkim * 128238384Sjkim * The reason for this is so that we can clock bits into four different 129238384Sjkim * locations when doing simple scalar multiplies against the base point, 130238384Sjkim * and then another four locations using the second 16 elements. 131238384Sjkim */ 132238384Sjkimstatic const felem gmul[2][16][3] = 133238384Sjkim{{{{0, 0, 0, 0}, 134238384Sjkim {0, 0, 0, 0}, 135238384Sjkim {0, 0, 0, 0}}, 136238384Sjkim {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf}, 137238384Sjkim {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723}, 138238384Sjkim {1, 0, 0, 0}}, 139238384Sjkim {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5}, 140238384Sjkim {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321}, 141238384Sjkim {1, 0, 0, 0}}, 142238384Sjkim {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748}, 143238384Sjkim {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17}, 144238384Sjkim {1, 0, 0, 0}}, 145238384Sjkim {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe}, 146238384Sjkim {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b}, 147238384Sjkim {1, 0, 0, 0}}, 148238384Sjkim {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3}, 149238384Sjkim {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a}, 150238384Sjkim {1, 0, 0, 0}}, 151238384Sjkim {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c}, 152238384Sjkim {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244}, 153238384Sjkim {1, 0, 0, 0}}, 154238384Sjkim {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849}, 155238384Sjkim {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112}, 156238384Sjkim {1, 0, 0, 0}}, 157238384Sjkim {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47}, 158238384Sjkim {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394}, 159238384Sjkim {1, 0, 0, 0}}, 160238384Sjkim {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d}, 161238384Sjkim {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7}, 162238384Sjkim {1, 0, 0, 0}}, 163238384Sjkim {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24}, 164238384Sjkim {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881}, 165238384Sjkim {1, 0, 0, 0}}, 166238384Sjkim {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984}, 167238384Sjkim {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369}, 168238384Sjkim {1, 0, 0, 0}}, 169238384Sjkim {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3}, 170238384Sjkim {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60}, 171238384Sjkim {1, 0, 0, 0}}, 172238384Sjkim {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057}, 173238384Sjkim {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9}, 174238384Sjkim {1, 0, 0, 0}}, 175238384Sjkim {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9}, 176238384Sjkim {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc}, 177238384Sjkim {1, 0, 0, 0}}, 178238384Sjkim {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58}, 179238384Sjkim {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558}, 180238384Sjkim {1, 0, 0, 0}}}, 181238384Sjkim {{{0, 0, 0, 0}, 182238384Sjkim {0, 0, 0, 0}, 183238384Sjkim {0, 0, 0, 0}}, 184238384Sjkim {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31}, 185238384Sjkim {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d}, 186238384Sjkim {1, 0, 0, 0}}, 187238384Sjkim {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3}, 188238384Sjkim {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a}, 189238384Sjkim {1, 0, 0, 0}}, 190238384Sjkim {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33}, 191238384Sjkim {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100}, 192238384Sjkim {1, 0, 0, 0}}, 193238384Sjkim {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5}, 194238384Sjkim {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea}, 195238384Sjkim {1, 0, 0, 0}}, 196238384Sjkim {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be}, 197238384Sjkim {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51}, 198238384Sjkim {1, 0, 0, 0}}, 199238384Sjkim {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1}, 200238384Sjkim {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb}, 201238384Sjkim {1, 0, 0, 0}}, 202238384Sjkim {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233}, 203238384Sjkim {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def}, 204238384Sjkim {1, 0, 0, 0}}, 205238384Sjkim {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae}, 206238384Sjkim {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45}, 207238384Sjkim {1, 0, 0, 0}}, 208238384Sjkim {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e}, 209238384Sjkim {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb}, 210238384Sjkim {1, 0, 0, 0}}, 211238384Sjkim {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de}, 212238384Sjkim {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3}, 213238384Sjkim {1, 0, 0, 0}}, 214238384Sjkim {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05}, 215238384Sjkim {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58}, 216238384Sjkim {1, 0, 0, 0}}, 217238384Sjkim {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb}, 218238384Sjkim {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0}, 219238384Sjkim {1, 0, 0, 0}}, 220238384Sjkim {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9}, 221238384Sjkim {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea}, 222238384Sjkim {1, 0, 0, 0}}, 223238384Sjkim {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba}, 224238384Sjkim {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405}, 225238384Sjkim {1, 0, 0, 0}}, 226238384Sjkim {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e}, 227238384Sjkim {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e}, 228238384Sjkim {1, 0, 0, 0}}}}; 229238384Sjkim 230238384Sjkim/* Precomputation for the group generator. */ 231238384Sjkimtypedef struct { 232238384Sjkim felem g_pre_comp[2][16][3]; 233238384Sjkim int references; 234238384Sjkim} NISTP224_PRE_COMP; 235238384Sjkim 236238384Sjkimconst EC_METHOD *EC_GFp_nistp224_method(void) 237238384Sjkim { 238238384Sjkim static const EC_METHOD ret = { 239238384Sjkim EC_FLAGS_DEFAULT_OCT, 240238384Sjkim NID_X9_62_prime_field, 241238384Sjkim ec_GFp_nistp224_group_init, 242238384Sjkim ec_GFp_simple_group_finish, 243238384Sjkim ec_GFp_simple_group_clear_finish, 244238384Sjkim ec_GFp_nist_group_copy, 245238384Sjkim ec_GFp_nistp224_group_set_curve, 246238384Sjkim ec_GFp_simple_group_get_curve, 247238384Sjkim ec_GFp_simple_group_get_degree, 248238384Sjkim ec_GFp_simple_group_check_discriminant, 249238384Sjkim ec_GFp_simple_point_init, 250238384Sjkim ec_GFp_simple_point_finish, 251238384Sjkim ec_GFp_simple_point_clear_finish, 252238384Sjkim ec_GFp_simple_point_copy, 253238384Sjkim ec_GFp_simple_point_set_to_infinity, 254238384Sjkim ec_GFp_simple_set_Jprojective_coordinates_GFp, 255238384Sjkim ec_GFp_simple_get_Jprojective_coordinates_GFp, 256238384Sjkim ec_GFp_simple_point_set_affine_coordinates, 257238384Sjkim ec_GFp_nistp224_point_get_affine_coordinates, 258238384Sjkim 0 /* point_set_compressed_coordinates */, 259238384Sjkim 0 /* point2oct */, 260238384Sjkim 0 /* oct2point */, 261238384Sjkim ec_GFp_simple_add, 262238384Sjkim ec_GFp_simple_dbl, 263238384Sjkim ec_GFp_simple_invert, 264238384Sjkim ec_GFp_simple_is_at_infinity, 265238384Sjkim ec_GFp_simple_is_on_curve, 266238384Sjkim ec_GFp_simple_cmp, 267238384Sjkim ec_GFp_simple_make_affine, 268238384Sjkim ec_GFp_simple_points_make_affine, 269238384Sjkim ec_GFp_nistp224_points_mul, 270238384Sjkim ec_GFp_nistp224_precompute_mult, 271238384Sjkim ec_GFp_nistp224_have_precompute_mult, 272238384Sjkim ec_GFp_nist_field_mul, 273238384Sjkim ec_GFp_nist_field_sqr, 274238384Sjkim 0 /* field_div */, 275238384Sjkim 0 /* field_encode */, 276238384Sjkim 0 /* field_decode */, 277238384Sjkim 0 /* field_set_to_one */ }; 278238384Sjkim 279238384Sjkim return &ret; 280238384Sjkim } 281238384Sjkim 282238384Sjkim/* Helper functions to convert field elements to/from internal representation */ 283238384Sjkimstatic void bin28_to_felem(felem out, const u8 in[28]) 284238384Sjkim { 285238384Sjkim out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff; 286238384Sjkim out[1] = (*((const uint64_t *)(in+7))) & 0x00ffffffffffffff; 287238384Sjkim out[2] = (*((const uint64_t *)(in+14))) & 0x00ffffffffffffff; 288238384Sjkim out[3] = (*((const uint64_t *)(in+21))) & 0x00ffffffffffffff; 289238384Sjkim } 290238384Sjkim 291238384Sjkimstatic void felem_to_bin28(u8 out[28], const felem in) 292238384Sjkim { 293238384Sjkim unsigned i; 294238384Sjkim for (i = 0; i < 7; ++i) 295238384Sjkim { 296238384Sjkim out[i] = in[0]>>(8*i); 297238384Sjkim out[i+7] = in[1]>>(8*i); 298238384Sjkim out[i+14] = in[2]>>(8*i); 299238384Sjkim out[i+21] = in[3]>>(8*i); 300238384Sjkim } 301238384Sjkim } 302238384Sjkim 303238384Sjkim/* To preserve endianness when using BN_bn2bin and BN_bin2bn */ 304238384Sjkimstatic void flip_endian(u8 *out, const u8 *in, unsigned len) 305238384Sjkim { 306238384Sjkim unsigned i; 307238384Sjkim for (i = 0; i < len; ++i) 308238384Sjkim out[i] = in[len-1-i]; 309238384Sjkim } 310238384Sjkim 311238384Sjkim/* From OpenSSL BIGNUM to internal representation */ 312238384Sjkimstatic int BN_to_felem(felem out, const BIGNUM *bn) 313238384Sjkim { 314238384Sjkim felem_bytearray b_in; 315238384Sjkim felem_bytearray b_out; 316238384Sjkim unsigned num_bytes; 317238384Sjkim 318238384Sjkim /* BN_bn2bin eats leading zeroes */ 319238384Sjkim memset(b_out, 0, sizeof b_out); 320238384Sjkim num_bytes = BN_num_bytes(bn); 321238384Sjkim if (num_bytes > sizeof b_out) 322238384Sjkim { 323238384Sjkim ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE); 324238384Sjkim return 0; 325238384Sjkim } 326238384Sjkim if (BN_is_negative(bn)) 327238384Sjkim { 328238384Sjkim ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE); 329238384Sjkim return 0; 330238384Sjkim } 331238384Sjkim num_bytes = BN_bn2bin(bn, b_in); 332238384Sjkim flip_endian(b_out, b_in, num_bytes); 333238384Sjkim bin28_to_felem(out, b_out); 334238384Sjkim return 1; 335238384Sjkim } 336238384Sjkim 337238384Sjkim/* From internal representation to OpenSSL BIGNUM */ 338238384Sjkimstatic BIGNUM *felem_to_BN(BIGNUM *out, const felem in) 339238384Sjkim { 340238384Sjkim felem_bytearray b_in, b_out; 341238384Sjkim felem_to_bin28(b_in, in); 342238384Sjkim flip_endian(b_out, b_in, sizeof b_out); 343238384Sjkim return BN_bin2bn(b_out, sizeof b_out, out); 344238384Sjkim } 345238384Sjkim 346238384Sjkim/******************************************************************************/ 347238384Sjkim/* FIELD OPERATIONS 348238384Sjkim * 349238384Sjkim * Field operations, using the internal representation of field elements. 350238384Sjkim * NB! These operations are specific to our point multiplication and cannot be 351238384Sjkim * expected to be correct in general - e.g., multiplication with a large scalar 352238384Sjkim * will cause an overflow. 353238384Sjkim * 354238384Sjkim */ 355238384Sjkim 356238384Sjkimstatic void felem_one(felem out) 357238384Sjkim { 358238384Sjkim out[0] = 1; 359238384Sjkim out[1] = 0; 360238384Sjkim out[2] = 0; 361238384Sjkim out[3] = 0; 362238384Sjkim } 363238384Sjkim 364238384Sjkimstatic void felem_assign(felem out, const felem in) 365238384Sjkim { 366238384Sjkim out[0] = in[0]; 367238384Sjkim out[1] = in[1]; 368238384Sjkim out[2] = in[2]; 369238384Sjkim out[3] = in[3]; 370238384Sjkim } 371238384Sjkim 372238384Sjkim/* Sum two field elements: out += in */ 373238384Sjkimstatic void felem_sum(felem out, const felem in) 374238384Sjkim { 375238384Sjkim out[0] += in[0]; 376238384Sjkim out[1] += in[1]; 377238384Sjkim out[2] += in[2]; 378238384Sjkim out[3] += in[3]; 379238384Sjkim } 380238384Sjkim 381238384Sjkim/* Get negative value: out = -in */ 382238384Sjkim/* Assumes in[i] < 2^57 */ 383238384Sjkimstatic void felem_neg(felem out, const felem in) 384238384Sjkim { 385238384Sjkim static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2); 386238384Sjkim static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2); 387238384Sjkim static const limb two58m42m2 = (((limb) 1) << 58) - 388238384Sjkim (((limb) 1) << 42) - (((limb) 1) << 2); 389238384Sjkim 390238384Sjkim /* Set to 0 mod 2^224-2^96+1 to ensure out > in */ 391238384Sjkim out[0] = two58p2 - in[0]; 392238384Sjkim out[1] = two58m42m2 - in[1]; 393238384Sjkim out[2] = two58m2 - in[2]; 394238384Sjkim out[3] = two58m2 - in[3]; 395238384Sjkim } 396238384Sjkim 397238384Sjkim/* Subtract field elements: out -= in */ 398238384Sjkim/* Assumes in[i] < 2^57 */ 399238384Sjkimstatic void felem_diff(felem out, const felem in) 400238384Sjkim { 401238384Sjkim static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2); 402238384Sjkim static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2); 403238384Sjkim static const limb two58m42m2 = (((limb) 1) << 58) - 404238384Sjkim (((limb) 1) << 42) - (((limb) 1) << 2); 405238384Sjkim 406238384Sjkim /* Add 0 mod 2^224-2^96+1 to ensure out > in */ 407238384Sjkim out[0] += two58p2; 408238384Sjkim out[1] += two58m42m2; 409238384Sjkim out[2] += two58m2; 410238384Sjkim out[3] += two58m2; 411238384Sjkim 412238384Sjkim out[0] -= in[0]; 413238384Sjkim out[1] -= in[1]; 414238384Sjkim out[2] -= in[2]; 415238384Sjkim out[3] -= in[3]; 416238384Sjkim } 417238384Sjkim 418238384Sjkim/* Subtract in unreduced 128-bit mode: out -= in */ 419238384Sjkim/* Assumes in[i] < 2^119 */ 420238384Sjkimstatic void widefelem_diff(widefelem out, const widefelem in) 421238384Sjkim { 422238384Sjkim static const widelimb two120 = ((widelimb) 1) << 120; 423238384Sjkim static const widelimb two120m64 = (((widelimb) 1) << 120) - 424238384Sjkim (((widelimb) 1) << 64); 425238384Sjkim static const widelimb two120m104m64 = (((widelimb) 1) << 120) - 426238384Sjkim (((widelimb) 1) << 104) - (((widelimb) 1) << 64); 427238384Sjkim 428238384Sjkim /* Add 0 mod 2^224-2^96+1 to ensure out > in */ 429238384Sjkim out[0] += two120; 430238384Sjkim out[1] += two120m64; 431238384Sjkim out[2] += two120m64; 432238384Sjkim out[3] += two120; 433238384Sjkim out[4] += two120m104m64; 434238384Sjkim out[5] += two120m64; 435238384Sjkim out[6] += two120m64; 436238384Sjkim 437238384Sjkim out[0] -= in[0]; 438238384Sjkim out[1] -= in[1]; 439238384Sjkim out[2] -= in[2]; 440238384Sjkim out[3] -= in[3]; 441238384Sjkim out[4] -= in[4]; 442238384Sjkim out[5] -= in[5]; 443238384Sjkim out[6] -= in[6]; 444238384Sjkim } 445238384Sjkim 446238384Sjkim/* Subtract in mixed mode: out128 -= in64 */ 447238384Sjkim/* in[i] < 2^63 */ 448238384Sjkimstatic void felem_diff_128_64(widefelem out, const felem in) 449238384Sjkim { 450238384Sjkim static const widelimb two64p8 = (((widelimb) 1) << 64) + 451238384Sjkim (((widelimb) 1) << 8); 452238384Sjkim static const widelimb two64m8 = (((widelimb) 1) << 64) - 453238384Sjkim (((widelimb) 1) << 8); 454238384Sjkim static const widelimb two64m48m8 = (((widelimb) 1) << 64) - 455238384Sjkim (((widelimb) 1) << 48) - (((widelimb) 1) << 8); 456238384Sjkim 457238384Sjkim /* Add 0 mod 2^224-2^96+1 to ensure out > in */ 458238384Sjkim out[0] += two64p8; 459238384Sjkim out[1] += two64m48m8; 460238384Sjkim out[2] += two64m8; 461238384Sjkim out[3] += two64m8; 462238384Sjkim 463238384Sjkim out[0] -= in[0]; 464238384Sjkim out[1] -= in[1]; 465238384Sjkim out[2] -= in[2]; 466238384Sjkim out[3] -= in[3]; 467238384Sjkim } 468238384Sjkim 469238384Sjkim/* Multiply a field element by a scalar: out = out * scalar 470238384Sjkim * The scalars we actually use are small, so results fit without overflow */ 471238384Sjkimstatic void felem_scalar(felem out, const limb scalar) 472238384Sjkim { 473238384Sjkim out[0] *= scalar; 474238384Sjkim out[1] *= scalar; 475238384Sjkim out[2] *= scalar; 476238384Sjkim out[3] *= scalar; 477238384Sjkim } 478238384Sjkim 479238384Sjkim/* Multiply an unreduced field element by a scalar: out = out * scalar 480238384Sjkim * The scalars we actually use are small, so results fit without overflow */ 481238384Sjkimstatic void widefelem_scalar(widefelem out, const widelimb scalar) 482238384Sjkim { 483238384Sjkim out[0] *= scalar; 484238384Sjkim out[1] *= scalar; 485238384Sjkim out[2] *= scalar; 486238384Sjkim out[3] *= scalar; 487238384Sjkim out[4] *= scalar; 488238384Sjkim out[5] *= scalar; 489238384Sjkim out[6] *= scalar; 490238384Sjkim } 491238384Sjkim 492238384Sjkim/* Square a field element: out = in^2 */ 493238384Sjkimstatic void felem_square(widefelem out, const felem in) 494238384Sjkim { 495238384Sjkim limb tmp0, tmp1, tmp2; 496238384Sjkim tmp0 = 2 * in[0]; tmp1 = 2 * in[1]; tmp2 = 2 * in[2]; 497238384Sjkim out[0] = ((widelimb) in[0]) * in[0]; 498238384Sjkim out[1] = ((widelimb) in[0]) * tmp1; 499238384Sjkim out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1]; 500238384Sjkim out[3] = ((widelimb) in[3]) * tmp0 + 501238384Sjkim ((widelimb) in[1]) * tmp2; 502238384Sjkim out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2]; 503238384Sjkim out[5] = ((widelimb) in[3]) * tmp2; 504238384Sjkim out[6] = ((widelimb) in[3]) * in[3]; 505238384Sjkim } 506238384Sjkim 507238384Sjkim/* Multiply two field elements: out = in1 * in2 */ 508238384Sjkimstatic void felem_mul(widefelem out, const felem in1, const felem in2) 509238384Sjkim { 510238384Sjkim out[0] = ((widelimb) in1[0]) * in2[0]; 511238384Sjkim out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0]; 512238384Sjkim out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] + 513238384Sjkim ((widelimb) in1[2]) * in2[0]; 514238384Sjkim out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] + 515238384Sjkim ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0]; 516238384Sjkim out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] + 517238384Sjkim ((widelimb) in1[3]) * in2[1]; 518238384Sjkim out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2]; 519238384Sjkim out[6] = ((widelimb) in1[3]) * in2[3]; 520238384Sjkim } 521238384Sjkim 522238384Sjkim/* Reduce seven 128-bit coefficients to four 64-bit coefficients. 523238384Sjkim * Requires in[i] < 2^126, 524238384Sjkim * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */ 525238384Sjkimstatic void felem_reduce(felem out, const widefelem in) 526238384Sjkim { 527238384Sjkim static const widelimb two127p15 = (((widelimb) 1) << 127) + 528238384Sjkim (((widelimb) 1) << 15); 529238384Sjkim static const widelimb two127m71 = (((widelimb) 1) << 127) - 530238384Sjkim (((widelimb) 1) << 71); 531238384Sjkim static const widelimb two127m71m55 = (((widelimb) 1) << 127) - 532238384Sjkim (((widelimb) 1) << 71) - (((widelimb) 1) << 55); 533238384Sjkim widelimb output[5]; 534238384Sjkim 535238384Sjkim /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */ 536238384Sjkim output[0] = in[0] + two127p15; 537238384Sjkim output[1] = in[1] + two127m71m55; 538238384Sjkim output[2] = in[2] + two127m71; 539238384Sjkim output[3] = in[3]; 540238384Sjkim output[4] = in[4]; 541238384Sjkim 542238384Sjkim /* Eliminate in[4], in[5], in[6] */ 543238384Sjkim output[4] += in[6] >> 16; 544238384Sjkim output[3] += (in[6] & 0xffff) << 40; 545238384Sjkim output[2] -= in[6]; 546238384Sjkim 547238384Sjkim output[3] += in[5] >> 16; 548238384Sjkim output[2] += (in[5] & 0xffff) << 40; 549238384Sjkim output[1] -= in[5]; 550238384Sjkim 551238384Sjkim output[2] += output[4] >> 16; 552238384Sjkim output[1] += (output[4] & 0xffff) << 40; 553238384Sjkim output[0] -= output[4]; 554238384Sjkim 555238384Sjkim /* Carry 2 -> 3 -> 4 */ 556238384Sjkim output[3] += output[2] >> 56; 557238384Sjkim output[2] &= 0x00ffffffffffffff; 558238384Sjkim 559238384Sjkim output[4] = output[3] >> 56; 560238384Sjkim output[3] &= 0x00ffffffffffffff; 561238384Sjkim 562238384Sjkim /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */ 563238384Sjkim 564238384Sjkim /* Eliminate output[4] */ 565238384Sjkim output[2] += output[4] >> 16; 566238384Sjkim /* output[2] < 2^56 + 2^56 = 2^57 */ 567238384Sjkim output[1] += (output[4] & 0xffff) << 40; 568238384Sjkim output[0] -= output[4]; 569238384Sjkim 570238384Sjkim /* Carry 0 -> 1 -> 2 -> 3 */ 571238384Sjkim output[1] += output[0] >> 56; 572238384Sjkim out[0] = output[0] & 0x00ffffffffffffff; 573238384Sjkim 574238384Sjkim output[2] += output[1] >> 56; 575238384Sjkim /* output[2] < 2^57 + 2^72 */ 576238384Sjkim out[1] = output[1] & 0x00ffffffffffffff; 577238384Sjkim output[3] += output[2] >> 56; 578238384Sjkim /* output[3] <= 2^56 + 2^16 */ 579238384Sjkim out[2] = output[2] & 0x00ffffffffffffff; 580238384Sjkim 581238384Sjkim /* out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, 582238384Sjkim * out[3] <= 2^56 + 2^16 (due to final carry), 583238384Sjkim * so out < 2*p */ 584238384Sjkim out[3] = output[3]; 585238384Sjkim } 586238384Sjkim 587238384Sjkimstatic void felem_square_reduce(felem out, const felem in) 588238384Sjkim { 589238384Sjkim widefelem tmp; 590238384Sjkim felem_square(tmp, in); 591238384Sjkim felem_reduce(out, tmp); 592238384Sjkim } 593238384Sjkim 594238384Sjkimstatic void felem_mul_reduce(felem out, const felem in1, const felem in2) 595238384Sjkim { 596238384Sjkim widefelem tmp; 597238384Sjkim felem_mul(tmp, in1, in2); 598238384Sjkim felem_reduce(out, tmp); 599238384Sjkim } 600238384Sjkim 601238384Sjkim/* Reduce to unique minimal representation. 602238384Sjkim * Requires 0 <= in < 2*p (always call felem_reduce first) */ 603238384Sjkimstatic void felem_contract(felem out, const felem in) 604238384Sjkim { 605238384Sjkim static const int64_t two56 = ((limb) 1) << 56; 606238384Sjkim /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */ 607238384Sjkim /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */ 608238384Sjkim int64_t tmp[4], a; 609238384Sjkim tmp[0] = in[0]; 610238384Sjkim tmp[1] = in[1]; 611238384Sjkim tmp[2] = in[2]; 612238384Sjkim tmp[3] = in[3]; 613238384Sjkim /* Case 1: a = 1 iff in >= 2^224 */ 614238384Sjkim a = (in[3] >> 56); 615238384Sjkim tmp[0] -= a; 616238384Sjkim tmp[1] += a << 40; 617238384Sjkim tmp[3] &= 0x00ffffffffffffff; 618238384Sjkim /* Case 2: a = 0 iff p <= in < 2^224, i.e., 619238384Sjkim * the high 128 bits are all 1 and the lower part is non-zero */ 620238384Sjkim a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) | 621238384Sjkim (((int64_t)(in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63); 622238384Sjkim a &= 0x00ffffffffffffff; 623238384Sjkim /* turn a into an all-one mask (if a = 0) or an all-zero mask */ 624238384Sjkim a = (a - 1) >> 63; 625238384Sjkim /* subtract 2^224 - 2^96 + 1 if a is all-one*/ 626238384Sjkim tmp[3] &= a ^ 0xffffffffffffffff; 627238384Sjkim tmp[2] &= a ^ 0xffffffffffffffff; 628238384Sjkim tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff; 629238384Sjkim tmp[0] -= 1 & a; 630238384Sjkim 631238384Sjkim /* eliminate negative coefficients: if tmp[0] is negative, tmp[1] must 632238384Sjkim * be non-zero, so we only need one step */ 633238384Sjkim a = tmp[0] >> 63; 634238384Sjkim tmp[0] += two56 & a; 635238384Sjkim tmp[1] -= 1 & a; 636238384Sjkim 637238384Sjkim /* carry 1 -> 2 -> 3 */ 638238384Sjkim tmp[2] += tmp[1] >> 56; 639238384Sjkim tmp[1] &= 0x00ffffffffffffff; 640238384Sjkim 641238384Sjkim tmp[3] += tmp[2] >> 56; 642238384Sjkim tmp[2] &= 0x00ffffffffffffff; 643238384Sjkim 644238384Sjkim /* Now 0 <= out < p */ 645238384Sjkim out[0] = tmp[0]; 646238384Sjkim out[1] = tmp[1]; 647238384Sjkim out[2] = tmp[2]; 648238384Sjkim out[3] = tmp[3]; 649238384Sjkim } 650238384Sjkim 651238384Sjkim/* Zero-check: returns 1 if input is 0, and 0 otherwise. 652238384Sjkim * We know that field elements are reduced to in < 2^225, 653238384Sjkim * so we only need to check three cases: 0, 2^224 - 2^96 + 1, 654238384Sjkim * and 2^225 - 2^97 + 2 */ 655238384Sjkimstatic limb felem_is_zero(const felem in) 656238384Sjkim { 657238384Sjkim limb zero, two224m96p1, two225m97p2; 658238384Sjkim 659238384Sjkim zero = in[0] | in[1] | in[2] | in[3]; 660238384Sjkim zero = (((int64_t)(zero) - 1) >> 63) & 1; 661238384Sjkim two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000) 662238384Sjkim | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff); 663238384Sjkim two224m96p1 = (((int64_t)(two224m96p1) - 1) >> 63) & 1; 664238384Sjkim two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000) 665238384Sjkim | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff); 666238384Sjkim two225m97p2 = (((int64_t)(two225m97p2) - 1) >> 63) & 1; 667238384Sjkim return (zero | two224m96p1 | two225m97p2); 668238384Sjkim } 669238384Sjkim 670238384Sjkimstatic limb felem_is_zero_int(const felem in) 671238384Sjkim { 672238384Sjkim return (int) (felem_is_zero(in) & ((limb)1)); 673238384Sjkim } 674238384Sjkim 675238384Sjkim/* Invert a field element */ 676238384Sjkim/* Computation chain copied from djb's code */ 677238384Sjkimstatic void felem_inv(felem out, const felem in) 678238384Sjkim { 679238384Sjkim felem ftmp, ftmp2, ftmp3, ftmp4; 680238384Sjkim widefelem tmp; 681238384Sjkim unsigned i; 682238384Sjkim 683238384Sjkim felem_square(tmp, in); felem_reduce(ftmp, tmp); /* 2 */ 684238384Sjkim felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^2 - 1 */ 685238384Sjkim felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2 */ 686238384Sjkim felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 1 */ 687238384Sjkim felem_square(tmp, ftmp); felem_reduce(ftmp2, tmp); /* 2^4 - 2 */ 688238384Sjkim felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^5 - 4 */ 689238384Sjkim felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^6 - 8 */ 690238384Sjkim felem_mul(tmp, ftmp2, ftmp); felem_reduce(ftmp, tmp); /* 2^6 - 1 */ 691238384Sjkim felem_square(tmp, ftmp); felem_reduce(ftmp2, tmp); /* 2^7 - 2 */ 692238384Sjkim for (i = 0; i < 5; ++i) /* 2^12 - 2^6 */ 693238384Sjkim { 694238384Sjkim felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); 695238384Sjkim } 696238384Sjkim felem_mul(tmp, ftmp2, ftmp); felem_reduce(ftmp2, tmp); /* 2^12 - 1 */ 697238384Sjkim felem_square(tmp, ftmp2); felem_reduce(ftmp3, tmp); /* 2^13 - 2 */ 698238384Sjkim for (i = 0; i < 11; ++i) /* 2^24 - 2^12 */ 699238384Sjkim { 700238384Sjkim felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); 701238384Sjkim } 702238384Sjkim felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp2, tmp); /* 2^24 - 1 */ 703238384Sjkim felem_square(tmp, ftmp2); felem_reduce(ftmp3, tmp); /* 2^25 - 2 */ 704238384Sjkim for (i = 0; i < 23; ++i) /* 2^48 - 2^24 */ 705238384Sjkim { 706238384Sjkim felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); 707238384Sjkim } 708238384Sjkim felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^48 - 1 */ 709238384Sjkim felem_square(tmp, ftmp3); felem_reduce(ftmp4, tmp); /* 2^49 - 2 */ 710238384Sjkim for (i = 0; i < 47; ++i) /* 2^96 - 2^48 */ 711238384Sjkim { 712238384Sjkim felem_square(tmp, ftmp4); felem_reduce(ftmp4, tmp); 713238384Sjkim } 714238384Sjkim felem_mul(tmp, ftmp3, ftmp4); felem_reduce(ftmp3, tmp); /* 2^96 - 1 */ 715238384Sjkim felem_square(tmp, ftmp3); felem_reduce(ftmp4, tmp); /* 2^97 - 2 */ 716238384Sjkim for (i = 0; i < 23; ++i) /* 2^120 - 2^24 */ 717238384Sjkim { 718238384Sjkim felem_square(tmp, ftmp4); felem_reduce(ftmp4, tmp); 719238384Sjkim } 720238384Sjkim felem_mul(tmp, ftmp2, ftmp4); felem_reduce(ftmp2, tmp); /* 2^120 - 1 */ 721238384Sjkim for (i = 0; i < 6; ++i) /* 2^126 - 2^6 */ 722238384Sjkim { 723238384Sjkim felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); 724238384Sjkim } 725238384Sjkim felem_mul(tmp, ftmp2, ftmp); felem_reduce(ftmp, tmp); /* 2^126 - 1 */ 726238384Sjkim felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^127 - 2 */ 727238384Sjkim felem_mul(tmp, ftmp, in); felem_reduce(ftmp, tmp); /* 2^127 - 1 */ 728238384Sjkim for (i = 0; i < 97; ++i) /* 2^224 - 2^97 */ 729238384Sjkim { 730238384Sjkim felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); 731238384Sjkim } 732238384Sjkim felem_mul(tmp, ftmp, ftmp3); felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */ 733238384Sjkim } 734238384Sjkim 735238384Sjkim/* Copy in constant time: 736238384Sjkim * if icopy == 1, copy in to out, 737238384Sjkim * if icopy == 0, copy out to itself. */ 738238384Sjkimstatic void 739238384Sjkimcopy_conditional(felem out, const felem in, limb icopy) 740238384Sjkim { 741238384Sjkim unsigned i; 742238384Sjkim /* icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one */ 743238384Sjkim const limb copy = -icopy; 744238384Sjkim for (i = 0; i < 4; ++i) 745238384Sjkim { 746238384Sjkim const limb tmp = copy & (in[i] ^ out[i]); 747238384Sjkim out[i] ^= tmp; 748238384Sjkim } 749238384Sjkim } 750238384Sjkim 751238384Sjkim/******************************************************************************/ 752238384Sjkim/* ELLIPTIC CURVE POINT OPERATIONS 753238384Sjkim * 754238384Sjkim * Points are represented in Jacobian projective coordinates: 755238384Sjkim * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3), 756238384Sjkim * or to the point at infinity if Z == 0. 757238384Sjkim * 758238384Sjkim */ 759238384Sjkim 760238384Sjkim/* Double an elliptic curve point: 761238384Sjkim * (X', Y', Z') = 2 * (X, Y, Z), where 762238384Sjkim * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2 763238384Sjkim * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2 764238384Sjkim * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z 765238384Sjkim * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed, 766238384Sjkim * while x_out == y_in is not (maybe this works, but it's not tested). */ 767238384Sjkimstatic void 768238384Sjkimpoint_double(felem x_out, felem y_out, felem z_out, 769238384Sjkim const felem x_in, const felem y_in, const felem z_in) 770238384Sjkim { 771238384Sjkim widefelem tmp, tmp2; 772238384Sjkim felem delta, gamma, beta, alpha, ftmp, ftmp2; 773238384Sjkim 774238384Sjkim felem_assign(ftmp, x_in); 775238384Sjkim felem_assign(ftmp2, x_in); 776238384Sjkim 777238384Sjkim /* delta = z^2 */ 778238384Sjkim felem_square(tmp, z_in); 779238384Sjkim felem_reduce(delta, tmp); 780238384Sjkim 781238384Sjkim /* gamma = y^2 */ 782238384Sjkim felem_square(tmp, y_in); 783238384Sjkim felem_reduce(gamma, tmp); 784238384Sjkim 785238384Sjkim /* beta = x*gamma */ 786238384Sjkim felem_mul(tmp, x_in, gamma); 787238384Sjkim felem_reduce(beta, tmp); 788238384Sjkim 789238384Sjkim /* alpha = 3*(x-delta)*(x+delta) */ 790238384Sjkim felem_diff(ftmp, delta); 791238384Sjkim /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */ 792238384Sjkim felem_sum(ftmp2, delta); 793238384Sjkim /* ftmp2[i] < 2^57 + 2^57 = 2^58 */ 794238384Sjkim felem_scalar(ftmp2, 3); 795238384Sjkim /* ftmp2[i] < 3 * 2^58 < 2^60 */ 796238384Sjkim felem_mul(tmp, ftmp, ftmp2); 797238384Sjkim /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */ 798238384Sjkim felem_reduce(alpha, tmp); 799238384Sjkim 800238384Sjkim /* x' = alpha^2 - 8*beta */ 801238384Sjkim felem_square(tmp, alpha); 802238384Sjkim /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */ 803238384Sjkim felem_assign(ftmp, beta); 804238384Sjkim felem_scalar(ftmp, 8); 805238384Sjkim /* ftmp[i] < 8 * 2^57 = 2^60 */ 806238384Sjkim felem_diff_128_64(tmp, ftmp); 807238384Sjkim /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */ 808238384Sjkim felem_reduce(x_out, tmp); 809238384Sjkim 810238384Sjkim /* z' = (y + z)^2 - gamma - delta */ 811238384Sjkim felem_sum(delta, gamma); 812238384Sjkim /* delta[i] < 2^57 + 2^57 = 2^58 */ 813238384Sjkim felem_assign(ftmp, y_in); 814238384Sjkim felem_sum(ftmp, z_in); 815238384Sjkim /* ftmp[i] < 2^57 + 2^57 = 2^58 */ 816238384Sjkim felem_square(tmp, ftmp); 817238384Sjkim /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */ 818238384Sjkim felem_diff_128_64(tmp, delta); 819238384Sjkim /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */ 820238384Sjkim felem_reduce(z_out, tmp); 821238384Sjkim 822238384Sjkim /* y' = alpha*(4*beta - x') - 8*gamma^2 */ 823238384Sjkim felem_scalar(beta, 4); 824238384Sjkim /* beta[i] < 4 * 2^57 = 2^59 */ 825238384Sjkim felem_diff(beta, x_out); 826238384Sjkim /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */ 827238384Sjkim felem_mul(tmp, alpha, beta); 828238384Sjkim /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */ 829238384Sjkim felem_square(tmp2, gamma); 830238384Sjkim /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */ 831238384Sjkim widefelem_scalar(tmp2, 8); 832238384Sjkim /* tmp2[i] < 8 * 2^116 = 2^119 */ 833238384Sjkim widefelem_diff(tmp, tmp2); 834238384Sjkim /* tmp[i] < 2^119 + 2^120 < 2^121 */ 835238384Sjkim felem_reduce(y_out, tmp); 836238384Sjkim } 837238384Sjkim 838238384Sjkim/* Add two elliptic curve points: 839238384Sjkim * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where 840238384Sjkim * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 - 841238384Sjkim * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 842238384Sjkim * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) - 843238384Sjkim * Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3 844238384Sjkim * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2) 845238384Sjkim * 846238384Sjkim * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0. 847238384Sjkim */ 848238384Sjkim 849238384Sjkim/* This function is not entirely constant-time: 850238384Sjkim * it includes a branch for checking whether the two input points are equal, 851238384Sjkim * (while not equal to the point at infinity). 852238384Sjkim * This case never happens during single point multiplication, 853238384Sjkim * so there is no timing leak for ECDH or ECDSA signing. */ 854238384Sjkimstatic void point_add(felem x3, felem y3, felem z3, 855238384Sjkim const felem x1, const felem y1, const felem z1, 856238384Sjkim const int mixed, const felem x2, const felem y2, const felem z2) 857238384Sjkim { 858238384Sjkim felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out; 859238384Sjkim widefelem tmp, tmp2; 860238384Sjkim limb z1_is_zero, z2_is_zero, x_equal, y_equal; 861238384Sjkim 862238384Sjkim if (!mixed) 863238384Sjkim { 864238384Sjkim /* ftmp2 = z2^2 */ 865238384Sjkim felem_square(tmp, z2); 866238384Sjkim felem_reduce(ftmp2, tmp); 867238384Sjkim 868238384Sjkim /* ftmp4 = z2^3 */ 869238384Sjkim felem_mul(tmp, ftmp2, z2); 870238384Sjkim felem_reduce(ftmp4, tmp); 871238384Sjkim 872238384Sjkim /* ftmp4 = z2^3*y1 */ 873238384Sjkim felem_mul(tmp2, ftmp4, y1); 874238384Sjkim felem_reduce(ftmp4, tmp2); 875238384Sjkim 876238384Sjkim /* ftmp2 = z2^2*x1 */ 877238384Sjkim felem_mul(tmp2, ftmp2, x1); 878238384Sjkim felem_reduce(ftmp2, tmp2); 879238384Sjkim } 880238384Sjkim else 881238384Sjkim { 882238384Sjkim /* We'll assume z2 = 1 (special case z2 = 0 is handled later) */ 883238384Sjkim 884238384Sjkim /* ftmp4 = z2^3*y1 */ 885238384Sjkim felem_assign(ftmp4, y1); 886238384Sjkim 887238384Sjkim /* ftmp2 = z2^2*x1 */ 888238384Sjkim felem_assign(ftmp2, x1); 889238384Sjkim } 890238384Sjkim 891238384Sjkim /* ftmp = z1^2 */ 892238384Sjkim felem_square(tmp, z1); 893238384Sjkim felem_reduce(ftmp, tmp); 894238384Sjkim 895238384Sjkim /* ftmp3 = z1^3 */ 896238384Sjkim felem_mul(tmp, ftmp, z1); 897238384Sjkim felem_reduce(ftmp3, tmp); 898238384Sjkim 899238384Sjkim /* tmp = z1^3*y2 */ 900238384Sjkim felem_mul(tmp, ftmp3, y2); 901238384Sjkim /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */ 902238384Sjkim 903238384Sjkim /* ftmp3 = z1^3*y2 - z2^3*y1 */ 904238384Sjkim felem_diff_128_64(tmp, ftmp4); 905238384Sjkim /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */ 906238384Sjkim felem_reduce(ftmp3, tmp); 907238384Sjkim 908238384Sjkim /* tmp = z1^2*x2 */ 909238384Sjkim felem_mul(tmp, ftmp, x2); 910238384Sjkim /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */ 911238384Sjkim 912238384Sjkim /* ftmp = z1^2*x2 - z2^2*x1 */ 913238384Sjkim felem_diff_128_64(tmp, ftmp2); 914238384Sjkim /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */ 915238384Sjkim felem_reduce(ftmp, tmp); 916238384Sjkim 917238384Sjkim /* the formulae are incorrect if the points are equal 918238384Sjkim * so we check for this and do doubling if this happens */ 919238384Sjkim x_equal = felem_is_zero(ftmp); 920238384Sjkim y_equal = felem_is_zero(ftmp3); 921238384Sjkim z1_is_zero = felem_is_zero(z1); 922238384Sjkim z2_is_zero = felem_is_zero(z2); 923238384Sjkim /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */ 924238384Sjkim if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) 925238384Sjkim { 926238384Sjkim point_double(x3, y3, z3, x1, y1, z1); 927238384Sjkim return; 928238384Sjkim } 929238384Sjkim 930238384Sjkim /* ftmp5 = z1*z2 */ 931238384Sjkim if (!mixed) 932238384Sjkim { 933238384Sjkim felem_mul(tmp, z1, z2); 934238384Sjkim felem_reduce(ftmp5, tmp); 935238384Sjkim } 936238384Sjkim else 937238384Sjkim { 938238384Sjkim /* special case z2 = 0 is handled later */ 939238384Sjkim felem_assign(ftmp5, z1); 940238384Sjkim } 941238384Sjkim 942238384Sjkim /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */ 943238384Sjkim felem_mul(tmp, ftmp, ftmp5); 944238384Sjkim felem_reduce(z_out, tmp); 945238384Sjkim 946238384Sjkim /* ftmp = (z1^2*x2 - z2^2*x1)^2 */ 947238384Sjkim felem_assign(ftmp5, ftmp); 948238384Sjkim felem_square(tmp, ftmp); 949238384Sjkim felem_reduce(ftmp, tmp); 950238384Sjkim 951238384Sjkim /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */ 952238384Sjkim felem_mul(tmp, ftmp, ftmp5); 953238384Sjkim felem_reduce(ftmp5, tmp); 954238384Sjkim 955238384Sjkim /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */ 956238384Sjkim felem_mul(tmp, ftmp2, ftmp); 957238384Sjkim felem_reduce(ftmp2, tmp); 958238384Sjkim 959238384Sjkim /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */ 960238384Sjkim felem_mul(tmp, ftmp4, ftmp5); 961238384Sjkim /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */ 962238384Sjkim 963238384Sjkim /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */ 964238384Sjkim felem_square(tmp2, ftmp3); 965238384Sjkim /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */ 966238384Sjkim 967238384Sjkim /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */ 968238384Sjkim felem_diff_128_64(tmp2, ftmp5); 969238384Sjkim /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */ 970238384Sjkim 971238384Sjkim /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */ 972238384Sjkim felem_assign(ftmp5, ftmp2); 973238384Sjkim felem_scalar(ftmp5, 2); 974238384Sjkim /* ftmp5[i] < 2 * 2^57 = 2^58 */ 975238384Sjkim 976238384Sjkim /* x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 - 977238384Sjkim 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */ 978238384Sjkim felem_diff_128_64(tmp2, ftmp5); 979238384Sjkim /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */ 980238384Sjkim felem_reduce(x_out, tmp2); 981238384Sjkim 982238384Sjkim /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */ 983238384Sjkim felem_diff(ftmp2, x_out); 984238384Sjkim /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */ 985238384Sjkim 986238384Sjkim /* tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) */ 987238384Sjkim felem_mul(tmp2, ftmp3, ftmp2); 988238384Sjkim /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */ 989238384Sjkim 990238384Sjkim /* y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) - 991238384Sjkim z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */ 992238384Sjkim widefelem_diff(tmp2, tmp); 993238384Sjkim /* tmp2[i] < 2^118 + 2^120 < 2^121 */ 994238384Sjkim felem_reduce(y_out, tmp2); 995238384Sjkim 996238384Sjkim /* the result (x_out, y_out, z_out) is incorrect if one of the inputs is 997238384Sjkim * the point at infinity, so we need to check for this separately */ 998238384Sjkim 999238384Sjkim /* if point 1 is at infinity, copy point 2 to output, and vice versa */ 1000238384Sjkim copy_conditional(x_out, x2, z1_is_zero); 1001238384Sjkim copy_conditional(x_out, x1, z2_is_zero); 1002238384Sjkim copy_conditional(y_out, y2, z1_is_zero); 1003238384Sjkim copy_conditional(y_out, y1, z2_is_zero); 1004238384Sjkim copy_conditional(z_out, z2, z1_is_zero); 1005238384Sjkim copy_conditional(z_out, z1, z2_is_zero); 1006238384Sjkim felem_assign(x3, x_out); 1007238384Sjkim felem_assign(y3, y_out); 1008238384Sjkim felem_assign(z3, z_out); 1009238384Sjkim } 1010238384Sjkim 1011238384Sjkim/* select_point selects the |idx|th point from a precomputation table and 1012238384Sjkim * copies it to out. */ 1013238384Sjkimstatic void select_point(const u64 idx, unsigned int size, const felem pre_comp[/*size*/][3], felem out[3]) 1014238384Sjkim { 1015238384Sjkim unsigned i, j; 1016238384Sjkim limb *outlimbs = &out[0][0]; 1017238384Sjkim memset(outlimbs, 0, 3 * sizeof(felem)); 1018238384Sjkim 1019238384Sjkim for (i = 0; i < size; i++) 1020238384Sjkim { 1021238384Sjkim const limb *inlimbs = &pre_comp[i][0][0]; 1022238384Sjkim u64 mask = i ^ idx; 1023238384Sjkim mask |= mask >> 4; 1024238384Sjkim mask |= mask >> 2; 1025238384Sjkim mask |= mask >> 1; 1026238384Sjkim mask &= 1; 1027238384Sjkim mask--; 1028238384Sjkim for (j = 0; j < 4 * 3; j++) 1029238384Sjkim outlimbs[j] |= inlimbs[j] & mask; 1030238384Sjkim } 1031238384Sjkim } 1032238384Sjkim 1033238384Sjkim/* get_bit returns the |i|th bit in |in| */ 1034238384Sjkimstatic char get_bit(const felem_bytearray in, unsigned i) 1035238384Sjkim { 1036238384Sjkim if (i >= 224) 1037238384Sjkim return 0; 1038238384Sjkim return (in[i >> 3] >> (i & 7)) & 1; 1039238384Sjkim } 1040238384Sjkim 1041238384Sjkim/* Interleaved point multiplication using precomputed point multiples: 1042238384Sjkim * The small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], 1043238384Sjkim * the scalars in scalars[]. If g_scalar is non-NULL, we also add this multiple 1044238384Sjkim * of the generator, using certain (large) precomputed multiples in g_pre_comp. 1045238384Sjkim * Output point (X, Y, Z) is stored in x_out, y_out, z_out */ 1046238384Sjkimstatic void batch_mul(felem x_out, felem y_out, felem z_out, 1047238384Sjkim const felem_bytearray scalars[], const unsigned num_points, const u8 *g_scalar, 1048238384Sjkim const int mixed, const felem pre_comp[][17][3], const felem g_pre_comp[2][16][3]) 1049238384Sjkim { 1050238384Sjkim int i, skip; 1051238384Sjkim unsigned num; 1052238384Sjkim unsigned gen_mul = (g_scalar != NULL); 1053238384Sjkim felem nq[3], tmp[4]; 1054238384Sjkim u64 bits; 1055238384Sjkim u8 sign, digit; 1056238384Sjkim 1057238384Sjkim /* set nq to the point at infinity */ 1058238384Sjkim memset(nq, 0, 3 * sizeof(felem)); 1059238384Sjkim 1060238384Sjkim /* Loop over all scalars msb-to-lsb, interleaving additions 1061238384Sjkim * of multiples of the generator (two in each of the last 28 rounds) 1062238384Sjkim * and additions of other points multiples (every 5th round). 1063238384Sjkim */ 1064238384Sjkim skip = 1; /* save two point operations in the first round */ 1065238384Sjkim for (i = (num_points ? 220 : 27); i >= 0; --i) 1066238384Sjkim { 1067238384Sjkim /* double */ 1068238384Sjkim if (!skip) 1069238384Sjkim point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]); 1070238384Sjkim 1071238384Sjkim /* add multiples of the generator */ 1072238384Sjkim if (gen_mul && (i <= 27)) 1073238384Sjkim { 1074238384Sjkim /* first, look 28 bits upwards */ 1075238384Sjkim bits = get_bit(g_scalar, i + 196) << 3; 1076238384Sjkim bits |= get_bit(g_scalar, i + 140) << 2; 1077238384Sjkim bits |= get_bit(g_scalar, i + 84) << 1; 1078238384Sjkim bits |= get_bit(g_scalar, i + 28); 1079238384Sjkim /* select the point to add, in constant time */ 1080238384Sjkim select_point(bits, 16, g_pre_comp[1], tmp); 1081238384Sjkim 1082238384Sjkim if (!skip) 1083238384Sjkim { 1084238384Sjkim point_add(nq[0], nq[1], nq[2], 1085238384Sjkim nq[0], nq[1], nq[2], 1086238384Sjkim 1 /* mixed */, tmp[0], tmp[1], tmp[2]); 1087238384Sjkim } 1088238384Sjkim else 1089238384Sjkim { 1090238384Sjkim memcpy(nq, tmp, 3 * sizeof(felem)); 1091238384Sjkim skip = 0; 1092238384Sjkim } 1093238384Sjkim 1094238384Sjkim /* second, look at the current position */ 1095238384Sjkim bits = get_bit(g_scalar, i + 168) << 3; 1096238384Sjkim bits |= get_bit(g_scalar, i + 112) << 2; 1097238384Sjkim bits |= get_bit(g_scalar, i + 56) << 1; 1098238384Sjkim bits |= get_bit(g_scalar, i); 1099238384Sjkim /* select the point to add, in constant time */ 1100238384Sjkim select_point(bits, 16, g_pre_comp[0], tmp); 1101238384Sjkim point_add(nq[0], nq[1], nq[2], 1102238384Sjkim nq[0], nq[1], nq[2], 1103238384Sjkim 1 /* mixed */, tmp[0], tmp[1], tmp[2]); 1104238384Sjkim } 1105238384Sjkim 1106238384Sjkim /* do other additions every 5 doublings */ 1107238384Sjkim if (num_points && (i % 5 == 0)) 1108238384Sjkim { 1109238384Sjkim /* loop over all scalars */ 1110238384Sjkim for (num = 0; num < num_points; ++num) 1111238384Sjkim { 1112238384Sjkim bits = get_bit(scalars[num], i + 4) << 5; 1113238384Sjkim bits |= get_bit(scalars[num], i + 3) << 4; 1114238384Sjkim bits |= get_bit(scalars[num], i + 2) << 3; 1115238384Sjkim bits |= get_bit(scalars[num], i + 1) << 2; 1116238384Sjkim bits |= get_bit(scalars[num], i) << 1; 1117238384Sjkim bits |= get_bit(scalars[num], i - 1); 1118238384Sjkim ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits); 1119238384Sjkim 1120238384Sjkim /* select the point to add or subtract */ 1121238384Sjkim select_point(digit, 17, pre_comp[num], tmp); 1122238384Sjkim felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative point */ 1123238384Sjkim copy_conditional(tmp[1], tmp[3], sign); 1124238384Sjkim 1125238384Sjkim if (!skip) 1126238384Sjkim { 1127238384Sjkim point_add(nq[0], nq[1], nq[2], 1128238384Sjkim nq[0], nq[1], nq[2], 1129238384Sjkim mixed, tmp[0], tmp[1], tmp[2]); 1130238384Sjkim } 1131238384Sjkim else 1132238384Sjkim { 1133238384Sjkim memcpy(nq, tmp, 3 * sizeof(felem)); 1134238384Sjkim skip = 0; 1135238384Sjkim } 1136238384Sjkim } 1137238384Sjkim } 1138238384Sjkim } 1139238384Sjkim felem_assign(x_out, nq[0]); 1140238384Sjkim felem_assign(y_out, nq[1]); 1141238384Sjkim felem_assign(z_out, nq[2]); 1142238384Sjkim } 1143238384Sjkim 1144238384Sjkim/******************************************************************************/ 1145238384Sjkim/* FUNCTIONS TO MANAGE PRECOMPUTATION 1146238384Sjkim */ 1147238384Sjkim 1148238384Sjkimstatic NISTP224_PRE_COMP *nistp224_pre_comp_new() 1149238384Sjkim { 1150238384Sjkim NISTP224_PRE_COMP *ret = NULL; 1151238384Sjkim ret = (NISTP224_PRE_COMP *) OPENSSL_malloc(sizeof *ret); 1152238384Sjkim if (!ret) 1153238384Sjkim { 1154238384Sjkim ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE); 1155238384Sjkim return ret; 1156238384Sjkim } 1157238384Sjkim memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp)); 1158238384Sjkim ret->references = 1; 1159238384Sjkim return ret; 1160238384Sjkim } 1161238384Sjkim 1162238384Sjkimstatic void *nistp224_pre_comp_dup(void *src_) 1163238384Sjkim { 1164238384Sjkim NISTP224_PRE_COMP *src = src_; 1165238384Sjkim 1166238384Sjkim /* no need to actually copy, these objects never change! */ 1167238384Sjkim CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP); 1168238384Sjkim 1169238384Sjkim return src_; 1170238384Sjkim } 1171238384Sjkim 1172238384Sjkimstatic void nistp224_pre_comp_free(void *pre_) 1173238384Sjkim { 1174238384Sjkim int i; 1175238384Sjkim NISTP224_PRE_COMP *pre = pre_; 1176238384Sjkim 1177238384Sjkim if (!pre) 1178238384Sjkim return; 1179238384Sjkim 1180238384Sjkim i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP); 1181238384Sjkim if (i > 0) 1182238384Sjkim return; 1183238384Sjkim 1184238384Sjkim OPENSSL_free(pre); 1185238384Sjkim } 1186238384Sjkim 1187238384Sjkimstatic void nistp224_pre_comp_clear_free(void *pre_) 1188238384Sjkim { 1189238384Sjkim int i; 1190238384Sjkim NISTP224_PRE_COMP *pre = pre_; 1191238384Sjkim 1192238384Sjkim if (!pre) 1193238384Sjkim return; 1194238384Sjkim 1195238384Sjkim i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP); 1196238384Sjkim if (i > 0) 1197238384Sjkim return; 1198238384Sjkim 1199238384Sjkim OPENSSL_cleanse(pre, sizeof *pre); 1200238384Sjkim OPENSSL_free(pre); 1201238384Sjkim } 1202238384Sjkim 1203238384Sjkim/******************************************************************************/ 1204238384Sjkim/* OPENSSL EC_METHOD FUNCTIONS 1205238384Sjkim */ 1206238384Sjkim 1207238384Sjkimint ec_GFp_nistp224_group_init(EC_GROUP *group) 1208238384Sjkim { 1209238384Sjkim int ret; 1210238384Sjkim ret = ec_GFp_simple_group_init(group); 1211238384Sjkim group->a_is_minus3 = 1; 1212238384Sjkim return ret; 1213238384Sjkim } 1214238384Sjkim 1215238384Sjkimint ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p, 1216238384Sjkim const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) 1217238384Sjkim { 1218238384Sjkim int ret = 0; 1219238384Sjkim BN_CTX *new_ctx = NULL; 1220238384Sjkim BIGNUM *curve_p, *curve_a, *curve_b; 1221238384Sjkim 1222238384Sjkim if (ctx == NULL) 1223238384Sjkim if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0; 1224238384Sjkim BN_CTX_start(ctx); 1225238384Sjkim if (((curve_p = BN_CTX_get(ctx)) == NULL) || 1226238384Sjkim ((curve_a = BN_CTX_get(ctx)) == NULL) || 1227238384Sjkim ((curve_b = BN_CTX_get(ctx)) == NULL)) goto err; 1228238384Sjkim BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p); 1229238384Sjkim BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a); 1230238384Sjkim BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b); 1231238384Sjkim if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || 1232238384Sjkim (BN_cmp(curve_b, b))) 1233238384Sjkim { 1234238384Sjkim ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE, 1235238384Sjkim EC_R_WRONG_CURVE_PARAMETERS); 1236238384Sjkim goto err; 1237238384Sjkim } 1238238384Sjkim group->field_mod_func = BN_nist_mod_224; 1239238384Sjkim ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx); 1240238384Sjkimerr: 1241238384Sjkim BN_CTX_end(ctx); 1242238384Sjkim if (new_ctx != NULL) 1243238384Sjkim BN_CTX_free(new_ctx); 1244238384Sjkim return ret; 1245238384Sjkim } 1246238384Sjkim 1247238384Sjkim/* Takes the Jacobian coordinates (X, Y, Z) of a point and returns 1248238384Sjkim * (X', Y') = (X/Z^2, Y/Z^3) */ 1249238384Sjkimint ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group, 1250238384Sjkim const EC_POINT *point, BIGNUM *x, BIGNUM *y, BN_CTX *ctx) 1251238384Sjkim { 1252238384Sjkim felem z1, z2, x_in, y_in, x_out, y_out; 1253238384Sjkim widefelem tmp; 1254238384Sjkim 1255238384Sjkim if (EC_POINT_is_at_infinity(group, point)) 1256238384Sjkim { 1257238384Sjkim ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES, 1258238384Sjkim EC_R_POINT_AT_INFINITY); 1259238384Sjkim return 0; 1260238384Sjkim } 1261238384Sjkim if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) || 1262238384Sjkim (!BN_to_felem(z1, &point->Z))) return 0; 1263238384Sjkim felem_inv(z2, z1); 1264238384Sjkim felem_square(tmp, z2); felem_reduce(z1, tmp); 1265238384Sjkim felem_mul(tmp, x_in, z1); felem_reduce(x_in, tmp); 1266238384Sjkim felem_contract(x_out, x_in); 1267238384Sjkim if (x != NULL) 1268238384Sjkim { 1269238384Sjkim if (!felem_to_BN(x, x_out)) { 1270238384Sjkim ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES, 1271238384Sjkim ERR_R_BN_LIB); 1272238384Sjkim return 0; 1273238384Sjkim } 1274238384Sjkim } 1275238384Sjkim felem_mul(tmp, z1, z2); felem_reduce(z1, tmp); 1276238384Sjkim felem_mul(tmp, y_in, z1); felem_reduce(y_in, tmp); 1277238384Sjkim felem_contract(y_out, y_in); 1278238384Sjkim if (y != NULL) 1279238384Sjkim { 1280238384Sjkim if (!felem_to_BN(y, y_out)) { 1281238384Sjkim ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES, 1282238384Sjkim ERR_R_BN_LIB); 1283238384Sjkim return 0; 1284238384Sjkim } 1285238384Sjkim } 1286238384Sjkim return 1; 1287238384Sjkim } 1288238384Sjkim 1289238384Sjkimstatic void make_points_affine(size_t num, felem points[/*num*/][3], felem tmp_felems[/*num+1*/]) 1290238384Sjkim { 1291238384Sjkim /* Runs in constant time, unless an input is the point at infinity 1292238384Sjkim * (which normally shouldn't happen). */ 1293238384Sjkim ec_GFp_nistp_points_make_affine_internal( 1294238384Sjkim num, 1295238384Sjkim points, 1296238384Sjkim sizeof(felem), 1297238384Sjkim tmp_felems, 1298238384Sjkim (void (*)(void *)) felem_one, 1299238384Sjkim (int (*)(const void *)) felem_is_zero_int, 1300238384Sjkim (void (*)(void *, const void *)) felem_assign, 1301238384Sjkim (void (*)(void *, const void *)) felem_square_reduce, 1302238384Sjkim (void (*)(void *, const void *, const void *)) felem_mul_reduce, 1303238384Sjkim (void (*)(void *, const void *)) felem_inv, 1304238384Sjkim (void (*)(void *, const void *)) felem_contract); 1305238384Sjkim } 1306238384Sjkim 1307238384Sjkim/* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL values 1308238384Sjkim * Result is stored in r (r can equal one of the inputs). */ 1309238384Sjkimint ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r, 1310238384Sjkim const BIGNUM *scalar, size_t num, const EC_POINT *points[], 1311238384Sjkim const BIGNUM *scalars[], BN_CTX *ctx) 1312238384Sjkim { 1313238384Sjkim int ret = 0; 1314238384Sjkim int j; 1315238384Sjkim unsigned i; 1316238384Sjkim int mixed = 0; 1317238384Sjkim BN_CTX *new_ctx = NULL; 1318238384Sjkim BIGNUM *x, *y, *z, *tmp_scalar; 1319238384Sjkim felem_bytearray g_secret; 1320238384Sjkim felem_bytearray *secrets = NULL; 1321238384Sjkim felem (*pre_comp)[17][3] = NULL; 1322238384Sjkim felem *tmp_felems = NULL; 1323238384Sjkim felem_bytearray tmp; 1324238384Sjkim unsigned num_bytes; 1325238384Sjkim int have_pre_comp = 0; 1326238384Sjkim size_t num_points = num; 1327238384Sjkim felem x_in, y_in, z_in, x_out, y_out, z_out; 1328238384Sjkim NISTP224_PRE_COMP *pre = NULL; 1329238384Sjkim const felem (*g_pre_comp)[16][3] = NULL; 1330238384Sjkim EC_POINT *generator = NULL; 1331238384Sjkim const EC_POINT *p = NULL; 1332238384Sjkim const BIGNUM *p_scalar = NULL; 1333238384Sjkim 1334238384Sjkim if (ctx == NULL) 1335238384Sjkim if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0; 1336238384Sjkim BN_CTX_start(ctx); 1337238384Sjkim if (((x = BN_CTX_get(ctx)) == NULL) || 1338238384Sjkim ((y = BN_CTX_get(ctx)) == NULL) || 1339238384Sjkim ((z = BN_CTX_get(ctx)) == NULL) || 1340238384Sjkim ((tmp_scalar = BN_CTX_get(ctx)) == NULL)) 1341238384Sjkim goto err; 1342238384Sjkim 1343238384Sjkim if (scalar != NULL) 1344238384Sjkim { 1345238384Sjkim pre = EC_EX_DATA_get_data(group->extra_data, 1346238384Sjkim nistp224_pre_comp_dup, nistp224_pre_comp_free, 1347238384Sjkim nistp224_pre_comp_clear_free); 1348238384Sjkim if (pre) 1349238384Sjkim /* we have precomputation, try to use it */ 1350238384Sjkim g_pre_comp = (const felem (*)[16][3]) pre->g_pre_comp; 1351238384Sjkim else 1352238384Sjkim /* try to use the standard precomputation */ 1353238384Sjkim g_pre_comp = &gmul[0]; 1354238384Sjkim generator = EC_POINT_new(group); 1355238384Sjkim if (generator == NULL) 1356238384Sjkim goto err; 1357238384Sjkim /* get the generator from precomputation */ 1358238384Sjkim if (!felem_to_BN(x, g_pre_comp[0][1][0]) || 1359238384Sjkim !felem_to_BN(y, g_pre_comp[0][1][1]) || 1360238384Sjkim !felem_to_BN(z, g_pre_comp[0][1][2])) 1361238384Sjkim { 1362238384Sjkim ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB); 1363238384Sjkim goto err; 1364238384Sjkim } 1365238384Sjkim if (!EC_POINT_set_Jprojective_coordinates_GFp(group, 1366238384Sjkim generator, x, y, z, ctx)) 1367238384Sjkim goto err; 1368238384Sjkim if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) 1369238384Sjkim /* precomputation matches generator */ 1370238384Sjkim have_pre_comp = 1; 1371238384Sjkim else 1372238384Sjkim /* we don't have valid precomputation: 1373238384Sjkim * treat the generator as a random point */ 1374238384Sjkim num_points = num_points + 1; 1375238384Sjkim } 1376238384Sjkim 1377238384Sjkim if (num_points > 0) 1378238384Sjkim { 1379238384Sjkim if (num_points >= 3) 1380238384Sjkim { 1381238384Sjkim /* unless we precompute multiples for just one or two points, 1382238384Sjkim * converting those into affine form is time well spent */ 1383238384Sjkim mixed = 1; 1384238384Sjkim } 1385238384Sjkim secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray)); 1386238384Sjkim pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem)); 1387238384Sjkim if (mixed) 1388238384Sjkim tmp_felems = OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem)); 1389238384Sjkim if ((secrets == NULL) || (pre_comp == NULL) || (mixed && (tmp_felems == NULL))) 1390238384Sjkim { 1391238384Sjkim ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE); 1392238384Sjkim goto err; 1393238384Sjkim } 1394238384Sjkim 1395238384Sjkim /* we treat NULL scalars as 0, and NULL points as points at infinity, 1396238384Sjkim * i.e., they contribute nothing to the linear combination */ 1397238384Sjkim memset(secrets, 0, num_points * sizeof(felem_bytearray)); 1398238384Sjkim memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem)); 1399238384Sjkim for (i = 0; i < num_points; ++i) 1400238384Sjkim { 1401238384Sjkim if (i == num) 1402238384Sjkim /* the generator */ 1403238384Sjkim { 1404238384Sjkim p = EC_GROUP_get0_generator(group); 1405238384Sjkim p_scalar = scalar; 1406238384Sjkim } 1407238384Sjkim else 1408238384Sjkim /* the i^th point */ 1409238384Sjkim { 1410238384Sjkim p = points[i]; 1411238384Sjkim p_scalar = scalars[i]; 1412238384Sjkim } 1413238384Sjkim if ((p_scalar != NULL) && (p != NULL)) 1414238384Sjkim { 1415238384Sjkim /* reduce scalar to 0 <= scalar < 2^224 */ 1416238384Sjkim if ((BN_num_bits(p_scalar) > 224) || (BN_is_negative(p_scalar))) 1417238384Sjkim { 1418238384Sjkim /* this is an unusual input, and we don't guarantee 1419238384Sjkim * constant-timeness */ 1420238384Sjkim if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) 1421238384Sjkim { 1422238384Sjkim ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB); 1423238384Sjkim goto err; 1424238384Sjkim } 1425238384Sjkim num_bytes = BN_bn2bin(tmp_scalar, tmp); 1426238384Sjkim } 1427238384Sjkim else 1428238384Sjkim num_bytes = BN_bn2bin(p_scalar, tmp); 1429238384Sjkim flip_endian(secrets[i], tmp, num_bytes); 1430238384Sjkim /* precompute multiples */ 1431238384Sjkim if ((!BN_to_felem(x_out, &p->X)) || 1432238384Sjkim (!BN_to_felem(y_out, &p->Y)) || 1433238384Sjkim (!BN_to_felem(z_out, &p->Z))) goto err; 1434238384Sjkim felem_assign(pre_comp[i][1][0], x_out); 1435238384Sjkim felem_assign(pre_comp[i][1][1], y_out); 1436238384Sjkim felem_assign(pre_comp[i][1][2], z_out); 1437238384Sjkim for (j = 2; j <= 16; ++j) 1438238384Sjkim { 1439238384Sjkim if (j & 1) 1440238384Sjkim { 1441238384Sjkim point_add( 1442238384Sjkim pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2], 1443238384Sjkim pre_comp[i][1][0], pre_comp[i][1][1], pre_comp[i][1][2], 1444238384Sjkim 0, pre_comp[i][j-1][0], pre_comp[i][j-1][1], pre_comp[i][j-1][2]); 1445238384Sjkim } 1446238384Sjkim else 1447238384Sjkim { 1448238384Sjkim point_double( 1449238384Sjkim pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2], 1450238384Sjkim pre_comp[i][j/2][0], pre_comp[i][j/2][1], pre_comp[i][j/2][2]); 1451238384Sjkim } 1452238384Sjkim } 1453238384Sjkim } 1454238384Sjkim } 1455238384Sjkim if (mixed) 1456238384Sjkim make_points_affine(num_points * 17, pre_comp[0], tmp_felems); 1457238384Sjkim } 1458238384Sjkim 1459238384Sjkim /* the scalar for the generator */ 1460238384Sjkim if ((scalar != NULL) && (have_pre_comp)) 1461238384Sjkim { 1462238384Sjkim memset(g_secret, 0, sizeof g_secret); 1463238384Sjkim /* reduce scalar to 0 <= scalar < 2^224 */ 1464238384Sjkim if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) 1465238384Sjkim { 1466238384Sjkim /* this is an unusual input, and we don't guarantee 1467238384Sjkim * constant-timeness */ 1468238384Sjkim if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) 1469238384Sjkim { 1470238384Sjkim ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB); 1471238384Sjkim goto err; 1472238384Sjkim } 1473238384Sjkim num_bytes = BN_bn2bin(tmp_scalar, tmp); 1474238384Sjkim } 1475238384Sjkim else 1476238384Sjkim num_bytes = BN_bn2bin(scalar, tmp); 1477238384Sjkim flip_endian(g_secret, tmp, num_bytes); 1478238384Sjkim /* do the multiplication with generator precomputation*/ 1479238384Sjkim batch_mul(x_out, y_out, z_out, 1480238384Sjkim (const felem_bytearray (*)) secrets, num_points, 1481238384Sjkim g_secret, 1482238384Sjkim mixed, (const felem (*)[17][3]) pre_comp, 1483238384Sjkim g_pre_comp); 1484238384Sjkim } 1485238384Sjkim else 1486238384Sjkim /* do the multiplication without generator precomputation */ 1487238384Sjkim batch_mul(x_out, y_out, z_out, 1488238384Sjkim (const felem_bytearray (*)) secrets, num_points, 1489238384Sjkim NULL, mixed, (const felem (*)[17][3]) pre_comp, NULL); 1490238384Sjkim /* reduce the output to its unique minimal representation */ 1491238384Sjkim felem_contract(x_in, x_out); 1492238384Sjkim felem_contract(y_in, y_out); 1493238384Sjkim felem_contract(z_in, z_out); 1494238384Sjkim if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) || 1495238384Sjkim (!felem_to_BN(z, z_in))) 1496238384Sjkim { 1497238384Sjkim ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB); 1498238384Sjkim goto err; 1499238384Sjkim } 1500238384Sjkim ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx); 1501238384Sjkim 1502238384Sjkimerr: 1503238384Sjkim BN_CTX_end(ctx); 1504238384Sjkim if (generator != NULL) 1505238384Sjkim EC_POINT_free(generator); 1506238384Sjkim if (new_ctx != NULL) 1507238384Sjkim BN_CTX_free(new_ctx); 1508238384Sjkim if (secrets != NULL) 1509238384Sjkim OPENSSL_free(secrets); 1510238384Sjkim if (pre_comp != NULL) 1511238384Sjkim OPENSSL_free(pre_comp); 1512238384Sjkim if (tmp_felems != NULL) 1513238384Sjkim OPENSSL_free(tmp_felems); 1514238384Sjkim return ret; 1515238384Sjkim } 1516238384Sjkim 1517238384Sjkimint ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx) 1518238384Sjkim { 1519238384Sjkim int ret = 0; 1520238384Sjkim NISTP224_PRE_COMP *pre = NULL; 1521238384Sjkim int i, j; 1522238384Sjkim BN_CTX *new_ctx = NULL; 1523238384Sjkim BIGNUM *x, *y; 1524238384Sjkim EC_POINT *generator = NULL; 1525238384Sjkim felem tmp_felems[32]; 1526238384Sjkim 1527238384Sjkim /* throw away old precomputation */ 1528238384Sjkim EC_EX_DATA_free_data(&group->extra_data, nistp224_pre_comp_dup, 1529238384Sjkim nistp224_pre_comp_free, nistp224_pre_comp_clear_free); 1530238384Sjkim if (ctx == NULL) 1531238384Sjkim if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0; 1532238384Sjkim BN_CTX_start(ctx); 1533238384Sjkim if (((x = BN_CTX_get(ctx)) == NULL) || 1534238384Sjkim ((y = BN_CTX_get(ctx)) == NULL)) 1535238384Sjkim goto err; 1536238384Sjkim /* get the generator */ 1537238384Sjkim if (group->generator == NULL) goto err; 1538238384Sjkim generator = EC_POINT_new(group); 1539238384Sjkim if (generator == NULL) 1540238384Sjkim goto err; 1541238384Sjkim BN_bin2bn(nistp224_curve_params[3], sizeof (felem_bytearray), x); 1542238384Sjkim BN_bin2bn(nistp224_curve_params[4], sizeof (felem_bytearray), y); 1543238384Sjkim if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx)) 1544238384Sjkim goto err; 1545238384Sjkim if ((pre = nistp224_pre_comp_new()) == NULL) 1546238384Sjkim goto err; 1547238384Sjkim /* if the generator is the standard one, use built-in precomputation */ 1548238384Sjkim if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) 1549238384Sjkim { 1550238384Sjkim memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp)); 1551238384Sjkim ret = 1; 1552238384Sjkim goto err; 1553238384Sjkim } 1554238384Sjkim if ((!BN_to_felem(pre->g_pre_comp[0][1][0], &group->generator->X)) || 1555238384Sjkim (!BN_to_felem(pre->g_pre_comp[0][1][1], &group->generator->Y)) || 1556238384Sjkim (!BN_to_felem(pre->g_pre_comp[0][1][2], &group->generator->Z))) 1557238384Sjkim goto err; 1558238384Sjkim /* compute 2^56*G, 2^112*G, 2^168*G for the first table, 1559238384Sjkim * 2^28*G, 2^84*G, 2^140*G, 2^196*G for the second one 1560238384Sjkim */ 1561238384Sjkim for (i = 1; i <= 8; i <<= 1) 1562238384Sjkim { 1563238384Sjkim point_double( 1564238384Sjkim pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2], 1565238384Sjkim pre->g_pre_comp[0][i][0], pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]); 1566238384Sjkim for (j = 0; j < 27; ++j) 1567238384Sjkim { 1568238384Sjkim point_double( 1569238384Sjkim pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2], 1570238384Sjkim pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]); 1571238384Sjkim } 1572238384Sjkim if (i == 8) 1573238384Sjkim break; 1574238384Sjkim point_double( 1575238384Sjkim pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2], 1576238384Sjkim pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]); 1577238384Sjkim for (j = 0; j < 27; ++j) 1578238384Sjkim { 1579238384Sjkim point_double( 1580238384Sjkim pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2], 1581238384Sjkim pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2]); 1582238384Sjkim } 1583238384Sjkim } 1584238384Sjkim for (i = 0; i < 2; i++) 1585238384Sjkim { 1586238384Sjkim /* g_pre_comp[i][0] is the point at infinity */ 1587238384Sjkim memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0])); 1588238384Sjkim /* the remaining multiples */ 1589238384Sjkim /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */ 1590238384Sjkim point_add( 1591238384Sjkim pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1], 1592238384Sjkim pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0], 1593238384Sjkim pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2], 1594238384Sjkim 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], 1595238384Sjkim pre->g_pre_comp[i][2][2]); 1596238384Sjkim /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */ 1597238384Sjkim point_add( 1598238384Sjkim pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1], 1599238384Sjkim pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0], 1600238384Sjkim pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2], 1601238384Sjkim 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], 1602238384Sjkim pre->g_pre_comp[i][2][2]); 1603238384Sjkim /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */ 1604238384Sjkim point_add( 1605238384Sjkim pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1], 1606238384Sjkim pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0], 1607238384Sjkim pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2], 1608238384Sjkim 0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1], 1609238384Sjkim pre->g_pre_comp[i][4][2]); 1610238384Sjkim /* 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G */ 1611238384Sjkim point_add( 1612238384Sjkim pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1], 1613238384Sjkim pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0], 1614238384Sjkim pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2], 1615238384Sjkim 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], 1616238384Sjkim pre->g_pre_comp[i][2][2]); 1617238384Sjkim for (j = 1; j < 8; ++j) 1618238384Sjkim { 1619238384Sjkim /* odd multiples: add G resp. 2^28*G */ 1620238384Sjkim point_add( 1621238384Sjkim pre->g_pre_comp[i][2*j+1][0], pre->g_pre_comp[i][2*j+1][1], 1622238384Sjkim pre->g_pre_comp[i][2*j+1][2], pre->g_pre_comp[i][2*j][0], 1623238384Sjkim pre->g_pre_comp[i][2*j][1], pre->g_pre_comp[i][2*j][2], 1624238384Sjkim 0, pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1], 1625238384Sjkim pre->g_pre_comp[i][1][2]); 1626238384Sjkim } 1627238384Sjkim } 1628238384Sjkim make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems); 1629238384Sjkim 1630238384Sjkim if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp224_pre_comp_dup, 1631238384Sjkim nistp224_pre_comp_free, nistp224_pre_comp_clear_free)) 1632238384Sjkim goto err; 1633238384Sjkim ret = 1; 1634238384Sjkim pre = NULL; 1635238384Sjkim err: 1636238384Sjkim BN_CTX_end(ctx); 1637238384Sjkim if (generator != NULL) 1638238384Sjkim EC_POINT_free(generator); 1639238384Sjkim if (new_ctx != NULL) 1640238384Sjkim BN_CTX_free(new_ctx); 1641238384Sjkim if (pre) 1642238384Sjkim nistp224_pre_comp_free(pre); 1643238384Sjkim return ret; 1644238384Sjkim } 1645238384Sjkim 1646238384Sjkimint ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group) 1647238384Sjkim { 1648238384Sjkim if (EC_EX_DATA_get_data(group->extra_data, nistp224_pre_comp_dup, 1649238384Sjkim nistp224_pre_comp_free, nistp224_pre_comp_clear_free) 1650238384Sjkim != NULL) 1651238384Sjkim return 1; 1652238384Sjkim else 1653238384Sjkim return 0; 1654238384Sjkim } 1655238384Sjkim 1656238384Sjkim#else 1657238384Sjkimstatic void *dummy=&dummy; 1658238384Sjkim#endif 1659