1/*
2 * Copyright 2010-2021 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License 2.0 (the "License").  You may not use
5 * this file except in compliance with the License.  You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
10/* Copyright 2011 Google Inc.
11 *
12 * Licensed under the Apache License, Version 2.0 (the "License");
13 *
14 * you may not use this file except in compliance with the License.
15 * You may obtain a copy of the License at
16 *
17 *     http://www.apache.org/licenses/LICENSE-2.0
18 *
19 *  Unless required by applicable law or agreed to in writing, software
20 *  distributed under the License is distributed on an "AS IS" BASIS,
21 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22 *  See the License for the specific language governing permissions and
23 *  limitations under the License.
24 */
25
26/*
27 * ECDSA low level APIs are deprecated for public use, but still ok for
28 * internal use.
29 */
30#include "internal/deprecated.h"
31
32/*
33 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
34 *
35 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
36 * and Adam Langley's public domain 64-bit C implementation of curve25519
37 */
38
39#include <openssl/opensslconf.h>
40
41#include <stdint.h>
42#include <string.h>
43#include <openssl/err.h>
44#include "ec_local.h"
45
46#include "internal/numbers.h"
47
48#ifndef INT128_MAX
49# error "Your compiler doesn't appear to support 128-bit integer types"
50#endif
51
52typedef uint8_t u8;
53typedef uint64_t u64;
54
55/******************************************************************************/
56/*-
57 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
58 *
59 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
60 * using 64-bit coefficients called 'limbs',
61 * and sometimes (for multiplication results) as
62 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
63 * using 128-bit coefficients called 'widelimbs'.
64 * A 4-limb representation is an 'felem';
65 * a 7-widelimb representation is a 'widefelem'.
66 * Even within felems, bits of adjacent limbs overlap, and we don't always
67 * reduce the representations: we ensure that inputs to each felem
68 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
69 * and fit into a 128-bit word without overflow. The coefficients are then
70 * again partially reduced to obtain an felem satisfying a_i < 2^57.
71 * We only reduce to the unique minimal representation at the end of the
72 * computation.
73 */
74
75typedef uint64_t limb;
76typedef uint64_t limb_aX __attribute((__aligned__(1)));
77typedef uint128_t widelimb;
78
79typedef limb felem[4];
80typedef widelimb widefelem[7];
81
82/*
83 * Field element represented as a byte array. 28*8 = 224 bits is also the
84 * group order size for the elliptic curve, and we also use this type for
85 * scalars for point multiplication.
86 */
87typedef u8 felem_bytearray[28];
88
89static const felem_bytearray nistp224_curve_params[5] = {
90    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
91     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
92     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
93    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
94     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
95     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
96    {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
97     0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
98     0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
99    {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
100     0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
101     0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
102    {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
103     0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
104     0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
105};
106
107/*-
108 * Precomputed multiples of the standard generator
109 * Points are given in coordinates (X, Y, Z) where Z normally is 1
110 * (0 for the point at infinity).
111 * For each field element, slice a_0 is word 0, etc.
112 *
113 * The table has 2 * 16 elements, starting with the following:
114 * index | bits    | point
115 * ------+---------+------------------------------
116 *     0 | 0 0 0 0 | 0G
117 *     1 | 0 0 0 1 | 1G
118 *     2 | 0 0 1 0 | 2^56G
119 *     3 | 0 0 1 1 | (2^56 + 1)G
120 *     4 | 0 1 0 0 | 2^112G
121 *     5 | 0 1 0 1 | (2^112 + 1)G
122 *     6 | 0 1 1 0 | (2^112 + 2^56)G
123 *     7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
124 *     8 | 1 0 0 0 | 2^168G
125 *     9 | 1 0 0 1 | (2^168 + 1)G
126 *    10 | 1 0 1 0 | (2^168 + 2^56)G
127 *    11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
128 *    12 | 1 1 0 0 | (2^168 + 2^112)G
129 *    13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
130 *    14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
131 *    15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
132 * followed by a copy of this with each element multiplied by 2^28.
133 *
134 * The reason for this is so that we can clock bits into four different
135 * locations when doing simple scalar multiplies against the base point,
136 * and then another four locations using the second 16 elements.
137 */
138static const felem gmul[2][16][3] = {
139{{{0, 0, 0, 0},
140  {0, 0, 0, 0},
141  {0, 0, 0, 0}},
142 {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
143  {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
144  {1, 0, 0, 0}},
145 {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
146  {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
147  {1, 0, 0, 0}},
148 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
149  {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
150  {1, 0, 0, 0}},
151 {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
152  {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
153  {1, 0, 0, 0}},
154 {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
155  {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
156  {1, 0, 0, 0}},
157 {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
158  {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
159  {1, 0, 0, 0}},
160 {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
161  {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
162  {1, 0, 0, 0}},
163 {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
164  {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
165  {1, 0, 0, 0}},
166 {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
167  {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
168  {1, 0, 0, 0}},
169 {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
170  {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
171  {1, 0, 0, 0}},
172 {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
173  {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
174  {1, 0, 0, 0}},
175 {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
176  {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
177  {1, 0, 0, 0}},
178 {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
179  {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
180  {1, 0, 0, 0}},
181 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
182  {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
183  {1, 0, 0, 0}},
184 {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
185  {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
186  {1, 0, 0, 0}}},
187{{{0, 0, 0, 0},
188  {0, 0, 0, 0},
189  {0, 0, 0, 0}},
190 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
191  {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
192  {1, 0, 0, 0}},
193 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
194  {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
195  {1, 0, 0, 0}},
196 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
197  {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
198  {1, 0, 0, 0}},
199 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
200  {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
201  {1, 0, 0, 0}},
202 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
203  {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
204  {1, 0, 0, 0}},
205 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
206  {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
207  {1, 0, 0, 0}},
208 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
209  {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
210  {1, 0, 0, 0}},
211 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
212  {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
213  {1, 0, 0, 0}},
214 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
215  {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
216  {1, 0, 0, 0}},
217 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
218  {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
219  {1, 0, 0, 0}},
220 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
221  {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
222  {1, 0, 0, 0}},
223 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
224  {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
225  {1, 0, 0, 0}},
226 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
227  {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
228  {1, 0, 0, 0}},
229 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
230  {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
231  {1, 0, 0, 0}},
232 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
233  {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
234  {1, 0, 0, 0}}}
235};
236
237/* Precomputation for the group generator. */
238struct nistp224_pre_comp_st {
239    felem g_pre_comp[2][16][3];
240    CRYPTO_REF_COUNT references;
241    CRYPTO_RWLOCK *lock;
242};
243
244const EC_METHOD *EC_GFp_nistp224_method(void)
245{
246    static const EC_METHOD ret = {
247        EC_FLAGS_DEFAULT_OCT,
248        NID_X9_62_prime_field,
249        ossl_ec_GFp_nistp224_group_init,
250        ossl_ec_GFp_simple_group_finish,
251        ossl_ec_GFp_simple_group_clear_finish,
252        ossl_ec_GFp_nist_group_copy,
253        ossl_ec_GFp_nistp224_group_set_curve,
254        ossl_ec_GFp_simple_group_get_curve,
255        ossl_ec_GFp_simple_group_get_degree,
256        ossl_ec_group_simple_order_bits,
257        ossl_ec_GFp_simple_group_check_discriminant,
258        ossl_ec_GFp_simple_point_init,
259        ossl_ec_GFp_simple_point_finish,
260        ossl_ec_GFp_simple_point_clear_finish,
261        ossl_ec_GFp_simple_point_copy,
262        ossl_ec_GFp_simple_point_set_to_infinity,
263        ossl_ec_GFp_simple_point_set_affine_coordinates,
264        ossl_ec_GFp_nistp224_point_get_affine_coordinates,
265        0 /* point_set_compressed_coordinates */ ,
266        0 /* point2oct */ ,
267        0 /* oct2point */ ,
268        ossl_ec_GFp_simple_add,
269        ossl_ec_GFp_simple_dbl,
270        ossl_ec_GFp_simple_invert,
271        ossl_ec_GFp_simple_is_at_infinity,
272        ossl_ec_GFp_simple_is_on_curve,
273        ossl_ec_GFp_simple_cmp,
274        ossl_ec_GFp_simple_make_affine,
275        ossl_ec_GFp_simple_points_make_affine,
276        ossl_ec_GFp_nistp224_points_mul,
277        ossl_ec_GFp_nistp224_precompute_mult,
278        ossl_ec_GFp_nistp224_have_precompute_mult,
279        ossl_ec_GFp_nist_field_mul,
280        ossl_ec_GFp_nist_field_sqr,
281        0 /* field_div */ ,
282        ossl_ec_GFp_simple_field_inv,
283        0 /* field_encode */ ,
284        0 /* field_decode */ ,
285        0,                      /* field_set_to_one */
286        ossl_ec_key_simple_priv2oct,
287        ossl_ec_key_simple_oct2priv,
288        0, /* set private */
289        ossl_ec_key_simple_generate_key,
290        ossl_ec_key_simple_check_key,
291        ossl_ec_key_simple_generate_public_key,
292        0, /* keycopy */
293        0, /* keyfinish */
294        ossl_ecdh_simple_compute_key,
295        ossl_ecdsa_simple_sign_setup,
296        ossl_ecdsa_simple_sign_sig,
297        ossl_ecdsa_simple_verify_sig,
298        0, /* field_inverse_mod_ord */
299        0, /* blind_coordinates */
300        0, /* ladder_pre */
301        0, /* ladder_step */
302        0  /* ladder_post */
303    };
304
305    return &ret;
306}
307
308/*
309 * Helper functions to convert field elements to/from internal representation
310 */
311static void bin28_to_felem(felem out, const u8 in[28])
312{
313    out[0] = *((const limb *)(in)) & 0x00ffffffffffffff;
314    out[1] = (*((const limb_aX *)(in + 7))) & 0x00ffffffffffffff;
315    out[2] = (*((const limb_aX *)(in + 14))) & 0x00ffffffffffffff;
316    out[3] = (*((const limb_aX *)(in + 20))) >> 8;
317}
318
319static void felem_to_bin28(u8 out[28], const felem in)
320{
321    unsigned i;
322    for (i = 0; i < 7; ++i) {
323        out[i] = in[0] >> (8 * i);
324        out[i + 7] = in[1] >> (8 * i);
325        out[i + 14] = in[2] >> (8 * i);
326        out[i + 21] = in[3] >> (8 * i);
327    }
328}
329
330/* From OpenSSL BIGNUM to internal representation */
331static int BN_to_felem(felem out, const BIGNUM *bn)
332{
333    felem_bytearray b_out;
334    int num_bytes;
335
336    if (BN_is_negative(bn)) {
337        ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
338        return 0;
339    }
340    num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
341    if (num_bytes < 0) {
342        ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
343        return 0;
344    }
345    bin28_to_felem(out, b_out);
346    return 1;
347}
348
349/* From internal representation to OpenSSL BIGNUM */
350static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
351{
352    felem_bytearray b_out;
353    felem_to_bin28(b_out, in);
354    return BN_lebin2bn(b_out, sizeof(b_out), out);
355}
356
357/******************************************************************************/
358/*-
359 *                              FIELD OPERATIONS
360 *
361 * Field operations, using the internal representation of field elements.
362 * NB! These operations are specific to our point multiplication and cannot be
363 * expected to be correct in general - e.g., multiplication with a large scalar
364 * will cause an overflow.
365 *
366 */
367
368static void felem_one(felem out)
369{
370    out[0] = 1;
371    out[1] = 0;
372    out[2] = 0;
373    out[3] = 0;
374}
375
376static void felem_assign(felem out, const felem in)
377{
378    out[0] = in[0];
379    out[1] = in[1];
380    out[2] = in[2];
381    out[3] = in[3];
382}
383
384/* Sum two field elements: out += in */
385static void felem_sum(felem out, const felem in)
386{
387    out[0] += in[0];
388    out[1] += in[1];
389    out[2] += in[2];
390    out[3] += in[3];
391}
392
393/* Subtract field elements: out -= in */
394/* Assumes in[i] < 2^57 */
395static void felem_diff(felem out, const felem in)
396{
397    static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
398    static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
399    static const limb two58m42m2 = (((limb) 1) << 58) -
400        (((limb) 1) << 42) - (((limb) 1) << 2);
401
402    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
403    out[0] += two58p2;
404    out[1] += two58m42m2;
405    out[2] += two58m2;
406    out[3] += two58m2;
407
408    out[0] -= in[0];
409    out[1] -= in[1];
410    out[2] -= in[2];
411    out[3] -= in[3];
412}
413
414/* Subtract in unreduced 128-bit mode: out -= in */
415/* Assumes in[i] < 2^119 */
416static void widefelem_diff(widefelem out, const widefelem in)
417{
418    static const widelimb two120 = ((widelimb) 1) << 120;
419    static const widelimb two120m64 = (((widelimb) 1) << 120) -
420        (((widelimb) 1) << 64);
421    static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
422        (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
423
424    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
425    out[0] += two120;
426    out[1] += two120m64;
427    out[2] += two120m64;
428    out[3] += two120;
429    out[4] += two120m104m64;
430    out[5] += two120m64;
431    out[6] += two120m64;
432
433    out[0] -= in[0];
434    out[1] -= in[1];
435    out[2] -= in[2];
436    out[3] -= in[3];
437    out[4] -= in[4];
438    out[5] -= in[5];
439    out[6] -= in[6];
440}
441
442/* Subtract in mixed mode: out128 -= in64 */
443/* in[i] < 2^63 */
444static void felem_diff_128_64(widefelem out, const felem in)
445{
446    static const widelimb two64p8 = (((widelimb) 1) << 64) +
447        (((widelimb) 1) << 8);
448    static const widelimb two64m8 = (((widelimb) 1) << 64) -
449        (((widelimb) 1) << 8);
450    static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
451        (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
452
453    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
454    out[0] += two64p8;
455    out[1] += two64m48m8;
456    out[2] += two64m8;
457    out[3] += two64m8;
458
459    out[0] -= in[0];
460    out[1] -= in[1];
461    out[2] -= in[2];
462    out[3] -= in[3];
463}
464
465/*
466 * Multiply a field element by a scalar: out = out * scalar The scalars we
467 * actually use are small, so results fit without overflow
468 */
469static void felem_scalar(felem out, const limb scalar)
470{
471    out[0] *= scalar;
472    out[1] *= scalar;
473    out[2] *= scalar;
474    out[3] *= scalar;
475}
476
477/*
478 * Multiply an unreduced field element by a scalar: out = out * scalar The
479 * scalars we actually use are small, so results fit without overflow
480 */
481static void widefelem_scalar(widefelem out, const widelimb scalar)
482{
483    out[0] *= scalar;
484    out[1] *= scalar;
485    out[2] *= scalar;
486    out[3] *= scalar;
487    out[4] *= scalar;
488    out[5] *= scalar;
489    out[6] *= scalar;
490}
491
492/* Square a field element: out = in^2 */
493static void felem_square(widefelem out, const felem in)
494{
495    limb tmp0, tmp1, tmp2;
496    tmp0 = 2 * in[0];
497    tmp1 = 2 * in[1];
498    tmp2 = 2 * in[2];
499    out[0] = ((widelimb) in[0]) * in[0];
500    out[1] = ((widelimb) in[0]) * tmp1;
501    out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
502    out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
503    out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
504    out[5] = ((widelimb) in[3]) * tmp2;
505    out[6] = ((widelimb) in[3]) * in[3];
506}
507
508/* Multiply two field elements: out = in1 * in2 */
509static void felem_mul(widefelem out, const felem in1, const felem in2)
510{
511    out[0] = ((widelimb) in1[0]) * in2[0];
512    out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
513    out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
514             ((widelimb) in1[2]) * in2[0];
515    out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
516             ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
517    out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
518             ((widelimb) in1[3]) * in2[1];
519    out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
520    out[6] = ((widelimb) in1[3]) * in2[3];
521}
522
523/*-
524 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
525 * Requires in[i] < 2^126,
526 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
527static void felem_reduce(felem out, const widefelem in)
528{
529    static const widelimb two127p15 = (((widelimb) 1) << 127) +
530        (((widelimb) 1) << 15);
531    static const widelimb two127m71 = (((widelimb) 1) << 127) -
532        (((widelimb) 1) << 71);
533    static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
534        (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
535    widelimb output[5];
536
537    /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
538    output[0] = in[0] + two127p15;
539    output[1] = in[1] + two127m71m55;
540    output[2] = in[2] + two127m71;
541    output[3] = in[3];
542    output[4] = in[4];
543
544    /* Eliminate in[4], in[5], in[6] */
545    output[4] += in[6] >> 16;
546    output[3] += (in[6] & 0xffff) << 40;
547    output[2] -= in[6];
548
549    output[3] += in[5] >> 16;
550    output[2] += (in[5] & 0xffff) << 40;
551    output[1] -= in[5];
552
553    output[2] += output[4] >> 16;
554    output[1] += (output[4] & 0xffff) << 40;
555    output[0] -= output[4];
556
557    /* Carry 2 -> 3 -> 4 */
558    output[3] += output[2] >> 56;
559    output[2] &= 0x00ffffffffffffff;
560
561    output[4] = output[3] >> 56;
562    output[3] &= 0x00ffffffffffffff;
563
564    /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
565
566    /* Eliminate output[4] */
567    output[2] += output[4] >> 16;
568    /* output[2] < 2^56 + 2^56 = 2^57 */
569    output[1] += (output[4] & 0xffff) << 40;
570    output[0] -= output[4];
571
572    /* Carry 0 -> 1 -> 2 -> 3 */
573    output[1] += output[0] >> 56;
574    out[0] = output[0] & 0x00ffffffffffffff;
575
576    output[2] += output[1] >> 56;
577    /* output[2] < 2^57 + 2^72 */
578    out[1] = output[1] & 0x00ffffffffffffff;
579    output[3] += output[2] >> 56;
580    /* output[3] <= 2^56 + 2^16 */
581    out[2] = output[2] & 0x00ffffffffffffff;
582
583    /*-
584     * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
585     * out[3] <= 2^56 + 2^16 (due to final carry),
586     * so out < 2*p
587     */
588    out[3] = output[3];
589}
590
591static void felem_square_reduce(felem out, const felem in)
592{
593    widefelem tmp;
594    felem_square(tmp, in);
595    felem_reduce(out, tmp);
596}
597
598static void felem_mul_reduce(felem out, const felem in1, const felem in2)
599{
600    widefelem tmp;
601    felem_mul(tmp, in1, in2);
602    felem_reduce(out, tmp);
603}
604
605/*
606 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
607 * call felem_reduce first)
608 */
609static void felem_contract(felem out, const felem in)
610{
611    static const int64_t two56 = ((limb) 1) << 56;
612    /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
613    /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
614    int64_t tmp[4], a;
615    tmp[0] = in[0];
616    tmp[1] = in[1];
617    tmp[2] = in[2];
618    tmp[3] = in[3];
619    /* Case 1: a = 1 iff in >= 2^224 */
620    a = (in[3] >> 56);
621    tmp[0] -= a;
622    tmp[1] += a << 40;
623    tmp[3] &= 0x00ffffffffffffff;
624    /*
625     * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
626     * and the lower part is non-zero
627     */
628    a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
629        (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
630    a &= 0x00ffffffffffffff;
631    /* turn a into an all-one mask (if a = 0) or an all-zero mask */
632    a = (a - 1) >> 63;
633    /* subtract 2^224 - 2^96 + 1 if a is all-one */
634    tmp[3] &= a ^ 0xffffffffffffffff;
635    tmp[2] &= a ^ 0xffffffffffffffff;
636    tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
637    tmp[0] -= 1 & a;
638
639    /*
640     * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
641     * non-zero, so we only need one step
642     */
643    a = tmp[0] >> 63;
644    tmp[0] += two56 & a;
645    tmp[1] -= 1 & a;
646
647    /* carry 1 -> 2 -> 3 */
648    tmp[2] += tmp[1] >> 56;
649    tmp[1] &= 0x00ffffffffffffff;
650
651    tmp[3] += tmp[2] >> 56;
652    tmp[2] &= 0x00ffffffffffffff;
653
654    /* Now 0 <= out < p */
655    out[0] = tmp[0];
656    out[1] = tmp[1];
657    out[2] = tmp[2];
658    out[3] = tmp[3];
659}
660
661/*
662 * Get negative value: out = -in
663 * Requires in[i] < 2^63,
664 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16
665 */
666static void felem_neg(felem out, const felem in)
667{
668    widefelem tmp;
669
670    memset(tmp, 0, sizeof(tmp));
671    felem_diff_128_64(tmp, in);
672    felem_reduce(out, tmp);
673}
674
675/*
676 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
677 * elements are reduced to in < 2^225, so we only need to check three cases:
678 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
679 */
680static limb felem_is_zero(const felem in)
681{
682    limb zero, two224m96p1, two225m97p2;
683
684    zero = in[0] | in[1] | in[2] | in[3];
685    zero = (((int64_t) (zero) - 1) >> 63) & 1;
686    two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
687        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
688    two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
689    two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
690        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
691    two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
692    return (zero | two224m96p1 | two225m97p2);
693}
694
695static int felem_is_zero_int(const void *in)
696{
697    return (int)(felem_is_zero(in) & ((limb) 1));
698}
699
700/* Invert a field element */
701/* Computation chain copied from djb's code */
702static void felem_inv(felem out, const felem in)
703{
704    felem ftmp, ftmp2, ftmp3, ftmp4;
705    widefelem tmp;
706    unsigned i;
707
708    felem_square(tmp, in);
709    felem_reduce(ftmp, tmp);    /* 2 */
710    felem_mul(tmp, in, ftmp);
711    felem_reduce(ftmp, tmp);    /* 2^2 - 1 */
712    felem_square(tmp, ftmp);
713    felem_reduce(ftmp, tmp);    /* 2^3 - 2 */
714    felem_mul(tmp, in, ftmp);
715    felem_reduce(ftmp, tmp);    /* 2^3 - 1 */
716    felem_square(tmp, ftmp);
717    felem_reduce(ftmp2, tmp);   /* 2^4 - 2 */
718    felem_square(tmp, ftmp2);
719    felem_reduce(ftmp2, tmp);   /* 2^5 - 4 */
720    felem_square(tmp, ftmp2);
721    felem_reduce(ftmp2, tmp);   /* 2^6 - 8 */
722    felem_mul(tmp, ftmp2, ftmp);
723    felem_reduce(ftmp, tmp);    /* 2^6 - 1 */
724    felem_square(tmp, ftmp);
725    felem_reduce(ftmp2, tmp);   /* 2^7 - 2 */
726    for (i = 0; i < 5; ++i) {   /* 2^12 - 2^6 */
727        felem_square(tmp, ftmp2);
728        felem_reduce(ftmp2, tmp);
729    }
730    felem_mul(tmp, ftmp2, ftmp);
731    felem_reduce(ftmp2, tmp);   /* 2^12 - 1 */
732    felem_square(tmp, ftmp2);
733    felem_reduce(ftmp3, tmp);   /* 2^13 - 2 */
734    for (i = 0; i < 11; ++i) {  /* 2^24 - 2^12 */
735        felem_square(tmp, ftmp3);
736        felem_reduce(ftmp3, tmp);
737    }
738    felem_mul(tmp, ftmp3, ftmp2);
739    felem_reduce(ftmp2, tmp);   /* 2^24 - 1 */
740    felem_square(tmp, ftmp2);
741    felem_reduce(ftmp3, tmp);   /* 2^25 - 2 */
742    for (i = 0; i < 23; ++i) {  /* 2^48 - 2^24 */
743        felem_square(tmp, ftmp3);
744        felem_reduce(ftmp3, tmp);
745    }
746    felem_mul(tmp, ftmp3, ftmp2);
747    felem_reduce(ftmp3, tmp);   /* 2^48 - 1 */
748    felem_square(tmp, ftmp3);
749    felem_reduce(ftmp4, tmp);   /* 2^49 - 2 */
750    for (i = 0; i < 47; ++i) {  /* 2^96 - 2^48 */
751        felem_square(tmp, ftmp4);
752        felem_reduce(ftmp4, tmp);
753    }
754    felem_mul(tmp, ftmp3, ftmp4);
755    felem_reduce(ftmp3, tmp);   /* 2^96 - 1 */
756    felem_square(tmp, ftmp3);
757    felem_reduce(ftmp4, tmp);   /* 2^97 - 2 */
758    for (i = 0; i < 23; ++i) {  /* 2^120 - 2^24 */
759        felem_square(tmp, ftmp4);
760        felem_reduce(ftmp4, tmp);
761    }
762    felem_mul(tmp, ftmp2, ftmp4);
763    felem_reduce(ftmp2, tmp);   /* 2^120 - 1 */
764    for (i = 0; i < 6; ++i) {   /* 2^126 - 2^6 */
765        felem_square(tmp, ftmp2);
766        felem_reduce(ftmp2, tmp);
767    }
768    felem_mul(tmp, ftmp2, ftmp);
769    felem_reduce(ftmp, tmp);    /* 2^126 - 1 */
770    felem_square(tmp, ftmp);
771    felem_reduce(ftmp, tmp);    /* 2^127 - 2 */
772    felem_mul(tmp, ftmp, in);
773    felem_reduce(ftmp, tmp);    /* 2^127 - 1 */
774    for (i = 0; i < 97; ++i) {  /* 2^224 - 2^97 */
775        felem_square(tmp, ftmp);
776        felem_reduce(ftmp, tmp);
777    }
778    felem_mul(tmp, ftmp, ftmp3);
779    felem_reduce(out, tmp);     /* 2^224 - 2^96 - 1 */
780}
781
782/*
783 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
784 * out to itself.
785 */
786static void copy_conditional(felem out, const felem in, limb icopy)
787{
788    unsigned i;
789    /*
790     * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
791     */
792    const limb copy = -icopy;
793    for (i = 0; i < 4; ++i) {
794        const limb tmp = copy & (in[i] ^ out[i]);
795        out[i] ^= tmp;
796    }
797}
798
799/******************************************************************************/
800/*-
801 *                       ELLIPTIC CURVE POINT OPERATIONS
802 *
803 * Points are represented in Jacobian projective coordinates:
804 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
805 * or to the point at infinity if Z == 0.
806 *
807 */
808
809/*-
810 * Double an elliptic curve point:
811 * (X', Y', Z') = 2 * (X, Y, Z), where
812 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
813 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^4
814 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
815 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
816 * while x_out == y_in is not (maybe this works, but it's not tested).
817 */
818static void
819point_double(felem x_out, felem y_out, felem z_out,
820             const felem x_in, const felem y_in, const felem z_in)
821{
822    widefelem tmp, tmp2;
823    felem delta, gamma, beta, alpha, ftmp, ftmp2;
824
825    felem_assign(ftmp, x_in);
826    felem_assign(ftmp2, x_in);
827
828    /* delta = z^2 */
829    felem_square(tmp, z_in);
830    felem_reduce(delta, tmp);
831
832    /* gamma = y^2 */
833    felem_square(tmp, y_in);
834    felem_reduce(gamma, tmp);
835
836    /* beta = x*gamma */
837    felem_mul(tmp, x_in, gamma);
838    felem_reduce(beta, tmp);
839
840    /* alpha = 3*(x-delta)*(x+delta) */
841    felem_diff(ftmp, delta);
842    /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
843    felem_sum(ftmp2, delta);
844    /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
845    felem_scalar(ftmp2, 3);
846    /* ftmp2[i] < 3 * 2^58 < 2^60 */
847    felem_mul(tmp, ftmp, ftmp2);
848    /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
849    felem_reduce(alpha, tmp);
850
851    /* x' = alpha^2 - 8*beta */
852    felem_square(tmp, alpha);
853    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
854    felem_assign(ftmp, beta);
855    felem_scalar(ftmp, 8);
856    /* ftmp[i] < 8 * 2^57 = 2^60 */
857    felem_diff_128_64(tmp, ftmp);
858    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
859    felem_reduce(x_out, tmp);
860
861    /* z' = (y + z)^2 - gamma - delta */
862    felem_sum(delta, gamma);
863    /* delta[i] < 2^57 + 2^57 = 2^58 */
864    felem_assign(ftmp, y_in);
865    felem_sum(ftmp, z_in);
866    /* ftmp[i] < 2^57 + 2^57 = 2^58 */
867    felem_square(tmp, ftmp);
868    /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
869    felem_diff_128_64(tmp, delta);
870    /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
871    felem_reduce(z_out, tmp);
872
873    /* y' = alpha*(4*beta - x') - 8*gamma^2 */
874    felem_scalar(beta, 4);
875    /* beta[i] < 4 * 2^57 = 2^59 */
876    felem_diff(beta, x_out);
877    /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
878    felem_mul(tmp, alpha, beta);
879    /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
880    felem_square(tmp2, gamma);
881    /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
882    widefelem_scalar(tmp2, 8);
883    /* tmp2[i] < 8 * 2^116 = 2^119 */
884    widefelem_diff(tmp, tmp2);
885    /* tmp[i] < 2^119 + 2^120 < 2^121 */
886    felem_reduce(y_out, tmp);
887}
888
889/*-
890 * Add two elliptic curve points:
891 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
892 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
893 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
894 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
895 *        Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
896 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
897 *
898 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
899 */
900
901/*
902 * This function is not entirely constant-time: it includes a branch for
903 * checking whether the two input points are equal, (while not equal to the
904 * point at infinity). This case never happens during single point
905 * multiplication, so there is no timing leak for ECDH or ECDSA signing.
906 */
907static void point_add(felem x3, felem y3, felem z3,
908                      const felem x1, const felem y1, const felem z1,
909                      const int mixed, const felem x2, const felem y2,
910                      const felem z2)
911{
912    felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
913    widefelem tmp, tmp2;
914    limb z1_is_zero, z2_is_zero, x_equal, y_equal;
915    limb points_equal;
916
917    if (!mixed) {
918        /* ftmp2 = z2^2 */
919        felem_square(tmp, z2);
920        felem_reduce(ftmp2, tmp);
921
922        /* ftmp4 = z2^3 */
923        felem_mul(tmp, ftmp2, z2);
924        felem_reduce(ftmp4, tmp);
925
926        /* ftmp4 = z2^3*y1 */
927        felem_mul(tmp2, ftmp4, y1);
928        felem_reduce(ftmp4, tmp2);
929
930        /* ftmp2 = z2^2*x1 */
931        felem_mul(tmp2, ftmp2, x1);
932        felem_reduce(ftmp2, tmp2);
933    } else {
934        /*
935         * We'll assume z2 = 1 (special case z2 = 0 is handled later)
936         */
937
938        /* ftmp4 = z2^3*y1 */
939        felem_assign(ftmp4, y1);
940
941        /* ftmp2 = z2^2*x1 */
942        felem_assign(ftmp2, x1);
943    }
944
945    /* ftmp = z1^2 */
946    felem_square(tmp, z1);
947    felem_reduce(ftmp, tmp);
948
949    /* ftmp3 = z1^3 */
950    felem_mul(tmp, ftmp, z1);
951    felem_reduce(ftmp3, tmp);
952
953    /* tmp = z1^3*y2 */
954    felem_mul(tmp, ftmp3, y2);
955    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
956
957    /* ftmp3 = z1^3*y2 - z2^3*y1 */
958    felem_diff_128_64(tmp, ftmp4);
959    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
960    felem_reduce(ftmp3, tmp);
961
962    /* tmp = z1^2*x2 */
963    felem_mul(tmp, ftmp, x2);
964    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
965
966    /* ftmp = z1^2*x2 - z2^2*x1 */
967    felem_diff_128_64(tmp, ftmp2);
968    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
969    felem_reduce(ftmp, tmp);
970
971    /*
972     * The formulae are incorrect if the points are equal, in affine coordinates
973     * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
974     * happens.
975     *
976     * We use bitwise operations to avoid potential side-channels introduced by
977     * the short-circuiting behaviour of boolean operators.
978     */
979    x_equal = felem_is_zero(ftmp);
980    y_equal = felem_is_zero(ftmp3);
981    /*
982     * The special case of either point being the point at infinity (z1 and/or
983     * z2 are zero), is handled separately later on in this function, so we
984     * avoid jumping to point_double here in those special cases.
985     */
986    z1_is_zero = felem_is_zero(z1);
987    z2_is_zero = felem_is_zero(z2);
988
989    /*
990     * Compared to `ecp_nistp256.c` and `ecp_nistp521.c`, in this
991     * specific implementation `felem_is_zero()` returns truth as `0x1`
992     * (rather than `0xff..ff`).
993     *
994     * This implies that `~true` in this implementation becomes
995     * `0xff..fe` (rather than `0x0`): for this reason, to be used in
996     * the if expression, we mask out only the last bit in the next
997     * line.
998     */
999    points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero)) & 1;
1000
1001    if (points_equal) {
1002        /*
1003         * This is obviously not constant-time but, as mentioned before, this
1004         * case never happens during single point multiplication, so there is no
1005         * timing leak for ECDH or ECDSA signing.
1006         */
1007        point_double(x3, y3, z3, x1, y1, z1);
1008        return;
1009    }
1010
1011    /* ftmp5 = z1*z2 */
1012    if (!mixed) {
1013        felem_mul(tmp, z1, z2);
1014        felem_reduce(ftmp5, tmp);
1015    } else {
1016        /* special case z2 = 0 is handled later */
1017        felem_assign(ftmp5, z1);
1018    }
1019
1020    /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1021    felem_mul(tmp, ftmp, ftmp5);
1022    felem_reduce(z_out, tmp);
1023
1024    /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1025    felem_assign(ftmp5, ftmp);
1026    felem_square(tmp, ftmp);
1027    felem_reduce(ftmp, tmp);
1028
1029    /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1030    felem_mul(tmp, ftmp, ftmp5);
1031    felem_reduce(ftmp5, tmp);
1032
1033    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1034    felem_mul(tmp, ftmp2, ftmp);
1035    felem_reduce(ftmp2, tmp);
1036
1037    /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1038    felem_mul(tmp, ftmp4, ftmp5);
1039    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1040
1041    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1042    felem_square(tmp2, ftmp3);
1043    /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1044
1045    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1046    felem_diff_128_64(tmp2, ftmp5);
1047    /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1048
1049    /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1050    felem_assign(ftmp5, ftmp2);
1051    felem_scalar(ftmp5, 2);
1052    /* ftmp5[i] < 2 * 2^57 = 2^58 */
1053
1054    /*-
1055     * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1056     *  2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1057     */
1058    felem_diff_128_64(tmp2, ftmp5);
1059    /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1060    felem_reduce(x_out, tmp2);
1061
1062    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1063    felem_diff(ftmp2, x_out);
1064    /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1065
1066    /*
1067     * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1068     */
1069    felem_mul(tmp2, ftmp3, ftmp2);
1070    /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1071
1072    /*-
1073     * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1074     *  z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1075     */
1076    widefelem_diff(tmp2, tmp);
1077    /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1078    felem_reduce(y_out, tmp2);
1079
1080    /*
1081     * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1082     * the point at infinity, so we need to check for this separately
1083     */
1084
1085    /*
1086     * if point 1 is at infinity, copy point 2 to output, and vice versa
1087     */
1088    copy_conditional(x_out, x2, z1_is_zero);
1089    copy_conditional(x_out, x1, z2_is_zero);
1090    copy_conditional(y_out, y2, z1_is_zero);
1091    copy_conditional(y_out, y1, z2_is_zero);
1092    copy_conditional(z_out, z2, z1_is_zero);
1093    copy_conditional(z_out, z1, z2_is_zero);
1094    felem_assign(x3, x_out);
1095    felem_assign(y3, y_out);
1096    felem_assign(z3, z_out);
1097}
1098
1099/*
1100 * select_point selects the |idx|th point from a precomputation table and
1101 * copies it to out.
1102 * The pre_comp array argument should be size of |size| argument
1103 */
1104static void select_point(const u64 idx, unsigned int size,
1105                         const felem pre_comp[][3], felem out[3])
1106{
1107    unsigned i, j;
1108    limb *outlimbs = &out[0][0];
1109
1110    memset(out, 0, sizeof(*out) * 3);
1111    for (i = 0; i < size; i++) {
1112        const limb *inlimbs = &pre_comp[i][0][0];
1113        u64 mask = i ^ idx;
1114        mask |= mask >> 4;
1115        mask |= mask >> 2;
1116        mask |= mask >> 1;
1117        mask &= 1;
1118        mask--;
1119        for (j = 0; j < 4 * 3; j++)
1120            outlimbs[j] |= inlimbs[j] & mask;
1121    }
1122}
1123
1124/* get_bit returns the |i|th bit in |in| */
1125static char get_bit(const felem_bytearray in, unsigned i)
1126{
1127    if (i >= 224)
1128        return 0;
1129    return (in[i >> 3] >> (i & 7)) & 1;
1130}
1131
1132/*
1133 * Interleaved point multiplication using precomputed point multiples: The
1134 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1135 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1136 * generator, using certain (large) precomputed multiples in g_pre_comp.
1137 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1138 */
1139static void batch_mul(felem x_out, felem y_out, felem z_out,
1140                      const felem_bytearray scalars[],
1141                      const unsigned num_points, const u8 *g_scalar,
1142                      const int mixed, const felem pre_comp[][17][3],
1143                      const felem g_pre_comp[2][16][3])
1144{
1145    int i, skip;
1146    unsigned num;
1147    unsigned gen_mul = (g_scalar != NULL);
1148    felem nq[3], tmp[4];
1149    u64 bits;
1150    u8 sign, digit;
1151
1152    /* set nq to the point at infinity */
1153    memset(nq, 0, sizeof(nq));
1154
1155    /*
1156     * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1157     * of the generator (two in each of the last 28 rounds) and additions of
1158     * other points multiples (every 5th round).
1159     */
1160    skip = 1;                   /* save two point operations in the first
1161                                 * round */
1162    for (i = (num_points ? 220 : 27); i >= 0; --i) {
1163        /* double */
1164        if (!skip)
1165            point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1166
1167        /* add multiples of the generator */
1168        if (gen_mul && (i <= 27)) {
1169            /* first, look 28 bits upwards */
1170            bits = get_bit(g_scalar, i + 196) << 3;
1171            bits |= get_bit(g_scalar, i + 140) << 2;
1172            bits |= get_bit(g_scalar, i + 84) << 1;
1173            bits |= get_bit(g_scalar, i + 28);
1174            /* select the point to add, in constant time */
1175            select_point(bits, 16, g_pre_comp[1], tmp);
1176
1177            if (!skip) {
1178                /* value 1 below is argument for "mixed" */
1179                point_add(nq[0], nq[1], nq[2],
1180                          nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1181            } else {
1182                memcpy(nq, tmp, 3 * sizeof(felem));
1183                skip = 0;
1184            }
1185
1186            /* second, look at the current position */
1187            bits = get_bit(g_scalar, i + 168) << 3;
1188            bits |= get_bit(g_scalar, i + 112) << 2;
1189            bits |= get_bit(g_scalar, i + 56) << 1;
1190            bits |= get_bit(g_scalar, i);
1191            /* select the point to add, in constant time */
1192            select_point(bits, 16, g_pre_comp[0], tmp);
1193            point_add(nq[0], nq[1], nq[2],
1194                      nq[0], nq[1], nq[2],
1195                      1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1196        }
1197
1198        /* do other additions every 5 doublings */
1199        if (num_points && (i % 5 == 0)) {
1200            /* loop over all scalars */
1201            for (num = 0; num < num_points; ++num) {
1202                bits = get_bit(scalars[num], i + 4) << 5;
1203                bits |= get_bit(scalars[num], i + 3) << 4;
1204                bits |= get_bit(scalars[num], i + 2) << 3;
1205                bits |= get_bit(scalars[num], i + 1) << 2;
1206                bits |= get_bit(scalars[num], i) << 1;
1207                bits |= get_bit(scalars[num], i - 1);
1208                ossl_ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1209
1210                /* select the point to add or subtract */
1211                select_point(digit, 17, pre_comp[num], tmp);
1212                felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1213                                            * point */
1214                copy_conditional(tmp[1], tmp[3], sign);
1215
1216                if (!skip) {
1217                    point_add(nq[0], nq[1], nq[2],
1218                              nq[0], nq[1], nq[2],
1219                              mixed, tmp[0], tmp[1], tmp[2]);
1220                } else {
1221                    memcpy(nq, tmp, 3 * sizeof(felem));
1222                    skip = 0;
1223                }
1224            }
1225        }
1226    }
1227    felem_assign(x_out, nq[0]);
1228    felem_assign(y_out, nq[1]);
1229    felem_assign(z_out, nq[2]);
1230}
1231
1232/******************************************************************************/
1233/*
1234 * FUNCTIONS TO MANAGE PRECOMPUTATION
1235 */
1236
1237static NISTP224_PRE_COMP *nistp224_pre_comp_new(void)
1238{
1239    NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1240
1241    if (!ret) {
1242        ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
1243        return ret;
1244    }
1245
1246    ret->references = 1;
1247
1248    ret->lock = CRYPTO_THREAD_lock_new();
1249    if (ret->lock == NULL) {
1250        ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
1251        OPENSSL_free(ret);
1252        return NULL;
1253    }
1254    return ret;
1255}
1256
1257NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p)
1258{
1259    int i;
1260    if (p != NULL)
1261        CRYPTO_UP_REF(&p->references, &i, p->lock);
1262    return p;
1263}
1264
1265void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
1266{
1267    int i;
1268
1269    if (p == NULL)
1270        return;
1271
1272    CRYPTO_DOWN_REF(&p->references, &i, p->lock);
1273    REF_PRINT_COUNT("EC_nistp224", p);
1274    if (i > 0)
1275        return;
1276    REF_ASSERT_ISNT(i < 0);
1277
1278    CRYPTO_THREAD_lock_free(p->lock);
1279    OPENSSL_free(p);
1280}
1281
1282/******************************************************************************/
1283/*
1284 * OPENSSL EC_METHOD FUNCTIONS
1285 */
1286
1287int ossl_ec_GFp_nistp224_group_init(EC_GROUP *group)
1288{
1289    int ret;
1290    ret = ossl_ec_GFp_simple_group_init(group);
1291    group->a_is_minus3 = 1;
1292    return ret;
1293}
1294
1295int ossl_ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1296                                         const BIGNUM *a, const BIGNUM *b,
1297                                         BN_CTX *ctx)
1298{
1299    int ret = 0;
1300    BIGNUM *curve_p, *curve_a, *curve_b;
1301#ifndef FIPS_MODULE
1302    BN_CTX *new_ctx = NULL;
1303
1304    if (ctx == NULL)
1305        ctx = new_ctx = BN_CTX_new();
1306#endif
1307    if (ctx == NULL)
1308        return 0;
1309
1310    BN_CTX_start(ctx);
1311    curve_p = BN_CTX_get(ctx);
1312    curve_a = BN_CTX_get(ctx);
1313    curve_b = BN_CTX_get(ctx);
1314    if (curve_b == NULL)
1315        goto err;
1316    BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1317    BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1318    BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1319    if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1320        ERR_raise(ERR_LIB_EC, EC_R_WRONG_CURVE_PARAMETERS);
1321        goto err;
1322    }
1323    group->field_mod_func = BN_nist_mod_224;
1324    ret = ossl_ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1325 err:
1326    BN_CTX_end(ctx);
1327#ifndef FIPS_MODULE
1328    BN_CTX_free(new_ctx);
1329#endif
1330    return ret;
1331}
1332
1333/*
1334 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1335 * (X/Z^2, Y/Z^3)
1336 */
1337int ossl_ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1338                                                      const EC_POINT *point,
1339                                                      BIGNUM *x, BIGNUM *y,
1340                                                      BN_CTX *ctx)
1341{
1342    felem z1, z2, x_in, y_in, x_out, y_out;
1343    widefelem tmp;
1344
1345    if (EC_POINT_is_at_infinity(group, point)) {
1346        ERR_raise(ERR_LIB_EC, EC_R_POINT_AT_INFINITY);
1347        return 0;
1348    }
1349    if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1350        (!BN_to_felem(z1, point->Z)))
1351        return 0;
1352    felem_inv(z2, z1);
1353    felem_square(tmp, z2);
1354    felem_reduce(z1, tmp);
1355    felem_mul(tmp, x_in, z1);
1356    felem_reduce(x_in, tmp);
1357    felem_contract(x_out, x_in);
1358    if (x != NULL) {
1359        if (!felem_to_BN(x, x_out)) {
1360            ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1361            return 0;
1362        }
1363    }
1364    felem_mul(tmp, z1, z2);
1365    felem_reduce(z1, tmp);
1366    felem_mul(tmp, y_in, z1);
1367    felem_reduce(y_in, tmp);
1368    felem_contract(y_out, y_in);
1369    if (y != NULL) {
1370        if (!felem_to_BN(y, y_out)) {
1371            ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1372            return 0;
1373        }
1374    }
1375    return 1;
1376}
1377
1378static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1379                               felem tmp_felems[ /* num+1 */ ])
1380{
1381    /*
1382     * Runs in constant time, unless an input is the point at infinity (which
1383     * normally shouldn't happen).
1384     */
1385    ossl_ec_GFp_nistp_points_make_affine_internal(num,
1386                                                  points,
1387                                                  sizeof(felem),
1388                                                  tmp_felems,
1389                                                  (void (*)(void *))felem_one,
1390                                                  felem_is_zero_int,
1391                                                  (void (*)(void *, const void *))
1392                                                  felem_assign,
1393                                                  (void (*)(void *, const void *))
1394                                                  felem_square_reduce, (void (*)
1395                                                                        (void *,
1396                                                                         const void
1397                                                                         *,
1398                                                                         const void
1399                                                                         *))
1400                                                  felem_mul_reduce,
1401                                                  (void (*)(void *, const void *))
1402                                                  felem_inv,
1403                                                  (void (*)(void *, const void *))
1404                                                  felem_contract);
1405}
1406
1407/*
1408 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1409 * values Result is stored in r (r can equal one of the inputs).
1410 */
1411int ossl_ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1412                                    const BIGNUM *scalar, size_t num,
1413                                    const EC_POINT *points[],
1414                                    const BIGNUM *scalars[], BN_CTX *ctx)
1415{
1416    int ret = 0;
1417    int j;
1418    unsigned i;
1419    int mixed = 0;
1420    BIGNUM *x, *y, *z, *tmp_scalar;
1421    felem_bytearray g_secret;
1422    felem_bytearray *secrets = NULL;
1423    felem (*pre_comp)[17][3] = NULL;
1424    felem *tmp_felems = NULL;
1425    int num_bytes;
1426    int have_pre_comp = 0;
1427    size_t num_points = num;
1428    felem x_in, y_in, z_in, x_out, y_out, z_out;
1429    NISTP224_PRE_COMP *pre = NULL;
1430    const felem(*g_pre_comp)[16][3] = NULL;
1431    EC_POINT *generator = NULL;
1432    const EC_POINT *p = NULL;
1433    const BIGNUM *p_scalar = NULL;
1434
1435    BN_CTX_start(ctx);
1436    x = BN_CTX_get(ctx);
1437    y = BN_CTX_get(ctx);
1438    z = BN_CTX_get(ctx);
1439    tmp_scalar = BN_CTX_get(ctx);
1440    if (tmp_scalar == NULL)
1441        goto err;
1442
1443    if (scalar != NULL) {
1444        pre = group->pre_comp.nistp224;
1445        if (pre)
1446            /* we have precomputation, try to use it */
1447            g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1448        else
1449            /* try to use the standard precomputation */
1450            g_pre_comp = &gmul[0];
1451        generator = EC_POINT_new(group);
1452        if (generator == NULL)
1453            goto err;
1454        /* get the generator from precomputation */
1455        if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1456            !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1457            !felem_to_BN(z, g_pre_comp[0][1][2])) {
1458            ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1459            goto err;
1460        }
1461        if (!ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group,
1462                                                                generator,
1463                                                                x, y, z, ctx))
1464            goto err;
1465        if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1466            /* precomputation matches generator */
1467            have_pre_comp = 1;
1468        else
1469            /*
1470             * we don't have valid precomputation: treat the generator as a
1471             * random point
1472             */
1473            num_points = num_points + 1;
1474    }
1475
1476    if (num_points > 0) {
1477        if (num_points >= 3) {
1478            /*
1479             * unless we precompute multiples for just one or two points,
1480             * converting those into affine form is time well spent
1481             */
1482            mixed = 1;
1483        }
1484        secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1485        pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
1486        if (mixed)
1487            tmp_felems =
1488                OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
1489        if ((secrets == NULL) || (pre_comp == NULL)
1490            || (mixed && (tmp_felems == NULL))) {
1491            ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
1492            goto err;
1493        }
1494
1495        /*
1496         * we treat NULL scalars as 0, and NULL points as points at infinity,
1497         * i.e., they contribute nothing to the linear combination
1498         */
1499        for (i = 0; i < num_points; ++i) {
1500            if (i == num) {
1501                /* the generator */
1502                p = EC_GROUP_get0_generator(group);
1503                p_scalar = scalar;
1504            } else {
1505                /* the i^th point */
1506                p = points[i];
1507                p_scalar = scalars[i];
1508            }
1509            if ((p_scalar != NULL) && (p != NULL)) {
1510                /* reduce scalar to 0 <= scalar < 2^224 */
1511                if ((BN_num_bits(p_scalar) > 224)
1512                    || (BN_is_negative(p_scalar))) {
1513                    /*
1514                     * this is an unusual input, and we don't guarantee
1515                     * constant-timeness
1516                     */
1517                    if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1518                        ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1519                        goto err;
1520                    }
1521                    num_bytes = BN_bn2lebinpad(tmp_scalar,
1522                                               secrets[i], sizeof(secrets[i]));
1523                } else {
1524                    num_bytes = BN_bn2lebinpad(p_scalar,
1525                                               secrets[i], sizeof(secrets[i]));
1526                }
1527                if (num_bytes < 0) {
1528                    ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1529                    goto err;
1530                }
1531                /* precompute multiples */
1532                if ((!BN_to_felem(x_out, p->X)) ||
1533                    (!BN_to_felem(y_out, p->Y)) ||
1534                    (!BN_to_felem(z_out, p->Z)))
1535                    goto err;
1536                felem_assign(pre_comp[i][1][0], x_out);
1537                felem_assign(pre_comp[i][1][1], y_out);
1538                felem_assign(pre_comp[i][1][2], z_out);
1539                for (j = 2; j <= 16; ++j) {
1540                    if (j & 1) {
1541                        point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1542                                  pre_comp[i][j][2], pre_comp[i][1][0],
1543                                  pre_comp[i][1][1], pre_comp[i][1][2], 0,
1544                                  pre_comp[i][j - 1][0],
1545                                  pre_comp[i][j - 1][1],
1546                                  pre_comp[i][j - 1][2]);
1547                    } else {
1548                        point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1549                                     pre_comp[i][j][2], pre_comp[i][j / 2][0],
1550                                     pre_comp[i][j / 2][1],
1551                                     pre_comp[i][j / 2][2]);
1552                    }
1553                }
1554            }
1555        }
1556        if (mixed)
1557            make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1558    }
1559
1560    /* the scalar for the generator */
1561    if ((scalar != NULL) && (have_pre_comp)) {
1562        memset(g_secret, 0, sizeof(g_secret));
1563        /* reduce scalar to 0 <= scalar < 2^224 */
1564        if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1565            /*
1566             * this is an unusual input, and we don't guarantee
1567             * constant-timeness
1568             */
1569            if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1570                ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1571                goto err;
1572            }
1573            num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
1574        } else {
1575            num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
1576        }
1577        /* do the multiplication with generator precomputation */
1578        batch_mul(x_out, y_out, z_out,
1579                  (const felem_bytearray(*))secrets, num_points,
1580                  g_secret,
1581                  mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1582    } else {
1583        /* do the multiplication without generator precomputation */
1584        batch_mul(x_out, y_out, z_out,
1585                  (const felem_bytearray(*))secrets, num_points,
1586                  NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1587    }
1588    /* reduce the output to its unique minimal representation */
1589    felem_contract(x_in, x_out);
1590    felem_contract(y_in, y_out);
1591    felem_contract(z_in, z_out);
1592    if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1593        (!felem_to_BN(z, z_in))) {
1594        ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1595        goto err;
1596    }
1597    ret = ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group, r, x, y, z,
1598                                                             ctx);
1599
1600 err:
1601    BN_CTX_end(ctx);
1602    EC_POINT_free(generator);
1603    OPENSSL_free(secrets);
1604    OPENSSL_free(pre_comp);
1605    OPENSSL_free(tmp_felems);
1606    return ret;
1607}
1608
1609int ossl_ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1610{
1611    int ret = 0;
1612    NISTP224_PRE_COMP *pre = NULL;
1613    int i, j;
1614    BIGNUM *x, *y;
1615    EC_POINT *generator = NULL;
1616    felem tmp_felems[32];
1617#ifndef FIPS_MODULE
1618    BN_CTX *new_ctx = NULL;
1619#endif
1620
1621    /* throw away old precomputation */
1622    EC_pre_comp_free(group);
1623
1624#ifndef FIPS_MODULE
1625    if (ctx == NULL)
1626        ctx = new_ctx = BN_CTX_new();
1627#endif
1628    if (ctx == NULL)
1629        return 0;
1630
1631    BN_CTX_start(ctx);
1632    x = BN_CTX_get(ctx);
1633    y = BN_CTX_get(ctx);
1634    if (y == NULL)
1635        goto err;
1636    /* get the generator */
1637    if (group->generator == NULL)
1638        goto err;
1639    generator = EC_POINT_new(group);
1640    if (generator == NULL)
1641        goto err;
1642    BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1643    BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1644    if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
1645        goto err;
1646    if ((pre = nistp224_pre_comp_new()) == NULL)
1647        goto err;
1648    /*
1649     * if the generator is the standard one, use built-in precomputation
1650     */
1651    if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1652        memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1653        goto done;
1654    }
1655    if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) ||
1656        (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) ||
1657        (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
1658        goto err;
1659    /*
1660     * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1661     * 2^140*G, 2^196*G for the second one
1662     */
1663    for (i = 1; i <= 8; i <<= 1) {
1664        point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1665                     pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1666                     pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1667        for (j = 0; j < 27; ++j) {
1668            point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1669                         pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1670                         pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1671        }
1672        if (i == 8)
1673            break;
1674        point_double(pre->g_pre_comp[0][2 * i][0],
1675                     pre->g_pre_comp[0][2 * i][1],
1676                     pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1677                     pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1678        for (j = 0; j < 27; ++j) {
1679            point_double(pre->g_pre_comp[0][2 * i][0],
1680                         pre->g_pre_comp[0][2 * i][1],
1681                         pre->g_pre_comp[0][2 * i][2],
1682                         pre->g_pre_comp[0][2 * i][0],
1683                         pre->g_pre_comp[0][2 * i][1],
1684                         pre->g_pre_comp[0][2 * i][2]);
1685        }
1686    }
1687    for (i = 0; i < 2; i++) {
1688        /* g_pre_comp[i][0] is the point at infinity */
1689        memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1690        /* the remaining multiples */
1691        /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1692        point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1693                  pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1694                  pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1695                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1696                  pre->g_pre_comp[i][2][2]);
1697        /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1698        point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1699                  pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1700                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1701                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1702                  pre->g_pre_comp[i][2][2]);
1703        /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1704        point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1705                  pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1706                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1707                  0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1708                  pre->g_pre_comp[i][4][2]);
1709        /*
1710         * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1711         */
1712        point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1713                  pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1714                  pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1715                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1716                  pre->g_pre_comp[i][2][2]);
1717        for (j = 1; j < 8; ++j) {
1718            /* odd multiples: add G resp. 2^28*G */
1719            point_add(pre->g_pre_comp[i][2 * j + 1][0],
1720                      pre->g_pre_comp[i][2 * j + 1][1],
1721                      pre->g_pre_comp[i][2 * j + 1][2],
1722                      pre->g_pre_comp[i][2 * j][0],
1723                      pre->g_pre_comp[i][2 * j][1],
1724                      pre->g_pre_comp[i][2 * j][2], 0,
1725                      pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1726                      pre->g_pre_comp[i][1][2]);
1727        }
1728    }
1729    make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1730
1731 done:
1732    SETPRECOMP(group, nistp224, pre);
1733    pre = NULL;
1734    ret = 1;
1735 err:
1736    BN_CTX_end(ctx);
1737    EC_POINT_free(generator);
1738#ifndef FIPS_MODULE
1739    BN_CTX_free(new_ctx);
1740#endif
1741    EC_nistp224_pre_comp_free(pre);
1742    return ret;
1743}
1744
1745int ossl_ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1746{
1747    return HAVEPRECOMP(group, nistp224);
1748}
1749