1/*
2 * Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the OpenSSL license (the "License").  You may not use
5 * this file except in compliance with the License.  You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
10/* Copyright 2011 Google Inc.
11 *
12 * Licensed under the Apache License, Version 2.0 (the "License");
13 *
14 * you may not use this file except in compliance with the License.
15 * You may obtain a copy of the License at
16 *
17 *     http://www.apache.org/licenses/LICENSE-2.0
18 *
19 *  Unless required by applicable law or agreed to in writing, software
20 *  distributed under the License is distributed on an "AS IS" BASIS,
21 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22 *  See the License for the specific language governing permissions and
23 *  limitations under the License.
24 */
25
26/*
27 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
28 *
29 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
30 * and Adam Langley's public domain 64-bit C implementation of curve25519
31 */
32
33#include <openssl/opensslconf.h>
34#ifdef OPENSSL_NO_EC_NISTP_64_GCC_128
35NON_EMPTY_TRANSLATION_UNIT
36#else
37
38# include <stdint.h>
39# include <string.h>
40# include <openssl/err.h>
41# include "ec_local.h"
42
43# if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16
44  /* even with gcc, the typedef won't work for 32-bit platforms */
45typedef __uint128_t uint128_t;  /* nonstandard; implemented by gcc on 64-bit
46                                 * platforms */
47# else
48#  error "Your compiler doesn't appear to support 128-bit integer types"
49# endif
50
51typedef uint8_t u8;
52typedef uint64_t u64;
53
54/******************************************************************************/
55/*-
56 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
57 *
58 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
59 * using 64-bit coefficients called 'limbs',
60 * and sometimes (for multiplication results) as
61 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
62 * using 128-bit coefficients called 'widelimbs'.
63 * A 4-limb representation is an 'felem';
64 * a 7-widelimb representation is a 'widefelem'.
65 * Even within felems, bits of adjacent limbs overlap, and we don't always
66 * reduce the representations: we ensure that inputs to each felem
67 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
68 * and fit into a 128-bit word without overflow. The coefficients are then
69 * again partially reduced to obtain an felem satisfying a_i < 2^57.
70 * We only reduce to the unique minimal representation at the end of the
71 * computation.
72 */
73
74typedef uint64_t limb;
75typedef uint64_t limb_aX __attribute((__aligned__(1)));
76typedef uint128_t widelimb;
77
78typedef limb felem[4];
79typedef widelimb widefelem[7];
80
81/*
82 * Field element represented as a byte array. 28*8 = 224 bits is also the
83 * group order size for the elliptic curve, and we also use this type for
84 * scalars for point multiplication.
85 */
86typedef u8 felem_bytearray[28];
87
88static const felem_bytearray nistp224_curve_params[5] = {
89    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
90     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
91     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
92    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
93     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
94     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
95    {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
96     0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
97     0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
98    {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
99     0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
100     0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
101    {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
102     0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
103     0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
104};
105
106/*-
107 * Precomputed multiples of the standard generator
108 * Points are given in coordinates (X, Y, Z) where Z normally is 1
109 * (0 for the point at infinity).
110 * For each field element, slice a_0 is word 0, etc.
111 *
112 * The table has 2 * 16 elements, starting with the following:
113 * index | bits    | point
114 * ------+---------+------------------------------
115 *     0 | 0 0 0 0 | 0G
116 *     1 | 0 0 0 1 | 1G
117 *     2 | 0 0 1 0 | 2^56G
118 *     3 | 0 0 1 1 | (2^56 + 1)G
119 *     4 | 0 1 0 0 | 2^112G
120 *     5 | 0 1 0 1 | (2^112 + 1)G
121 *     6 | 0 1 1 0 | (2^112 + 2^56)G
122 *     7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
123 *     8 | 1 0 0 0 | 2^168G
124 *     9 | 1 0 0 1 | (2^168 + 1)G
125 *    10 | 1 0 1 0 | (2^168 + 2^56)G
126 *    11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
127 *    12 | 1 1 0 0 | (2^168 + 2^112)G
128 *    13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
129 *    14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
130 *    15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
131 * followed by a copy of this with each element multiplied by 2^28.
132 *
133 * The reason for this is so that we can clock bits into four different
134 * locations when doing simple scalar multiplies against the base point,
135 * and then another four locations using the second 16 elements.
136 */
137static const felem gmul[2][16][3] = {
138{{{0, 0, 0, 0},
139  {0, 0, 0, 0},
140  {0, 0, 0, 0}},
141 {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
142  {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
143  {1, 0, 0, 0}},
144 {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
145  {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
146  {1, 0, 0, 0}},
147 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
148  {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
149  {1, 0, 0, 0}},
150 {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
151  {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
152  {1, 0, 0, 0}},
153 {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
154  {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
155  {1, 0, 0, 0}},
156 {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
157  {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
158  {1, 0, 0, 0}},
159 {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
160  {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
161  {1, 0, 0, 0}},
162 {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
163  {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
164  {1, 0, 0, 0}},
165 {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
166  {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
167  {1, 0, 0, 0}},
168 {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
169  {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
170  {1, 0, 0, 0}},
171 {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
172  {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
173  {1, 0, 0, 0}},
174 {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
175  {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
176  {1, 0, 0, 0}},
177 {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
178  {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
179  {1, 0, 0, 0}},
180 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
181  {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
182  {1, 0, 0, 0}},
183 {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
184  {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
185  {1, 0, 0, 0}}},
186{{{0, 0, 0, 0},
187  {0, 0, 0, 0},
188  {0, 0, 0, 0}},
189 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
190  {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
191  {1, 0, 0, 0}},
192 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
193  {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
194  {1, 0, 0, 0}},
195 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
196  {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
197  {1, 0, 0, 0}},
198 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
199  {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
200  {1, 0, 0, 0}},
201 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
202  {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
203  {1, 0, 0, 0}},
204 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
205  {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
206  {1, 0, 0, 0}},
207 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
208  {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
209  {1, 0, 0, 0}},
210 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
211  {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
212  {1, 0, 0, 0}},
213 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
214  {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
215  {1, 0, 0, 0}},
216 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
217  {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
218  {1, 0, 0, 0}},
219 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
220  {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
221  {1, 0, 0, 0}},
222 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
223  {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
224  {1, 0, 0, 0}},
225 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
226  {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
227  {1, 0, 0, 0}},
228 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
229  {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
230  {1, 0, 0, 0}},
231 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
232  {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
233  {1, 0, 0, 0}}}
234};
235
236/* Precomputation for the group generator. */
237struct nistp224_pre_comp_st {
238    felem g_pre_comp[2][16][3];
239    CRYPTO_REF_COUNT references;
240    CRYPTO_RWLOCK *lock;
241};
242
243const EC_METHOD *EC_GFp_nistp224_method(void)
244{
245    static const EC_METHOD ret = {
246        EC_FLAGS_DEFAULT_OCT,
247        NID_X9_62_prime_field,
248        ec_GFp_nistp224_group_init,
249        ec_GFp_simple_group_finish,
250        ec_GFp_simple_group_clear_finish,
251        ec_GFp_nist_group_copy,
252        ec_GFp_nistp224_group_set_curve,
253        ec_GFp_simple_group_get_curve,
254        ec_GFp_simple_group_get_degree,
255        ec_group_simple_order_bits,
256        ec_GFp_simple_group_check_discriminant,
257        ec_GFp_simple_point_init,
258        ec_GFp_simple_point_finish,
259        ec_GFp_simple_point_clear_finish,
260        ec_GFp_simple_point_copy,
261        ec_GFp_simple_point_set_to_infinity,
262        ec_GFp_simple_set_Jprojective_coordinates_GFp,
263        ec_GFp_simple_get_Jprojective_coordinates_GFp,
264        ec_GFp_simple_point_set_affine_coordinates,
265        ec_GFp_nistp224_point_get_affine_coordinates,
266        0 /* point_set_compressed_coordinates */ ,
267        0 /* point2oct */ ,
268        0 /* oct2point */ ,
269        ec_GFp_simple_add,
270        ec_GFp_simple_dbl,
271        ec_GFp_simple_invert,
272        ec_GFp_simple_is_at_infinity,
273        ec_GFp_simple_is_on_curve,
274        ec_GFp_simple_cmp,
275        ec_GFp_simple_make_affine,
276        ec_GFp_simple_points_make_affine,
277        ec_GFp_nistp224_points_mul,
278        ec_GFp_nistp224_precompute_mult,
279        ec_GFp_nistp224_have_precompute_mult,
280        ec_GFp_nist_field_mul,
281        ec_GFp_nist_field_sqr,
282        0 /* field_div */ ,
283        ec_GFp_simple_field_inv,
284        0 /* field_encode */ ,
285        0 /* field_decode */ ,
286        0,                      /* field_set_to_one */
287        ec_key_simple_priv2oct,
288        ec_key_simple_oct2priv,
289        0, /* set private */
290        ec_key_simple_generate_key,
291        ec_key_simple_check_key,
292        ec_key_simple_generate_public_key,
293        0, /* keycopy */
294        0, /* keyfinish */
295        ecdh_simple_compute_key,
296        0, /* field_inverse_mod_ord */
297        0, /* blind_coordinates */
298        0, /* ladder_pre */
299        0, /* ladder_step */
300        0  /* ladder_post */
301    };
302
303    return &ret;
304}
305
306/*
307 * Helper functions to convert field elements to/from internal representation
308 */
309static void bin28_to_felem(felem out, const u8 in[28])
310{
311    out[0] = *((const limb *)(in)) & 0x00ffffffffffffff;
312    out[1] = (*((const limb_aX *)(in + 7))) & 0x00ffffffffffffff;
313    out[2] = (*((const limb_aX *)(in + 14))) & 0x00ffffffffffffff;
314    out[3] = (*((const limb_aX *)(in + 20))) >> 8;
315}
316
317static void felem_to_bin28(u8 out[28], const felem in)
318{
319    unsigned i;
320    for (i = 0; i < 7; ++i) {
321        out[i] = in[0] >> (8 * i);
322        out[i + 7] = in[1] >> (8 * i);
323        out[i + 14] = in[2] >> (8 * i);
324        out[i + 21] = in[3] >> (8 * i);
325    }
326}
327
328/* From OpenSSL BIGNUM to internal representation */
329static int BN_to_felem(felem out, const BIGNUM *bn)
330{
331    felem_bytearray b_out;
332    int num_bytes;
333
334    if (BN_is_negative(bn)) {
335        ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
336        return 0;
337    }
338    num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
339    if (num_bytes < 0) {
340        ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
341        return 0;
342    }
343    bin28_to_felem(out, b_out);
344    return 1;
345}
346
347/* From internal representation to OpenSSL BIGNUM */
348static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
349{
350    felem_bytearray b_out;
351    felem_to_bin28(b_out, in);
352    return BN_lebin2bn(b_out, sizeof(b_out), out);
353}
354
355/******************************************************************************/
356/*-
357 *                              FIELD OPERATIONS
358 *
359 * Field operations, using the internal representation of field elements.
360 * NB! These operations are specific to our point multiplication and cannot be
361 * expected to be correct in general - e.g., multiplication with a large scalar
362 * will cause an overflow.
363 *
364 */
365
366static void felem_one(felem out)
367{
368    out[0] = 1;
369    out[1] = 0;
370    out[2] = 0;
371    out[3] = 0;
372}
373
374static void felem_assign(felem out, const felem in)
375{
376    out[0] = in[0];
377    out[1] = in[1];
378    out[2] = in[2];
379    out[3] = in[3];
380}
381
382/* Sum two field elements: out += in */
383static void felem_sum(felem out, const felem in)
384{
385    out[0] += in[0];
386    out[1] += in[1];
387    out[2] += in[2];
388    out[3] += in[3];
389}
390
391/* Subtract field elements: out -= in */
392/* Assumes in[i] < 2^57 */
393static void felem_diff(felem out, const felem in)
394{
395    static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
396    static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
397    static const limb two58m42m2 = (((limb) 1) << 58) -
398        (((limb) 1) << 42) - (((limb) 1) << 2);
399
400    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
401    out[0] += two58p2;
402    out[1] += two58m42m2;
403    out[2] += two58m2;
404    out[3] += two58m2;
405
406    out[0] -= in[0];
407    out[1] -= in[1];
408    out[2] -= in[2];
409    out[3] -= in[3];
410}
411
412/* Subtract in unreduced 128-bit mode: out -= in */
413/* Assumes in[i] < 2^119 */
414static void widefelem_diff(widefelem out, const widefelem in)
415{
416    static const widelimb two120 = ((widelimb) 1) << 120;
417    static const widelimb two120m64 = (((widelimb) 1) << 120) -
418        (((widelimb) 1) << 64);
419    static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
420        (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
421
422    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
423    out[0] += two120;
424    out[1] += two120m64;
425    out[2] += two120m64;
426    out[3] += two120;
427    out[4] += two120m104m64;
428    out[5] += two120m64;
429    out[6] += two120m64;
430
431    out[0] -= in[0];
432    out[1] -= in[1];
433    out[2] -= in[2];
434    out[3] -= in[3];
435    out[4] -= in[4];
436    out[5] -= in[5];
437    out[6] -= in[6];
438}
439
440/* Subtract in mixed mode: out128 -= in64 */
441/* in[i] < 2^63 */
442static void felem_diff_128_64(widefelem out, const felem in)
443{
444    static const widelimb two64p8 = (((widelimb) 1) << 64) +
445        (((widelimb) 1) << 8);
446    static const widelimb two64m8 = (((widelimb) 1) << 64) -
447        (((widelimb) 1) << 8);
448    static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
449        (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
450
451    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
452    out[0] += two64p8;
453    out[1] += two64m48m8;
454    out[2] += two64m8;
455    out[3] += two64m8;
456
457    out[0] -= in[0];
458    out[1] -= in[1];
459    out[2] -= in[2];
460    out[3] -= in[3];
461}
462
463/*
464 * Multiply a field element by a scalar: out = out * scalar The scalars we
465 * actually use are small, so results fit without overflow
466 */
467static void felem_scalar(felem out, const limb scalar)
468{
469    out[0] *= scalar;
470    out[1] *= scalar;
471    out[2] *= scalar;
472    out[3] *= scalar;
473}
474
475/*
476 * Multiply an unreduced field element by a scalar: out = out * scalar The
477 * scalars we actually use are small, so results fit without overflow
478 */
479static void widefelem_scalar(widefelem out, const widelimb scalar)
480{
481    out[0] *= scalar;
482    out[1] *= scalar;
483    out[2] *= scalar;
484    out[3] *= scalar;
485    out[4] *= scalar;
486    out[5] *= scalar;
487    out[6] *= scalar;
488}
489
490/* Square a field element: out = in^2 */
491static void felem_square(widefelem out, const felem in)
492{
493    limb tmp0, tmp1, tmp2;
494    tmp0 = 2 * in[0];
495    tmp1 = 2 * in[1];
496    tmp2 = 2 * in[2];
497    out[0] = ((widelimb) in[0]) * in[0];
498    out[1] = ((widelimb) in[0]) * tmp1;
499    out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
500    out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
501    out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
502    out[5] = ((widelimb) in[3]) * tmp2;
503    out[6] = ((widelimb) in[3]) * in[3];
504}
505
506/* Multiply two field elements: out = in1 * in2 */
507static void felem_mul(widefelem out, const felem in1, const felem in2)
508{
509    out[0] = ((widelimb) in1[0]) * in2[0];
510    out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
511    out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
512             ((widelimb) in1[2]) * in2[0];
513    out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
514             ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
515    out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
516             ((widelimb) in1[3]) * in2[1];
517    out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
518    out[6] = ((widelimb) in1[3]) * in2[3];
519}
520
521/*-
522 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
523 * Requires in[i] < 2^126,
524 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
525static void felem_reduce(felem out, const widefelem in)
526{
527    static const widelimb two127p15 = (((widelimb) 1) << 127) +
528        (((widelimb) 1) << 15);
529    static const widelimb two127m71 = (((widelimb) 1) << 127) -
530        (((widelimb) 1) << 71);
531    static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
532        (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
533    widelimb output[5];
534
535    /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
536    output[0] = in[0] + two127p15;
537    output[1] = in[1] + two127m71m55;
538    output[2] = in[2] + two127m71;
539    output[3] = in[3];
540    output[4] = in[4];
541
542    /* Eliminate in[4], in[5], in[6] */
543    output[4] += in[6] >> 16;
544    output[3] += (in[6] & 0xffff) << 40;
545    output[2] -= in[6];
546
547    output[3] += in[5] >> 16;
548    output[2] += (in[5] & 0xffff) << 40;
549    output[1] -= in[5];
550
551    output[2] += output[4] >> 16;
552    output[1] += (output[4] & 0xffff) << 40;
553    output[0] -= output[4];
554
555    /* Carry 2 -> 3 -> 4 */
556    output[3] += output[2] >> 56;
557    output[2] &= 0x00ffffffffffffff;
558
559    output[4] = output[3] >> 56;
560    output[3] &= 0x00ffffffffffffff;
561
562    /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
563
564    /* Eliminate output[4] */
565    output[2] += output[4] >> 16;
566    /* output[2] < 2^56 + 2^56 = 2^57 */
567    output[1] += (output[4] & 0xffff) << 40;
568    output[0] -= output[4];
569
570    /* Carry 0 -> 1 -> 2 -> 3 */
571    output[1] += output[0] >> 56;
572    out[0] = output[0] & 0x00ffffffffffffff;
573
574    output[2] += output[1] >> 56;
575    /* output[2] < 2^57 + 2^72 */
576    out[1] = output[1] & 0x00ffffffffffffff;
577    output[3] += output[2] >> 56;
578    /* output[3] <= 2^56 + 2^16 */
579    out[2] = output[2] & 0x00ffffffffffffff;
580
581    /*-
582     * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
583     * out[3] <= 2^56 + 2^16 (due to final carry),
584     * so out < 2*p
585     */
586    out[3] = output[3];
587}
588
589static void felem_square_reduce(felem out, const felem in)
590{
591    widefelem tmp;
592    felem_square(tmp, in);
593    felem_reduce(out, tmp);
594}
595
596static void felem_mul_reduce(felem out, const felem in1, const felem in2)
597{
598    widefelem tmp;
599    felem_mul(tmp, in1, in2);
600    felem_reduce(out, tmp);
601}
602
603/*
604 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
605 * call felem_reduce first)
606 */
607static void felem_contract(felem out, const felem in)
608{
609    static const int64_t two56 = ((limb) 1) << 56;
610    /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
611    /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
612    int64_t tmp[4], a;
613    tmp[0] = in[0];
614    tmp[1] = in[1];
615    tmp[2] = in[2];
616    tmp[3] = in[3];
617    /* Case 1: a = 1 iff in >= 2^224 */
618    a = (in[3] >> 56);
619    tmp[0] -= a;
620    tmp[1] += a << 40;
621    tmp[3] &= 0x00ffffffffffffff;
622    /*
623     * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
624     * and the lower part is non-zero
625     */
626    a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
627        (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
628    a &= 0x00ffffffffffffff;
629    /* turn a into an all-one mask (if a = 0) or an all-zero mask */
630    a = (a - 1) >> 63;
631    /* subtract 2^224 - 2^96 + 1 if a is all-one */
632    tmp[3] &= a ^ 0xffffffffffffffff;
633    tmp[2] &= a ^ 0xffffffffffffffff;
634    tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
635    tmp[0] -= 1 & a;
636
637    /*
638     * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
639     * non-zero, so we only need one step
640     */
641    a = tmp[0] >> 63;
642    tmp[0] += two56 & a;
643    tmp[1] -= 1 & a;
644
645    /* carry 1 -> 2 -> 3 */
646    tmp[2] += tmp[1] >> 56;
647    tmp[1] &= 0x00ffffffffffffff;
648
649    tmp[3] += tmp[2] >> 56;
650    tmp[2] &= 0x00ffffffffffffff;
651
652    /* Now 0 <= out < p */
653    out[0] = tmp[0];
654    out[1] = tmp[1];
655    out[2] = tmp[2];
656    out[3] = tmp[3];
657}
658
659/*
660 * Get negative value: out = -in
661 * Requires in[i] < 2^63,
662 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16
663 */
664static void felem_neg(felem out, const felem in)
665{
666    widefelem tmp = {0};
667    felem_diff_128_64(tmp, in);
668    felem_reduce(out, tmp);
669}
670
671/*
672 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
673 * elements are reduced to in < 2^225, so we only need to check three cases:
674 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
675 */
676static limb felem_is_zero(const felem in)
677{
678    limb zero, two224m96p1, two225m97p2;
679
680    zero = in[0] | in[1] | in[2] | in[3];
681    zero = (((int64_t) (zero) - 1) >> 63) & 1;
682    two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
683        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
684    two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
685    two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
686        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
687    two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
688    return (zero | two224m96p1 | two225m97p2);
689}
690
691static int felem_is_zero_int(const void *in)
692{
693    return (int)(felem_is_zero(in) & ((limb) 1));
694}
695
696/* Invert a field element */
697/* Computation chain copied from djb's code */
698static void felem_inv(felem out, const felem in)
699{
700    felem ftmp, ftmp2, ftmp3, ftmp4;
701    widefelem tmp;
702    unsigned i;
703
704    felem_square(tmp, in);
705    felem_reduce(ftmp, tmp);    /* 2 */
706    felem_mul(tmp, in, ftmp);
707    felem_reduce(ftmp, tmp);    /* 2^2 - 1 */
708    felem_square(tmp, ftmp);
709    felem_reduce(ftmp, tmp);    /* 2^3 - 2 */
710    felem_mul(tmp, in, ftmp);
711    felem_reduce(ftmp, tmp);    /* 2^3 - 1 */
712    felem_square(tmp, ftmp);
713    felem_reduce(ftmp2, tmp);   /* 2^4 - 2 */
714    felem_square(tmp, ftmp2);
715    felem_reduce(ftmp2, tmp);   /* 2^5 - 4 */
716    felem_square(tmp, ftmp2);
717    felem_reduce(ftmp2, tmp);   /* 2^6 - 8 */
718    felem_mul(tmp, ftmp2, ftmp);
719    felem_reduce(ftmp, tmp);    /* 2^6 - 1 */
720    felem_square(tmp, ftmp);
721    felem_reduce(ftmp2, tmp);   /* 2^7 - 2 */
722    for (i = 0; i < 5; ++i) {   /* 2^12 - 2^6 */
723        felem_square(tmp, ftmp2);
724        felem_reduce(ftmp2, tmp);
725    }
726    felem_mul(tmp, ftmp2, ftmp);
727    felem_reduce(ftmp2, tmp);   /* 2^12 - 1 */
728    felem_square(tmp, ftmp2);
729    felem_reduce(ftmp3, tmp);   /* 2^13 - 2 */
730    for (i = 0; i < 11; ++i) {  /* 2^24 - 2^12 */
731        felem_square(tmp, ftmp3);
732        felem_reduce(ftmp3, tmp);
733    }
734    felem_mul(tmp, ftmp3, ftmp2);
735    felem_reduce(ftmp2, tmp);   /* 2^24 - 1 */
736    felem_square(tmp, ftmp2);
737    felem_reduce(ftmp3, tmp);   /* 2^25 - 2 */
738    for (i = 0; i < 23; ++i) {  /* 2^48 - 2^24 */
739        felem_square(tmp, ftmp3);
740        felem_reduce(ftmp3, tmp);
741    }
742    felem_mul(tmp, ftmp3, ftmp2);
743    felem_reduce(ftmp3, tmp);   /* 2^48 - 1 */
744    felem_square(tmp, ftmp3);
745    felem_reduce(ftmp4, tmp);   /* 2^49 - 2 */
746    for (i = 0; i < 47; ++i) {  /* 2^96 - 2^48 */
747        felem_square(tmp, ftmp4);
748        felem_reduce(ftmp4, tmp);
749    }
750    felem_mul(tmp, ftmp3, ftmp4);
751    felem_reduce(ftmp3, tmp);   /* 2^96 - 1 */
752    felem_square(tmp, ftmp3);
753    felem_reduce(ftmp4, tmp);   /* 2^97 - 2 */
754    for (i = 0; i < 23; ++i) {  /* 2^120 - 2^24 */
755        felem_square(tmp, ftmp4);
756        felem_reduce(ftmp4, tmp);
757    }
758    felem_mul(tmp, ftmp2, ftmp4);
759    felem_reduce(ftmp2, tmp);   /* 2^120 - 1 */
760    for (i = 0; i < 6; ++i) {   /* 2^126 - 2^6 */
761        felem_square(tmp, ftmp2);
762        felem_reduce(ftmp2, tmp);
763    }
764    felem_mul(tmp, ftmp2, ftmp);
765    felem_reduce(ftmp, tmp);    /* 2^126 - 1 */
766    felem_square(tmp, ftmp);
767    felem_reduce(ftmp, tmp);    /* 2^127 - 2 */
768    felem_mul(tmp, ftmp, in);
769    felem_reduce(ftmp, tmp);    /* 2^127 - 1 */
770    for (i = 0; i < 97; ++i) {  /* 2^224 - 2^97 */
771        felem_square(tmp, ftmp);
772        felem_reduce(ftmp, tmp);
773    }
774    felem_mul(tmp, ftmp, ftmp3);
775    felem_reduce(out, tmp);     /* 2^224 - 2^96 - 1 */
776}
777
778/*
779 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
780 * out to itself.
781 */
782static void copy_conditional(felem out, const felem in, limb icopy)
783{
784    unsigned i;
785    /*
786     * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
787     */
788    const limb copy = -icopy;
789    for (i = 0; i < 4; ++i) {
790        const limb tmp = copy & (in[i] ^ out[i]);
791        out[i] ^= tmp;
792    }
793}
794
795/******************************************************************************/
796/*-
797 *                       ELLIPTIC CURVE POINT OPERATIONS
798 *
799 * Points are represented in Jacobian projective coordinates:
800 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
801 * or to the point at infinity if Z == 0.
802 *
803 */
804
805/*-
806 * Double an elliptic curve point:
807 * (X', Y', Z') = 2 * (X, Y, Z), where
808 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
809 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^4
810 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
811 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
812 * while x_out == y_in is not (maybe this works, but it's not tested).
813 */
814static void
815point_double(felem x_out, felem y_out, felem z_out,
816             const felem x_in, const felem y_in, const felem z_in)
817{
818    widefelem tmp, tmp2;
819    felem delta, gamma, beta, alpha, ftmp, ftmp2;
820
821    felem_assign(ftmp, x_in);
822    felem_assign(ftmp2, x_in);
823
824    /* delta = z^2 */
825    felem_square(tmp, z_in);
826    felem_reduce(delta, tmp);
827
828    /* gamma = y^2 */
829    felem_square(tmp, y_in);
830    felem_reduce(gamma, tmp);
831
832    /* beta = x*gamma */
833    felem_mul(tmp, x_in, gamma);
834    felem_reduce(beta, tmp);
835
836    /* alpha = 3*(x-delta)*(x+delta) */
837    felem_diff(ftmp, delta);
838    /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
839    felem_sum(ftmp2, delta);
840    /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
841    felem_scalar(ftmp2, 3);
842    /* ftmp2[i] < 3 * 2^58 < 2^60 */
843    felem_mul(tmp, ftmp, ftmp2);
844    /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
845    felem_reduce(alpha, tmp);
846
847    /* x' = alpha^2 - 8*beta */
848    felem_square(tmp, alpha);
849    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
850    felem_assign(ftmp, beta);
851    felem_scalar(ftmp, 8);
852    /* ftmp[i] < 8 * 2^57 = 2^60 */
853    felem_diff_128_64(tmp, ftmp);
854    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
855    felem_reduce(x_out, tmp);
856
857    /* z' = (y + z)^2 - gamma - delta */
858    felem_sum(delta, gamma);
859    /* delta[i] < 2^57 + 2^57 = 2^58 */
860    felem_assign(ftmp, y_in);
861    felem_sum(ftmp, z_in);
862    /* ftmp[i] < 2^57 + 2^57 = 2^58 */
863    felem_square(tmp, ftmp);
864    /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
865    felem_diff_128_64(tmp, delta);
866    /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
867    felem_reduce(z_out, tmp);
868
869    /* y' = alpha*(4*beta - x') - 8*gamma^2 */
870    felem_scalar(beta, 4);
871    /* beta[i] < 4 * 2^57 = 2^59 */
872    felem_diff(beta, x_out);
873    /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
874    felem_mul(tmp, alpha, beta);
875    /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
876    felem_square(tmp2, gamma);
877    /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
878    widefelem_scalar(tmp2, 8);
879    /* tmp2[i] < 8 * 2^116 = 2^119 */
880    widefelem_diff(tmp, tmp2);
881    /* tmp[i] < 2^119 + 2^120 < 2^121 */
882    felem_reduce(y_out, tmp);
883}
884
885/*-
886 * Add two elliptic curve points:
887 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
888 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
889 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
890 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
891 *        Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
892 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
893 *
894 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
895 */
896
897/*
898 * This function is not entirely constant-time: it includes a branch for
899 * checking whether the two input points are equal, (while not equal to the
900 * point at infinity). This case never happens during single point
901 * multiplication, so there is no timing leak for ECDH or ECDSA signing.
902 */
903static void point_add(felem x3, felem y3, felem z3,
904                      const felem x1, const felem y1, const felem z1,
905                      const int mixed, const felem x2, const felem y2,
906                      const felem z2)
907{
908    felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
909    widefelem tmp, tmp2;
910    limb z1_is_zero, z2_is_zero, x_equal, y_equal;
911    limb points_equal;
912
913    if (!mixed) {
914        /* ftmp2 = z2^2 */
915        felem_square(tmp, z2);
916        felem_reduce(ftmp2, tmp);
917
918        /* ftmp4 = z2^3 */
919        felem_mul(tmp, ftmp2, z2);
920        felem_reduce(ftmp4, tmp);
921
922        /* ftmp4 = z2^3*y1 */
923        felem_mul(tmp2, ftmp4, y1);
924        felem_reduce(ftmp4, tmp2);
925
926        /* ftmp2 = z2^2*x1 */
927        felem_mul(tmp2, ftmp2, x1);
928        felem_reduce(ftmp2, tmp2);
929    } else {
930        /*
931         * We'll assume z2 = 1 (special case z2 = 0 is handled later)
932         */
933
934        /* ftmp4 = z2^3*y1 */
935        felem_assign(ftmp4, y1);
936
937        /* ftmp2 = z2^2*x1 */
938        felem_assign(ftmp2, x1);
939    }
940
941    /* ftmp = z1^2 */
942    felem_square(tmp, z1);
943    felem_reduce(ftmp, tmp);
944
945    /* ftmp3 = z1^3 */
946    felem_mul(tmp, ftmp, z1);
947    felem_reduce(ftmp3, tmp);
948
949    /* tmp = z1^3*y2 */
950    felem_mul(tmp, ftmp3, y2);
951    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
952
953    /* ftmp3 = z1^3*y2 - z2^3*y1 */
954    felem_diff_128_64(tmp, ftmp4);
955    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
956    felem_reduce(ftmp3, tmp);
957
958    /* tmp = z1^2*x2 */
959    felem_mul(tmp, ftmp, x2);
960    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
961
962    /* ftmp = z1^2*x2 - z2^2*x1 */
963    felem_diff_128_64(tmp, ftmp2);
964    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
965    felem_reduce(ftmp, tmp);
966
967    /*
968     * The formulae are incorrect if the points are equal, in affine coordinates
969     * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
970     * happens.
971     *
972     * We use bitwise operations to avoid potential side-channels introduced by
973     * the short-circuiting behaviour of boolean operators.
974     */
975    x_equal = felem_is_zero(ftmp);
976    y_equal = felem_is_zero(ftmp3);
977    /*
978     * The special case of either point being the point at infinity (z1 and/or
979     * z2 are zero), is handled separately later on in this function, so we
980     * avoid jumping to point_double here in those special cases.
981     */
982    z1_is_zero = felem_is_zero(z1);
983    z2_is_zero = felem_is_zero(z2);
984
985    /*
986     * Compared to `ecp_nistp256.c` and `ecp_nistp521.c`, in this
987     * specific implementation `felem_is_zero()` returns truth as `0x1`
988     * (rather than `0xff..ff`).
989     *
990     * This implies that `~true` in this implementation becomes
991     * `0xff..fe` (rather than `0x0`): for this reason, to be used in
992     * the if expression, we mask out only the last bit in the next
993     * line.
994     */
995    points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero)) & 1;
996
997    if (points_equal) {
998        /*
999         * This is obviously not constant-time but, as mentioned before, this
1000         * case never happens during single point multiplication, so there is no
1001         * timing leak for ECDH or ECDSA signing.
1002         */
1003        point_double(x3, y3, z3, x1, y1, z1);
1004        return;
1005    }
1006
1007    /* ftmp5 = z1*z2 */
1008    if (!mixed) {
1009        felem_mul(tmp, z1, z2);
1010        felem_reduce(ftmp5, tmp);
1011    } else {
1012        /* special case z2 = 0 is handled later */
1013        felem_assign(ftmp5, z1);
1014    }
1015
1016    /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1017    felem_mul(tmp, ftmp, ftmp5);
1018    felem_reduce(z_out, tmp);
1019
1020    /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1021    felem_assign(ftmp5, ftmp);
1022    felem_square(tmp, ftmp);
1023    felem_reduce(ftmp, tmp);
1024
1025    /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1026    felem_mul(tmp, ftmp, ftmp5);
1027    felem_reduce(ftmp5, tmp);
1028
1029    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1030    felem_mul(tmp, ftmp2, ftmp);
1031    felem_reduce(ftmp2, tmp);
1032
1033    /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1034    felem_mul(tmp, ftmp4, ftmp5);
1035    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1036
1037    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1038    felem_square(tmp2, ftmp3);
1039    /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1040
1041    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1042    felem_diff_128_64(tmp2, ftmp5);
1043    /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1044
1045    /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1046    felem_assign(ftmp5, ftmp2);
1047    felem_scalar(ftmp5, 2);
1048    /* ftmp5[i] < 2 * 2^57 = 2^58 */
1049
1050    /*-
1051     * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1052     *  2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1053     */
1054    felem_diff_128_64(tmp2, ftmp5);
1055    /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1056    felem_reduce(x_out, tmp2);
1057
1058    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1059    felem_diff(ftmp2, x_out);
1060    /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1061
1062    /*
1063     * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1064     */
1065    felem_mul(tmp2, ftmp3, ftmp2);
1066    /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1067
1068    /*-
1069     * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1070     *  z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1071     */
1072    widefelem_diff(tmp2, tmp);
1073    /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1074    felem_reduce(y_out, tmp2);
1075
1076    /*
1077     * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1078     * the point at infinity, so we need to check for this separately
1079     */
1080
1081    /*
1082     * if point 1 is at infinity, copy point 2 to output, and vice versa
1083     */
1084    copy_conditional(x_out, x2, z1_is_zero);
1085    copy_conditional(x_out, x1, z2_is_zero);
1086    copy_conditional(y_out, y2, z1_is_zero);
1087    copy_conditional(y_out, y1, z2_is_zero);
1088    copy_conditional(z_out, z2, z1_is_zero);
1089    copy_conditional(z_out, z1, z2_is_zero);
1090    felem_assign(x3, x_out);
1091    felem_assign(y3, y_out);
1092    felem_assign(z3, z_out);
1093}
1094
1095/*
1096 * select_point selects the |idx|th point from a precomputation table and
1097 * copies it to out.
1098 * The pre_comp array argument should be size of |size| argument
1099 */
1100static void select_point(const u64 idx, unsigned int size,
1101                         const felem pre_comp[][3], felem out[3])
1102{
1103    unsigned i, j;
1104    limb *outlimbs = &out[0][0];
1105
1106    memset(out, 0, sizeof(*out) * 3);
1107    for (i = 0; i < size; i++) {
1108        const limb *inlimbs = &pre_comp[i][0][0];
1109        u64 mask = i ^ idx;
1110        mask |= mask >> 4;
1111        mask |= mask >> 2;
1112        mask |= mask >> 1;
1113        mask &= 1;
1114        mask--;
1115        for (j = 0; j < 4 * 3; j++)
1116            outlimbs[j] |= inlimbs[j] & mask;
1117    }
1118}
1119
1120/* get_bit returns the |i|th bit in |in| */
1121static char get_bit(const felem_bytearray in, unsigned i)
1122{
1123    if (i >= 224)
1124        return 0;
1125    return (in[i >> 3] >> (i & 7)) & 1;
1126}
1127
1128/*
1129 * Interleaved point multiplication using precomputed point multiples: The
1130 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1131 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1132 * generator, using certain (large) precomputed multiples in g_pre_comp.
1133 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1134 */
1135static void batch_mul(felem x_out, felem y_out, felem z_out,
1136                      const felem_bytearray scalars[],
1137                      const unsigned num_points, const u8 *g_scalar,
1138                      const int mixed, const felem pre_comp[][17][3],
1139                      const felem g_pre_comp[2][16][3])
1140{
1141    int i, skip;
1142    unsigned num;
1143    unsigned gen_mul = (g_scalar != NULL);
1144    felem nq[3], tmp[4];
1145    u64 bits;
1146    u8 sign, digit;
1147
1148    /* set nq to the point at infinity */
1149    memset(nq, 0, sizeof(nq));
1150
1151    /*
1152     * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1153     * of the generator (two in each of the last 28 rounds) and additions of
1154     * other points multiples (every 5th round).
1155     */
1156    skip = 1;                   /* save two point operations in the first
1157                                 * round */
1158    for (i = (num_points ? 220 : 27); i >= 0; --i) {
1159        /* double */
1160        if (!skip)
1161            point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1162
1163        /* add multiples of the generator */
1164        if (gen_mul && (i <= 27)) {
1165            /* first, look 28 bits upwards */
1166            bits = get_bit(g_scalar, i + 196) << 3;
1167            bits |= get_bit(g_scalar, i + 140) << 2;
1168            bits |= get_bit(g_scalar, i + 84) << 1;
1169            bits |= get_bit(g_scalar, i + 28);
1170            /* select the point to add, in constant time */
1171            select_point(bits, 16, g_pre_comp[1], tmp);
1172
1173            if (!skip) {
1174                /* value 1 below is argument for "mixed" */
1175                point_add(nq[0], nq[1], nq[2],
1176                          nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1177            } else {
1178                memcpy(nq, tmp, 3 * sizeof(felem));
1179                skip = 0;
1180            }
1181
1182            /* second, look at the current position */
1183            bits = get_bit(g_scalar, i + 168) << 3;
1184            bits |= get_bit(g_scalar, i + 112) << 2;
1185            bits |= get_bit(g_scalar, i + 56) << 1;
1186            bits |= get_bit(g_scalar, i);
1187            /* select the point to add, in constant time */
1188            select_point(bits, 16, g_pre_comp[0], tmp);
1189            point_add(nq[0], nq[1], nq[2],
1190                      nq[0], nq[1], nq[2],
1191                      1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1192        }
1193
1194        /* do other additions every 5 doublings */
1195        if (num_points && (i % 5 == 0)) {
1196            /* loop over all scalars */
1197            for (num = 0; num < num_points; ++num) {
1198                bits = get_bit(scalars[num], i + 4) << 5;
1199                bits |= get_bit(scalars[num], i + 3) << 4;
1200                bits |= get_bit(scalars[num], i + 2) << 3;
1201                bits |= get_bit(scalars[num], i + 1) << 2;
1202                bits |= get_bit(scalars[num], i) << 1;
1203                bits |= get_bit(scalars[num], i - 1);
1204                ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1205
1206                /* select the point to add or subtract */
1207                select_point(digit, 17, pre_comp[num], tmp);
1208                felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1209                                            * point */
1210                copy_conditional(tmp[1], tmp[3], sign);
1211
1212                if (!skip) {
1213                    point_add(nq[0], nq[1], nq[2],
1214                              nq[0], nq[1], nq[2],
1215                              mixed, tmp[0], tmp[1], tmp[2]);
1216                } else {
1217                    memcpy(nq, tmp, 3 * sizeof(felem));
1218                    skip = 0;
1219                }
1220            }
1221        }
1222    }
1223    felem_assign(x_out, nq[0]);
1224    felem_assign(y_out, nq[1]);
1225    felem_assign(z_out, nq[2]);
1226}
1227
1228/******************************************************************************/
1229/*
1230 * FUNCTIONS TO MANAGE PRECOMPUTATION
1231 */
1232
1233static NISTP224_PRE_COMP *nistp224_pre_comp_new(void)
1234{
1235    NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1236
1237    if (!ret) {
1238        ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1239        return ret;
1240    }
1241
1242    ret->references = 1;
1243
1244    ret->lock = CRYPTO_THREAD_lock_new();
1245    if (ret->lock == NULL) {
1246        ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1247        OPENSSL_free(ret);
1248        return NULL;
1249    }
1250    return ret;
1251}
1252
1253NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p)
1254{
1255    int i;
1256    if (p != NULL)
1257        CRYPTO_UP_REF(&p->references, &i, p->lock);
1258    return p;
1259}
1260
1261void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
1262{
1263    int i;
1264
1265    if (p == NULL)
1266        return;
1267
1268    CRYPTO_DOWN_REF(&p->references, &i, p->lock);
1269    REF_PRINT_COUNT("EC_nistp224", x);
1270    if (i > 0)
1271        return;
1272    REF_ASSERT_ISNT(i < 0);
1273
1274    CRYPTO_THREAD_lock_free(p->lock);
1275    OPENSSL_free(p);
1276}
1277
1278/******************************************************************************/
1279/*
1280 * OPENSSL EC_METHOD FUNCTIONS
1281 */
1282
1283int ec_GFp_nistp224_group_init(EC_GROUP *group)
1284{
1285    int ret;
1286    ret = ec_GFp_simple_group_init(group);
1287    group->a_is_minus3 = 1;
1288    return ret;
1289}
1290
1291int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1292                                    const BIGNUM *a, const BIGNUM *b,
1293                                    BN_CTX *ctx)
1294{
1295    int ret = 0;
1296    BN_CTX *new_ctx = NULL;
1297    BIGNUM *curve_p, *curve_a, *curve_b;
1298
1299    if (ctx == NULL)
1300        if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1301            return 0;
1302    BN_CTX_start(ctx);
1303    curve_p = BN_CTX_get(ctx);
1304    curve_a = BN_CTX_get(ctx);
1305    curve_b = BN_CTX_get(ctx);
1306    if (curve_b == NULL)
1307        goto err;
1308    BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1309    BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1310    BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1311    if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1312        ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1313              EC_R_WRONG_CURVE_PARAMETERS);
1314        goto err;
1315    }
1316    group->field_mod_func = BN_nist_mod_224;
1317    ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1318 err:
1319    BN_CTX_end(ctx);
1320    BN_CTX_free(new_ctx);
1321    return ret;
1322}
1323
1324/*
1325 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1326 * (X/Z^2, Y/Z^3)
1327 */
1328int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1329                                                 const EC_POINT *point,
1330                                                 BIGNUM *x, BIGNUM *y,
1331                                                 BN_CTX *ctx)
1332{
1333    felem z1, z2, x_in, y_in, x_out, y_out;
1334    widefelem tmp;
1335
1336    if (EC_POINT_is_at_infinity(group, point)) {
1337        ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1338              EC_R_POINT_AT_INFINITY);
1339        return 0;
1340    }
1341    if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1342        (!BN_to_felem(z1, point->Z)))
1343        return 0;
1344    felem_inv(z2, z1);
1345    felem_square(tmp, z2);
1346    felem_reduce(z1, tmp);
1347    felem_mul(tmp, x_in, z1);
1348    felem_reduce(x_in, tmp);
1349    felem_contract(x_out, x_in);
1350    if (x != NULL) {
1351        if (!felem_to_BN(x, x_out)) {
1352            ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1353                  ERR_R_BN_LIB);
1354            return 0;
1355        }
1356    }
1357    felem_mul(tmp, z1, z2);
1358    felem_reduce(z1, tmp);
1359    felem_mul(tmp, y_in, z1);
1360    felem_reduce(y_in, tmp);
1361    felem_contract(y_out, y_in);
1362    if (y != NULL) {
1363        if (!felem_to_BN(y, y_out)) {
1364            ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1365                  ERR_R_BN_LIB);
1366            return 0;
1367        }
1368    }
1369    return 1;
1370}
1371
1372static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1373                               felem tmp_felems[ /* num+1 */ ])
1374{
1375    /*
1376     * Runs in constant time, unless an input is the point at infinity (which
1377     * normally shouldn't happen).
1378     */
1379    ec_GFp_nistp_points_make_affine_internal(num,
1380                                             points,
1381                                             sizeof(felem),
1382                                             tmp_felems,
1383                                             (void (*)(void *))felem_one,
1384                                             felem_is_zero_int,
1385                                             (void (*)(void *, const void *))
1386                                             felem_assign,
1387                                             (void (*)(void *, const void *))
1388                                             felem_square_reduce, (void (*)
1389                                                                   (void *,
1390                                                                    const void
1391                                                                    *,
1392                                                                    const void
1393                                                                    *))
1394                                             felem_mul_reduce,
1395                                             (void (*)(void *, const void *))
1396                                             felem_inv,
1397                                             (void (*)(void *, const void *))
1398                                             felem_contract);
1399}
1400
1401/*
1402 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1403 * values Result is stored in r (r can equal one of the inputs).
1404 */
1405int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1406                               const BIGNUM *scalar, size_t num,
1407                               const EC_POINT *points[],
1408                               const BIGNUM *scalars[], BN_CTX *ctx)
1409{
1410    int ret = 0;
1411    int j;
1412    unsigned i;
1413    int mixed = 0;
1414    BIGNUM *x, *y, *z, *tmp_scalar;
1415    felem_bytearray g_secret;
1416    felem_bytearray *secrets = NULL;
1417    felem (*pre_comp)[17][3] = NULL;
1418    felem *tmp_felems = NULL;
1419    int num_bytes;
1420    int have_pre_comp = 0;
1421    size_t num_points = num;
1422    felem x_in, y_in, z_in, x_out, y_out, z_out;
1423    NISTP224_PRE_COMP *pre = NULL;
1424    const felem(*g_pre_comp)[16][3] = NULL;
1425    EC_POINT *generator = NULL;
1426    const EC_POINT *p = NULL;
1427    const BIGNUM *p_scalar = NULL;
1428
1429    BN_CTX_start(ctx);
1430    x = BN_CTX_get(ctx);
1431    y = BN_CTX_get(ctx);
1432    z = BN_CTX_get(ctx);
1433    tmp_scalar = BN_CTX_get(ctx);
1434    if (tmp_scalar == NULL)
1435        goto err;
1436
1437    if (scalar != NULL) {
1438        pre = group->pre_comp.nistp224;
1439        if (pre)
1440            /* we have precomputation, try to use it */
1441            g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1442        else
1443            /* try to use the standard precomputation */
1444            g_pre_comp = &gmul[0];
1445        generator = EC_POINT_new(group);
1446        if (generator == NULL)
1447            goto err;
1448        /* get the generator from precomputation */
1449        if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1450            !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1451            !felem_to_BN(z, g_pre_comp[0][1][2])) {
1452            ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1453            goto err;
1454        }
1455        if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1456                                                      generator, x, y, z,
1457                                                      ctx))
1458            goto err;
1459        if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1460            /* precomputation matches generator */
1461            have_pre_comp = 1;
1462        else
1463            /*
1464             * we don't have valid precomputation: treat the generator as a
1465             * random point
1466             */
1467            num_points = num_points + 1;
1468    }
1469
1470    if (num_points > 0) {
1471        if (num_points >= 3) {
1472            /*
1473             * unless we precompute multiples for just one or two points,
1474             * converting those into affine form is time well spent
1475             */
1476            mixed = 1;
1477        }
1478        secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1479        pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
1480        if (mixed)
1481            tmp_felems =
1482                OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
1483        if ((secrets == NULL) || (pre_comp == NULL)
1484            || (mixed && (tmp_felems == NULL))) {
1485            ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1486            goto err;
1487        }
1488
1489        /*
1490         * we treat NULL scalars as 0, and NULL points as points at infinity,
1491         * i.e., they contribute nothing to the linear combination
1492         */
1493        for (i = 0; i < num_points; ++i) {
1494            if (i == num) {
1495                /* the generator */
1496                p = EC_GROUP_get0_generator(group);
1497                p_scalar = scalar;
1498            } else {
1499                /* the i^th point */
1500                p = points[i];
1501                p_scalar = scalars[i];
1502            }
1503            if ((p_scalar != NULL) && (p != NULL)) {
1504                /* reduce scalar to 0 <= scalar < 2^224 */
1505                if ((BN_num_bits(p_scalar) > 224)
1506                    || (BN_is_negative(p_scalar))) {
1507                    /*
1508                     * this is an unusual input, and we don't guarantee
1509                     * constant-timeness
1510                     */
1511                    if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1512                        ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1513                        goto err;
1514                    }
1515                    num_bytes = BN_bn2lebinpad(tmp_scalar,
1516                                               secrets[i], sizeof(secrets[i]));
1517                } else {
1518                    num_bytes = BN_bn2lebinpad(p_scalar,
1519                                               secrets[i], sizeof(secrets[i]));
1520                }
1521                if (num_bytes < 0) {
1522                    ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1523                    goto err;
1524                }
1525                /* precompute multiples */
1526                if ((!BN_to_felem(x_out, p->X)) ||
1527                    (!BN_to_felem(y_out, p->Y)) ||
1528                    (!BN_to_felem(z_out, p->Z)))
1529                    goto err;
1530                felem_assign(pre_comp[i][1][0], x_out);
1531                felem_assign(pre_comp[i][1][1], y_out);
1532                felem_assign(pre_comp[i][1][2], z_out);
1533                for (j = 2; j <= 16; ++j) {
1534                    if (j & 1) {
1535                        point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1536                                  pre_comp[i][j][2], pre_comp[i][1][0],
1537                                  pre_comp[i][1][1], pre_comp[i][1][2], 0,
1538                                  pre_comp[i][j - 1][0],
1539                                  pre_comp[i][j - 1][1],
1540                                  pre_comp[i][j - 1][2]);
1541                    } else {
1542                        point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1543                                     pre_comp[i][j][2], pre_comp[i][j / 2][0],
1544                                     pre_comp[i][j / 2][1],
1545                                     pre_comp[i][j / 2][2]);
1546                    }
1547                }
1548            }
1549        }
1550        if (mixed)
1551            make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1552    }
1553
1554    /* the scalar for the generator */
1555    if ((scalar != NULL) && (have_pre_comp)) {
1556        memset(g_secret, 0, sizeof(g_secret));
1557        /* reduce scalar to 0 <= scalar < 2^224 */
1558        if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1559            /*
1560             * this is an unusual input, and we don't guarantee
1561             * constant-timeness
1562             */
1563            if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1564                ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1565                goto err;
1566            }
1567            num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
1568        } else {
1569            num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
1570        }
1571        /* do the multiplication with generator precomputation */
1572        batch_mul(x_out, y_out, z_out,
1573                  (const felem_bytearray(*))secrets, num_points,
1574                  g_secret,
1575                  mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1576    } else {
1577        /* do the multiplication without generator precomputation */
1578        batch_mul(x_out, y_out, z_out,
1579                  (const felem_bytearray(*))secrets, num_points,
1580                  NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1581    }
1582    /* reduce the output to its unique minimal representation */
1583    felem_contract(x_in, x_out);
1584    felem_contract(y_in, y_out);
1585    felem_contract(z_in, z_out);
1586    if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1587        (!felem_to_BN(z, z_in))) {
1588        ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1589        goto err;
1590    }
1591    ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1592
1593 err:
1594    BN_CTX_end(ctx);
1595    EC_POINT_free(generator);
1596    OPENSSL_free(secrets);
1597    OPENSSL_free(pre_comp);
1598    OPENSSL_free(tmp_felems);
1599    return ret;
1600}
1601
1602int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1603{
1604    int ret = 0;
1605    NISTP224_PRE_COMP *pre = NULL;
1606    int i, j;
1607    BN_CTX *new_ctx = NULL;
1608    BIGNUM *x, *y;
1609    EC_POINT *generator = NULL;
1610    felem tmp_felems[32];
1611
1612    /* throw away old precomputation */
1613    EC_pre_comp_free(group);
1614    if (ctx == NULL)
1615        if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1616            return 0;
1617    BN_CTX_start(ctx);
1618    x = BN_CTX_get(ctx);
1619    y = BN_CTX_get(ctx);
1620    if (y == NULL)
1621        goto err;
1622    /* get the generator */
1623    if (group->generator == NULL)
1624        goto err;
1625    generator = EC_POINT_new(group);
1626    if (generator == NULL)
1627        goto err;
1628    BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1629    BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1630    if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
1631        goto err;
1632    if ((pre = nistp224_pre_comp_new()) == NULL)
1633        goto err;
1634    /*
1635     * if the generator is the standard one, use built-in precomputation
1636     */
1637    if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1638        memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1639        goto done;
1640    }
1641    if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) ||
1642        (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) ||
1643        (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
1644        goto err;
1645    /*
1646     * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1647     * 2^140*G, 2^196*G for the second one
1648     */
1649    for (i = 1; i <= 8; i <<= 1) {
1650        point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1651                     pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1652                     pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1653        for (j = 0; j < 27; ++j) {
1654            point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1655                         pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1656                         pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1657        }
1658        if (i == 8)
1659            break;
1660        point_double(pre->g_pre_comp[0][2 * i][0],
1661                     pre->g_pre_comp[0][2 * i][1],
1662                     pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1663                     pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1664        for (j = 0; j < 27; ++j) {
1665            point_double(pre->g_pre_comp[0][2 * i][0],
1666                         pre->g_pre_comp[0][2 * i][1],
1667                         pre->g_pre_comp[0][2 * i][2],
1668                         pre->g_pre_comp[0][2 * i][0],
1669                         pre->g_pre_comp[0][2 * i][1],
1670                         pre->g_pre_comp[0][2 * i][2]);
1671        }
1672    }
1673    for (i = 0; i < 2; i++) {
1674        /* g_pre_comp[i][0] is the point at infinity */
1675        memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1676        /* the remaining multiples */
1677        /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1678        point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1679                  pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1680                  pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1681                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1682                  pre->g_pre_comp[i][2][2]);
1683        /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1684        point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1685                  pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1686                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1687                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1688                  pre->g_pre_comp[i][2][2]);
1689        /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1690        point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1691                  pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1692                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1693                  0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1694                  pre->g_pre_comp[i][4][2]);
1695        /*
1696         * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1697         */
1698        point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1699                  pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1700                  pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1701                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1702                  pre->g_pre_comp[i][2][2]);
1703        for (j = 1; j < 8; ++j) {
1704            /* odd multiples: add G resp. 2^28*G */
1705            point_add(pre->g_pre_comp[i][2 * j + 1][0],
1706                      pre->g_pre_comp[i][2 * j + 1][1],
1707                      pre->g_pre_comp[i][2 * j + 1][2],
1708                      pre->g_pre_comp[i][2 * j][0],
1709                      pre->g_pre_comp[i][2 * j][1],
1710                      pre->g_pre_comp[i][2 * j][2], 0,
1711                      pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1712                      pre->g_pre_comp[i][1][2]);
1713        }
1714    }
1715    make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1716
1717 done:
1718    SETPRECOMP(group, nistp224, pre);
1719    pre = NULL;
1720    ret = 1;
1721 err:
1722    BN_CTX_end(ctx);
1723    EC_POINT_free(generator);
1724    BN_CTX_free(new_ctx);
1725    EC_nistp224_pre_comp_free(pre);
1726    return ret;
1727}
1728
1729int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1730{
1731    return HAVEPRECOMP(group, nistp224);
1732}
1733
1734#endif
1735