ecp_nistp224.c revision 352193
1/* crypto/ec/ecp_nistp224.c */
2/*
3 * Written by Emilia Kasper (Google) for the OpenSSL project.
4 */
5/* Copyright 2011 Google Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 *
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 *     http://www.apache.org/licenses/LICENSE-2.0
13 *
14 *  Unless required by applicable law or agreed to in writing, software
15 *  distributed under the License is distributed on an "AS IS" BASIS,
16 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 *  See the License for the specific language governing permissions and
18 *  limitations under the License.
19 */
20
21/*
22 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
23 *
24 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
25 * and Adam Langley's public domain 64-bit C implementation of curve25519
26 */
27
28#include <openssl/opensslconf.h>
29#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
30
31# ifndef OPENSSL_SYS_VMS
32#  include <stdint.h>
33# else
34#  include <inttypes.h>
35# endif
36
37# include <string.h>
38# include <openssl/err.h>
39# include "ec_lcl.h"
40# include "bn_int.h" /* bn_bn2lebinpad, bn_lebin2bn */
41
42# if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
43  /* even with gcc, the typedef won't work for 32-bit platforms */
44typedef __uint128_t uint128_t;  /* nonstandard; implemented by gcc on 64-bit
45                                 * platforms */
46# else
47#  error "Need GCC 3.1 or later to define type uint128_t"
48# endif
49
50typedef uint8_t u8;
51typedef uint64_t u64;
52
53/******************************************************************************/
54/*-
55 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
56 *
57 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
58 * using 64-bit coefficients called 'limbs',
59 * and sometimes (for multiplication results) as
60 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
61 * using 128-bit coefficients called 'widelimbs'.
62 * A 4-limb representation is an 'felem';
63 * a 7-widelimb representation is a 'widefelem'.
64 * Even within felems, bits of adjacent limbs overlap, and we don't always
65 * reduce the representations: we ensure that inputs to each felem
66 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
67 * and fit into a 128-bit word without overflow. The coefficients are then
68 * again partially reduced to obtain an felem satisfying a_i < 2^57.
69 * We only reduce to the unique minimal representation at the end of the
70 * computation.
71 */
72
73typedef uint64_t limb;
74typedef uint128_t widelimb;
75
76typedef limb felem[4];
77typedef widelimb widefelem[7];
78
79/*
80 * Field element represented as a byte arrary. 28*8 = 224 bits is also the
81 * group order size for the elliptic curve, and we also use this type for
82 * scalars for point multiplication.
83 */
84typedef u8 felem_bytearray[28];
85
86static const felem_bytearray nistp224_curve_params[5] = {
87    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
88     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
89     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
90    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
91     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
92     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
93    {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
94     0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
95     0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
96    {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
97     0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
98     0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
99    {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
100     0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
101     0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
102};
103
104/*-
105 * Precomputed multiples of the standard generator
106 * Points are given in coordinates (X, Y, Z) where Z normally is 1
107 * (0 for the point at infinity).
108 * For each field element, slice a_0 is word 0, etc.
109 *
110 * The table has 2 * 16 elements, starting with the following:
111 * index | bits    | point
112 * ------+---------+------------------------------
113 *     0 | 0 0 0 0 | 0G
114 *     1 | 0 0 0 1 | 1G
115 *     2 | 0 0 1 0 | 2^56G
116 *     3 | 0 0 1 1 | (2^56 + 1)G
117 *     4 | 0 1 0 0 | 2^112G
118 *     5 | 0 1 0 1 | (2^112 + 1)G
119 *     6 | 0 1 1 0 | (2^112 + 2^56)G
120 *     7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
121 *     8 | 1 0 0 0 | 2^168G
122 *     9 | 1 0 0 1 | (2^168 + 1)G
123 *    10 | 1 0 1 0 | (2^168 + 2^56)G
124 *    11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
125 *    12 | 1 1 0 0 | (2^168 + 2^112)G
126 *    13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
127 *    14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
128 *    15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
129 * followed by a copy of this with each element multiplied by 2^28.
130 *
131 * The reason for this is so that we can clock bits into four different
132 * locations when doing simple scalar multiplies against the base point,
133 * and then another four locations using the second 16 elements.
134 */
135static const felem gmul[2][16][3] = { {{{0, 0, 0, 0},
136                                        {0, 0, 0, 0},
137                                        {0, 0, 0, 0}},
138                                       {{0x3280d6115c1d21, 0xc1d356c2112234,
139                                         0x7f321390b94a03, 0xb70e0cbd6bb4bf},
140                                        {0xd5819985007e34, 0x75a05a07476444,
141                                         0xfb4c22dfe6cd43, 0xbd376388b5f723},
142                                        {1, 0, 0, 0}},
143                                       {{0xfd9675666ebbe9, 0xbca7664d40ce5e,
144                                         0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
145                                        {0x29e0b892dc9c43, 0xece8608436e662,
146                                         0xdc858f185310d0, 0x9812dd4eb8d321},
147                                        {1, 0, 0, 0}},
148                                       {{0x6d3e678d5d8eb8, 0x559eed1cb362f1,
149                                         0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
150                                        {0xf19f90ed50266d, 0xabf2b4bf65f9df,
151                                         0x313865468fafec, 0x5cb379ba910a17},
152                                        {1, 0, 0, 0}},
153                                       {{0x0641966cab26e3, 0x91fb2991fab0a0,
154                                         0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
155                                        {0x7510407766af5d, 0x84d929610d5450,
156                                         0x81d77aae82f706, 0x6916f6d4338c5b},
157                                        {1, 0, 0, 0}},
158                                       {{0xea95ac3b1f15c6, 0x086000905e82d4,
159                                         0xdd323ae4d1c8b1, 0x932b56be7685a3},
160                                        {0x9ef93dea25dbbf, 0x41665960f390f0,
161                                         0xfdec76dbe2a8a7, 0x523e80f019062a},
162                                        {1, 0, 0, 0}},
163                                       {{0x822fdd26732c73, 0xa01c83531b5d0f,
164                                         0x363f37347c1ba4, 0xc391b45c84725c},
165                                        {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec,
166                                         0xc393da7e222a7f, 0x1efb7890ede244},
167                                        {1, 0, 0, 0}},
168                                       {{0x4c9e90ca217da1, 0xd11beca79159bb,
169                                         0xff8d33c2c98b7c, 0x2610b39409f849},
170                                        {0x44d1352ac64da0, 0xcdbb7b2c46b4fb,
171                                         0x966c079b753c89, 0xfe67e4e820b112},
172                                        {1, 0, 0, 0}},
173                                       {{0xe28cae2df5312d, 0xc71b61d16f5c6e,
174                                         0x79b7619a3e7c4c, 0x05c73240899b47},
175                                        {0x9f7f6382c73e3a, 0x18615165c56bda,
176                                         0x641fab2116fd56, 0x72855882b08394},
177                                        {1, 0, 0, 0}},
178                                       {{0x0469182f161c09, 0x74a98ca8d00fb5,
179                                         0xb89da93489a3e0, 0x41c98768fb0c1d},
180                                        {0xe5ea05fb32da81, 0x3dce9ffbca6855,
181                                         0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
182                                        {1, 0, 0, 0}},
183                                       {{0xdab22b2333e87f, 0x4430137a5dd2f6,
184                                         0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
185                                        {0x764a7df0c8fda5, 0x185ba5c3fa2044,
186                                         0x9281d688bcbe50, 0xc40331df893881},
187                                        {1, 0, 0, 0}},
188                                       {{0xb89530796f0f60, 0xade92bd26909a3,
189                                         0x1a0c83fb4884da, 0x1765bf22a5a984},
190                                        {0x772a9ee75db09e, 0x23bc6c67cec16f,
191                                         0x4c1edba8b14e2f, 0xe2a215d9611369},
192                                        {1, 0, 0, 0}},
193                                       {{0x571e509fb5efb3, 0xade88696410552,
194                                         0xc8ae85fada74fe, 0x6c7e4be83bbde3},
195                                        {0xff9f51160f4652, 0xb47ce2495a6539,
196                                         0xa2946c53b582f4, 0x286d2db3ee9a60},
197                                        {1, 0, 0, 0}},
198                                       {{0x40bbd5081a44af, 0x0995183b13926c,
199                                         0xbcefba6f47f6d0, 0x215619e9cc0057},
200                                        {0x8bc94d3b0df45e, 0xf11c54a3694f6f,
201                                         0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
202                                        {1, 0, 0, 0}},
203                                       {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8,
204                                         0x1c29819435d2c6, 0xc813132f4c07e9},
205                                        {0x2891425503b11f, 0x08781030579fea,
206                                         0xf5426ba5cc9674, 0x1e28ebf18562bc},
207                                        {1, 0, 0, 0}},
208                                       {{0x9f31997cc864eb, 0x06cd91d28b5e4c,
209                                         0xff17036691a973, 0xf1aef351497c58},
210                                        {0xdd1f2d600564ff, 0xdead073b1402db,
211                                         0x74a684435bd693, 0xeea7471f962558},
212                                        {1, 0, 0, 0}}},
213{{{0, 0, 0, 0},
214  {0, 0, 0, 0},
215  {0, 0, 0, 0}},
216 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
217  {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
218  {1, 0, 0, 0}},
219 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
220  {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
221  {1, 0, 0, 0}},
222 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
223  {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
224  {1, 0, 0, 0}},
225 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
226  {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
227  {1, 0, 0, 0}},
228 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
229  {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
230  {1, 0, 0, 0}},
231 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
232  {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
233  {1, 0, 0, 0}},
234 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
235  {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
236  {1, 0, 0, 0}},
237 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
238  {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
239  {1, 0, 0, 0}},
240 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
241  {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
242  {1, 0, 0, 0}},
243 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
244  {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
245  {1, 0, 0, 0}},
246 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
247  {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
248  {1, 0, 0, 0}},
249 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
250  {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
251  {1, 0, 0, 0}},
252 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
253  {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
254  {1, 0, 0, 0}},
255 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
256  {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
257  {1, 0, 0, 0}},
258 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
259  {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
260  {1, 0, 0, 0}}}
261};
262
263/* Precomputation for the group generator. */
264typedef struct {
265    felem g_pre_comp[2][16][3];
266    int references;
267} NISTP224_PRE_COMP;
268
269const EC_METHOD *EC_GFp_nistp224_method(void)
270{
271    static const EC_METHOD ret = {
272        EC_FLAGS_DEFAULT_OCT,
273        NID_X9_62_prime_field,
274        ec_GFp_nistp224_group_init,
275        ec_GFp_simple_group_finish,
276        ec_GFp_simple_group_clear_finish,
277        ec_GFp_nist_group_copy,
278        ec_GFp_nistp224_group_set_curve,
279        ec_GFp_simple_group_get_curve,
280        ec_GFp_simple_group_get_degree,
281        ec_GFp_simple_group_check_discriminant,
282        ec_GFp_simple_point_init,
283        ec_GFp_simple_point_finish,
284        ec_GFp_simple_point_clear_finish,
285        ec_GFp_simple_point_copy,
286        ec_GFp_simple_point_set_to_infinity,
287        ec_GFp_simple_set_Jprojective_coordinates_GFp,
288        ec_GFp_simple_get_Jprojective_coordinates_GFp,
289        ec_GFp_simple_point_set_affine_coordinates,
290        ec_GFp_nistp224_point_get_affine_coordinates,
291        0 /* point_set_compressed_coordinates */ ,
292        0 /* point2oct */ ,
293        0 /* oct2point */ ,
294        ec_GFp_simple_add,
295        ec_GFp_simple_dbl,
296        ec_GFp_simple_invert,
297        ec_GFp_simple_is_at_infinity,
298        ec_GFp_simple_is_on_curve,
299        ec_GFp_simple_cmp,
300        ec_GFp_simple_make_affine,
301        ec_GFp_simple_points_make_affine,
302        ec_GFp_nistp224_points_mul,
303        ec_GFp_nistp224_precompute_mult,
304        ec_GFp_nistp224_have_precompute_mult,
305        ec_GFp_nist_field_mul,
306        ec_GFp_nist_field_sqr,
307        0 /* field_div */ ,
308        0 /* field_encode */ ,
309        0 /* field_decode */ ,
310        0                       /* field_set_to_one */
311    };
312
313    return &ret;
314}
315
316/*
317 * Helper functions to convert field elements to/from internal representation
318 */
319static void bin28_to_felem(felem out, const u8 in[28])
320{
321    out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
322    out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff;
323    out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff;
324    out[3] = (*((const uint64_t *)(in+20))) >> 8;
325}
326
327static void felem_to_bin28(u8 out[28], const felem in)
328{
329    unsigned i;
330    for (i = 0; i < 7; ++i) {
331        out[i] = in[0] >> (8 * i);
332        out[i + 7] = in[1] >> (8 * i);
333        out[i + 14] = in[2] >> (8 * i);
334        out[i + 21] = in[3] >> (8 * i);
335    }
336}
337
338/* From OpenSSL BIGNUM to internal representation */
339static int BN_to_felem(felem out, const BIGNUM *bn)
340{
341    felem_bytearray b_out;
342    int num_bytes;
343
344    if (BN_is_negative(bn)) {
345        ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
346        return 0;
347    }
348    num_bytes = bn_bn2lebinpad(bn, b_out, sizeof(b_out));
349    if (num_bytes < 0) {
350        ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
351        return 0;
352    }
353    bin28_to_felem(out, b_out);
354    return 1;
355}
356
357/* From internal representation to OpenSSL BIGNUM */
358static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
359{
360    felem_bytearray b_out;
361    felem_to_bin28(b_out, in);
362    return bn_lebin2bn(b_out, sizeof(b_out), out);
363}
364
365/******************************************************************************/
366/*-
367 *                              FIELD OPERATIONS
368 *
369 * Field operations, using the internal representation of field elements.
370 * NB! These operations are specific to our point multiplication and cannot be
371 * expected to be correct in general - e.g., multiplication with a large scalar
372 * will cause an overflow.
373 *
374 */
375
376static void felem_one(felem out)
377{
378    out[0] = 1;
379    out[1] = 0;
380    out[2] = 0;
381    out[3] = 0;
382}
383
384static void felem_assign(felem out, const felem in)
385{
386    out[0] = in[0];
387    out[1] = in[1];
388    out[2] = in[2];
389    out[3] = in[3];
390}
391
392/* Sum two field elements: out += in */
393static void felem_sum(felem out, const felem in)
394{
395    out[0] += in[0];
396    out[1] += in[1];
397    out[2] += in[2];
398    out[3] += in[3];
399}
400
401/* Get negative value: out = -in */
402/* Assumes in[i] < 2^57 */
403static void felem_neg(felem out, const felem in)
404{
405    static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
406    static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
407    static const limb two58m42m2 = (((limb) 1) << 58) -
408        (((limb) 1) << 42) - (((limb) 1) << 2);
409
410    /* Set to 0 mod 2^224-2^96+1 to ensure out > in */
411    out[0] = two58p2 - in[0];
412    out[1] = two58m42m2 - in[1];
413    out[2] = two58m2 - in[2];
414    out[3] = two58m2 - in[3];
415}
416
417/* Subtract field elements: out -= in */
418/* Assumes in[i] < 2^57 */
419static void felem_diff(felem out, const felem in)
420{
421    static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
422    static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
423    static const limb two58m42m2 = (((limb) 1) << 58) -
424        (((limb) 1) << 42) - (((limb) 1) << 2);
425
426    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
427    out[0] += two58p2;
428    out[1] += two58m42m2;
429    out[2] += two58m2;
430    out[3] += two58m2;
431
432    out[0] -= in[0];
433    out[1] -= in[1];
434    out[2] -= in[2];
435    out[3] -= in[3];
436}
437
438/* Subtract in unreduced 128-bit mode: out -= in */
439/* Assumes in[i] < 2^119 */
440static void widefelem_diff(widefelem out, const widefelem in)
441{
442    static const widelimb two120 = ((widelimb) 1) << 120;
443    static const widelimb two120m64 = (((widelimb) 1) << 120) -
444        (((widelimb) 1) << 64);
445    static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
446        (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
447
448    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
449    out[0] += two120;
450    out[1] += two120m64;
451    out[2] += two120m64;
452    out[3] += two120;
453    out[4] += two120m104m64;
454    out[5] += two120m64;
455    out[6] += two120m64;
456
457    out[0] -= in[0];
458    out[1] -= in[1];
459    out[2] -= in[2];
460    out[3] -= in[3];
461    out[4] -= in[4];
462    out[5] -= in[5];
463    out[6] -= in[6];
464}
465
466/* Subtract in mixed mode: out128 -= in64 */
467/* in[i] < 2^63 */
468static void felem_diff_128_64(widefelem out, const felem in)
469{
470    static const widelimb two64p8 = (((widelimb) 1) << 64) +
471        (((widelimb) 1) << 8);
472    static const widelimb two64m8 = (((widelimb) 1) << 64) -
473        (((widelimb) 1) << 8);
474    static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
475        (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
476
477    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
478    out[0] += two64p8;
479    out[1] += two64m48m8;
480    out[2] += two64m8;
481    out[3] += two64m8;
482
483    out[0] -= in[0];
484    out[1] -= in[1];
485    out[2] -= in[2];
486    out[3] -= in[3];
487}
488
489/*
490 * Multiply a field element by a scalar: out = out * scalar The scalars we
491 * actually use are small, so results fit without overflow
492 */
493static void felem_scalar(felem out, const limb scalar)
494{
495    out[0] *= scalar;
496    out[1] *= scalar;
497    out[2] *= scalar;
498    out[3] *= scalar;
499}
500
501/*
502 * Multiply an unreduced field element by a scalar: out = out * scalar The
503 * scalars we actually use are small, so results fit without overflow
504 */
505static void widefelem_scalar(widefelem out, const widelimb scalar)
506{
507    out[0] *= scalar;
508    out[1] *= scalar;
509    out[2] *= scalar;
510    out[3] *= scalar;
511    out[4] *= scalar;
512    out[5] *= scalar;
513    out[6] *= scalar;
514}
515
516/* Square a field element: out = in^2 */
517static void felem_square(widefelem out, const felem in)
518{
519    limb tmp0, tmp1, tmp2;
520    tmp0 = 2 * in[0];
521    tmp1 = 2 * in[1];
522    tmp2 = 2 * in[2];
523    out[0] = ((widelimb) in[0]) * in[0];
524    out[1] = ((widelimb) in[0]) * tmp1;
525    out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
526    out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
527    out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
528    out[5] = ((widelimb) in[3]) * tmp2;
529    out[6] = ((widelimb) in[3]) * in[3];
530}
531
532/* Multiply two field elements: out = in1 * in2 */
533static void felem_mul(widefelem out, const felem in1, const felem in2)
534{
535    out[0] = ((widelimb) in1[0]) * in2[0];
536    out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
537    out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
538        ((widelimb) in1[2]) * in2[0];
539    out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
540        ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
541    out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
542        ((widelimb) in1[3]) * in2[1];
543    out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
544    out[6] = ((widelimb) in1[3]) * in2[3];
545}
546
547/*-
548 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
549 * Requires in[i] < 2^126,
550 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
551static void felem_reduce(felem out, const widefelem in)
552{
553    static const widelimb two127p15 = (((widelimb) 1) << 127) +
554        (((widelimb) 1) << 15);
555    static const widelimb two127m71 = (((widelimb) 1) << 127) -
556        (((widelimb) 1) << 71);
557    static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
558        (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
559    widelimb output[5];
560
561    /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
562    output[0] = in[0] + two127p15;
563    output[1] = in[1] + two127m71m55;
564    output[2] = in[2] + two127m71;
565    output[3] = in[3];
566    output[4] = in[4];
567
568    /* Eliminate in[4], in[5], in[6] */
569    output[4] += in[6] >> 16;
570    output[3] += (in[6] & 0xffff) << 40;
571    output[2] -= in[6];
572
573    output[3] += in[5] >> 16;
574    output[2] += (in[5] & 0xffff) << 40;
575    output[1] -= in[5];
576
577    output[2] += output[4] >> 16;
578    output[1] += (output[4] & 0xffff) << 40;
579    output[0] -= output[4];
580
581    /* Carry 2 -> 3 -> 4 */
582    output[3] += output[2] >> 56;
583    output[2] &= 0x00ffffffffffffff;
584
585    output[4] = output[3] >> 56;
586    output[3] &= 0x00ffffffffffffff;
587
588    /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
589
590    /* Eliminate output[4] */
591    output[2] += output[4] >> 16;
592    /* output[2] < 2^56 + 2^56 = 2^57 */
593    output[1] += (output[4] & 0xffff) << 40;
594    output[0] -= output[4];
595
596    /* Carry 0 -> 1 -> 2 -> 3 */
597    output[1] += output[0] >> 56;
598    out[0] = output[0] & 0x00ffffffffffffff;
599
600    output[2] += output[1] >> 56;
601    /* output[2] < 2^57 + 2^72 */
602    out[1] = output[1] & 0x00ffffffffffffff;
603    output[3] += output[2] >> 56;
604    /* output[3] <= 2^56 + 2^16 */
605    out[2] = output[2] & 0x00ffffffffffffff;
606
607    /*-
608     * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
609     * out[3] <= 2^56 + 2^16 (due to final carry),
610     * so out < 2*p
611     */
612    out[3] = output[3];
613}
614
615static void felem_square_reduce(felem out, const felem in)
616{
617    widefelem tmp;
618    felem_square(tmp, in);
619    felem_reduce(out, tmp);
620}
621
622static void felem_mul_reduce(felem out, const felem in1, const felem in2)
623{
624    widefelem tmp;
625    felem_mul(tmp, in1, in2);
626    felem_reduce(out, tmp);
627}
628
629/*
630 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
631 * call felem_reduce first)
632 */
633static void felem_contract(felem out, const felem in)
634{
635    static const int64_t two56 = ((limb) 1) << 56;
636    /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
637    /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
638    int64_t tmp[4], a;
639    tmp[0] = in[0];
640    tmp[1] = in[1];
641    tmp[2] = in[2];
642    tmp[3] = in[3];
643    /* Case 1: a = 1 iff in >= 2^224 */
644    a = (in[3] >> 56);
645    tmp[0] -= a;
646    tmp[1] += a << 40;
647    tmp[3] &= 0x00ffffffffffffff;
648    /*
649     * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
650     * and the lower part is non-zero
651     */
652    a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
653        (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
654    a &= 0x00ffffffffffffff;
655    /* turn a into an all-one mask (if a = 0) or an all-zero mask */
656    a = (a - 1) >> 63;
657    /* subtract 2^224 - 2^96 + 1 if a is all-one */
658    tmp[3] &= a ^ 0xffffffffffffffff;
659    tmp[2] &= a ^ 0xffffffffffffffff;
660    tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
661    tmp[0] -= 1 & a;
662
663    /*
664     * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
665     * non-zero, so we only need one step
666     */
667    a = tmp[0] >> 63;
668    tmp[0] += two56 & a;
669    tmp[1] -= 1 & a;
670
671    /* carry 1 -> 2 -> 3 */
672    tmp[2] += tmp[1] >> 56;
673    tmp[1] &= 0x00ffffffffffffff;
674
675    tmp[3] += tmp[2] >> 56;
676    tmp[2] &= 0x00ffffffffffffff;
677
678    /* Now 0 <= out < p */
679    out[0] = tmp[0];
680    out[1] = tmp[1];
681    out[2] = tmp[2];
682    out[3] = tmp[3];
683}
684
685/*
686 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
687 * elements are reduced to in < 2^225, so we only need to check three cases:
688 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
689 */
690static limb felem_is_zero(const felem in)
691{
692    limb zero, two224m96p1, two225m97p2;
693
694    zero = in[0] | in[1] | in[2] | in[3];
695    zero = (((int64_t) (zero) - 1) >> 63) & 1;
696    two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
697        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
698    two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
699    two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
700        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
701    two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
702    return (zero | two224m96p1 | two225m97p2);
703}
704
705static int felem_is_zero_int(const void *in)
706{
707    return (int)(felem_is_zero(in) & ((limb) 1));
708}
709
710/* Invert a field element */
711/* Computation chain copied from djb's code */
712static void felem_inv(felem out, const felem in)
713{
714    felem ftmp, ftmp2, ftmp3, ftmp4;
715    widefelem tmp;
716    unsigned i;
717
718    felem_square(tmp, in);
719    felem_reduce(ftmp, tmp);    /* 2 */
720    felem_mul(tmp, in, ftmp);
721    felem_reduce(ftmp, tmp);    /* 2^2 - 1 */
722    felem_square(tmp, ftmp);
723    felem_reduce(ftmp, tmp);    /* 2^3 - 2 */
724    felem_mul(tmp, in, ftmp);
725    felem_reduce(ftmp, tmp);    /* 2^3 - 1 */
726    felem_square(tmp, ftmp);
727    felem_reduce(ftmp2, tmp);   /* 2^4 - 2 */
728    felem_square(tmp, ftmp2);
729    felem_reduce(ftmp2, tmp);   /* 2^5 - 4 */
730    felem_square(tmp, ftmp2);
731    felem_reduce(ftmp2, tmp);   /* 2^6 - 8 */
732    felem_mul(tmp, ftmp2, ftmp);
733    felem_reduce(ftmp, tmp);    /* 2^6 - 1 */
734    felem_square(tmp, ftmp);
735    felem_reduce(ftmp2, tmp);   /* 2^7 - 2 */
736    for (i = 0; i < 5; ++i) {   /* 2^12 - 2^6 */
737        felem_square(tmp, ftmp2);
738        felem_reduce(ftmp2, tmp);
739    }
740    felem_mul(tmp, ftmp2, ftmp);
741    felem_reduce(ftmp2, tmp);   /* 2^12 - 1 */
742    felem_square(tmp, ftmp2);
743    felem_reduce(ftmp3, tmp);   /* 2^13 - 2 */
744    for (i = 0; i < 11; ++i) {  /* 2^24 - 2^12 */
745        felem_square(tmp, ftmp3);
746        felem_reduce(ftmp3, tmp);
747    }
748    felem_mul(tmp, ftmp3, ftmp2);
749    felem_reduce(ftmp2, tmp);   /* 2^24 - 1 */
750    felem_square(tmp, ftmp2);
751    felem_reduce(ftmp3, tmp);   /* 2^25 - 2 */
752    for (i = 0; i < 23; ++i) {  /* 2^48 - 2^24 */
753        felem_square(tmp, ftmp3);
754        felem_reduce(ftmp3, tmp);
755    }
756    felem_mul(tmp, ftmp3, ftmp2);
757    felem_reduce(ftmp3, tmp);   /* 2^48 - 1 */
758    felem_square(tmp, ftmp3);
759    felem_reduce(ftmp4, tmp);   /* 2^49 - 2 */
760    for (i = 0; i < 47; ++i) {  /* 2^96 - 2^48 */
761        felem_square(tmp, ftmp4);
762        felem_reduce(ftmp4, tmp);
763    }
764    felem_mul(tmp, ftmp3, ftmp4);
765    felem_reduce(ftmp3, tmp);   /* 2^96 - 1 */
766    felem_square(tmp, ftmp3);
767    felem_reduce(ftmp4, tmp);   /* 2^97 - 2 */
768    for (i = 0; i < 23; ++i) {  /* 2^120 - 2^24 */
769        felem_square(tmp, ftmp4);
770        felem_reduce(ftmp4, tmp);
771    }
772    felem_mul(tmp, ftmp2, ftmp4);
773    felem_reduce(ftmp2, tmp);   /* 2^120 - 1 */
774    for (i = 0; i < 6; ++i) {   /* 2^126 - 2^6 */
775        felem_square(tmp, ftmp2);
776        felem_reduce(ftmp2, tmp);
777    }
778    felem_mul(tmp, ftmp2, ftmp);
779    felem_reduce(ftmp, tmp);    /* 2^126 - 1 */
780    felem_square(tmp, ftmp);
781    felem_reduce(ftmp, tmp);    /* 2^127 - 2 */
782    felem_mul(tmp, ftmp, in);
783    felem_reduce(ftmp, tmp);    /* 2^127 - 1 */
784    for (i = 0; i < 97; ++i) {  /* 2^224 - 2^97 */
785        felem_square(tmp, ftmp);
786        felem_reduce(ftmp, tmp);
787    }
788    felem_mul(tmp, ftmp, ftmp3);
789    felem_reduce(out, tmp);     /* 2^224 - 2^96 - 1 */
790}
791
792/*
793 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
794 * out to itself.
795 */
796static void copy_conditional(felem out, const felem in, limb icopy)
797{
798    unsigned i;
799    /*
800     * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
801     */
802    const limb copy = -icopy;
803    for (i = 0; i < 4; ++i) {
804        const limb tmp = copy & (in[i] ^ out[i]);
805        out[i] ^= tmp;
806    }
807}
808
809/******************************************************************************/
810/*-
811 *                       ELLIPTIC CURVE POINT OPERATIONS
812 *
813 * Points are represented in Jacobian projective coordinates:
814 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
815 * or to the point at infinity if Z == 0.
816 *
817 */
818
819/*-
820 * Double an elliptic curve point:
821 * (X', Y', Z') = 2 * (X, Y, Z), where
822 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
823 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2
824 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
825 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
826 * while x_out == y_in is not (maybe this works, but it's not tested).
827 */
828static void
829point_double(felem x_out, felem y_out, felem z_out,
830             const felem x_in, const felem y_in, const felem z_in)
831{
832    widefelem tmp, tmp2;
833    felem delta, gamma, beta, alpha, ftmp, ftmp2;
834
835    felem_assign(ftmp, x_in);
836    felem_assign(ftmp2, x_in);
837
838    /* delta = z^2 */
839    felem_square(tmp, z_in);
840    felem_reduce(delta, tmp);
841
842    /* gamma = y^2 */
843    felem_square(tmp, y_in);
844    felem_reduce(gamma, tmp);
845
846    /* beta = x*gamma */
847    felem_mul(tmp, x_in, gamma);
848    felem_reduce(beta, tmp);
849
850    /* alpha = 3*(x-delta)*(x+delta) */
851    felem_diff(ftmp, delta);
852    /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
853    felem_sum(ftmp2, delta);
854    /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
855    felem_scalar(ftmp2, 3);
856    /* ftmp2[i] < 3 * 2^58 < 2^60 */
857    felem_mul(tmp, ftmp, ftmp2);
858    /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
859    felem_reduce(alpha, tmp);
860
861    /* x' = alpha^2 - 8*beta */
862    felem_square(tmp, alpha);
863    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
864    felem_assign(ftmp, beta);
865    felem_scalar(ftmp, 8);
866    /* ftmp[i] < 8 * 2^57 = 2^60 */
867    felem_diff_128_64(tmp, ftmp);
868    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
869    felem_reduce(x_out, tmp);
870
871    /* z' = (y + z)^2 - gamma - delta */
872    felem_sum(delta, gamma);
873    /* delta[i] < 2^57 + 2^57 = 2^58 */
874    felem_assign(ftmp, y_in);
875    felem_sum(ftmp, z_in);
876    /* ftmp[i] < 2^57 + 2^57 = 2^58 */
877    felem_square(tmp, ftmp);
878    /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
879    felem_diff_128_64(tmp, delta);
880    /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
881    felem_reduce(z_out, tmp);
882
883    /* y' = alpha*(4*beta - x') - 8*gamma^2 */
884    felem_scalar(beta, 4);
885    /* beta[i] < 4 * 2^57 = 2^59 */
886    felem_diff(beta, x_out);
887    /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
888    felem_mul(tmp, alpha, beta);
889    /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
890    felem_square(tmp2, gamma);
891    /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
892    widefelem_scalar(tmp2, 8);
893    /* tmp2[i] < 8 * 2^116 = 2^119 */
894    widefelem_diff(tmp, tmp2);
895    /* tmp[i] < 2^119 + 2^120 < 2^121 */
896    felem_reduce(y_out, tmp);
897}
898
899/*-
900 * Add two elliptic curve points:
901 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
902 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
903 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
904 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
905 *        Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
906 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
907 *
908 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
909 */
910
911/*
912 * This function is not entirely constant-time: it includes a branch for
913 * checking whether the two input points are equal, (while not equal to the
914 * point at infinity). This case never happens during single point
915 * multiplication, so there is no timing leak for ECDH or ECDSA signing.
916 */
917static void point_add(felem x3, felem y3, felem z3,
918                      const felem x1, const felem y1, const felem z1,
919                      const int mixed, const felem x2, const felem y2,
920                      const felem z2)
921{
922    felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
923    widefelem tmp, tmp2;
924    limb z1_is_zero, z2_is_zero, x_equal, y_equal;
925
926    if (!mixed) {
927        /* ftmp2 = z2^2 */
928        felem_square(tmp, z2);
929        felem_reduce(ftmp2, tmp);
930
931        /* ftmp4 = z2^3 */
932        felem_mul(tmp, ftmp2, z2);
933        felem_reduce(ftmp4, tmp);
934
935        /* ftmp4 = z2^3*y1 */
936        felem_mul(tmp2, ftmp4, y1);
937        felem_reduce(ftmp4, tmp2);
938
939        /* ftmp2 = z2^2*x1 */
940        felem_mul(tmp2, ftmp2, x1);
941        felem_reduce(ftmp2, tmp2);
942    } else {
943        /*
944         * We'll assume z2 = 1 (special case z2 = 0 is handled later)
945         */
946
947        /* ftmp4 = z2^3*y1 */
948        felem_assign(ftmp4, y1);
949
950        /* ftmp2 = z2^2*x1 */
951        felem_assign(ftmp2, x1);
952    }
953
954    /* ftmp = z1^2 */
955    felem_square(tmp, z1);
956    felem_reduce(ftmp, tmp);
957
958    /* ftmp3 = z1^3 */
959    felem_mul(tmp, ftmp, z1);
960    felem_reduce(ftmp3, tmp);
961
962    /* tmp = z1^3*y2 */
963    felem_mul(tmp, ftmp3, y2);
964    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
965
966    /* ftmp3 = z1^3*y2 - z2^3*y1 */
967    felem_diff_128_64(tmp, ftmp4);
968    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
969    felem_reduce(ftmp3, tmp);
970
971    /* tmp = z1^2*x2 */
972    felem_mul(tmp, ftmp, x2);
973    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
974
975    /* ftmp = z1^2*x2 - z2^2*x1 */
976    felem_diff_128_64(tmp, ftmp2);
977    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
978    felem_reduce(ftmp, tmp);
979
980    /*
981     * the formulae are incorrect if the points are equal so we check for
982     * this and do doubling if this happens
983     */
984    x_equal = felem_is_zero(ftmp);
985    y_equal = felem_is_zero(ftmp3);
986    z1_is_zero = felem_is_zero(z1);
987    z2_is_zero = felem_is_zero(z2);
988    /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
989    if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
990        point_double(x3, y3, z3, x1, y1, z1);
991        return;
992    }
993
994    /* ftmp5 = z1*z2 */
995    if (!mixed) {
996        felem_mul(tmp, z1, z2);
997        felem_reduce(ftmp5, tmp);
998    } else {
999        /* special case z2 = 0 is handled later */
1000        felem_assign(ftmp5, z1);
1001    }
1002
1003    /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1004    felem_mul(tmp, ftmp, ftmp5);
1005    felem_reduce(z_out, tmp);
1006
1007    /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1008    felem_assign(ftmp5, ftmp);
1009    felem_square(tmp, ftmp);
1010    felem_reduce(ftmp, tmp);
1011
1012    /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1013    felem_mul(tmp, ftmp, ftmp5);
1014    felem_reduce(ftmp5, tmp);
1015
1016    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1017    felem_mul(tmp, ftmp2, ftmp);
1018    felem_reduce(ftmp2, tmp);
1019
1020    /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1021    felem_mul(tmp, ftmp4, ftmp5);
1022    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1023
1024    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1025    felem_square(tmp2, ftmp3);
1026    /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1027
1028    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1029    felem_diff_128_64(tmp2, ftmp5);
1030    /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1031
1032    /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1033    felem_assign(ftmp5, ftmp2);
1034    felem_scalar(ftmp5, 2);
1035    /* ftmp5[i] < 2 * 2^57 = 2^58 */
1036
1037    /*-
1038     * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1039     *  2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1040     */
1041    felem_diff_128_64(tmp2, ftmp5);
1042    /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1043    felem_reduce(x_out, tmp2);
1044
1045    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1046    felem_diff(ftmp2, x_out);
1047    /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1048
1049    /*
1050     * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1051     */
1052    felem_mul(tmp2, ftmp3, ftmp2);
1053    /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1054
1055    /*-
1056     * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1057     *  z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1058     */
1059    widefelem_diff(tmp2, tmp);
1060    /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1061    felem_reduce(y_out, tmp2);
1062
1063    /*
1064     * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1065     * the point at infinity, so we need to check for this separately
1066     */
1067
1068    /*
1069     * if point 1 is at infinity, copy point 2 to output, and vice versa
1070     */
1071    copy_conditional(x_out, x2, z1_is_zero);
1072    copy_conditional(x_out, x1, z2_is_zero);
1073    copy_conditional(y_out, y2, z1_is_zero);
1074    copy_conditional(y_out, y1, z2_is_zero);
1075    copy_conditional(z_out, z2, z1_is_zero);
1076    copy_conditional(z_out, z1, z2_is_zero);
1077    felem_assign(x3, x_out);
1078    felem_assign(y3, y_out);
1079    felem_assign(z3, z_out);
1080}
1081
1082/*
1083 * select_point selects the |idx|th point from a precomputation table and
1084 * copies it to out.
1085 * The pre_comp array argument should be size of |size| argument
1086 */
1087static void select_point(const u64 idx, unsigned int size,
1088                         const felem pre_comp[][3], felem out[3])
1089{
1090    unsigned i, j;
1091    limb *outlimbs = &out[0][0];
1092    memset(outlimbs, 0, 3 * sizeof(felem));
1093
1094    for (i = 0; i < size; i++) {
1095        const limb *inlimbs = &pre_comp[i][0][0];
1096        u64 mask = i ^ idx;
1097        mask |= mask >> 4;
1098        mask |= mask >> 2;
1099        mask |= mask >> 1;
1100        mask &= 1;
1101        mask--;
1102        for (j = 0; j < 4 * 3; j++)
1103            outlimbs[j] |= inlimbs[j] & mask;
1104    }
1105}
1106
1107/* get_bit returns the |i|th bit in |in| */
1108static char get_bit(const felem_bytearray in, unsigned i)
1109{
1110    if (i >= 224)
1111        return 0;
1112    return (in[i >> 3] >> (i & 7)) & 1;
1113}
1114
1115/*
1116 * Interleaved point multiplication using precomputed point multiples: The
1117 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1118 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1119 * generator, using certain (large) precomputed multiples in g_pre_comp.
1120 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1121 */
1122static void batch_mul(felem x_out, felem y_out, felem z_out,
1123                      const felem_bytearray scalars[],
1124                      const unsigned num_points, const u8 *g_scalar,
1125                      const int mixed, const felem pre_comp[][17][3],
1126                      const felem g_pre_comp[2][16][3])
1127{
1128    int i, skip;
1129    unsigned num;
1130    unsigned gen_mul = (g_scalar != NULL);
1131    felem nq[3], tmp[4];
1132    u64 bits;
1133    u8 sign, digit;
1134
1135    /* set nq to the point at infinity */
1136    memset(nq, 0, 3 * sizeof(felem));
1137
1138    /*
1139     * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1140     * of the generator (two in each of the last 28 rounds) and additions of
1141     * other points multiples (every 5th round).
1142     */
1143    skip = 1;                   /* save two point operations in the first
1144                                 * round */
1145    for (i = (num_points ? 220 : 27); i >= 0; --i) {
1146        /* double */
1147        if (!skip)
1148            point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1149
1150        /* add multiples of the generator */
1151        if (gen_mul && (i <= 27)) {
1152            /* first, look 28 bits upwards */
1153            bits = get_bit(g_scalar, i + 196) << 3;
1154            bits |= get_bit(g_scalar, i + 140) << 2;
1155            bits |= get_bit(g_scalar, i + 84) << 1;
1156            bits |= get_bit(g_scalar, i + 28);
1157            /* select the point to add, in constant time */
1158            select_point(bits, 16, g_pre_comp[1], tmp);
1159
1160            if (!skip) {
1161                /* value 1 below is argument for "mixed" */
1162                point_add(nq[0], nq[1], nq[2],
1163                          nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1164            } else {
1165                memcpy(nq, tmp, 3 * sizeof(felem));
1166                skip = 0;
1167            }
1168
1169            /* second, look at the current position */
1170            bits = get_bit(g_scalar, i + 168) << 3;
1171            bits |= get_bit(g_scalar, i + 112) << 2;
1172            bits |= get_bit(g_scalar, i + 56) << 1;
1173            bits |= get_bit(g_scalar, i);
1174            /* select the point to add, in constant time */
1175            select_point(bits, 16, g_pre_comp[0], tmp);
1176            point_add(nq[0], nq[1], nq[2],
1177                      nq[0], nq[1], nq[2],
1178                      1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1179        }
1180
1181        /* do other additions every 5 doublings */
1182        if (num_points && (i % 5 == 0)) {
1183            /* loop over all scalars */
1184            for (num = 0; num < num_points; ++num) {
1185                bits = get_bit(scalars[num], i + 4) << 5;
1186                bits |= get_bit(scalars[num], i + 3) << 4;
1187                bits |= get_bit(scalars[num], i + 2) << 3;
1188                bits |= get_bit(scalars[num], i + 1) << 2;
1189                bits |= get_bit(scalars[num], i) << 1;
1190                bits |= get_bit(scalars[num], i - 1);
1191                ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1192
1193                /* select the point to add or subtract */
1194                select_point(digit, 17, pre_comp[num], tmp);
1195                felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1196                                            * point */
1197                copy_conditional(tmp[1], tmp[3], sign);
1198
1199                if (!skip) {
1200                    point_add(nq[0], nq[1], nq[2],
1201                              nq[0], nq[1], nq[2],
1202                              mixed, tmp[0], tmp[1], tmp[2]);
1203                } else {
1204                    memcpy(nq, tmp, 3 * sizeof(felem));
1205                    skip = 0;
1206                }
1207            }
1208        }
1209    }
1210    felem_assign(x_out, nq[0]);
1211    felem_assign(y_out, nq[1]);
1212    felem_assign(z_out, nq[2]);
1213}
1214
1215/******************************************************************************/
1216/*
1217 * FUNCTIONS TO MANAGE PRECOMPUTATION
1218 */
1219
1220static NISTP224_PRE_COMP *nistp224_pre_comp_new()
1221{
1222    NISTP224_PRE_COMP *ret = NULL;
1223    ret = (NISTP224_PRE_COMP *) OPENSSL_malloc(sizeof(*ret));
1224    if (!ret) {
1225        ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1226        return ret;
1227    }
1228    memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1229    ret->references = 1;
1230    return ret;
1231}
1232
1233static void *nistp224_pre_comp_dup(void *src_)
1234{
1235    NISTP224_PRE_COMP *src = src_;
1236
1237    /* no need to actually copy, these objects never change! */
1238    CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1239
1240    return src_;
1241}
1242
1243static void nistp224_pre_comp_free(void *pre_)
1244{
1245    int i;
1246    NISTP224_PRE_COMP *pre = pre_;
1247
1248    if (!pre)
1249        return;
1250
1251    i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1252    if (i > 0)
1253        return;
1254
1255    OPENSSL_free(pre);
1256}
1257
1258static void nistp224_pre_comp_clear_free(void *pre_)
1259{
1260    int i;
1261    NISTP224_PRE_COMP *pre = pre_;
1262
1263    if (!pre)
1264        return;
1265
1266    i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1267    if (i > 0)
1268        return;
1269
1270    OPENSSL_cleanse(pre, sizeof(*pre));
1271    OPENSSL_free(pre);
1272}
1273
1274/******************************************************************************/
1275/*
1276 * OPENSSL EC_METHOD FUNCTIONS
1277 */
1278
1279int ec_GFp_nistp224_group_init(EC_GROUP *group)
1280{
1281    int ret;
1282    ret = ec_GFp_simple_group_init(group);
1283    group->a_is_minus3 = 1;
1284    return ret;
1285}
1286
1287int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1288                                    const BIGNUM *a, const BIGNUM *b,
1289                                    BN_CTX *ctx)
1290{
1291    int ret = 0;
1292    BN_CTX *new_ctx = NULL;
1293    BIGNUM *curve_p, *curve_a, *curve_b;
1294
1295    if (ctx == NULL)
1296        if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1297            return 0;
1298    BN_CTX_start(ctx);
1299    if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1300        ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1301        ((curve_b = BN_CTX_get(ctx)) == NULL))
1302        goto err;
1303    BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1304    BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1305    BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1306    if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1307        ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1308              EC_R_WRONG_CURVE_PARAMETERS);
1309        goto err;
1310    }
1311    group->field_mod_func = BN_nist_mod_224;
1312    ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1313 err:
1314    BN_CTX_end(ctx);
1315    if (new_ctx != NULL)
1316        BN_CTX_free(new_ctx);
1317    return ret;
1318}
1319
1320/*
1321 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1322 * (X/Z^2, Y/Z^3)
1323 */
1324int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1325                                                 const EC_POINT *point,
1326                                                 BIGNUM *x, BIGNUM *y,
1327                                                 BN_CTX *ctx)
1328{
1329    felem z1, z2, x_in, y_in, x_out, y_out;
1330    widefelem tmp;
1331
1332    if (EC_POINT_is_at_infinity(group, point)) {
1333        ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1334              EC_R_POINT_AT_INFINITY);
1335        return 0;
1336    }
1337    if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1338        (!BN_to_felem(z1, &point->Z)))
1339        return 0;
1340    felem_inv(z2, z1);
1341    felem_square(tmp, z2);
1342    felem_reduce(z1, tmp);
1343    felem_mul(tmp, x_in, z1);
1344    felem_reduce(x_in, tmp);
1345    felem_contract(x_out, x_in);
1346    if (x != NULL) {
1347        if (!felem_to_BN(x, x_out)) {
1348            ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1349                  ERR_R_BN_LIB);
1350            return 0;
1351        }
1352    }
1353    felem_mul(tmp, z1, z2);
1354    felem_reduce(z1, tmp);
1355    felem_mul(tmp, y_in, z1);
1356    felem_reduce(y_in, tmp);
1357    felem_contract(y_out, y_in);
1358    if (y != NULL) {
1359        if (!felem_to_BN(y, y_out)) {
1360            ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1361                  ERR_R_BN_LIB);
1362            return 0;
1363        }
1364    }
1365    return 1;
1366}
1367
1368static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1369                               felem tmp_felems[ /* num+1 */ ])
1370{
1371    /*
1372     * Runs in constant time, unless an input is the point at infinity (which
1373     * normally shouldn't happen).
1374     */
1375    ec_GFp_nistp_points_make_affine_internal(num,
1376                                             points,
1377                                             sizeof(felem),
1378                                             tmp_felems,
1379                                             (void (*)(void *))felem_one,
1380                                             felem_is_zero_int,
1381                                             (void (*)(void *, const void *))
1382                                             felem_assign,
1383                                             (void (*)(void *, const void *))
1384                                             felem_square_reduce, (void (*)
1385                                                                   (void *,
1386                                                                    const void
1387                                                                    *,
1388                                                                    const void
1389                                                                    *))
1390                                             felem_mul_reduce,
1391                                             (void (*)(void *, const void *))
1392                                             felem_inv,
1393                                             (void (*)(void *, const void *))
1394                                             felem_contract);
1395}
1396
1397/*
1398 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1399 * values Result is stored in r (r can equal one of the inputs).
1400 */
1401int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1402                               const BIGNUM *scalar, size_t num,
1403                               const EC_POINT *points[],
1404                               const BIGNUM *scalars[], BN_CTX *ctx)
1405{
1406    int ret = 0;
1407    int j;
1408    unsigned i;
1409    int mixed = 0;
1410    BN_CTX *new_ctx = NULL;
1411    BIGNUM *x, *y, *z, *tmp_scalar;
1412    felem_bytearray g_secret;
1413    felem_bytearray *secrets = NULL;
1414    felem(*pre_comp)[17][3] = NULL;
1415    felem *tmp_felems = NULL;
1416    int num_bytes;
1417    int have_pre_comp = 0;
1418    size_t num_points = num;
1419    felem x_in, y_in, z_in, x_out, y_out, z_out;
1420    NISTP224_PRE_COMP *pre = NULL;
1421    const felem(*g_pre_comp)[16][3] = NULL;
1422    EC_POINT *generator = NULL;
1423    const EC_POINT *p = NULL;
1424    const BIGNUM *p_scalar = NULL;
1425
1426    if (ctx == NULL)
1427        if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1428            return 0;
1429    BN_CTX_start(ctx);
1430    if (((x = BN_CTX_get(ctx)) == NULL) ||
1431        ((y = BN_CTX_get(ctx)) == NULL) ||
1432        ((z = BN_CTX_get(ctx)) == NULL) ||
1433        ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1434        goto err;
1435
1436    if (scalar != NULL) {
1437        pre = EC_EX_DATA_get_data(group->extra_data,
1438                                  nistp224_pre_comp_dup,
1439                                  nistp224_pre_comp_free,
1440                                  nistp224_pre_comp_clear_free);
1441        if (pre)
1442            /* we have precomputation, try to use it */
1443            g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1444        else
1445            /* try to use the standard precomputation */
1446            g_pre_comp = &gmul[0];
1447        generator = EC_POINT_new(group);
1448        if (generator == NULL)
1449            goto err;
1450        /* get the generator from precomputation */
1451        if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1452            !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1453            !felem_to_BN(z, g_pre_comp[0][1][2])) {
1454            ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1455            goto err;
1456        }
1457        if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1458                                                      generator, x, y, z,
1459                                                      ctx))
1460            goto err;
1461        if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1462            /* precomputation matches generator */
1463            have_pre_comp = 1;
1464        else
1465            /*
1466             * we don't have valid precomputation: treat the generator as a
1467             * random point
1468             */
1469            num_points = num_points + 1;
1470    }
1471
1472    if (num_points > 0) {
1473        if (num_points >= 3) {
1474            /*
1475             * unless we precompute multiples for just one or two points,
1476             * converting those into affine form is time well spent
1477             */
1478            mixed = 1;
1479        }
1480        secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1481        pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem));
1482        if (mixed)
1483            tmp_felems =
1484                OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem));
1485        if ((secrets == NULL) || (pre_comp == NULL)
1486            || (mixed && (tmp_felems == NULL))) {
1487            ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1488            goto err;
1489        }
1490
1491        /*
1492         * we treat NULL scalars as 0, and NULL points as points at infinity,
1493         * i.e., they contribute nothing to the linear combination
1494         */
1495        memset(secrets, 0, num_points * sizeof(felem_bytearray));
1496        memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem));
1497        for (i = 0; i < num_points; ++i) {
1498            if (i == num) {
1499                /* the generator */
1500                p = EC_GROUP_get0_generator(group);
1501                p_scalar = scalar;
1502            } else {
1503                /* the i^th point */
1504                p = points[i];
1505                p_scalar = scalars[i];
1506            }
1507            if ((p_scalar != NULL) && (p != NULL)) {
1508                /* reduce scalar to 0 <= scalar < 2^224 */
1509                if ((BN_num_bits(p_scalar) > 224)
1510                    || (BN_is_negative(p_scalar))) {
1511                    /*
1512                     * this is an unusual input, and we don't guarantee
1513                     * constant-timeness
1514                     */
1515                    if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) {
1516                        ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1517                        goto err;
1518                    }
1519                    num_bytes = bn_bn2lebinpad(tmp_scalar,
1520                                               secrets[i], sizeof(secrets[i]));
1521                } else {
1522                    num_bytes = bn_bn2lebinpad(p_scalar,
1523                                               secrets[i], sizeof(secrets[i]));
1524                }
1525                if (num_bytes < 0) {
1526                    ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1527                    goto err;
1528                }
1529                /* precompute multiples */
1530                if ((!BN_to_felem(x_out, &p->X)) ||
1531                    (!BN_to_felem(y_out, &p->Y)) ||
1532                    (!BN_to_felem(z_out, &p->Z)))
1533                    goto err;
1534                felem_assign(pre_comp[i][1][0], x_out);
1535                felem_assign(pre_comp[i][1][1], y_out);
1536                felem_assign(pre_comp[i][1][2], z_out);
1537                for (j = 2; j <= 16; ++j) {
1538                    if (j & 1) {
1539                        point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1540                                  pre_comp[i][j][2], pre_comp[i][1][0],
1541                                  pre_comp[i][1][1], pre_comp[i][1][2], 0,
1542                                  pre_comp[i][j - 1][0],
1543                                  pre_comp[i][j - 1][1],
1544                                  pre_comp[i][j - 1][2]);
1545                    } else {
1546                        point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1547                                     pre_comp[i][j][2], pre_comp[i][j / 2][0],
1548                                     pre_comp[i][j / 2][1],
1549                                     pre_comp[i][j / 2][2]);
1550                    }
1551                }
1552            }
1553        }
1554        if (mixed)
1555            make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1556    }
1557
1558    /* the scalar for the generator */
1559    if ((scalar != NULL) && (have_pre_comp)) {
1560        memset(g_secret, 0, sizeof(g_secret));
1561        /* reduce scalar to 0 <= scalar < 2^224 */
1562        if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1563            /*
1564             * this is an unusual input, and we don't guarantee
1565             * constant-timeness
1566             */
1567            if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) {
1568                ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1569                goto err;
1570            }
1571            num_bytes = bn_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
1572        } else {
1573            num_bytes = bn_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
1574        }
1575        /* do the multiplication with generator precomputation */
1576        batch_mul(x_out, y_out, z_out,
1577                  (const felem_bytearray(*))secrets, num_points,
1578                  g_secret,
1579                  mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1580    } else {
1581        /* do the multiplication without generator precomputation */
1582        batch_mul(x_out, y_out, z_out,
1583                  (const felem_bytearray(*))secrets, num_points,
1584                  NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1585    }
1586    /* reduce the output to its unique minimal representation */
1587    felem_contract(x_in, x_out);
1588    felem_contract(y_in, y_out);
1589    felem_contract(z_in, z_out);
1590    if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1591        (!felem_to_BN(z, z_in))) {
1592        ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1593        goto err;
1594    }
1595    ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1596
1597 err:
1598    BN_CTX_end(ctx);
1599    if (generator != NULL)
1600        EC_POINT_free(generator);
1601    if (new_ctx != NULL)
1602        BN_CTX_free(new_ctx);
1603    if (secrets != NULL)
1604        OPENSSL_free(secrets);
1605    if (pre_comp != NULL)
1606        OPENSSL_free(pre_comp);
1607    if (tmp_felems != NULL)
1608        OPENSSL_free(tmp_felems);
1609    return ret;
1610}
1611
1612int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1613{
1614    int ret = 0;
1615    NISTP224_PRE_COMP *pre = NULL;
1616    int i, j;
1617    BN_CTX *new_ctx = NULL;
1618    BIGNUM *x, *y;
1619    EC_POINT *generator = NULL;
1620    felem tmp_felems[32];
1621
1622    /* throw away old precomputation */
1623    EC_EX_DATA_free_data(&group->extra_data, nistp224_pre_comp_dup,
1624                         nistp224_pre_comp_free,
1625                         nistp224_pre_comp_clear_free);
1626    if (ctx == NULL)
1627        if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1628            return 0;
1629    BN_CTX_start(ctx);
1630    if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL))
1631        goto err;
1632    /* get the generator */
1633    if (group->generator == NULL)
1634        goto err;
1635    generator = EC_POINT_new(group);
1636    if (generator == NULL)
1637        goto err;
1638    BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1639    BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1640    if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1641        goto err;
1642    if ((pre = nistp224_pre_comp_new()) == NULL)
1643        goto err;
1644    /*
1645     * if the generator is the standard one, use built-in precomputation
1646     */
1647    if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1648        memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1649        goto done;
1650    }
1651    if ((!BN_to_felem(pre->g_pre_comp[0][1][0], &group->generator->X)) ||
1652        (!BN_to_felem(pre->g_pre_comp[0][1][1], &group->generator->Y)) ||
1653        (!BN_to_felem(pre->g_pre_comp[0][1][2], &group->generator->Z)))
1654        goto err;
1655    /*
1656     * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1657     * 2^140*G, 2^196*G for the second one
1658     */
1659    for (i = 1; i <= 8; i <<= 1) {
1660        point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1661                     pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1662                     pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1663        for (j = 0; j < 27; ++j) {
1664            point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1665                         pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1666                         pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1667        }
1668        if (i == 8)
1669            break;
1670        point_double(pre->g_pre_comp[0][2 * i][0],
1671                     pre->g_pre_comp[0][2 * i][1],
1672                     pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1673                     pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1674        for (j = 0; j < 27; ++j) {
1675            point_double(pre->g_pre_comp[0][2 * i][0],
1676                         pre->g_pre_comp[0][2 * i][1],
1677                         pre->g_pre_comp[0][2 * i][2],
1678                         pre->g_pre_comp[0][2 * i][0],
1679                         pre->g_pre_comp[0][2 * i][1],
1680                         pre->g_pre_comp[0][2 * i][2]);
1681        }
1682    }
1683    for (i = 0; i < 2; i++) {
1684        /* g_pre_comp[i][0] is the point at infinity */
1685        memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1686        /* the remaining multiples */
1687        /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1688        point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1689                  pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1690                  pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1691                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1692                  pre->g_pre_comp[i][2][2]);
1693        /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1694        point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1695                  pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1696                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1697                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1698                  pre->g_pre_comp[i][2][2]);
1699        /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1700        point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1701                  pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1702                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1703                  0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1704                  pre->g_pre_comp[i][4][2]);
1705        /*
1706         * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1707         */
1708        point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1709                  pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1710                  pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1711                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1712                  pre->g_pre_comp[i][2][2]);
1713        for (j = 1; j < 8; ++j) {
1714            /* odd multiples: add G resp. 2^28*G */
1715            point_add(pre->g_pre_comp[i][2 * j + 1][0],
1716                      pre->g_pre_comp[i][2 * j + 1][1],
1717                      pre->g_pre_comp[i][2 * j + 1][2],
1718                      pre->g_pre_comp[i][2 * j][0],
1719                      pre->g_pre_comp[i][2 * j][1],
1720                      pre->g_pre_comp[i][2 * j][2], 0,
1721                      pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1722                      pre->g_pre_comp[i][1][2]);
1723        }
1724    }
1725    make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1726
1727 done:
1728    if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp224_pre_comp_dup,
1729                             nistp224_pre_comp_free,
1730                             nistp224_pre_comp_clear_free))
1731        goto err;
1732    ret = 1;
1733    pre = NULL;
1734 err:
1735    BN_CTX_end(ctx);
1736    if (generator != NULL)
1737        EC_POINT_free(generator);
1738    if (new_ctx != NULL)
1739        BN_CTX_free(new_ctx);
1740    if (pre)
1741        nistp224_pre_comp_free(pre);
1742    return ret;
1743}
1744
1745int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1746{
1747    if (EC_EX_DATA_get_data(group->extra_data, nistp224_pre_comp_dup,
1748                            nistp224_pre_comp_free,
1749                            nistp224_pre_comp_clear_free)
1750        != NULL)
1751        return 1;
1752    else
1753        return 0;
1754}
1755
1756#else
1757static void *dummy = &dummy;
1758#endif
1759