ecp_nistp224.c revision 280304
1/* crypto/ec/ecp_nistp224.c */
2/*
3 * Written by Emilia Kasper (Google) for the OpenSSL project.
4 */
5/* Copyright 2011 Google Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 *
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 *     http://www.apache.org/licenses/LICENSE-2.0
13 *
14 *  Unless required by applicable law or agreed to in writing, software
15 *  distributed under the License is distributed on an "AS IS" BASIS,
16 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 *  See the License for the specific language governing permissions and
18 *  limitations under the License.
19 */
20
21/*
22 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
23 *
24 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
25 * and Adam Langley's public domain 64-bit C implementation of curve25519
26 */
27
28#include <openssl/opensslconf.h>
29#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
30
31# ifndef OPENSSL_SYS_VMS
32#  include <stdint.h>
33# else
34#  include <inttypes.h>
35# endif
36
37# include <string.h>
38# include <openssl/err.h>
39# include "ec_lcl.h"
40
41# if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
42  /* even with gcc, the typedef won't work for 32-bit platforms */
43typedef __uint128_t uint128_t;  /* nonstandard; implemented by gcc on 64-bit
44                                 * platforms */
45# else
46#  error "Need GCC 3.1 or later to define type uint128_t"
47# endif
48
49typedef uint8_t u8;
50typedef uint64_t u64;
51typedef int64_t s64;
52
53/******************************************************************************/
54/*-
55 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
56 *
57 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
58 * using 64-bit coefficients called 'limbs',
59 * and sometimes (for multiplication results) as
60 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
61 * using 128-bit coefficients called 'widelimbs'.
62 * A 4-limb representation is an 'felem';
63 * a 7-widelimb representation is a 'widefelem'.
64 * Even within felems, bits of adjacent limbs overlap, and we don't always
65 * reduce the representations: we ensure that inputs to each felem
66 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
67 * and fit into a 128-bit word without overflow. The coefficients are then
68 * again partially reduced to obtain an felem satisfying a_i < 2^57.
69 * We only reduce to the unique minimal representation at the end of the
70 * computation.
71 */
72
73typedef uint64_t limb;
74typedef uint128_t widelimb;
75
76typedef limb felem[4];
77typedef widelimb widefelem[7];
78
79/*
80 * Field element represented as a byte arrary. 28*8 = 224 bits is also the
81 * group order size for the elliptic curve, and we also use this type for
82 * scalars for point multiplication.
83 */
84typedef u8 felem_bytearray[28];
85
86static const felem_bytearray nistp224_curve_params[5] = {
87    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
88     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
89     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
90    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
91     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
92     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
93    {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
94     0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
95     0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
96    {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
97     0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
98     0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
99    {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
100     0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
101     0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
102};
103
104/*-
105 * Precomputed multiples of the standard generator
106 * Points are given in coordinates (X, Y, Z) where Z normally is 1
107 * (0 for the point at infinity).
108 * For each field element, slice a_0 is word 0, etc.
109 *
110 * The table has 2 * 16 elements, starting with the following:
111 * index | bits    | point
112 * ------+---------+------------------------------
113 *     0 | 0 0 0 0 | 0G
114 *     1 | 0 0 0 1 | 1G
115 *     2 | 0 0 1 0 | 2^56G
116 *     3 | 0 0 1 1 | (2^56 + 1)G
117 *     4 | 0 1 0 0 | 2^112G
118 *     5 | 0 1 0 1 | (2^112 + 1)G
119 *     6 | 0 1 1 0 | (2^112 + 2^56)G
120 *     7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
121 *     8 | 1 0 0 0 | 2^168G
122 *     9 | 1 0 0 1 | (2^168 + 1)G
123 *    10 | 1 0 1 0 | (2^168 + 2^56)G
124 *    11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
125 *    12 | 1 1 0 0 | (2^168 + 2^112)G
126 *    13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
127 *    14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
128 *    15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
129 * followed by a copy of this with each element multiplied by 2^28.
130 *
131 * The reason for this is so that we can clock bits into four different
132 * locations when doing simple scalar multiplies against the base point,
133 * and then another four locations using the second 16 elements.
134 */
135static const felem gmul[2][16][3] = { {{{0, 0, 0, 0},
136                                        {0, 0, 0, 0},
137                                        {0, 0, 0, 0}},
138                                       {{0x3280d6115c1d21, 0xc1d356c2112234,
139                                         0x7f321390b94a03, 0xb70e0cbd6bb4bf},
140                                        {0xd5819985007e34, 0x75a05a07476444,
141                                         0xfb4c22dfe6cd43, 0xbd376388b5f723},
142                                        {1, 0, 0, 0}},
143                                       {{0xfd9675666ebbe9, 0xbca7664d40ce5e,
144                                         0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
145                                        {0x29e0b892dc9c43, 0xece8608436e662,
146                                         0xdc858f185310d0, 0x9812dd4eb8d321},
147                                        {1, 0, 0, 0}},
148                                       {{0x6d3e678d5d8eb8, 0x559eed1cb362f1,
149                                         0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
150                                        {0xf19f90ed50266d, 0xabf2b4bf65f9df,
151                                         0x313865468fafec, 0x5cb379ba910a17},
152                                        {1, 0, 0, 0}},
153                                       {{0x0641966cab26e3, 0x91fb2991fab0a0,
154                                         0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
155                                        {0x7510407766af5d, 0x84d929610d5450,
156                                         0x81d77aae82f706, 0x6916f6d4338c5b},
157                                        {1, 0, 0, 0}},
158                                       {{0xea95ac3b1f15c6, 0x086000905e82d4,
159                                         0xdd323ae4d1c8b1, 0x932b56be7685a3},
160                                        {0x9ef93dea25dbbf, 0x41665960f390f0,
161                                         0xfdec76dbe2a8a7, 0x523e80f019062a},
162                                        {1, 0, 0, 0}},
163                                       {{0x822fdd26732c73, 0xa01c83531b5d0f,
164                                         0x363f37347c1ba4, 0xc391b45c84725c},
165                                        {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec,
166                                         0xc393da7e222a7f, 0x1efb7890ede244},
167                                        {1, 0, 0, 0}},
168                                       {{0x4c9e90ca217da1, 0xd11beca79159bb,
169                                         0xff8d33c2c98b7c, 0x2610b39409f849},
170                                        {0x44d1352ac64da0, 0xcdbb7b2c46b4fb,
171                                         0x966c079b753c89, 0xfe67e4e820b112},
172                                        {1, 0, 0, 0}},
173                                       {{0xe28cae2df5312d, 0xc71b61d16f5c6e,
174                                         0x79b7619a3e7c4c, 0x05c73240899b47},
175                                        {0x9f7f6382c73e3a, 0x18615165c56bda,
176                                         0x641fab2116fd56, 0x72855882b08394},
177                                        {1, 0, 0, 0}},
178                                       {{0x0469182f161c09, 0x74a98ca8d00fb5,
179                                         0xb89da93489a3e0, 0x41c98768fb0c1d},
180                                        {0xe5ea05fb32da81, 0x3dce9ffbca6855,
181                                         0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
182                                        {1, 0, 0, 0}},
183                                       {{0xdab22b2333e87f, 0x4430137a5dd2f6,
184                                         0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
185                                        {0x764a7df0c8fda5, 0x185ba5c3fa2044,
186                                         0x9281d688bcbe50, 0xc40331df893881},
187                                        {1, 0, 0, 0}},
188                                       {{0xb89530796f0f60, 0xade92bd26909a3,
189                                         0x1a0c83fb4884da, 0x1765bf22a5a984},
190                                        {0x772a9ee75db09e, 0x23bc6c67cec16f,
191                                         0x4c1edba8b14e2f, 0xe2a215d9611369},
192                                        {1, 0, 0, 0}},
193                                       {{0x571e509fb5efb3, 0xade88696410552,
194                                         0xc8ae85fada74fe, 0x6c7e4be83bbde3},
195                                        {0xff9f51160f4652, 0xb47ce2495a6539,
196                                         0xa2946c53b582f4, 0x286d2db3ee9a60},
197                                        {1, 0, 0, 0}},
198                                       {{0x40bbd5081a44af, 0x0995183b13926c,
199                                         0xbcefba6f47f6d0, 0x215619e9cc0057},
200                                        {0x8bc94d3b0df45e, 0xf11c54a3694f6f,
201                                         0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
202                                        {1, 0, 0, 0}},
203                                       {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8,
204                                         0x1c29819435d2c6, 0xc813132f4c07e9},
205                                        {0x2891425503b11f, 0x08781030579fea,
206                                         0xf5426ba5cc9674, 0x1e28ebf18562bc},
207                                        {1, 0, 0, 0}},
208                                       {{0x9f31997cc864eb, 0x06cd91d28b5e4c,
209                                         0xff17036691a973, 0xf1aef351497c58},
210                                        {0xdd1f2d600564ff, 0xdead073b1402db,
211                                         0x74a684435bd693, 0xeea7471f962558},
212                                        {1, 0, 0, 0}}},
213{{{0, 0, 0, 0},
214  {0, 0, 0, 0},
215  {0, 0, 0, 0}},
216 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
217  {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
218  {1, 0, 0, 0}},
219 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
220  {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
221  {1, 0, 0, 0}},
222 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
223  {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
224  {1, 0, 0, 0}},
225 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
226  {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
227  {1, 0, 0, 0}},
228 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
229  {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
230  {1, 0, 0, 0}},
231 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
232  {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
233  {1, 0, 0, 0}},
234 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
235  {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
236  {1, 0, 0, 0}},
237 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
238  {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
239  {1, 0, 0, 0}},
240 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
241  {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
242  {1, 0, 0, 0}},
243 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
244  {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
245  {1, 0, 0, 0}},
246 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
247  {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
248  {1, 0, 0, 0}},
249 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
250  {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
251  {1, 0, 0, 0}},
252 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
253  {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
254  {1, 0, 0, 0}},
255 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
256  {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
257  {1, 0, 0, 0}},
258 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
259  {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
260  {1, 0, 0, 0}}}
261};
262
263/* Precomputation for the group generator. */
264typedef struct {
265    felem g_pre_comp[2][16][3];
266    int references;
267} NISTP224_PRE_COMP;
268
269const EC_METHOD *EC_GFp_nistp224_method(void)
270{
271    static const EC_METHOD ret = {
272        EC_FLAGS_DEFAULT_OCT,
273        NID_X9_62_prime_field,
274        ec_GFp_nistp224_group_init,
275        ec_GFp_simple_group_finish,
276        ec_GFp_simple_group_clear_finish,
277        ec_GFp_nist_group_copy,
278        ec_GFp_nistp224_group_set_curve,
279        ec_GFp_simple_group_get_curve,
280        ec_GFp_simple_group_get_degree,
281        ec_GFp_simple_group_check_discriminant,
282        ec_GFp_simple_point_init,
283        ec_GFp_simple_point_finish,
284        ec_GFp_simple_point_clear_finish,
285        ec_GFp_simple_point_copy,
286        ec_GFp_simple_point_set_to_infinity,
287        ec_GFp_simple_set_Jprojective_coordinates_GFp,
288        ec_GFp_simple_get_Jprojective_coordinates_GFp,
289        ec_GFp_simple_point_set_affine_coordinates,
290        ec_GFp_nistp224_point_get_affine_coordinates,
291        0 /* point_set_compressed_coordinates */ ,
292        0 /* point2oct */ ,
293        0 /* oct2point */ ,
294        ec_GFp_simple_add,
295        ec_GFp_simple_dbl,
296        ec_GFp_simple_invert,
297        ec_GFp_simple_is_at_infinity,
298        ec_GFp_simple_is_on_curve,
299        ec_GFp_simple_cmp,
300        ec_GFp_simple_make_affine,
301        ec_GFp_simple_points_make_affine,
302        ec_GFp_nistp224_points_mul,
303        ec_GFp_nistp224_precompute_mult,
304        ec_GFp_nistp224_have_precompute_mult,
305        ec_GFp_nist_field_mul,
306        ec_GFp_nist_field_sqr,
307        0 /* field_div */ ,
308        0 /* field_encode */ ,
309        0 /* field_decode */ ,
310        0                       /* field_set_to_one */
311    };
312
313    return &ret;
314}
315
316/*
317 * Helper functions to convert field elements to/from internal representation
318 */
319static void bin28_to_felem(felem out, const u8 in[28])
320{
321    out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
322    out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff;
323    out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff;
324    out[3] = (*((const uint64_t *)(in+20))) >> 8;
325}
326
327static void felem_to_bin28(u8 out[28], const felem in)
328{
329    unsigned i;
330    for (i = 0; i < 7; ++i) {
331        out[i] = in[0] >> (8 * i);
332        out[i + 7] = in[1] >> (8 * i);
333        out[i + 14] = in[2] >> (8 * i);
334        out[i + 21] = in[3] >> (8 * i);
335    }
336}
337
338/* To preserve endianness when using BN_bn2bin and BN_bin2bn */
339static void flip_endian(u8 *out, const u8 *in, unsigned len)
340{
341    unsigned i;
342    for (i = 0; i < len; ++i)
343        out[i] = in[len - 1 - i];
344}
345
346/* From OpenSSL BIGNUM to internal representation */
347static int BN_to_felem(felem out, const BIGNUM *bn)
348{
349    felem_bytearray b_in;
350    felem_bytearray b_out;
351    unsigned num_bytes;
352
353    /* BN_bn2bin eats leading zeroes */
354    memset(b_out, 0, sizeof b_out);
355    num_bytes = BN_num_bytes(bn);
356    if (num_bytes > sizeof b_out) {
357        ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
358        return 0;
359    }
360    if (BN_is_negative(bn)) {
361        ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
362        return 0;
363    }
364    num_bytes = BN_bn2bin(bn, b_in);
365    flip_endian(b_out, b_in, num_bytes);
366    bin28_to_felem(out, b_out);
367    return 1;
368}
369
370/* From internal representation to OpenSSL BIGNUM */
371static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
372{
373    felem_bytearray b_in, b_out;
374    felem_to_bin28(b_in, in);
375    flip_endian(b_out, b_in, sizeof b_out);
376    return BN_bin2bn(b_out, sizeof b_out, out);
377}
378
379/******************************************************************************/
380/*-
381 *                              FIELD OPERATIONS
382 *
383 * Field operations, using the internal representation of field elements.
384 * NB! These operations are specific to our point multiplication and cannot be
385 * expected to be correct in general - e.g., multiplication with a large scalar
386 * will cause an overflow.
387 *
388 */
389
390static void felem_one(felem out)
391{
392    out[0] = 1;
393    out[1] = 0;
394    out[2] = 0;
395    out[3] = 0;
396}
397
398static void felem_assign(felem out, const felem in)
399{
400    out[0] = in[0];
401    out[1] = in[1];
402    out[2] = in[2];
403    out[3] = in[3];
404}
405
406/* Sum two field elements: out += in */
407static void felem_sum(felem out, const felem in)
408{
409    out[0] += in[0];
410    out[1] += in[1];
411    out[2] += in[2];
412    out[3] += in[3];
413}
414
415/* Get negative value: out = -in */
416/* Assumes in[i] < 2^57 */
417static void felem_neg(felem out, const felem in)
418{
419    static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
420    static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
421    static const limb two58m42m2 = (((limb) 1) << 58) -
422        (((limb) 1) << 42) - (((limb) 1) << 2);
423
424    /* Set to 0 mod 2^224-2^96+1 to ensure out > in */
425    out[0] = two58p2 - in[0];
426    out[1] = two58m42m2 - in[1];
427    out[2] = two58m2 - in[2];
428    out[3] = two58m2 - in[3];
429}
430
431/* Subtract field elements: out -= in */
432/* Assumes in[i] < 2^57 */
433static void felem_diff(felem out, const felem in)
434{
435    static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
436    static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
437    static const limb two58m42m2 = (((limb) 1) << 58) -
438        (((limb) 1) << 42) - (((limb) 1) << 2);
439
440    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
441    out[0] += two58p2;
442    out[1] += two58m42m2;
443    out[2] += two58m2;
444    out[3] += two58m2;
445
446    out[0] -= in[0];
447    out[1] -= in[1];
448    out[2] -= in[2];
449    out[3] -= in[3];
450}
451
452/* Subtract in unreduced 128-bit mode: out -= in */
453/* Assumes in[i] < 2^119 */
454static void widefelem_diff(widefelem out, const widefelem in)
455{
456    static const widelimb two120 = ((widelimb) 1) << 120;
457    static const widelimb two120m64 = (((widelimb) 1) << 120) -
458        (((widelimb) 1) << 64);
459    static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
460        (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
461
462    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
463    out[0] += two120;
464    out[1] += two120m64;
465    out[2] += two120m64;
466    out[3] += two120;
467    out[4] += two120m104m64;
468    out[5] += two120m64;
469    out[6] += two120m64;
470
471    out[0] -= in[0];
472    out[1] -= in[1];
473    out[2] -= in[2];
474    out[3] -= in[3];
475    out[4] -= in[4];
476    out[5] -= in[5];
477    out[6] -= in[6];
478}
479
480/* Subtract in mixed mode: out128 -= in64 */
481/* in[i] < 2^63 */
482static void felem_diff_128_64(widefelem out, const felem in)
483{
484    static const widelimb two64p8 = (((widelimb) 1) << 64) +
485        (((widelimb) 1) << 8);
486    static const widelimb two64m8 = (((widelimb) 1) << 64) -
487        (((widelimb) 1) << 8);
488    static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
489        (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
490
491    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
492    out[0] += two64p8;
493    out[1] += two64m48m8;
494    out[2] += two64m8;
495    out[3] += two64m8;
496
497    out[0] -= in[0];
498    out[1] -= in[1];
499    out[2] -= in[2];
500    out[3] -= in[3];
501}
502
503/*
504 * Multiply a field element by a scalar: out = out * scalar The scalars we
505 * actually use are small, so results fit without overflow
506 */
507static void felem_scalar(felem out, const limb scalar)
508{
509    out[0] *= scalar;
510    out[1] *= scalar;
511    out[2] *= scalar;
512    out[3] *= scalar;
513}
514
515/*
516 * Multiply an unreduced field element by a scalar: out = out * scalar The
517 * scalars we actually use are small, so results fit without overflow
518 */
519static void widefelem_scalar(widefelem out, const widelimb scalar)
520{
521    out[0] *= scalar;
522    out[1] *= scalar;
523    out[2] *= scalar;
524    out[3] *= scalar;
525    out[4] *= scalar;
526    out[5] *= scalar;
527    out[6] *= scalar;
528}
529
530/* Square a field element: out = in^2 */
531static void felem_square(widefelem out, const felem in)
532{
533    limb tmp0, tmp1, tmp2;
534    tmp0 = 2 * in[0];
535    tmp1 = 2 * in[1];
536    tmp2 = 2 * in[2];
537    out[0] = ((widelimb) in[0]) * in[0];
538    out[1] = ((widelimb) in[0]) * tmp1;
539    out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
540    out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
541    out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
542    out[5] = ((widelimb) in[3]) * tmp2;
543    out[6] = ((widelimb) in[3]) * in[3];
544}
545
546/* Multiply two field elements: out = in1 * in2 */
547static void felem_mul(widefelem out, const felem in1, const felem in2)
548{
549    out[0] = ((widelimb) in1[0]) * in2[0];
550    out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
551    out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
552        ((widelimb) in1[2]) * in2[0];
553    out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
554        ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
555    out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
556        ((widelimb) in1[3]) * in2[1];
557    out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
558    out[6] = ((widelimb) in1[3]) * in2[3];
559}
560
561/*-
562 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
563 * Requires in[i] < 2^126,
564 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
565static void felem_reduce(felem out, const widefelem in)
566{
567    static const widelimb two127p15 = (((widelimb) 1) << 127) +
568        (((widelimb) 1) << 15);
569    static const widelimb two127m71 = (((widelimb) 1) << 127) -
570        (((widelimb) 1) << 71);
571    static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
572        (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
573    widelimb output[5];
574
575    /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
576    output[0] = in[0] + two127p15;
577    output[1] = in[1] + two127m71m55;
578    output[2] = in[2] + two127m71;
579    output[3] = in[3];
580    output[4] = in[4];
581
582    /* Eliminate in[4], in[5], in[6] */
583    output[4] += in[6] >> 16;
584    output[3] += (in[6] & 0xffff) << 40;
585    output[2] -= in[6];
586
587    output[3] += in[5] >> 16;
588    output[2] += (in[5] & 0xffff) << 40;
589    output[1] -= in[5];
590
591    output[2] += output[4] >> 16;
592    output[1] += (output[4] & 0xffff) << 40;
593    output[0] -= output[4];
594
595    /* Carry 2 -> 3 -> 4 */
596    output[3] += output[2] >> 56;
597    output[2] &= 0x00ffffffffffffff;
598
599    output[4] = output[3] >> 56;
600    output[3] &= 0x00ffffffffffffff;
601
602    /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
603
604    /* Eliminate output[4] */
605    output[2] += output[4] >> 16;
606    /* output[2] < 2^56 + 2^56 = 2^57 */
607    output[1] += (output[4] & 0xffff) << 40;
608    output[0] -= output[4];
609
610    /* Carry 0 -> 1 -> 2 -> 3 */
611    output[1] += output[0] >> 56;
612    out[0] = output[0] & 0x00ffffffffffffff;
613
614    output[2] += output[1] >> 56;
615    /* output[2] < 2^57 + 2^72 */
616    out[1] = output[1] & 0x00ffffffffffffff;
617    output[3] += output[2] >> 56;
618    /* output[3] <= 2^56 + 2^16 */
619    out[2] = output[2] & 0x00ffffffffffffff;
620
621    /*-
622     * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
623     * out[3] <= 2^56 + 2^16 (due to final carry),
624     * so out < 2*p
625     */
626    out[3] = output[3];
627}
628
629static void felem_square_reduce(felem out, const felem in)
630{
631    widefelem tmp;
632    felem_square(tmp, in);
633    felem_reduce(out, tmp);
634}
635
636static void felem_mul_reduce(felem out, const felem in1, const felem in2)
637{
638    widefelem tmp;
639    felem_mul(tmp, in1, in2);
640    felem_reduce(out, tmp);
641}
642
643/*
644 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
645 * call felem_reduce first)
646 */
647static void felem_contract(felem out, const felem in)
648{
649    static const int64_t two56 = ((limb) 1) << 56;
650    /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
651    /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
652    int64_t tmp[4], a;
653    tmp[0] = in[0];
654    tmp[1] = in[1];
655    tmp[2] = in[2];
656    tmp[3] = in[3];
657    /* Case 1: a = 1 iff in >= 2^224 */
658    a = (in[3] >> 56);
659    tmp[0] -= a;
660    tmp[1] += a << 40;
661    tmp[3] &= 0x00ffffffffffffff;
662    /*
663     * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
664     * and the lower part is non-zero
665     */
666    a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
667        (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
668    a &= 0x00ffffffffffffff;
669    /* turn a into an all-one mask (if a = 0) or an all-zero mask */
670    a = (a - 1) >> 63;
671    /* subtract 2^224 - 2^96 + 1 if a is all-one */
672    tmp[3] &= a ^ 0xffffffffffffffff;
673    tmp[2] &= a ^ 0xffffffffffffffff;
674    tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
675    tmp[0] -= 1 & a;
676
677    /*
678     * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
679     * non-zero, so we only need one step
680     */
681    a = tmp[0] >> 63;
682    tmp[0] += two56 & a;
683    tmp[1] -= 1 & a;
684
685    /* carry 1 -> 2 -> 3 */
686    tmp[2] += tmp[1] >> 56;
687    tmp[1] &= 0x00ffffffffffffff;
688
689    tmp[3] += tmp[2] >> 56;
690    tmp[2] &= 0x00ffffffffffffff;
691
692    /* Now 0 <= out < p */
693    out[0] = tmp[0];
694    out[1] = tmp[1];
695    out[2] = tmp[2];
696    out[3] = tmp[3];
697}
698
699/*
700 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
701 * elements are reduced to in < 2^225, so we only need to check three cases:
702 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
703 */
704static limb felem_is_zero(const felem in)
705{
706    limb zero, two224m96p1, two225m97p2;
707
708    zero = in[0] | in[1] | in[2] | in[3];
709    zero = (((int64_t) (zero) - 1) >> 63) & 1;
710    two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
711        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
712    two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
713    two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
714        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
715    two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
716    return (zero | two224m96p1 | two225m97p2);
717}
718
719static limb felem_is_zero_int(const felem in)
720{
721    return (int)(felem_is_zero(in) & ((limb) 1));
722}
723
724/* Invert a field element */
725/* Computation chain copied from djb's code */
726static void felem_inv(felem out, const felem in)
727{
728    felem ftmp, ftmp2, ftmp3, ftmp4;
729    widefelem tmp;
730    unsigned i;
731
732    felem_square(tmp, in);
733    felem_reduce(ftmp, tmp);    /* 2 */
734    felem_mul(tmp, in, ftmp);
735    felem_reduce(ftmp, tmp);    /* 2^2 - 1 */
736    felem_square(tmp, ftmp);
737    felem_reduce(ftmp, tmp);    /* 2^3 - 2 */
738    felem_mul(tmp, in, ftmp);
739    felem_reduce(ftmp, tmp);    /* 2^3 - 1 */
740    felem_square(tmp, ftmp);
741    felem_reduce(ftmp2, tmp);   /* 2^4 - 2 */
742    felem_square(tmp, ftmp2);
743    felem_reduce(ftmp2, tmp);   /* 2^5 - 4 */
744    felem_square(tmp, ftmp2);
745    felem_reduce(ftmp2, tmp);   /* 2^6 - 8 */
746    felem_mul(tmp, ftmp2, ftmp);
747    felem_reduce(ftmp, tmp);    /* 2^6 - 1 */
748    felem_square(tmp, ftmp);
749    felem_reduce(ftmp2, tmp);   /* 2^7 - 2 */
750    for (i = 0; i < 5; ++i) {   /* 2^12 - 2^6 */
751        felem_square(tmp, ftmp2);
752        felem_reduce(ftmp2, tmp);
753    }
754    felem_mul(tmp, ftmp2, ftmp);
755    felem_reduce(ftmp2, tmp);   /* 2^12 - 1 */
756    felem_square(tmp, ftmp2);
757    felem_reduce(ftmp3, tmp);   /* 2^13 - 2 */
758    for (i = 0; i < 11; ++i) {  /* 2^24 - 2^12 */
759        felem_square(tmp, ftmp3);
760        felem_reduce(ftmp3, tmp);
761    }
762    felem_mul(tmp, ftmp3, ftmp2);
763    felem_reduce(ftmp2, tmp);   /* 2^24 - 1 */
764    felem_square(tmp, ftmp2);
765    felem_reduce(ftmp3, tmp);   /* 2^25 - 2 */
766    for (i = 0; i < 23; ++i) {  /* 2^48 - 2^24 */
767        felem_square(tmp, ftmp3);
768        felem_reduce(ftmp3, tmp);
769    }
770    felem_mul(tmp, ftmp3, ftmp2);
771    felem_reduce(ftmp3, tmp);   /* 2^48 - 1 */
772    felem_square(tmp, ftmp3);
773    felem_reduce(ftmp4, tmp);   /* 2^49 - 2 */
774    for (i = 0; i < 47; ++i) {  /* 2^96 - 2^48 */
775        felem_square(tmp, ftmp4);
776        felem_reduce(ftmp4, tmp);
777    }
778    felem_mul(tmp, ftmp3, ftmp4);
779    felem_reduce(ftmp3, tmp);   /* 2^96 - 1 */
780    felem_square(tmp, ftmp3);
781    felem_reduce(ftmp4, tmp);   /* 2^97 - 2 */
782    for (i = 0; i < 23; ++i) {  /* 2^120 - 2^24 */
783        felem_square(tmp, ftmp4);
784        felem_reduce(ftmp4, tmp);
785    }
786    felem_mul(tmp, ftmp2, ftmp4);
787    felem_reduce(ftmp2, tmp);   /* 2^120 - 1 */
788    for (i = 0; i < 6; ++i) {   /* 2^126 - 2^6 */
789        felem_square(tmp, ftmp2);
790        felem_reduce(ftmp2, tmp);
791    }
792    felem_mul(tmp, ftmp2, ftmp);
793    felem_reduce(ftmp, tmp);    /* 2^126 - 1 */
794    felem_square(tmp, ftmp);
795    felem_reduce(ftmp, tmp);    /* 2^127 - 2 */
796    felem_mul(tmp, ftmp, in);
797    felem_reduce(ftmp, tmp);    /* 2^127 - 1 */
798    for (i = 0; i < 97; ++i) {  /* 2^224 - 2^97 */
799        felem_square(tmp, ftmp);
800        felem_reduce(ftmp, tmp);
801    }
802    felem_mul(tmp, ftmp, ftmp3);
803    felem_reduce(out, tmp);     /* 2^224 - 2^96 - 1 */
804}
805
806/*
807 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
808 * out to itself.
809 */
810static void copy_conditional(felem out, const felem in, limb icopy)
811{
812    unsigned i;
813    /*
814     * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
815     */
816    const limb copy = -icopy;
817    for (i = 0; i < 4; ++i) {
818        const limb tmp = copy & (in[i] ^ out[i]);
819        out[i] ^= tmp;
820    }
821}
822
823/******************************************************************************/
824/*-
825 *                       ELLIPTIC CURVE POINT OPERATIONS
826 *
827 * Points are represented in Jacobian projective coordinates:
828 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
829 * or to the point at infinity if Z == 0.
830 *
831 */
832
833/*-
834 * Double an elliptic curve point:
835 * (X', Y', Z') = 2 * (X, Y, Z), where
836 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
837 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2
838 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
839 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
840 * while x_out == y_in is not (maybe this works, but it's not tested).
841 */
842static void
843point_double(felem x_out, felem y_out, felem z_out,
844             const felem x_in, const felem y_in, const felem z_in)
845{
846    widefelem tmp, tmp2;
847    felem delta, gamma, beta, alpha, ftmp, ftmp2;
848
849    felem_assign(ftmp, x_in);
850    felem_assign(ftmp2, x_in);
851
852    /* delta = z^2 */
853    felem_square(tmp, z_in);
854    felem_reduce(delta, tmp);
855
856    /* gamma = y^2 */
857    felem_square(tmp, y_in);
858    felem_reduce(gamma, tmp);
859
860    /* beta = x*gamma */
861    felem_mul(tmp, x_in, gamma);
862    felem_reduce(beta, tmp);
863
864    /* alpha = 3*(x-delta)*(x+delta) */
865    felem_diff(ftmp, delta);
866    /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
867    felem_sum(ftmp2, delta);
868    /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
869    felem_scalar(ftmp2, 3);
870    /* ftmp2[i] < 3 * 2^58 < 2^60 */
871    felem_mul(tmp, ftmp, ftmp2);
872    /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
873    felem_reduce(alpha, tmp);
874
875    /* x' = alpha^2 - 8*beta */
876    felem_square(tmp, alpha);
877    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
878    felem_assign(ftmp, beta);
879    felem_scalar(ftmp, 8);
880    /* ftmp[i] < 8 * 2^57 = 2^60 */
881    felem_diff_128_64(tmp, ftmp);
882    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
883    felem_reduce(x_out, tmp);
884
885    /* z' = (y + z)^2 - gamma - delta */
886    felem_sum(delta, gamma);
887    /* delta[i] < 2^57 + 2^57 = 2^58 */
888    felem_assign(ftmp, y_in);
889    felem_sum(ftmp, z_in);
890    /* ftmp[i] < 2^57 + 2^57 = 2^58 */
891    felem_square(tmp, ftmp);
892    /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
893    felem_diff_128_64(tmp, delta);
894    /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
895    felem_reduce(z_out, tmp);
896
897    /* y' = alpha*(4*beta - x') - 8*gamma^2 */
898    felem_scalar(beta, 4);
899    /* beta[i] < 4 * 2^57 = 2^59 */
900    felem_diff(beta, x_out);
901    /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
902    felem_mul(tmp, alpha, beta);
903    /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
904    felem_square(tmp2, gamma);
905    /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
906    widefelem_scalar(tmp2, 8);
907    /* tmp2[i] < 8 * 2^116 = 2^119 */
908    widefelem_diff(tmp, tmp2);
909    /* tmp[i] < 2^119 + 2^120 < 2^121 */
910    felem_reduce(y_out, tmp);
911}
912
913/*-
914 * Add two elliptic curve points:
915 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
916 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
917 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
918 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
919 *        Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
920 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
921 *
922 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
923 */
924
925/*
926 * This function is not entirely constant-time: it includes a branch for
927 * checking whether the two input points are equal, (while not equal to the
928 * point at infinity). This case never happens during single point
929 * multiplication, so there is no timing leak for ECDH or ECDSA signing.
930 */
931static void point_add(felem x3, felem y3, felem z3,
932                      const felem x1, const felem y1, const felem z1,
933                      const int mixed, const felem x2, const felem y2,
934                      const felem z2)
935{
936    felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
937    widefelem tmp, tmp2;
938    limb z1_is_zero, z2_is_zero, x_equal, y_equal;
939
940    if (!mixed) {
941        /* ftmp2 = z2^2 */
942        felem_square(tmp, z2);
943        felem_reduce(ftmp2, tmp);
944
945        /* ftmp4 = z2^3 */
946        felem_mul(tmp, ftmp2, z2);
947        felem_reduce(ftmp4, tmp);
948
949        /* ftmp4 = z2^3*y1 */
950        felem_mul(tmp2, ftmp4, y1);
951        felem_reduce(ftmp4, tmp2);
952
953        /* ftmp2 = z2^2*x1 */
954        felem_mul(tmp2, ftmp2, x1);
955        felem_reduce(ftmp2, tmp2);
956    } else {
957        /*
958         * We'll assume z2 = 1 (special case z2 = 0 is handled later)
959         */
960
961        /* ftmp4 = z2^3*y1 */
962        felem_assign(ftmp4, y1);
963
964        /* ftmp2 = z2^2*x1 */
965        felem_assign(ftmp2, x1);
966    }
967
968    /* ftmp = z1^2 */
969    felem_square(tmp, z1);
970    felem_reduce(ftmp, tmp);
971
972    /* ftmp3 = z1^3 */
973    felem_mul(tmp, ftmp, z1);
974    felem_reduce(ftmp3, tmp);
975
976    /* tmp = z1^3*y2 */
977    felem_mul(tmp, ftmp3, y2);
978    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
979
980    /* ftmp3 = z1^3*y2 - z2^3*y1 */
981    felem_diff_128_64(tmp, ftmp4);
982    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
983    felem_reduce(ftmp3, tmp);
984
985    /* tmp = z1^2*x2 */
986    felem_mul(tmp, ftmp, x2);
987    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
988
989    /* ftmp = z1^2*x2 - z2^2*x1 */
990    felem_diff_128_64(tmp, ftmp2);
991    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
992    felem_reduce(ftmp, tmp);
993
994    /*
995     * the formulae are incorrect if the points are equal so we check for
996     * this and do doubling if this happens
997     */
998    x_equal = felem_is_zero(ftmp);
999    y_equal = felem_is_zero(ftmp3);
1000    z1_is_zero = felem_is_zero(z1);
1001    z2_is_zero = felem_is_zero(z2);
1002    /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
1003    if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
1004        point_double(x3, y3, z3, x1, y1, z1);
1005        return;
1006    }
1007
1008    /* ftmp5 = z1*z2 */
1009    if (!mixed) {
1010        felem_mul(tmp, z1, z2);
1011        felem_reduce(ftmp5, tmp);
1012    } else {
1013        /* special case z2 = 0 is handled later */
1014        felem_assign(ftmp5, z1);
1015    }
1016
1017    /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1018    felem_mul(tmp, ftmp, ftmp5);
1019    felem_reduce(z_out, tmp);
1020
1021    /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1022    felem_assign(ftmp5, ftmp);
1023    felem_square(tmp, ftmp);
1024    felem_reduce(ftmp, tmp);
1025
1026    /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1027    felem_mul(tmp, ftmp, ftmp5);
1028    felem_reduce(ftmp5, tmp);
1029
1030    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1031    felem_mul(tmp, ftmp2, ftmp);
1032    felem_reduce(ftmp2, tmp);
1033
1034    /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1035    felem_mul(tmp, ftmp4, ftmp5);
1036    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1037
1038    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1039    felem_square(tmp2, ftmp3);
1040    /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1041
1042    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1043    felem_diff_128_64(tmp2, ftmp5);
1044    /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1045
1046    /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1047    felem_assign(ftmp5, ftmp2);
1048    felem_scalar(ftmp5, 2);
1049    /* ftmp5[i] < 2 * 2^57 = 2^58 */
1050
1051    /*-
1052     * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1053     *  2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1054     */
1055    felem_diff_128_64(tmp2, ftmp5);
1056    /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1057    felem_reduce(x_out, tmp2);
1058
1059    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1060    felem_diff(ftmp2, x_out);
1061    /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1062
1063    /*
1064     * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1065     */
1066    felem_mul(tmp2, ftmp3, ftmp2);
1067    /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1068
1069    /*-
1070     * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1071     *  z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1072     */
1073    widefelem_diff(tmp2, tmp);
1074    /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1075    felem_reduce(y_out, tmp2);
1076
1077    /*
1078     * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1079     * the point at infinity, so we need to check for this separately
1080     */
1081
1082    /*
1083     * if point 1 is at infinity, copy point 2 to output, and vice versa
1084     */
1085    copy_conditional(x_out, x2, z1_is_zero);
1086    copy_conditional(x_out, x1, z2_is_zero);
1087    copy_conditional(y_out, y2, z1_is_zero);
1088    copy_conditional(y_out, y1, z2_is_zero);
1089    copy_conditional(z_out, z2, z1_is_zero);
1090    copy_conditional(z_out, z1, z2_is_zero);
1091    felem_assign(x3, x_out);
1092    felem_assign(y3, y_out);
1093    felem_assign(z3, z_out);
1094}
1095
1096/*
1097 * select_point selects the |idx|th point from a precomputation table and
1098 * copies it to out.
1099 * The pre_comp array argument should be size of |size| argument
1100 */
1101static void select_point(const u64 idx, unsigned int size,
1102                         const felem pre_comp[][3], felem out[3])
1103{
1104    unsigned i, j;
1105    limb *outlimbs = &out[0][0];
1106    memset(outlimbs, 0, 3 * sizeof(felem));
1107
1108    for (i = 0; i < size; i++) {
1109        const limb *inlimbs = &pre_comp[i][0][0];
1110        u64 mask = i ^ idx;
1111        mask |= mask >> 4;
1112        mask |= mask >> 2;
1113        mask |= mask >> 1;
1114        mask &= 1;
1115        mask--;
1116        for (j = 0; j < 4 * 3; j++)
1117            outlimbs[j] |= inlimbs[j] & mask;
1118    }
1119}
1120
1121/* get_bit returns the |i|th bit in |in| */
1122static char get_bit(const felem_bytearray in, unsigned i)
1123{
1124    if (i >= 224)
1125        return 0;
1126    return (in[i >> 3] >> (i & 7)) & 1;
1127}
1128
1129/*
1130 * Interleaved point multiplication using precomputed point multiples: The
1131 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1132 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1133 * generator, using certain (large) precomputed multiples in g_pre_comp.
1134 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1135 */
1136static void batch_mul(felem x_out, felem y_out, felem z_out,
1137                      const felem_bytearray scalars[],
1138                      const unsigned num_points, const u8 *g_scalar,
1139                      const int mixed, const felem pre_comp[][17][3],
1140                      const felem g_pre_comp[2][16][3])
1141{
1142    int i, skip;
1143    unsigned num;
1144    unsigned gen_mul = (g_scalar != NULL);
1145    felem nq[3], tmp[4];
1146    u64 bits;
1147    u8 sign, digit;
1148
1149    /* set nq to the point at infinity */
1150    memset(nq, 0, 3 * sizeof(felem));
1151
1152    /*
1153     * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1154     * of the generator (two in each of the last 28 rounds) and additions of
1155     * other points multiples (every 5th round).
1156     */
1157    skip = 1;                   /* save two point operations in the first
1158                                 * round */
1159    for (i = (num_points ? 220 : 27); i >= 0; --i) {
1160        /* double */
1161        if (!skip)
1162            point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1163
1164        /* add multiples of the generator */
1165        if (gen_mul && (i <= 27)) {
1166            /* first, look 28 bits upwards */
1167            bits = get_bit(g_scalar, i + 196) << 3;
1168            bits |= get_bit(g_scalar, i + 140) << 2;
1169            bits |= get_bit(g_scalar, i + 84) << 1;
1170            bits |= get_bit(g_scalar, i + 28);
1171            /* select the point to add, in constant time */
1172            select_point(bits, 16, g_pre_comp[1], tmp);
1173
1174            if (!skip) {
1175                /* value 1 below is argument for "mixed" */
1176                point_add(nq[0], nq[1], nq[2],
1177                          nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1178            } else {
1179                memcpy(nq, tmp, 3 * sizeof(felem));
1180                skip = 0;
1181            }
1182
1183            /* second, look at the current position */
1184            bits = get_bit(g_scalar, i + 168) << 3;
1185            bits |= get_bit(g_scalar, i + 112) << 2;
1186            bits |= get_bit(g_scalar, i + 56) << 1;
1187            bits |= get_bit(g_scalar, i);
1188            /* select the point to add, in constant time */
1189            select_point(bits, 16, g_pre_comp[0], tmp);
1190            point_add(nq[0], nq[1], nq[2],
1191                      nq[0], nq[1], nq[2],
1192                      1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1193        }
1194
1195        /* do other additions every 5 doublings */
1196        if (num_points && (i % 5 == 0)) {
1197            /* loop over all scalars */
1198            for (num = 0; num < num_points; ++num) {
1199                bits = get_bit(scalars[num], i + 4) << 5;
1200                bits |= get_bit(scalars[num], i + 3) << 4;
1201                bits |= get_bit(scalars[num], i + 2) << 3;
1202                bits |= get_bit(scalars[num], i + 1) << 2;
1203                bits |= get_bit(scalars[num], i) << 1;
1204                bits |= get_bit(scalars[num], i - 1);
1205                ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1206
1207                /* select the point to add or subtract */
1208                select_point(digit, 17, pre_comp[num], tmp);
1209                felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1210                                            * point */
1211                copy_conditional(tmp[1], tmp[3], sign);
1212
1213                if (!skip) {
1214                    point_add(nq[0], nq[1], nq[2],
1215                              nq[0], nq[1], nq[2],
1216                              mixed, tmp[0], tmp[1], tmp[2]);
1217                } else {
1218                    memcpy(nq, tmp, 3 * sizeof(felem));
1219                    skip = 0;
1220                }
1221            }
1222        }
1223    }
1224    felem_assign(x_out, nq[0]);
1225    felem_assign(y_out, nq[1]);
1226    felem_assign(z_out, nq[2]);
1227}
1228
1229/******************************************************************************/
1230/*
1231 * FUNCTIONS TO MANAGE PRECOMPUTATION
1232 */
1233
1234static NISTP224_PRE_COMP *nistp224_pre_comp_new()
1235{
1236    NISTP224_PRE_COMP *ret = NULL;
1237    ret = (NISTP224_PRE_COMP *) OPENSSL_malloc(sizeof *ret);
1238    if (!ret) {
1239        ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1240        return ret;
1241    }
1242    memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1243    ret->references = 1;
1244    return ret;
1245}
1246
1247static void *nistp224_pre_comp_dup(void *src_)
1248{
1249    NISTP224_PRE_COMP *src = src_;
1250
1251    /* no need to actually copy, these objects never change! */
1252    CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1253
1254    return src_;
1255}
1256
1257static void nistp224_pre_comp_free(void *pre_)
1258{
1259    int i;
1260    NISTP224_PRE_COMP *pre = pre_;
1261
1262    if (!pre)
1263        return;
1264
1265    i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1266    if (i > 0)
1267        return;
1268
1269    OPENSSL_free(pre);
1270}
1271
1272static void nistp224_pre_comp_clear_free(void *pre_)
1273{
1274    int i;
1275    NISTP224_PRE_COMP *pre = pre_;
1276
1277    if (!pre)
1278        return;
1279
1280    i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1281    if (i > 0)
1282        return;
1283
1284    OPENSSL_cleanse(pre, sizeof *pre);
1285    OPENSSL_free(pre);
1286}
1287
1288/******************************************************************************/
1289/*
1290 * OPENSSL EC_METHOD FUNCTIONS
1291 */
1292
1293int ec_GFp_nistp224_group_init(EC_GROUP *group)
1294{
1295    int ret;
1296    ret = ec_GFp_simple_group_init(group);
1297    group->a_is_minus3 = 1;
1298    return ret;
1299}
1300
1301int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1302                                    const BIGNUM *a, const BIGNUM *b,
1303                                    BN_CTX *ctx)
1304{
1305    int ret = 0;
1306    BN_CTX *new_ctx = NULL;
1307    BIGNUM *curve_p, *curve_a, *curve_b;
1308
1309    if (ctx == NULL)
1310        if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1311            return 0;
1312    BN_CTX_start(ctx);
1313    if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1314        ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1315        ((curve_b = BN_CTX_get(ctx)) == NULL))
1316        goto err;
1317    BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1318    BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1319    BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1320    if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1321        ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1322              EC_R_WRONG_CURVE_PARAMETERS);
1323        goto err;
1324    }
1325    group->field_mod_func = BN_nist_mod_224;
1326    ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1327 err:
1328    BN_CTX_end(ctx);
1329    if (new_ctx != NULL)
1330        BN_CTX_free(new_ctx);
1331    return ret;
1332}
1333
1334/*
1335 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1336 * (X/Z^2, Y/Z^3)
1337 */
1338int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1339                                                 const EC_POINT *point,
1340                                                 BIGNUM *x, BIGNUM *y,
1341                                                 BN_CTX *ctx)
1342{
1343    felem z1, z2, x_in, y_in, x_out, y_out;
1344    widefelem tmp;
1345
1346    if (EC_POINT_is_at_infinity(group, point)) {
1347        ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1348              EC_R_POINT_AT_INFINITY);
1349        return 0;
1350    }
1351    if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1352        (!BN_to_felem(z1, &point->Z)))
1353        return 0;
1354    felem_inv(z2, z1);
1355    felem_square(tmp, z2);
1356    felem_reduce(z1, tmp);
1357    felem_mul(tmp, x_in, z1);
1358    felem_reduce(x_in, tmp);
1359    felem_contract(x_out, x_in);
1360    if (x != NULL) {
1361        if (!felem_to_BN(x, x_out)) {
1362            ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1363                  ERR_R_BN_LIB);
1364            return 0;
1365        }
1366    }
1367    felem_mul(tmp, z1, z2);
1368    felem_reduce(z1, tmp);
1369    felem_mul(tmp, y_in, z1);
1370    felem_reduce(y_in, tmp);
1371    felem_contract(y_out, y_in);
1372    if (y != NULL) {
1373        if (!felem_to_BN(y, y_out)) {
1374            ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1375                  ERR_R_BN_LIB);
1376            return 0;
1377        }
1378    }
1379    return 1;
1380}
1381
1382static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1383                               felem tmp_felems[ /* num+1 */ ])
1384{
1385    /*
1386     * Runs in constant time, unless an input is the point at infinity (which
1387     * normally shouldn't happen).
1388     */
1389    ec_GFp_nistp_points_make_affine_internal(num,
1390                                             points,
1391                                             sizeof(felem),
1392                                             tmp_felems,
1393                                             (void (*)(void *))felem_one,
1394                                             (int (*)(const void *))
1395                                             felem_is_zero_int,
1396                                             (void (*)(void *, const void *))
1397                                             felem_assign,
1398                                             (void (*)(void *, const void *))
1399                                             felem_square_reduce, (void (*)
1400                                                                   (void *,
1401                                                                    const void
1402                                                                    *,
1403                                                                    const void
1404                                                                    *))
1405                                             felem_mul_reduce,
1406                                             (void (*)(void *, const void *))
1407                                             felem_inv,
1408                                             (void (*)(void *, const void *))
1409                                             felem_contract);
1410}
1411
1412/*
1413 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1414 * values Result is stored in r (r can equal one of the inputs).
1415 */
1416int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1417                               const BIGNUM *scalar, size_t num,
1418                               const EC_POINT *points[],
1419                               const BIGNUM *scalars[], BN_CTX *ctx)
1420{
1421    int ret = 0;
1422    int j;
1423    unsigned i;
1424    int mixed = 0;
1425    BN_CTX *new_ctx = NULL;
1426    BIGNUM *x, *y, *z, *tmp_scalar;
1427    felem_bytearray g_secret;
1428    felem_bytearray *secrets = NULL;
1429    felem(*pre_comp)[17][3] = NULL;
1430    felem *tmp_felems = NULL;
1431    felem_bytearray tmp;
1432    unsigned num_bytes;
1433    int have_pre_comp = 0;
1434    size_t num_points = num;
1435    felem x_in, y_in, z_in, x_out, y_out, z_out;
1436    NISTP224_PRE_COMP *pre = NULL;
1437    const felem(*g_pre_comp)[16][3] = NULL;
1438    EC_POINT *generator = NULL;
1439    const EC_POINT *p = NULL;
1440    const BIGNUM *p_scalar = NULL;
1441
1442    if (ctx == NULL)
1443        if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1444            return 0;
1445    BN_CTX_start(ctx);
1446    if (((x = BN_CTX_get(ctx)) == NULL) ||
1447        ((y = BN_CTX_get(ctx)) == NULL) ||
1448        ((z = BN_CTX_get(ctx)) == NULL) ||
1449        ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1450        goto err;
1451
1452    if (scalar != NULL) {
1453        pre = EC_EX_DATA_get_data(group->extra_data,
1454                                  nistp224_pre_comp_dup,
1455                                  nistp224_pre_comp_free,
1456                                  nistp224_pre_comp_clear_free);
1457        if (pre)
1458            /* we have precomputation, try to use it */
1459            g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1460        else
1461            /* try to use the standard precomputation */
1462            g_pre_comp = &gmul[0];
1463        generator = EC_POINT_new(group);
1464        if (generator == NULL)
1465            goto err;
1466        /* get the generator from precomputation */
1467        if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1468            !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1469            !felem_to_BN(z, g_pre_comp[0][1][2])) {
1470            ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1471            goto err;
1472        }
1473        if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1474                                                      generator, x, y, z,
1475                                                      ctx))
1476            goto err;
1477        if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1478            /* precomputation matches generator */
1479            have_pre_comp = 1;
1480        else
1481            /*
1482             * we don't have valid precomputation: treat the generator as a
1483             * random point
1484             */
1485            num_points = num_points + 1;
1486    }
1487
1488    if (num_points > 0) {
1489        if (num_points >= 3) {
1490            /*
1491             * unless we precompute multiples for just one or two points,
1492             * converting those into affine form is time well spent
1493             */
1494            mixed = 1;
1495        }
1496        secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1497        pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem));
1498        if (mixed)
1499            tmp_felems =
1500                OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem));
1501        if ((secrets == NULL) || (pre_comp == NULL)
1502            || (mixed && (tmp_felems == NULL))) {
1503            ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1504            goto err;
1505        }
1506
1507        /*
1508         * we treat NULL scalars as 0, and NULL points as points at infinity,
1509         * i.e., they contribute nothing to the linear combination
1510         */
1511        memset(secrets, 0, num_points * sizeof(felem_bytearray));
1512        memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem));
1513        for (i = 0; i < num_points; ++i) {
1514            if (i == num)
1515                /* the generator */
1516            {
1517                p = EC_GROUP_get0_generator(group);
1518                p_scalar = scalar;
1519            } else
1520                /* the i^th point */
1521            {
1522                p = points[i];
1523                p_scalar = scalars[i];
1524            }
1525            if ((p_scalar != NULL) && (p != NULL)) {
1526                /* reduce scalar to 0 <= scalar < 2^224 */
1527                if ((BN_num_bits(p_scalar) > 224)
1528                    || (BN_is_negative(p_scalar))) {
1529                    /*
1530                     * this is an unusual input, and we don't guarantee
1531                     * constant-timeness
1532                     */
1533                    if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) {
1534                        ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1535                        goto err;
1536                    }
1537                    num_bytes = BN_bn2bin(tmp_scalar, tmp);
1538                } else
1539                    num_bytes = BN_bn2bin(p_scalar, tmp);
1540                flip_endian(secrets[i], tmp, num_bytes);
1541                /* precompute multiples */
1542                if ((!BN_to_felem(x_out, &p->X)) ||
1543                    (!BN_to_felem(y_out, &p->Y)) ||
1544                    (!BN_to_felem(z_out, &p->Z)))
1545                    goto err;
1546                felem_assign(pre_comp[i][1][0], x_out);
1547                felem_assign(pre_comp[i][1][1], y_out);
1548                felem_assign(pre_comp[i][1][2], z_out);
1549                for (j = 2; j <= 16; ++j) {
1550                    if (j & 1) {
1551                        point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1552                                  pre_comp[i][j][2], pre_comp[i][1][0],
1553                                  pre_comp[i][1][1], pre_comp[i][1][2], 0,
1554                                  pre_comp[i][j - 1][0],
1555                                  pre_comp[i][j - 1][1],
1556                                  pre_comp[i][j - 1][2]);
1557                    } else {
1558                        point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1559                                     pre_comp[i][j][2], pre_comp[i][j / 2][0],
1560                                     pre_comp[i][j / 2][1],
1561                                     pre_comp[i][j / 2][2]);
1562                    }
1563                }
1564            }
1565        }
1566        if (mixed)
1567            make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1568    }
1569
1570    /* the scalar for the generator */
1571    if ((scalar != NULL) && (have_pre_comp)) {
1572        memset(g_secret, 0, sizeof g_secret);
1573        /* reduce scalar to 0 <= scalar < 2^224 */
1574        if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1575            /*
1576             * this is an unusual input, and we don't guarantee
1577             * constant-timeness
1578             */
1579            if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) {
1580                ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1581                goto err;
1582            }
1583            num_bytes = BN_bn2bin(tmp_scalar, tmp);
1584        } else
1585            num_bytes = BN_bn2bin(scalar, tmp);
1586        flip_endian(g_secret, tmp, num_bytes);
1587        /* do the multiplication with generator precomputation */
1588        batch_mul(x_out, y_out, z_out,
1589                  (const felem_bytearray(*))secrets, num_points,
1590                  g_secret,
1591                  mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1592    } else
1593        /* do the multiplication without generator precomputation */
1594        batch_mul(x_out, y_out, z_out,
1595                  (const felem_bytearray(*))secrets, num_points,
1596                  NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1597    /* reduce the output to its unique minimal representation */
1598    felem_contract(x_in, x_out);
1599    felem_contract(y_in, y_out);
1600    felem_contract(z_in, z_out);
1601    if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1602        (!felem_to_BN(z, z_in))) {
1603        ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1604        goto err;
1605    }
1606    ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1607
1608 err:
1609    BN_CTX_end(ctx);
1610    if (generator != NULL)
1611        EC_POINT_free(generator);
1612    if (new_ctx != NULL)
1613        BN_CTX_free(new_ctx);
1614    if (secrets != NULL)
1615        OPENSSL_free(secrets);
1616    if (pre_comp != NULL)
1617        OPENSSL_free(pre_comp);
1618    if (tmp_felems != NULL)
1619        OPENSSL_free(tmp_felems);
1620    return ret;
1621}
1622
1623int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1624{
1625    int ret = 0;
1626    NISTP224_PRE_COMP *pre = NULL;
1627    int i, j;
1628    BN_CTX *new_ctx = NULL;
1629    BIGNUM *x, *y;
1630    EC_POINT *generator = NULL;
1631    felem tmp_felems[32];
1632
1633    /* throw away old precomputation */
1634    EC_EX_DATA_free_data(&group->extra_data, nistp224_pre_comp_dup,
1635                         nistp224_pre_comp_free,
1636                         nistp224_pre_comp_clear_free);
1637    if (ctx == NULL)
1638        if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1639            return 0;
1640    BN_CTX_start(ctx);
1641    if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL))
1642        goto err;
1643    /* get the generator */
1644    if (group->generator == NULL)
1645        goto err;
1646    generator = EC_POINT_new(group);
1647    if (generator == NULL)
1648        goto err;
1649    BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1650    BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1651    if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1652        goto err;
1653    if ((pre = nistp224_pre_comp_new()) == NULL)
1654        goto err;
1655    /*
1656     * if the generator is the standard one, use built-in precomputation
1657     */
1658    if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1659        memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1660        ret = 1;
1661        goto err;
1662    }
1663    if ((!BN_to_felem(pre->g_pre_comp[0][1][0], &group->generator->X)) ||
1664        (!BN_to_felem(pre->g_pre_comp[0][1][1], &group->generator->Y)) ||
1665        (!BN_to_felem(pre->g_pre_comp[0][1][2], &group->generator->Z)))
1666        goto err;
1667    /*
1668     * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1669     * 2^140*G, 2^196*G for the second one
1670     */
1671    for (i = 1; i <= 8; i <<= 1) {
1672        point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1673                     pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1674                     pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1675        for (j = 0; j < 27; ++j) {
1676            point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1677                         pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1678                         pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1679        }
1680        if (i == 8)
1681            break;
1682        point_double(pre->g_pre_comp[0][2 * i][0],
1683                     pre->g_pre_comp[0][2 * i][1],
1684                     pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1685                     pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1686        for (j = 0; j < 27; ++j) {
1687            point_double(pre->g_pre_comp[0][2 * i][0],
1688                         pre->g_pre_comp[0][2 * i][1],
1689                         pre->g_pre_comp[0][2 * i][2],
1690                         pre->g_pre_comp[0][2 * i][0],
1691                         pre->g_pre_comp[0][2 * i][1],
1692                         pre->g_pre_comp[0][2 * i][2]);
1693        }
1694    }
1695    for (i = 0; i < 2; i++) {
1696        /* g_pre_comp[i][0] is the point at infinity */
1697        memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1698        /* the remaining multiples */
1699        /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1700        point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1701                  pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1702                  pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1703                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1704                  pre->g_pre_comp[i][2][2]);
1705        /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1706        point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1707                  pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1708                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1709                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1710                  pre->g_pre_comp[i][2][2]);
1711        /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1712        point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1713                  pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1714                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1715                  0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1716                  pre->g_pre_comp[i][4][2]);
1717        /*
1718         * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1719         */
1720        point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1721                  pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1722                  pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1723                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1724                  pre->g_pre_comp[i][2][2]);
1725        for (j = 1; j < 8; ++j) {
1726            /* odd multiples: add G resp. 2^28*G */
1727            point_add(pre->g_pre_comp[i][2 * j + 1][0],
1728                      pre->g_pre_comp[i][2 * j + 1][1],
1729                      pre->g_pre_comp[i][2 * j + 1][2],
1730                      pre->g_pre_comp[i][2 * j][0],
1731                      pre->g_pre_comp[i][2 * j][1],
1732                      pre->g_pre_comp[i][2 * j][2], 0,
1733                      pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1734                      pre->g_pre_comp[i][1][2]);
1735        }
1736    }
1737    make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1738
1739    if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp224_pre_comp_dup,
1740                             nistp224_pre_comp_free,
1741                             nistp224_pre_comp_clear_free))
1742        goto err;
1743    ret = 1;
1744    pre = NULL;
1745 err:
1746    BN_CTX_end(ctx);
1747    if (generator != NULL)
1748        EC_POINT_free(generator);
1749    if (new_ctx != NULL)
1750        BN_CTX_free(new_ctx);
1751    if (pre)
1752        nistp224_pre_comp_free(pre);
1753    return ret;
1754}
1755
1756int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1757{
1758    if (EC_EX_DATA_get_data(group->extra_data, nistp224_pre_comp_dup,
1759                            nistp224_pre_comp_free,
1760                            nistp224_pre_comp_clear_free)
1761        != NULL)
1762        return 1;
1763    else
1764        return 0;
1765}
1766
1767#else
1768static void *dummy = &dummy;
1769#endif
1770