ecp_nistp224.c revision 325337
1/* crypto/ec/ecp_nistp224.c */
2/*
3 * Written by Emilia Kasper (Google) for the OpenSSL project.
4 */
5/* Copyright 2011 Google Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 *
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 *     http://www.apache.org/licenses/LICENSE-2.0
13 *
14 *  Unless required by applicable law or agreed to in writing, software
15 *  distributed under the License is distributed on an "AS IS" BASIS,
16 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 *  See the License for the specific language governing permissions and
18 *  limitations under the License.
19 */
20
21/*
22 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
23 *
24 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
25 * and Adam Langley's public domain 64-bit C implementation of curve25519
26 */
27
28#include <openssl/opensslconf.h>
29#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
30
31# ifndef OPENSSL_SYS_VMS
32#  include <stdint.h>
33# else
34#  include <inttypes.h>
35# endif
36
37# include <string.h>
38# include <openssl/err.h>
39# include "ec_lcl.h"
40
41# if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
42  /* even with gcc, the typedef won't work for 32-bit platforms */
43typedef __uint128_t uint128_t;  /* nonstandard; implemented by gcc on 64-bit
44                                 * platforms */
45# else
46#  error "Need GCC 3.1 or later to define type uint128_t"
47# endif
48
49typedef uint8_t u8;
50typedef uint64_t u64;
51typedef int64_t s64;
52
53/******************************************************************************/
54/*-
55 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
56 *
57 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
58 * using 64-bit coefficients called 'limbs',
59 * and sometimes (for multiplication results) as
60 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
61 * using 128-bit coefficients called 'widelimbs'.
62 * A 4-limb representation is an 'felem';
63 * a 7-widelimb representation is a 'widefelem'.
64 * Even within felems, bits of adjacent limbs overlap, and we don't always
65 * reduce the representations: we ensure that inputs to each felem
66 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
67 * and fit into a 128-bit word without overflow. The coefficients are then
68 * again partially reduced to obtain an felem satisfying a_i < 2^57.
69 * We only reduce to the unique minimal representation at the end of the
70 * computation.
71 */
72
73typedef uint64_t limb;
74typedef uint128_t widelimb;
75
76typedef limb felem[4];
77typedef widelimb widefelem[7];
78
79/*
80 * Field element represented as a byte arrary. 28*8 = 224 bits is also the
81 * group order size for the elliptic curve, and we also use this type for
82 * scalars for point multiplication.
83 */
84typedef u8 felem_bytearray[28];
85
86static const felem_bytearray nistp224_curve_params[5] = {
87    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
88     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
89     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
90    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
91     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
92     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
93    {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
94     0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
95     0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
96    {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
97     0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
98     0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
99    {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
100     0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
101     0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
102};
103
104/*-
105 * Precomputed multiples of the standard generator
106 * Points are given in coordinates (X, Y, Z) where Z normally is 1
107 * (0 for the point at infinity).
108 * For each field element, slice a_0 is word 0, etc.
109 *
110 * The table has 2 * 16 elements, starting with the following:
111 * index | bits    | point
112 * ------+---------+------------------------------
113 *     0 | 0 0 0 0 | 0G
114 *     1 | 0 0 0 1 | 1G
115 *     2 | 0 0 1 0 | 2^56G
116 *     3 | 0 0 1 1 | (2^56 + 1)G
117 *     4 | 0 1 0 0 | 2^112G
118 *     5 | 0 1 0 1 | (2^112 + 1)G
119 *     6 | 0 1 1 0 | (2^112 + 2^56)G
120 *     7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
121 *     8 | 1 0 0 0 | 2^168G
122 *     9 | 1 0 0 1 | (2^168 + 1)G
123 *    10 | 1 0 1 0 | (2^168 + 2^56)G
124 *    11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
125 *    12 | 1 1 0 0 | (2^168 + 2^112)G
126 *    13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
127 *    14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
128 *    15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
129 * followed by a copy of this with each element multiplied by 2^28.
130 *
131 * The reason for this is so that we can clock bits into four different
132 * locations when doing simple scalar multiplies against the base point,
133 * and then another four locations using the second 16 elements.
134 */
135static const felem gmul[2][16][3] = { {{{0, 0, 0, 0},
136                                        {0, 0, 0, 0},
137                                        {0, 0, 0, 0}},
138                                       {{0x3280d6115c1d21, 0xc1d356c2112234,
139                                         0x7f321390b94a03, 0xb70e0cbd6bb4bf},
140                                        {0xd5819985007e34, 0x75a05a07476444,
141                                         0xfb4c22dfe6cd43, 0xbd376388b5f723},
142                                        {1, 0, 0, 0}},
143                                       {{0xfd9675666ebbe9, 0xbca7664d40ce5e,
144                                         0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
145                                        {0x29e0b892dc9c43, 0xece8608436e662,
146                                         0xdc858f185310d0, 0x9812dd4eb8d321},
147                                        {1, 0, 0, 0}},
148                                       {{0x6d3e678d5d8eb8, 0x559eed1cb362f1,
149                                         0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
150                                        {0xf19f90ed50266d, 0xabf2b4bf65f9df,
151                                         0x313865468fafec, 0x5cb379ba910a17},
152                                        {1, 0, 0, 0}},
153                                       {{0x0641966cab26e3, 0x91fb2991fab0a0,
154                                         0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
155                                        {0x7510407766af5d, 0x84d929610d5450,
156                                         0x81d77aae82f706, 0x6916f6d4338c5b},
157                                        {1, 0, 0, 0}},
158                                       {{0xea95ac3b1f15c6, 0x086000905e82d4,
159                                         0xdd323ae4d1c8b1, 0x932b56be7685a3},
160                                        {0x9ef93dea25dbbf, 0x41665960f390f0,
161                                         0xfdec76dbe2a8a7, 0x523e80f019062a},
162                                        {1, 0, 0, 0}},
163                                       {{0x822fdd26732c73, 0xa01c83531b5d0f,
164                                         0x363f37347c1ba4, 0xc391b45c84725c},
165                                        {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec,
166                                         0xc393da7e222a7f, 0x1efb7890ede244},
167                                        {1, 0, 0, 0}},
168                                       {{0x4c9e90ca217da1, 0xd11beca79159bb,
169                                         0xff8d33c2c98b7c, 0x2610b39409f849},
170                                        {0x44d1352ac64da0, 0xcdbb7b2c46b4fb,
171                                         0x966c079b753c89, 0xfe67e4e820b112},
172                                        {1, 0, 0, 0}},
173                                       {{0xe28cae2df5312d, 0xc71b61d16f5c6e,
174                                         0x79b7619a3e7c4c, 0x05c73240899b47},
175                                        {0x9f7f6382c73e3a, 0x18615165c56bda,
176                                         0x641fab2116fd56, 0x72855882b08394},
177                                        {1, 0, 0, 0}},
178                                       {{0x0469182f161c09, 0x74a98ca8d00fb5,
179                                         0xb89da93489a3e0, 0x41c98768fb0c1d},
180                                        {0xe5ea05fb32da81, 0x3dce9ffbca6855,
181                                         0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
182                                        {1, 0, 0, 0}},
183                                       {{0xdab22b2333e87f, 0x4430137a5dd2f6,
184                                         0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
185                                        {0x764a7df0c8fda5, 0x185ba5c3fa2044,
186                                         0x9281d688bcbe50, 0xc40331df893881},
187                                        {1, 0, 0, 0}},
188                                       {{0xb89530796f0f60, 0xade92bd26909a3,
189                                         0x1a0c83fb4884da, 0x1765bf22a5a984},
190                                        {0x772a9ee75db09e, 0x23bc6c67cec16f,
191                                         0x4c1edba8b14e2f, 0xe2a215d9611369},
192                                        {1, 0, 0, 0}},
193                                       {{0x571e509fb5efb3, 0xade88696410552,
194                                         0xc8ae85fada74fe, 0x6c7e4be83bbde3},
195                                        {0xff9f51160f4652, 0xb47ce2495a6539,
196                                         0xa2946c53b582f4, 0x286d2db3ee9a60},
197                                        {1, 0, 0, 0}},
198                                       {{0x40bbd5081a44af, 0x0995183b13926c,
199                                         0xbcefba6f47f6d0, 0x215619e9cc0057},
200                                        {0x8bc94d3b0df45e, 0xf11c54a3694f6f,
201                                         0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
202                                        {1, 0, 0, 0}},
203                                       {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8,
204                                         0x1c29819435d2c6, 0xc813132f4c07e9},
205                                        {0x2891425503b11f, 0x08781030579fea,
206                                         0xf5426ba5cc9674, 0x1e28ebf18562bc},
207                                        {1, 0, 0, 0}},
208                                       {{0x9f31997cc864eb, 0x06cd91d28b5e4c,
209                                         0xff17036691a973, 0xf1aef351497c58},
210                                        {0xdd1f2d600564ff, 0xdead073b1402db,
211                                         0x74a684435bd693, 0xeea7471f962558},
212                                        {1, 0, 0, 0}}},
213{{{0, 0, 0, 0},
214  {0, 0, 0, 0},
215  {0, 0, 0, 0}},
216 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
217  {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
218  {1, 0, 0, 0}},
219 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
220  {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
221  {1, 0, 0, 0}},
222 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
223  {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
224  {1, 0, 0, 0}},
225 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
226  {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
227  {1, 0, 0, 0}},
228 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
229  {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
230  {1, 0, 0, 0}},
231 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
232  {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
233  {1, 0, 0, 0}},
234 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
235  {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
236  {1, 0, 0, 0}},
237 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
238  {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
239  {1, 0, 0, 0}},
240 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
241  {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
242  {1, 0, 0, 0}},
243 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
244  {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
245  {1, 0, 0, 0}},
246 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
247  {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
248  {1, 0, 0, 0}},
249 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
250  {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
251  {1, 0, 0, 0}},
252 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
253  {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
254  {1, 0, 0, 0}},
255 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
256  {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
257  {1, 0, 0, 0}},
258 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
259  {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
260  {1, 0, 0, 0}}}
261};
262
263/* Precomputation for the group generator. */
264typedef struct {
265    felem g_pre_comp[2][16][3];
266    int references;
267} NISTP224_PRE_COMP;
268
269const EC_METHOD *EC_GFp_nistp224_method(void)
270{
271    static const EC_METHOD ret = {
272        EC_FLAGS_DEFAULT_OCT,
273        NID_X9_62_prime_field,
274        ec_GFp_nistp224_group_init,
275        ec_GFp_simple_group_finish,
276        ec_GFp_simple_group_clear_finish,
277        ec_GFp_nist_group_copy,
278        ec_GFp_nistp224_group_set_curve,
279        ec_GFp_simple_group_get_curve,
280        ec_GFp_simple_group_get_degree,
281        ec_GFp_simple_group_check_discriminant,
282        ec_GFp_simple_point_init,
283        ec_GFp_simple_point_finish,
284        ec_GFp_simple_point_clear_finish,
285        ec_GFp_simple_point_copy,
286        ec_GFp_simple_point_set_to_infinity,
287        ec_GFp_simple_set_Jprojective_coordinates_GFp,
288        ec_GFp_simple_get_Jprojective_coordinates_GFp,
289        ec_GFp_simple_point_set_affine_coordinates,
290        ec_GFp_nistp224_point_get_affine_coordinates,
291        0 /* point_set_compressed_coordinates */ ,
292        0 /* point2oct */ ,
293        0 /* oct2point */ ,
294        ec_GFp_simple_add,
295        ec_GFp_simple_dbl,
296        ec_GFp_simple_invert,
297        ec_GFp_simple_is_at_infinity,
298        ec_GFp_simple_is_on_curve,
299        ec_GFp_simple_cmp,
300        ec_GFp_simple_make_affine,
301        ec_GFp_simple_points_make_affine,
302        ec_GFp_nistp224_points_mul,
303        ec_GFp_nistp224_precompute_mult,
304        ec_GFp_nistp224_have_precompute_mult,
305        ec_GFp_nist_field_mul,
306        ec_GFp_nist_field_sqr,
307        0 /* field_div */ ,
308        0 /* field_encode */ ,
309        0 /* field_decode */ ,
310        0                       /* field_set_to_one */
311    };
312
313    return &ret;
314}
315
316/*
317 * Helper functions to convert field elements to/from internal representation
318 */
319static void bin28_to_felem(felem out, const u8 in[28])
320{
321    out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
322    out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff;
323    out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff;
324    out[3] = (*((const uint64_t *)(in+20))) >> 8;
325}
326
327static void felem_to_bin28(u8 out[28], const felem in)
328{
329    unsigned i;
330    for (i = 0; i < 7; ++i) {
331        out[i] = in[0] >> (8 * i);
332        out[i + 7] = in[1] >> (8 * i);
333        out[i + 14] = in[2] >> (8 * i);
334        out[i + 21] = in[3] >> (8 * i);
335    }
336}
337
338/* To preserve endianness when using BN_bn2bin and BN_bin2bn */
339static void flip_endian(u8 *out, const u8 *in, unsigned len)
340{
341    unsigned i;
342    for (i = 0; i < len; ++i)
343        out[i] = in[len - 1 - i];
344}
345
346/* From OpenSSL BIGNUM to internal representation */
347static int BN_to_felem(felem out, const BIGNUM *bn)
348{
349    felem_bytearray b_in;
350    felem_bytearray b_out;
351    unsigned num_bytes;
352
353    /* BN_bn2bin eats leading zeroes */
354    memset(b_out, 0, sizeof b_out);
355    num_bytes = BN_num_bytes(bn);
356    if (num_bytes > sizeof b_out) {
357        ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
358        return 0;
359    }
360    if (BN_is_negative(bn)) {
361        ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
362        return 0;
363    }
364    num_bytes = BN_bn2bin(bn, b_in);
365    flip_endian(b_out, b_in, num_bytes);
366    bin28_to_felem(out, b_out);
367    return 1;
368}
369
370/* From internal representation to OpenSSL BIGNUM */
371static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
372{
373    felem_bytearray b_in, b_out;
374    felem_to_bin28(b_in, in);
375    flip_endian(b_out, b_in, sizeof b_out);
376    return BN_bin2bn(b_out, sizeof b_out, out);
377}
378
379/******************************************************************************/
380/*-
381 *                              FIELD OPERATIONS
382 *
383 * Field operations, using the internal representation of field elements.
384 * NB! These operations are specific to our point multiplication and cannot be
385 * expected to be correct in general - e.g., multiplication with a large scalar
386 * will cause an overflow.
387 *
388 */
389
390static void felem_one(felem out)
391{
392    out[0] = 1;
393    out[1] = 0;
394    out[2] = 0;
395    out[3] = 0;
396}
397
398static void felem_assign(felem out, const felem in)
399{
400    out[0] = in[0];
401    out[1] = in[1];
402    out[2] = in[2];
403    out[3] = in[3];
404}
405
406/* Sum two field elements: out += in */
407static void felem_sum(felem out, const felem in)
408{
409    out[0] += in[0];
410    out[1] += in[1];
411    out[2] += in[2];
412    out[3] += in[3];
413}
414
415/* Get negative value: out = -in */
416/* Assumes in[i] < 2^57 */
417static void felem_neg(felem out, const felem in)
418{
419    static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
420    static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
421    static const limb two58m42m2 = (((limb) 1) << 58) -
422        (((limb) 1) << 42) - (((limb) 1) << 2);
423
424    /* Set to 0 mod 2^224-2^96+1 to ensure out > in */
425    out[0] = two58p2 - in[0];
426    out[1] = two58m42m2 - in[1];
427    out[2] = two58m2 - in[2];
428    out[3] = two58m2 - in[3];
429}
430
431/* Subtract field elements: out -= in */
432/* Assumes in[i] < 2^57 */
433static void felem_diff(felem out, const felem in)
434{
435    static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
436    static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
437    static const limb two58m42m2 = (((limb) 1) << 58) -
438        (((limb) 1) << 42) - (((limb) 1) << 2);
439
440    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
441    out[0] += two58p2;
442    out[1] += two58m42m2;
443    out[2] += two58m2;
444    out[3] += two58m2;
445
446    out[0] -= in[0];
447    out[1] -= in[1];
448    out[2] -= in[2];
449    out[3] -= in[3];
450}
451
452/* Subtract in unreduced 128-bit mode: out -= in */
453/* Assumes in[i] < 2^119 */
454static void widefelem_diff(widefelem out, const widefelem in)
455{
456    static const widelimb two120 = ((widelimb) 1) << 120;
457    static const widelimb two120m64 = (((widelimb) 1) << 120) -
458        (((widelimb) 1) << 64);
459    static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
460        (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
461
462    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
463    out[0] += two120;
464    out[1] += two120m64;
465    out[2] += two120m64;
466    out[3] += two120;
467    out[4] += two120m104m64;
468    out[5] += two120m64;
469    out[6] += two120m64;
470
471    out[0] -= in[0];
472    out[1] -= in[1];
473    out[2] -= in[2];
474    out[3] -= in[3];
475    out[4] -= in[4];
476    out[5] -= in[5];
477    out[6] -= in[6];
478}
479
480/* Subtract in mixed mode: out128 -= in64 */
481/* in[i] < 2^63 */
482static void felem_diff_128_64(widefelem out, const felem in)
483{
484    static const widelimb two64p8 = (((widelimb) 1) << 64) +
485        (((widelimb) 1) << 8);
486    static const widelimb two64m8 = (((widelimb) 1) << 64) -
487        (((widelimb) 1) << 8);
488    static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
489        (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
490
491    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
492    out[0] += two64p8;
493    out[1] += two64m48m8;
494    out[2] += two64m8;
495    out[3] += two64m8;
496
497    out[0] -= in[0];
498    out[1] -= in[1];
499    out[2] -= in[2];
500    out[3] -= in[3];
501}
502
503/*
504 * Multiply a field element by a scalar: out = out * scalar The scalars we
505 * actually use are small, so results fit without overflow
506 */
507static void felem_scalar(felem out, const limb scalar)
508{
509    out[0] *= scalar;
510    out[1] *= scalar;
511    out[2] *= scalar;
512    out[3] *= scalar;
513}
514
515/*
516 * Multiply an unreduced field element by a scalar: out = out * scalar The
517 * scalars we actually use are small, so results fit without overflow
518 */
519static void widefelem_scalar(widefelem out, const widelimb scalar)
520{
521    out[0] *= scalar;
522    out[1] *= scalar;
523    out[2] *= scalar;
524    out[3] *= scalar;
525    out[4] *= scalar;
526    out[5] *= scalar;
527    out[6] *= scalar;
528}
529
530/* Square a field element: out = in^2 */
531static void felem_square(widefelem out, const felem in)
532{
533    limb tmp0, tmp1, tmp2;
534    tmp0 = 2 * in[0];
535    tmp1 = 2 * in[1];
536    tmp2 = 2 * in[2];
537    out[0] = ((widelimb) in[0]) * in[0];
538    out[1] = ((widelimb) in[0]) * tmp1;
539    out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
540    out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
541    out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
542    out[5] = ((widelimb) in[3]) * tmp2;
543    out[6] = ((widelimb) in[3]) * in[3];
544}
545
546/* Multiply two field elements: out = in1 * in2 */
547static void felem_mul(widefelem out, const felem in1, const felem in2)
548{
549    out[0] = ((widelimb) in1[0]) * in2[0];
550    out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
551    out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
552        ((widelimb) in1[2]) * in2[0];
553    out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
554        ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
555    out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
556        ((widelimb) in1[3]) * in2[1];
557    out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
558    out[6] = ((widelimb) in1[3]) * in2[3];
559}
560
561/*-
562 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
563 * Requires in[i] < 2^126,
564 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
565static void felem_reduce(felem out, const widefelem in)
566{
567    static const widelimb two127p15 = (((widelimb) 1) << 127) +
568        (((widelimb) 1) << 15);
569    static const widelimb two127m71 = (((widelimb) 1) << 127) -
570        (((widelimb) 1) << 71);
571    static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
572        (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
573    widelimb output[5];
574
575    /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
576    output[0] = in[0] + two127p15;
577    output[1] = in[1] + two127m71m55;
578    output[2] = in[2] + two127m71;
579    output[3] = in[3];
580    output[4] = in[4];
581
582    /* Eliminate in[4], in[5], in[6] */
583    output[4] += in[6] >> 16;
584    output[3] += (in[6] & 0xffff) << 40;
585    output[2] -= in[6];
586
587    output[3] += in[5] >> 16;
588    output[2] += (in[5] & 0xffff) << 40;
589    output[1] -= in[5];
590
591    output[2] += output[4] >> 16;
592    output[1] += (output[4] & 0xffff) << 40;
593    output[0] -= output[4];
594
595    /* Carry 2 -> 3 -> 4 */
596    output[3] += output[2] >> 56;
597    output[2] &= 0x00ffffffffffffff;
598
599    output[4] = output[3] >> 56;
600    output[3] &= 0x00ffffffffffffff;
601
602    /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
603
604    /* Eliminate output[4] */
605    output[2] += output[4] >> 16;
606    /* output[2] < 2^56 + 2^56 = 2^57 */
607    output[1] += (output[4] & 0xffff) << 40;
608    output[0] -= output[4];
609
610    /* Carry 0 -> 1 -> 2 -> 3 */
611    output[1] += output[0] >> 56;
612    out[0] = output[0] & 0x00ffffffffffffff;
613
614    output[2] += output[1] >> 56;
615    /* output[2] < 2^57 + 2^72 */
616    out[1] = output[1] & 0x00ffffffffffffff;
617    output[3] += output[2] >> 56;
618    /* output[3] <= 2^56 + 2^16 */
619    out[2] = output[2] & 0x00ffffffffffffff;
620
621    /*-
622     * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
623     * out[3] <= 2^56 + 2^16 (due to final carry),
624     * so out < 2*p
625     */
626    out[3] = output[3];
627}
628
629static void felem_square_reduce(felem out, const felem in)
630{
631    widefelem tmp;
632    felem_square(tmp, in);
633    felem_reduce(out, tmp);
634}
635
636static void felem_mul_reduce(felem out, const felem in1, const felem in2)
637{
638    widefelem tmp;
639    felem_mul(tmp, in1, in2);
640    felem_reduce(out, tmp);
641}
642
643/*
644 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
645 * call felem_reduce first)
646 */
647static void felem_contract(felem out, const felem in)
648{
649    static const int64_t two56 = ((limb) 1) << 56;
650    /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
651    /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
652    int64_t tmp[4], a;
653    tmp[0] = in[0];
654    tmp[1] = in[1];
655    tmp[2] = in[2];
656    tmp[3] = in[3];
657    /* Case 1: a = 1 iff in >= 2^224 */
658    a = (in[3] >> 56);
659    tmp[0] -= a;
660    tmp[1] += a << 40;
661    tmp[3] &= 0x00ffffffffffffff;
662    /*
663     * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
664     * and the lower part is non-zero
665     */
666    a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
667        (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
668    a &= 0x00ffffffffffffff;
669    /* turn a into an all-one mask (if a = 0) or an all-zero mask */
670    a = (a - 1) >> 63;
671    /* subtract 2^224 - 2^96 + 1 if a is all-one */
672    tmp[3] &= a ^ 0xffffffffffffffff;
673    tmp[2] &= a ^ 0xffffffffffffffff;
674    tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
675    tmp[0] -= 1 & a;
676
677    /*
678     * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
679     * non-zero, so we only need one step
680     */
681    a = tmp[0] >> 63;
682    tmp[0] += two56 & a;
683    tmp[1] -= 1 & a;
684
685    /* carry 1 -> 2 -> 3 */
686    tmp[2] += tmp[1] >> 56;
687    tmp[1] &= 0x00ffffffffffffff;
688
689    tmp[3] += tmp[2] >> 56;
690    tmp[2] &= 0x00ffffffffffffff;
691
692    /* Now 0 <= out < p */
693    out[0] = tmp[0];
694    out[1] = tmp[1];
695    out[2] = tmp[2];
696    out[3] = tmp[3];
697}
698
699/*
700 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
701 * elements are reduced to in < 2^225, so we only need to check three cases:
702 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
703 */
704static limb felem_is_zero(const felem in)
705{
706    limb zero, two224m96p1, two225m97p2;
707
708    zero = in[0] | in[1] | in[2] | in[3];
709    zero = (((int64_t) (zero) - 1) >> 63) & 1;
710    two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
711        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
712    two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
713    two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
714        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
715    two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
716    return (zero | two224m96p1 | two225m97p2);
717}
718
719static int felem_is_zero_int(const void *in)
720{
721    return (int)(felem_is_zero(in) & ((limb) 1));
722}
723
724/* Invert a field element */
725/* Computation chain copied from djb's code */
726static void felem_inv(felem out, const felem in)
727{
728    felem ftmp, ftmp2, ftmp3, ftmp4;
729    widefelem tmp;
730    unsigned i;
731
732    felem_square(tmp, in);
733    felem_reduce(ftmp, tmp);    /* 2 */
734    felem_mul(tmp, in, ftmp);
735    felem_reduce(ftmp, tmp);    /* 2^2 - 1 */
736    felem_square(tmp, ftmp);
737    felem_reduce(ftmp, tmp);    /* 2^3 - 2 */
738    felem_mul(tmp, in, ftmp);
739    felem_reduce(ftmp, tmp);    /* 2^3 - 1 */
740    felem_square(tmp, ftmp);
741    felem_reduce(ftmp2, tmp);   /* 2^4 - 2 */
742    felem_square(tmp, ftmp2);
743    felem_reduce(ftmp2, tmp);   /* 2^5 - 4 */
744    felem_square(tmp, ftmp2);
745    felem_reduce(ftmp2, tmp);   /* 2^6 - 8 */
746    felem_mul(tmp, ftmp2, ftmp);
747    felem_reduce(ftmp, tmp);    /* 2^6 - 1 */
748    felem_square(tmp, ftmp);
749    felem_reduce(ftmp2, tmp);   /* 2^7 - 2 */
750    for (i = 0; i < 5; ++i) {   /* 2^12 - 2^6 */
751        felem_square(tmp, ftmp2);
752        felem_reduce(ftmp2, tmp);
753    }
754    felem_mul(tmp, ftmp2, ftmp);
755    felem_reduce(ftmp2, tmp);   /* 2^12 - 1 */
756    felem_square(tmp, ftmp2);
757    felem_reduce(ftmp3, tmp);   /* 2^13 - 2 */
758    for (i = 0; i < 11; ++i) {  /* 2^24 - 2^12 */
759        felem_square(tmp, ftmp3);
760        felem_reduce(ftmp3, tmp);
761    }
762    felem_mul(tmp, ftmp3, ftmp2);
763    felem_reduce(ftmp2, tmp);   /* 2^24 - 1 */
764    felem_square(tmp, ftmp2);
765    felem_reduce(ftmp3, tmp);   /* 2^25 - 2 */
766    for (i = 0; i < 23; ++i) {  /* 2^48 - 2^24 */
767        felem_square(tmp, ftmp3);
768        felem_reduce(ftmp3, tmp);
769    }
770    felem_mul(tmp, ftmp3, ftmp2);
771    felem_reduce(ftmp3, tmp);   /* 2^48 - 1 */
772    felem_square(tmp, ftmp3);
773    felem_reduce(ftmp4, tmp);   /* 2^49 - 2 */
774    for (i = 0; i < 47; ++i) {  /* 2^96 - 2^48 */
775        felem_square(tmp, ftmp4);
776        felem_reduce(ftmp4, tmp);
777    }
778    felem_mul(tmp, ftmp3, ftmp4);
779    felem_reduce(ftmp3, tmp);   /* 2^96 - 1 */
780    felem_square(tmp, ftmp3);
781    felem_reduce(ftmp4, tmp);   /* 2^97 - 2 */
782    for (i = 0; i < 23; ++i) {  /* 2^120 - 2^24 */
783        felem_square(tmp, ftmp4);
784        felem_reduce(ftmp4, tmp);
785    }
786    felem_mul(tmp, ftmp2, ftmp4);
787    felem_reduce(ftmp2, tmp);   /* 2^120 - 1 */
788    for (i = 0; i < 6; ++i) {   /* 2^126 - 2^6 */
789        felem_square(tmp, ftmp2);
790        felem_reduce(ftmp2, tmp);
791    }
792    felem_mul(tmp, ftmp2, ftmp);
793    felem_reduce(ftmp, tmp);    /* 2^126 - 1 */
794    felem_square(tmp, ftmp);
795    felem_reduce(ftmp, tmp);    /* 2^127 - 2 */
796    felem_mul(tmp, ftmp, in);
797    felem_reduce(ftmp, tmp);    /* 2^127 - 1 */
798    for (i = 0; i < 97; ++i) {  /* 2^224 - 2^97 */
799        felem_square(tmp, ftmp);
800        felem_reduce(ftmp, tmp);
801    }
802    felem_mul(tmp, ftmp, ftmp3);
803    felem_reduce(out, tmp);     /* 2^224 - 2^96 - 1 */
804}
805
806/*
807 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
808 * out to itself.
809 */
810static void copy_conditional(felem out, const felem in, limb icopy)
811{
812    unsigned i;
813    /*
814     * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
815     */
816    const limb copy = -icopy;
817    for (i = 0; i < 4; ++i) {
818        const limb tmp = copy & (in[i] ^ out[i]);
819        out[i] ^= tmp;
820    }
821}
822
823/******************************************************************************/
824/*-
825 *                       ELLIPTIC CURVE POINT OPERATIONS
826 *
827 * Points are represented in Jacobian projective coordinates:
828 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
829 * or to the point at infinity if Z == 0.
830 *
831 */
832
833/*-
834 * Double an elliptic curve point:
835 * (X', Y', Z') = 2 * (X, Y, Z), where
836 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
837 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2
838 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
839 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
840 * while x_out == y_in is not (maybe this works, but it's not tested).
841 */
842static void
843point_double(felem x_out, felem y_out, felem z_out,
844             const felem x_in, const felem y_in, const felem z_in)
845{
846    widefelem tmp, tmp2;
847    felem delta, gamma, beta, alpha, ftmp, ftmp2;
848
849    felem_assign(ftmp, x_in);
850    felem_assign(ftmp2, x_in);
851
852    /* delta = z^2 */
853    felem_square(tmp, z_in);
854    felem_reduce(delta, tmp);
855
856    /* gamma = y^2 */
857    felem_square(tmp, y_in);
858    felem_reduce(gamma, tmp);
859
860    /* beta = x*gamma */
861    felem_mul(tmp, x_in, gamma);
862    felem_reduce(beta, tmp);
863
864    /* alpha = 3*(x-delta)*(x+delta) */
865    felem_diff(ftmp, delta);
866    /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
867    felem_sum(ftmp2, delta);
868    /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
869    felem_scalar(ftmp2, 3);
870    /* ftmp2[i] < 3 * 2^58 < 2^60 */
871    felem_mul(tmp, ftmp, ftmp2);
872    /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
873    felem_reduce(alpha, tmp);
874
875    /* x' = alpha^2 - 8*beta */
876    felem_square(tmp, alpha);
877    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
878    felem_assign(ftmp, beta);
879    felem_scalar(ftmp, 8);
880    /* ftmp[i] < 8 * 2^57 = 2^60 */
881    felem_diff_128_64(tmp, ftmp);
882    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
883    felem_reduce(x_out, tmp);
884
885    /* z' = (y + z)^2 - gamma - delta */
886    felem_sum(delta, gamma);
887    /* delta[i] < 2^57 + 2^57 = 2^58 */
888    felem_assign(ftmp, y_in);
889    felem_sum(ftmp, z_in);
890    /* ftmp[i] < 2^57 + 2^57 = 2^58 */
891    felem_square(tmp, ftmp);
892    /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
893    felem_diff_128_64(tmp, delta);
894    /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
895    felem_reduce(z_out, tmp);
896
897    /* y' = alpha*(4*beta - x') - 8*gamma^2 */
898    felem_scalar(beta, 4);
899    /* beta[i] < 4 * 2^57 = 2^59 */
900    felem_diff(beta, x_out);
901    /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
902    felem_mul(tmp, alpha, beta);
903    /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
904    felem_square(tmp2, gamma);
905    /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
906    widefelem_scalar(tmp2, 8);
907    /* tmp2[i] < 8 * 2^116 = 2^119 */
908    widefelem_diff(tmp, tmp2);
909    /* tmp[i] < 2^119 + 2^120 < 2^121 */
910    felem_reduce(y_out, tmp);
911}
912
913/*-
914 * Add two elliptic curve points:
915 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
916 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
917 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
918 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
919 *        Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
920 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
921 *
922 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
923 */
924
925/*
926 * This function is not entirely constant-time: it includes a branch for
927 * checking whether the two input points are equal, (while not equal to the
928 * point at infinity). This case never happens during single point
929 * multiplication, so there is no timing leak for ECDH or ECDSA signing.
930 */
931static void point_add(felem x3, felem y3, felem z3,
932                      const felem x1, const felem y1, const felem z1,
933                      const int mixed, const felem x2, const felem y2,
934                      const felem z2)
935{
936    felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
937    widefelem tmp, tmp2;
938    limb z1_is_zero, z2_is_zero, x_equal, y_equal;
939
940    if (!mixed) {
941        /* ftmp2 = z2^2 */
942        felem_square(tmp, z2);
943        felem_reduce(ftmp2, tmp);
944
945        /* ftmp4 = z2^3 */
946        felem_mul(tmp, ftmp2, z2);
947        felem_reduce(ftmp4, tmp);
948
949        /* ftmp4 = z2^3*y1 */
950        felem_mul(tmp2, ftmp4, y1);
951        felem_reduce(ftmp4, tmp2);
952
953        /* ftmp2 = z2^2*x1 */
954        felem_mul(tmp2, ftmp2, x1);
955        felem_reduce(ftmp2, tmp2);
956    } else {
957        /*
958         * We'll assume z2 = 1 (special case z2 = 0 is handled later)
959         */
960
961        /* ftmp4 = z2^3*y1 */
962        felem_assign(ftmp4, y1);
963
964        /* ftmp2 = z2^2*x1 */
965        felem_assign(ftmp2, x1);
966    }
967
968    /* ftmp = z1^2 */
969    felem_square(tmp, z1);
970    felem_reduce(ftmp, tmp);
971
972    /* ftmp3 = z1^3 */
973    felem_mul(tmp, ftmp, z1);
974    felem_reduce(ftmp3, tmp);
975
976    /* tmp = z1^3*y2 */
977    felem_mul(tmp, ftmp3, y2);
978    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
979
980    /* ftmp3 = z1^3*y2 - z2^3*y1 */
981    felem_diff_128_64(tmp, ftmp4);
982    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
983    felem_reduce(ftmp3, tmp);
984
985    /* tmp = z1^2*x2 */
986    felem_mul(tmp, ftmp, x2);
987    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
988
989    /* ftmp = z1^2*x2 - z2^2*x1 */
990    felem_diff_128_64(tmp, ftmp2);
991    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
992    felem_reduce(ftmp, tmp);
993
994    /*
995     * the formulae are incorrect if the points are equal so we check for
996     * this and do doubling if this happens
997     */
998    x_equal = felem_is_zero(ftmp);
999    y_equal = felem_is_zero(ftmp3);
1000    z1_is_zero = felem_is_zero(z1);
1001    z2_is_zero = felem_is_zero(z2);
1002    /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
1003    if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
1004        point_double(x3, y3, z3, x1, y1, z1);
1005        return;
1006    }
1007
1008    /* ftmp5 = z1*z2 */
1009    if (!mixed) {
1010        felem_mul(tmp, z1, z2);
1011        felem_reduce(ftmp5, tmp);
1012    } else {
1013        /* special case z2 = 0 is handled later */
1014        felem_assign(ftmp5, z1);
1015    }
1016
1017    /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1018    felem_mul(tmp, ftmp, ftmp5);
1019    felem_reduce(z_out, tmp);
1020
1021    /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1022    felem_assign(ftmp5, ftmp);
1023    felem_square(tmp, ftmp);
1024    felem_reduce(ftmp, tmp);
1025
1026    /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1027    felem_mul(tmp, ftmp, ftmp5);
1028    felem_reduce(ftmp5, tmp);
1029
1030    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1031    felem_mul(tmp, ftmp2, ftmp);
1032    felem_reduce(ftmp2, tmp);
1033
1034    /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1035    felem_mul(tmp, ftmp4, ftmp5);
1036    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1037
1038    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1039    felem_square(tmp2, ftmp3);
1040    /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1041
1042    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1043    felem_diff_128_64(tmp2, ftmp5);
1044    /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1045
1046    /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1047    felem_assign(ftmp5, ftmp2);
1048    felem_scalar(ftmp5, 2);
1049    /* ftmp5[i] < 2 * 2^57 = 2^58 */
1050
1051    /*-
1052     * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1053     *  2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1054     */
1055    felem_diff_128_64(tmp2, ftmp5);
1056    /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1057    felem_reduce(x_out, tmp2);
1058
1059    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1060    felem_diff(ftmp2, x_out);
1061    /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1062
1063    /*
1064     * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1065     */
1066    felem_mul(tmp2, ftmp3, ftmp2);
1067    /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1068
1069    /*-
1070     * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1071     *  z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1072     */
1073    widefelem_diff(tmp2, tmp);
1074    /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1075    felem_reduce(y_out, tmp2);
1076
1077    /*
1078     * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1079     * the point at infinity, so we need to check for this separately
1080     */
1081
1082    /*
1083     * if point 1 is at infinity, copy point 2 to output, and vice versa
1084     */
1085    copy_conditional(x_out, x2, z1_is_zero);
1086    copy_conditional(x_out, x1, z2_is_zero);
1087    copy_conditional(y_out, y2, z1_is_zero);
1088    copy_conditional(y_out, y1, z2_is_zero);
1089    copy_conditional(z_out, z2, z1_is_zero);
1090    copy_conditional(z_out, z1, z2_is_zero);
1091    felem_assign(x3, x_out);
1092    felem_assign(y3, y_out);
1093    felem_assign(z3, z_out);
1094}
1095
1096/*
1097 * select_point selects the |idx|th point from a precomputation table and
1098 * copies it to out.
1099 * The pre_comp array argument should be size of |size| argument
1100 */
1101static void select_point(const u64 idx, unsigned int size,
1102                         const felem pre_comp[][3], felem out[3])
1103{
1104    unsigned i, j;
1105    limb *outlimbs = &out[0][0];
1106    memset(outlimbs, 0, 3 * sizeof(felem));
1107
1108    for (i = 0; i < size; i++) {
1109        const limb *inlimbs = &pre_comp[i][0][0];
1110        u64 mask = i ^ idx;
1111        mask |= mask >> 4;
1112        mask |= mask >> 2;
1113        mask |= mask >> 1;
1114        mask &= 1;
1115        mask--;
1116        for (j = 0; j < 4 * 3; j++)
1117            outlimbs[j] |= inlimbs[j] & mask;
1118    }
1119}
1120
1121/* get_bit returns the |i|th bit in |in| */
1122static char get_bit(const felem_bytearray in, unsigned i)
1123{
1124    if (i >= 224)
1125        return 0;
1126    return (in[i >> 3] >> (i & 7)) & 1;
1127}
1128
1129/*
1130 * Interleaved point multiplication using precomputed point multiples: The
1131 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1132 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1133 * generator, using certain (large) precomputed multiples in g_pre_comp.
1134 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1135 */
1136static void batch_mul(felem x_out, felem y_out, felem z_out,
1137                      const felem_bytearray scalars[],
1138                      const unsigned num_points, const u8 *g_scalar,
1139                      const int mixed, const felem pre_comp[][17][3],
1140                      const felem g_pre_comp[2][16][3])
1141{
1142    int i, skip;
1143    unsigned num;
1144    unsigned gen_mul = (g_scalar != NULL);
1145    felem nq[3], tmp[4];
1146    u64 bits;
1147    u8 sign, digit;
1148
1149    /* set nq to the point at infinity */
1150    memset(nq, 0, 3 * sizeof(felem));
1151
1152    /*
1153     * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1154     * of the generator (two in each of the last 28 rounds) and additions of
1155     * other points multiples (every 5th round).
1156     */
1157    skip = 1;                   /* save two point operations in the first
1158                                 * round */
1159    for (i = (num_points ? 220 : 27); i >= 0; --i) {
1160        /* double */
1161        if (!skip)
1162            point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1163
1164        /* add multiples of the generator */
1165        if (gen_mul && (i <= 27)) {
1166            /* first, look 28 bits upwards */
1167            bits = get_bit(g_scalar, i + 196) << 3;
1168            bits |= get_bit(g_scalar, i + 140) << 2;
1169            bits |= get_bit(g_scalar, i + 84) << 1;
1170            bits |= get_bit(g_scalar, i + 28);
1171            /* select the point to add, in constant time */
1172            select_point(bits, 16, g_pre_comp[1], tmp);
1173
1174            if (!skip) {
1175                /* value 1 below is argument for "mixed" */
1176                point_add(nq[0], nq[1], nq[2],
1177                          nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1178            } else {
1179                memcpy(nq, tmp, 3 * sizeof(felem));
1180                skip = 0;
1181            }
1182
1183            /* second, look at the current position */
1184            bits = get_bit(g_scalar, i + 168) << 3;
1185            bits |= get_bit(g_scalar, i + 112) << 2;
1186            bits |= get_bit(g_scalar, i + 56) << 1;
1187            bits |= get_bit(g_scalar, i);
1188            /* select the point to add, in constant time */
1189            select_point(bits, 16, g_pre_comp[0], tmp);
1190            point_add(nq[0], nq[1], nq[2],
1191                      nq[0], nq[1], nq[2],
1192                      1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1193        }
1194
1195        /* do other additions every 5 doublings */
1196        if (num_points && (i % 5 == 0)) {
1197            /* loop over all scalars */
1198            for (num = 0; num < num_points; ++num) {
1199                bits = get_bit(scalars[num], i + 4) << 5;
1200                bits |= get_bit(scalars[num], i + 3) << 4;
1201                bits |= get_bit(scalars[num], i + 2) << 3;
1202                bits |= get_bit(scalars[num], i + 1) << 2;
1203                bits |= get_bit(scalars[num], i) << 1;
1204                bits |= get_bit(scalars[num], i - 1);
1205                ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1206
1207                /* select the point to add or subtract */
1208                select_point(digit, 17, pre_comp[num], tmp);
1209                felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1210                                            * point */
1211                copy_conditional(tmp[1], tmp[3], sign);
1212
1213                if (!skip) {
1214                    point_add(nq[0], nq[1], nq[2],
1215                              nq[0], nq[1], nq[2],
1216                              mixed, tmp[0], tmp[1], tmp[2]);
1217                } else {
1218                    memcpy(nq, tmp, 3 * sizeof(felem));
1219                    skip = 0;
1220                }
1221            }
1222        }
1223    }
1224    felem_assign(x_out, nq[0]);
1225    felem_assign(y_out, nq[1]);
1226    felem_assign(z_out, nq[2]);
1227}
1228
1229/******************************************************************************/
1230/*
1231 * FUNCTIONS TO MANAGE PRECOMPUTATION
1232 */
1233
1234static NISTP224_PRE_COMP *nistp224_pre_comp_new()
1235{
1236    NISTP224_PRE_COMP *ret = NULL;
1237    ret = (NISTP224_PRE_COMP *) OPENSSL_malloc(sizeof *ret);
1238    if (!ret) {
1239        ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1240        return ret;
1241    }
1242    memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1243    ret->references = 1;
1244    return ret;
1245}
1246
1247static void *nistp224_pre_comp_dup(void *src_)
1248{
1249    NISTP224_PRE_COMP *src = src_;
1250
1251    /* no need to actually copy, these objects never change! */
1252    CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1253
1254    return src_;
1255}
1256
1257static void nistp224_pre_comp_free(void *pre_)
1258{
1259    int i;
1260    NISTP224_PRE_COMP *pre = pre_;
1261
1262    if (!pre)
1263        return;
1264
1265    i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1266    if (i > 0)
1267        return;
1268
1269    OPENSSL_free(pre);
1270}
1271
1272static void nistp224_pre_comp_clear_free(void *pre_)
1273{
1274    int i;
1275    NISTP224_PRE_COMP *pre = pre_;
1276
1277    if (!pre)
1278        return;
1279
1280    i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1281    if (i > 0)
1282        return;
1283
1284    OPENSSL_cleanse(pre, sizeof *pre);
1285    OPENSSL_free(pre);
1286}
1287
1288/******************************************************************************/
1289/*
1290 * OPENSSL EC_METHOD FUNCTIONS
1291 */
1292
1293int ec_GFp_nistp224_group_init(EC_GROUP *group)
1294{
1295    int ret;
1296    ret = ec_GFp_simple_group_init(group);
1297    group->a_is_minus3 = 1;
1298    return ret;
1299}
1300
1301int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1302                                    const BIGNUM *a, const BIGNUM *b,
1303                                    BN_CTX *ctx)
1304{
1305    int ret = 0;
1306    BN_CTX *new_ctx = NULL;
1307    BIGNUM *curve_p, *curve_a, *curve_b;
1308
1309    if (ctx == NULL)
1310        if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1311            return 0;
1312    BN_CTX_start(ctx);
1313    if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1314        ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1315        ((curve_b = BN_CTX_get(ctx)) == NULL))
1316        goto err;
1317    BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1318    BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1319    BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1320    if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1321        ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1322              EC_R_WRONG_CURVE_PARAMETERS);
1323        goto err;
1324    }
1325    group->field_mod_func = BN_nist_mod_224;
1326    ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1327 err:
1328    BN_CTX_end(ctx);
1329    if (new_ctx != NULL)
1330        BN_CTX_free(new_ctx);
1331    return ret;
1332}
1333
1334/*
1335 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1336 * (X/Z^2, Y/Z^3)
1337 */
1338int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1339                                                 const EC_POINT *point,
1340                                                 BIGNUM *x, BIGNUM *y,
1341                                                 BN_CTX *ctx)
1342{
1343    felem z1, z2, x_in, y_in, x_out, y_out;
1344    widefelem tmp;
1345
1346    if (EC_POINT_is_at_infinity(group, point)) {
1347        ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1348              EC_R_POINT_AT_INFINITY);
1349        return 0;
1350    }
1351    if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1352        (!BN_to_felem(z1, &point->Z)))
1353        return 0;
1354    felem_inv(z2, z1);
1355    felem_square(tmp, z2);
1356    felem_reduce(z1, tmp);
1357    felem_mul(tmp, x_in, z1);
1358    felem_reduce(x_in, tmp);
1359    felem_contract(x_out, x_in);
1360    if (x != NULL) {
1361        if (!felem_to_BN(x, x_out)) {
1362            ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1363                  ERR_R_BN_LIB);
1364            return 0;
1365        }
1366    }
1367    felem_mul(tmp, z1, z2);
1368    felem_reduce(z1, tmp);
1369    felem_mul(tmp, y_in, z1);
1370    felem_reduce(y_in, tmp);
1371    felem_contract(y_out, y_in);
1372    if (y != NULL) {
1373        if (!felem_to_BN(y, y_out)) {
1374            ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1375                  ERR_R_BN_LIB);
1376            return 0;
1377        }
1378    }
1379    return 1;
1380}
1381
1382static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1383                               felem tmp_felems[ /* num+1 */ ])
1384{
1385    /*
1386     * Runs in constant time, unless an input is the point at infinity (which
1387     * normally shouldn't happen).
1388     */
1389    ec_GFp_nistp_points_make_affine_internal(num,
1390                                             points,
1391                                             sizeof(felem),
1392                                             tmp_felems,
1393                                             (void (*)(void *))felem_one,
1394                                             felem_is_zero_int,
1395                                             (void (*)(void *, const void *))
1396                                             felem_assign,
1397                                             (void (*)(void *, const void *))
1398                                             felem_square_reduce, (void (*)
1399                                                                   (void *,
1400                                                                    const void
1401                                                                    *,
1402                                                                    const void
1403                                                                    *))
1404                                             felem_mul_reduce,
1405                                             (void (*)(void *, const void *))
1406                                             felem_inv,
1407                                             (void (*)(void *, const void *))
1408                                             felem_contract);
1409}
1410
1411/*
1412 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1413 * values Result is stored in r (r can equal one of the inputs).
1414 */
1415int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1416                               const BIGNUM *scalar, size_t num,
1417                               const EC_POINT *points[],
1418                               const BIGNUM *scalars[], BN_CTX *ctx)
1419{
1420    int ret = 0;
1421    int j;
1422    unsigned i;
1423    int mixed = 0;
1424    BN_CTX *new_ctx = NULL;
1425    BIGNUM *x, *y, *z, *tmp_scalar;
1426    felem_bytearray g_secret;
1427    felem_bytearray *secrets = NULL;
1428    felem(*pre_comp)[17][3] = NULL;
1429    felem *tmp_felems = NULL;
1430    felem_bytearray tmp;
1431    unsigned num_bytes;
1432    int have_pre_comp = 0;
1433    size_t num_points = num;
1434    felem x_in, y_in, z_in, x_out, y_out, z_out;
1435    NISTP224_PRE_COMP *pre = NULL;
1436    const felem(*g_pre_comp)[16][3] = NULL;
1437    EC_POINT *generator = NULL;
1438    const EC_POINT *p = NULL;
1439    const BIGNUM *p_scalar = NULL;
1440
1441    if (ctx == NULL)
1442        if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1443            return 0;
1444    BN_CTX_start(ctx);
1445    if (((x = BN_CTX_get(ctx)) == NULL) ||
1446        ((y = BN_CTX_get(ctx)) == NULL) ||
1447        ((z = BN_CTX_get(ctx)) == NULL) ||
1448        ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1449        goto err;
1450
1451    if (scalar != NULL) {
1452        pre = EC_EX_DATA_get_data(group->extra_data,
1453                                  nistp224_pre_comp_dup,
1454                                  nistp224_pre_comp_free,
1455                                  nistp224_pre_comp_clear_free);
1456        if (pre)
1457            /* we have precomputation, try to use it */
1458            g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1459        else
1460            /* try to use the standard precomputation */
1461            g_pre_comp = &gmul[0];
1462        generator = EC_POINT_new(group);
1463        if (generator == NULL)
1464            goto err;
1465        /* get the generator from precomputation */
1466        if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1467            !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1468            !felem_to_BN(z, g_pre_comp[0][1][2])) {
1469            ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1470            goto err;
1471        }
1472        if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1473                                                      generator, x, y, z,
1474                                                      ctx))
1475            goto err;
1476        if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1477            /* precomputation matches generator */
1478            have_pre_comp = 1;
1479        else
1480            /*
1481             * we don't have valid precomputation: treat the generator as a
1482             * random point
1483             */
1484            num_points = num_points + 1;
1485    }
1486
1487    if (num_points > 0) {
1488        if (num_points >= 3) {
1489            /*
1490             * unless we precompute multiples for just one or two points,
1491             * converting those into affine form is time well spent
1492             */
1493            mixed = 1;
1494        }
1495        secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1496        pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem));
1497        if (mixed)
1498            tmp_felems =
1499                OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem));
1500        if ((secrets == NULL) || (pre_comp == NULL)
1501            || (mixed && (tmp_felems == NULL))) {
1502            ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1503            goto err;
1504        }
1505
1506        /*
1507         * we treat NULL scalars as 0, and NULL points as points at infinity,
1508         * i.e., they contribute nothing to the linear combination
1509         */
1510        memset(secrets, 0, num_points * sizeof(felem_bytearray));
1511        memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem));
1512        for (i = 0; i < num_points; ++i) {
1513            if (i == num)
1514                /* the generator */
1515            {
1516                p = EC_GROUP_get0_generator(group);
1517                p_scalar = scalar;
1518            } else
1519                /* the i^th point */
1520            {
1521                p = points[i];
1522                p_scalar = scalars[i];
1523            }
1524            if ((p_scalar != NULL) && (p != NULL)) {
1525                /* reduce scalar to 0 <= scalar < 2^224 */
1526                if ((BN_num_bits(p_scalar) > 224)
1527                    || (BN_is_negative(p_scalar))) {
1528                    /*
1529                     * this is an unusual input, and we don't guarantee
1530                     * constant-timeness
1531                     */
1532                    if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) {
1533                        ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1534                        goto err;
1535                    }
1536                    num_bytes = BN_bn2bin(tmp_scalar, tmp);
1537                } else
1538                    num_bytes = BN_bn2bin(p_scalar, tmp);
1539                flip_endian(secrets[i], tmp, num_bytes);
1540                /* precompute multiples */
1541                if ((!BN_to_felem(x_out, &p->X)) ||
1542                    (!BN_to_felem(y_out, &p->Y)) ||
1543                    (!BN_to_felem(z_out, &p->Z)))
1544                    goto err;
1545                felem_assign(pre_comp[i][1][0], x_out);
1546                felem_assign(pre_comp[i][1][1], y_out);
1547                felem_assign(pre_comp[i][1][2], z_out);
1548                for (j = 2; j <= 16; ++j) {
1549                    if (j & 1) {
1550                        point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1551                                  pre_comp[i][j][2], pre_comp[i][1][0],
1552                                  pre_comp[i][1][1], pre_comp[i][1][2], 0,
1553                                  pre_comp[i][j - 1][0],
1554                                  pre_comp[i][j - 1][1],
1555                                  pre_comp[i][j - 1][2]);
1556                    } else {
1557                        point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1558                                     pre_comp[i][j][2], pre_comp[i][j / 2][0],
1559                                     pre_comp[i][j / 2][1],
1560                                     pre_comp[i][j / 2][2]);
1561                    }
1562                }
1563            }
1564        }
1565        if (mixed)
1566            make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1567    }
1568
1569    /* the scalar for the generator */
1570    if ((scalar != NULL) && (have_pre_comp)) {
1571        memset(g_secret, 0, sizeof g_secret);
1572        /* reduce scalar to 0 <= scalar < 2^224 */
1573        if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1574            /*
1575             * this is an unusual input, and we don't guarantee
1576             * constant-timeness
1577             */
1578            if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) {
1579                ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1580                goto err;
1581            }
1582            num_bytes = BN_bn2bin(tmp_scalar, tmp);
1583        } else
1584            num_bytes = BN_bn2bin(scalar, tmp);
1585        flip_endian(g_secret, tmp, num_bytes);
1586        /* do the multiplication with generator precomputation */
1587        batch_mul(x_out, y_out, z_out,
1588                  (const felem_bytearray(*))secrets, num_points,
1589                  g_secret,
1590                  mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1591    } else
1592        /* do the multiplication without generator precomputation */
1593        batch_mul(x_out, y_out, z_out,
1594                  (const felem_bytearray(*))secrets, num_points,
1595                  NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1596    /* reduce the output to its unique minimal representation */
1597    felem_contract(x_in, x_out);
1598    felem_contract(y_in, y_out);
1599    felem_contract(z_in, z_out);
1600    if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1601        (!felem_to_BN(z, z_in))) {
1602        ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1603        goto err;
1604    }
1605    ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1606
1607 err:
1608    BN_CTX_end(ctx);
1609    if (generator != NULL)
1610        EC_POINT_free(generator);
1611    if (new_ctx != NULL)
1612        BN_CTX_free(new_ctx);
1613    if (secrets != NULL)
1614        OPENSSL_free(secrets);
1615    if (pre_comp != NULL)
1616        OPENSSL_free(pre_comp);
1617    if (tmp_felems != NULL)
1618        OPENSSL_free(tmp_felems);
1619    return ret;
1620}
1621
1622int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1623{
1624    int ret = 0;
1625    NISTP224_PRE_COMP *pre = NULL;
1626    int i, j;
1627    BN_CTX *new_ctx = NULL;
1628    BIGNUM *x, *y;
1629    EC_POINT *generator = NULL;
1630    felem tmp_felems[32];
1631
1632    /* throw away old precomputation */
1633    EC_EX_DATA_free_data(&group->extra_data, nistp224_pre_comp_dup,
1634                         nistp224_pre_comp_free,
1635                         nistp224_pre_comp_clear_free);
1636    if (ctx == NULL)
1637        if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1638            return 0;
1639    BN_CTX_start(ctx);
1640    if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL))
1641        goto err;
1642    /* get the generator */
1643    if (group->generator == NULL)
1644        goto err;
1645    generator = EC_POINT_new(group);
1646    if (generator == NULL)
1647        goto err;
1648    BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1649    BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1650    if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1651        goto err;
1652    if ((pre = nistp224_pre_comp_new()) == NULL)
1653        goto err;
1654    /*
1655     * if the generator is the standard one, use built-in precomputation
1656     */
1657    if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1658        memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1659        goto done;
1660    }
1661    if ((!BN_to_felem(pre->g_pre_comp[0][1][0], &group->generator->X)) ||
1662        (!BN_to_felem(pre->g_pre_comp[0][1][1], &group->generator->Y)) ||
1663        (!BN_to_felem(pre->g_pre_comp[0][1][2], &group->generator->Z)))
1664        goto err;
1665    /*
1666     * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1667     * 2^140*G, 2^196*G for the second one
1668     */
1669    for (i = 1; i <= 8; i <<= 1) {
1670        point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1671                     pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1672                     pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1673        for (j = 0; j < 27; ++j) {
1674            point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1675                         pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1676                         pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1677        }
1678        if (i == 8)
1679            break;
1680        point_double(pre->g_pre_comp[0][2 * i][0],
1681                     pre->g_pre_comp[0][2 * i][1],
1682                     pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1683                     pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1684        for (j = 0; j < 27; ++j) {
1685            point_double(pre->g_pre_comp[0][2 * i][0],
1686                         pre->g_pre_comp[0][2 * i][1],
1687                         pre->g_pre_comp[0][2 * i][2],
1688                         pre->g_pre_comp[0][2 * i][0],
1689                         pre->g_pre_comp[0][2 * i][1],
1690                         pre->g_pre_comp[0][2 * i][2]);
1691        }
1692    }
1693    for (i = 0; i < 2; i++) {
1694        /* g_pre_comp[i][0] is the point at infinity */
1695        memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1696        /* the remaining multiples */
1697        /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1698        point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1699                  pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1700                  pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1701                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1702                  pre->g_pre_comp[i][2][2]);
1703        /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1704        point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1705                  pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1706                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1707                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1708                  pre->g_pre_comp[i][2][2]);
1709        /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1710        point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1711                  pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1712                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1713                  0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1714                  pre->g_pre_comp[i][4][2]);
1715        /*
1716         * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1717         */
1718        point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1719                  pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1720                  pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1721                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1722                  pre->g_pre_comp[i][2][2]);
1723        for (j = 1; j < 8; ++j) {
1724            /* odd multiples: add G resp. 2^28*G */
1725            point_add(pre->g_pre_comp[i][2 * j + 1][0],
1726                      pre->g_pre_comp[i][2 * j + 1][1],
1727                      pre->g_pre_comp[i][2 * j + 1][2],
1728                      pre->g_pre_comp[i][2 * j][0],
1729                      pre->g_pre_comp[i][2 * j][1],
1730                      pre->g_pre_comp[i][2 * j][2], 0,
1731                      pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1732                      pre->g_pre_comp[i][1][2]);
1733        }
1734    }
1735    make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1736
1737 done:
1738    if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp224_pre_comp_dup,
1739                             nistp224_pre_comp_free,
1740                             nistp224_pre_comp_clear_free))
1741        goto err;
1742    ret = 1;
1743    pre = NULL;
1744 err:
1745    BN_CTX_end(ctx);
1746    if (generator != NULL)
1747        EC_POINT_free(generator);
1748    if (new_ctx != NULL)
1749        BN_CTX_free(new_ctx);
1750    if (pre)
1751        nistp224_pre_comp_free(pre);
1752    return ret;
1753}
1754
1755int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1756{
1757    if (EC_EX_DATA_get_data(group->extra_data, nistp224_pre_comp_dup,
1758                            nistp224_pre_comp_free,
1759                            nistp224_pre_comp_clear_free)
1760        != NULL)
1761        return 1;
1762    else
1763        return 0;
1764}
1765
1766#else
1767static void *dummy = &dummy;
1768#endif
1769