ecp_nistp224.c revision 331638
1/* crypto/ec/ecp_nistp224.c */
2/*
3 * Written by Emilia Kasper (Google) for the OpenSSL project.
4 */
5/* Copyright 2011 Google Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 *
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 *     http://www.apache.org/licenses/LICENSE-2.0
13 *
14 *  Unless required by applicable law or agreed to in writing, software
15 *  distributed under the License is distributed on an "AS IS" BASIS,
16 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 *  See the License for the specific language governing permissions and
18 *  limitations under the License.
19 */
20
21/*
22 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
23 *
24 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
25 * and Adam Langley's public domain 64-bit C implementation of curve25519
26 */
27
28#include <openssl/opensslconf.h>
29#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
30
31# ifndef OPENSSL_SYS_VMS
32#  include <stdint.h>
33# else
34#  include <inttypes.h>
35# endif
36
37# include <string.h>
38# include <openssl/err.h>
39# include "ec_lcl.h"
40
41# if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
42  /* even with gcc, the typedef won't work for 32-bit platforms */
43typedef __uint128_t uint128_t;  /* nonstandard; implemented by gcc on 64-bit
44                                 * platforms */
45# else
46#  error "Need GCC 3.1 or later to define type uint128_t"
47# endif
48
49typedef uint8_t u8;
50typedef uint64_t u64;
51
52/******************************************************************************/
53/*-
54 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
55 *
56 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
57 * using 64-bit coefficients called 'limbs',
58 * and sometimes (for multiplication results) as
59 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
60 * using 128-bit coefficients called 'widelimbs'.
61 * A 4-limb representation is an 'felem';
62 * a 7-widelimb representation is a 'widefelem'.
63 * Even within felems, bits of adjacent limbs overlap, and we don't always
64 * reduce the representations: we ensure that inputs to each felem
65 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
66 * and fit into a 128-bit word without overflow. The coefficients are then
67 * again partially reduced to obtain an felem satisfying a_i < 2^57.
68 * We only reduce to the unique minimal representation at the end of the
69 * computation.
70 */
71
72typedef uint64_t limb;
73typedef uint128_t widelimb;
74
75typedef limb felem[4];
76typedef widelimb widefelem[7];
77
78/*
79 * Field element represented as a byte arrary. 28*8 = 224 bits is also the
80 * group order size for the elliptic curve, and we also use this type for
81 * scalars for point multiplication.
82 */
83typedef u8 felem_bytearray[28];
84
85static const felem_bytearray nistp224_curve_params[5] = {
86    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
87     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
88     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
89    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
90     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
91     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
92    {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
93     0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
94     0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
95    {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
96     0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
97     0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
98    {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
99     0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
100     0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
101};
102
103/*-
104 * Precomputed multiples of the standard generator
105 * Points are given in coordinates (X, Y, Z) where Z normally is 1
106 * (0 for the point at infinity).
107 * For each field element, slice a_0 is word 0, etc.
108 *
109 * The table has 2 * 16 elements, starting with the following:
110 * index | bits    | point
111 * ------+---------+------------------------------
112 *     0 | 0 0 0 0 | 0G
113 *     1 | 0 0 0 1 | 1G
114 *     2 | 0 0 1 0 | 2^56G
115 *     3 | 0 0 1 1 | (2^56 + 1)G
116 *     4 | 0 1 0 0 | 2^112G
117 *     5 | 0 1 0 1 | (2^112 + 1)G
118 *     6 | 0 1 1 0 | (2^112 + 2^56)G
119 *     7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
120 *     8 | 1 0 0 0 | 2^168G
121 *     9 | 1 0 0 1 | (2^168 + 1)G
122 *    10 | 1 0 1 0 | (2^168 + 2^56)G
123 *    11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
124 *    12 | 1 1 0 0 | (2^168 + 2^112)G
125 *    13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
126 *    14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
127 *    15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
128 * followed by a copy of this with each element multiplied by 2^28.
129 *
130 * The reason for this is so that we can clock bits into four different
131 * locations when doing simple scalar multiplies against the base point,
132 * and then another four locations using the second 16 elements.
133 */
134static const felem gmul[2][16][3] = { {{{0, 0, 0, 0},
135                                        {0, 0, 0, 0},
136                                        {0, 0, 0, 0}},
137                                       {{0x3280d6115c1d21, 0xc1d356c2112234,
138                                         0x7f321390b94a03, 0xb70e0cbd6bb4bf},
139                                        {0xd5819985007e34, 0x75a05a07476444,
140                                         0xfb4c22dfe6cd43, 0xbd376388b5f723},
141                                        {1, 0, 0, 0}},
142                                       {{0xfd9675666ebbe9, 0xbca7664d40ce5e,
143                                         0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
144                                        {0x29e0b892dc9c43, 0xece8608436e662,
145                                         0xdc858f185310d0, 0x9812dd4eb8d321},
146                                        {1, 0, 0, 0}},
147                                       {{0x6d3e678d5d8eb8, 0x559eed1cb362f1,
148                                         0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
149                                        {0xf19f90ed50266d, 0xabf2b4bf65f9df,
150                                         0x313865468fafec, 0x5cb379ba910a17},
151                                        {1, 0, 0, 0}},
152                                       {{0x0641966cab26e3, 0x91fb2991fab0a0,
153                                         0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
154                                        {0x7510407766af5d, 0x84d929610d5450,
155                                         0x81d77aae82f706, 0x6916f6d4338c5b},
156                                        {1, 0, 0, 0}},
157                                       {{0xea95ac3b1f15c6, 0x086000905e82d4,
158                                         0xdd323ae4d1c8b1, 0x932b56be7685a3},
159                                        {0x9ef93dea25dbbf, 0x41665960f390f0,
160                                         0xfdec76dbe2a8a7, 0x523e80f019062a},
161                                        {1, 0, 0, 0}},
162                                       {{0x822fdd26732c73, 0xa01c83531b5d0f,
163                                         0x363f37347c1ba4, 0xc391b45c84725c},
164                                        {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec,
165                                         0xc393da7e222a7f, 0x1efb7890ede244},
166                                        {1, 0, 0, 0}},
167                                       {{0x4c9e90ca217da1, 0xd11beca79159bb,
168                                         0xff8d33c2c98b7c, 0x2610b39409f849},
169                                        {0x44d1352ac64da0, 0xcdbb7b2c46b4fb,
170                                         0x966c079b753c89, 0xfe67e4e820b112},
171                                        {1, 0, 0, 0}},
172                                       {{0xe28cae2df5312d, 0xc71b61d16f5c6e,
173                                         0x79b7619a3e7c4c, 0x05c73240899b47},
174                                        {0x9f7f6382c73e3a, 0x18615165c56bda,
175                                         0x641fab2116fd56, 0x72855882b08394},
176                                        {1, 0, 0, 0}},
177                                       {{0x0469182f161c09, 0x74a98ca8d00fb5,
178                                         0xb89da93489a3e0, 0x41c98768fb0c1d},
179                                        {0xe5ea05fb32da81, 0x3dce9ffbca6855,
180                                         0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
181                                        {1, 0, 0, 0}},
182                                       {{0xdab22b2333e87f, 0x4430137a5dd2f6,
183                                         0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
184                                        {0x764a7df0c8fda5, 0x185ba5c3fa2044,
185                                         0x9281d688bcbe50, 0xc40331df893881},
186                                        {1, 0, 0, 0}},
187                                       {{0xb89530796f0f60, 0xade92bd26909a3,
188                                         0x1a0c83fb4884da, 0x1765bf22a5a984},
189                                        {0x772a9ee75db09e, 0x23bc6c67cec16f,
190                                         0x4c1edba8b14e2f, 0xe2a215d9611369},
191                                        {1, 0, 0, 0}},
192                                       {{0x571e509fb5efb3, 0xade88696410552,
193                                         0xc8ae85fada74fe, 0x6c7e4be83bbde3},
194                                        {0xff9f51160f4652, 0xb47ce2495a6539,
195                                         0xa2946c53b582f4, 0x286d2db3ee9a60},
196                                        {1, 0, 0, 0}},
197                                       {{0x40bbd5081a44af, 0x0995183b13926c,
198                                         0xbcefba6f47f6d0, 0x215619e9cc0057},
199                                        {0x8bc94d3b0df45e, 0xf11c54a3694f6f,
200                                         0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
201                                        {1, 0, 0, 0}},
202                                       {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8,
203                                         0x1c29819435d2c6, 0xc813132f4c07e9},
204                                        {0x2891425503b11f, 0x08781030579fea,
205                                         0xf5426ba5cc9674, 0x1e28ebf18562bc},
206                                        {1, 0, 0, 0}},
207                                       {{0x9f31997cc864eb, 0x06cd91d28b5e4c,
208                                         0xff17036691a973, 0xf1aef351497c58},
209                                        {0xdd1f2d600564ff, 0xdead073b1402db,
210                                         0x74a684435bd693, 0xeea7471f962558},
211                                        {1, 0, 0, 0}}},
212{{{0, 0, 0, 0},
213  {0, 0, 0, 0},
214  {0, 0, 0, 0}},
215 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
216  {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
217  {1, 0, 0, 0}},
218 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
219  {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
220  {1, 0, 0, 0}},
221 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
222  {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
223  {1, 0, 0, 0}},
224 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
225  {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
226  {1, 0, 0, 0}},
227 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
228  {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
229  {1, 0, 0, 0}},
230 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
231  {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
232  {1, 0, 0, 0}},
233 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
234  {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
235  {1, 0, 0, 0}},
236 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
237  {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
238  {1, 0, 0, 0}},
239 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
240  {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
241  {1, 0, 0, 0}},
242 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
243  {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
244  {1, 0, 0, 0}},
245 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
246  {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
247  {1, 0, 0, 0}},
248 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
249  {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
250  {1, 0, 0, 0}},
251 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
252  {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
253  {1, 0, 0, 0}},
254 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
255  {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
256  {1, 0, 0, 0}},
257 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
258  {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
259  {1, 0, 0, 0}}}
260};
261
262/* Precomputation for the group generator. */
263typedef struct {
264    felem g_pre_comp[2][16][3];
265    int references;
266} NISTP224_PRE_COMP;
267
268const EC_METHOD *EC_GFp_nistp224_method(void)
269{
270    static const EC_METHOD ret = {
271        EC_FLAGS_DEFAULT_OCT,
272        NID_X9_62_prime_field,
273        ec_GFp_nistp224_group_init,
274        ec_GFp_simple_group_finish,
275        ec_GFp_simple_group_clear_finish,
276        ec_GFp_nist_group_copy,
277        ec_GFp_nistp224_group_set_curve,
278        ec_GFp_simple_group_get_curve,
279        ec_GFp_simple_group_get_degree,
280        ec_GFp_simple_group_check_discriminant,
281        ec_GFp_simple_point_init,
282        ec_GFp_simple_point_finish,
283        ec_GFp_simple_point_clear_finish,
284        ec_GFp_simple_point_copy,
285        ec_GFp_simple_point_set_to_infinity,
286        ec_GFp_simple_set_Jprojective_coordinates_GFp,
287        ec_GFp_simple_get_Jprojective_coordinates_GFp,
288        ec_GFp_simple_point_set_affine_coordinates,
289        ec_GFp_nistp224_point_get_affine_coordinates,
290        0 /* point_set_compressed_coordinates */ ,
291        0 /* point2oct */ ,
292        0 /* oct2point */ ,
293        ec_GFp_simple_add,
294        ec_GFp_simple_dbl,
295        ec_GFp_simple_invert,
296        ec_GFp_simple_is_at_infinity,
297        ec_GFp_simple_is_on_curve,
298        ec_GFp_simple_cmp,
299        ec_GFp_simple_make_affine,
300        ec_GFp_simple_points_make_affine,
301        ec_GFp_nistp224_points_mul,
302        ec_GFp_nistp224_precompute_mult,
303        ec_GFp_nistp224_have_precompute_mult,
304        ec_GFp_nist_field_mul,
305        ec_GFp_nist_field_sqr,
306        0 /* field_div */ ,
307        0 /* field_encode */ ,
308        0 /* field_decode */ ,
309        0                       /* field_set_to_one */
310    };
311
312    return &ret;
313}
314
315/*
316 * Helper functions to convert field elements to/from internal representation
317 */
318static void bin28_to_felem(felem out, const u8 in[28])
319{
320    out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
321    out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff;
322    out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff;
323    out[3] = (*((const uint64_t *)(in+20))) >> 8;
324}
325
326static void felem_to_bin28(u8 out[28], const felem in)
327{
328    unsigned i;
329    for (i = 0; i < 7; ++i) {
330        out[i] = in[0] >> (8 * i);
331        out[i + 7] = in[1] >> (8 * i);
332        out[i + 14] = in[2] >> (8 * i);
333        out[i + 21] = in[3] >> (8 * i);
334    }
335}
336
337/* To preserve endianness when using BN_bn2bin and BN_bin2bn */
338static void flip_endian(u8 *out, const u8 *in, unsigned len)
339{
340    unsigned i;
341    for (i = 0; i < len; ++i)
342        out[i] = in[len - 1 - i];
343}
344
345/* From OpenSSL BIGNUM to internal representation */
346static int BN_to_felem(felem out, const BIGNUM *bn)
347{
348    felem_bytearray b_in;
349    felem_bytearray b_out;
350    unsigned num_bytes;
351
352    /* BN_bn2bin eats leading zeroes */
353    memset(b_out, 0, sizeof(b_out));
354    num_bytes = BN_num_bytes(bn);
355    if (num_bytes > sizeof(b_out)) {
356        ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
357        return 0;
358    }
359    if (BN_is_negative(bn)) {
360        ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
361        return 0;
362    }
363    num_bytes = BN_bn2bin(bn, b_in);
364    flip_endian(b_out, b_in, num_bytes);
365    bin28_to_felem(out, b_out);
366    return 1;
367}
368
369/* From internal representation to OpenSSL BIGNUM */
370static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
371{
372    felem_bytearray b_in, b_out;
373    felem_to_bin28(b_in, in);
374    flip_endian(b_out, b_in, sizeof(b_out));
375    return BN_bin2bn(b_out, sizeof(b_out), out);
376}
377
378/******************************************************************************/
379/*-
380 *                              FIELD OPERATIONS
381 *
382 * Field operations, using the internal representation of field elements.
383 * NB! These operations are specific to our point multiplication and cannot be
384 * expected to be correct in general - e.g., multiplication with a large scalar
385 * will cause an overflow.
386 *
387 */
388
389static void felem_one(felem out)
390{
391    out[0] = 1;
392    out[1] = 0;
393    out[2] = 0;
394    out[3] = 0;
395}
396
397static void felem_assign(felem out, const felem in)
398{
399    out[0] = in[0];
400    out[1] = in[1];
401    out[2] = in[2];
402    out[3] = in[3];
403}
404
405/* Sum two field elements: out += in */
406static void felem_sum(felem out, const felem in)
407{
408    out[0] += in[0];
409    out[1] += in[1];
410    out[2] += in[2];
411    out[3] += in[3];
412}
413
414/* Get negative value: out = -in */
415/* Assumes in[i] < 2^57 */
416static void felem_neg(felem out, const felem in)
417{
418    static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
419    static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
420    static const limb two58m42m2 = (((limb) 1) << 58) -
421        (((limb) 1) << 42) - (((limb) 1) << 2);
422
423    /* Set to 0 mod 2^224-2^96+1 to ensure out > in */
424    out[0] = two58p2 - in[0];
425    out[1] = two58m42m2 - in[1];
426    out[2] = two58m2 - in[2];
427    out[3] = two58m2 - in[3];
428}
429
430/* Subtract field elements: out -= in */
431/* Assumes in[i] < 2^57 */
432static void felem_diff(felem out, const felem in)
433{
434    static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
435    static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
436    static const limb two58m42m2 = (((limb) 1) << 58) -
437        (((limb) 1) << 42) - (((limb) 1) << 2);
438
439    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
440    out[0] += two58p2;
441    out[1] += two58m42m2;
442    out[2] += two58m2;
443    out[3] += two58m2;
444
445    out[0] -= in[0];
446    out[1] -= in[1];
447    out[2] -= in[2];
448    out[3] -= in[3];
449}
450
451/* Subtract in unreduced 128-bit mode: out -= in */
452/* Assumes in[i] < 2^119 */
453static void widefelem_diff(widefelem out, const widefelem in)
454{
455    static const widelimb two120 = ((widelimb) 1) << 120;
456    static const widelimb two120m64 = (((widelimb) 1) << 120) -
457        (((widelimb) 1) << 64);
458    static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
459        (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
460
461    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
462    out[0] += two120;
463    out[1] += two120m64;
464    out[2] += two120m64;
465    out[3] += two120;
466    out[4] += two120m104m64;
467    out[5] += two120m64;
468    out[6] += two120m64;
469
470    out[0] -= in[0];
471    out[1] -= in[1];
472    out[2] -= in[2];
473    out[3] -= in[3];
474    out[4] -= in[4];
475    out[5] -= in[5];
476    out[6] -= in[6];
477}
478
479/* Subtract in mixed mode: out128 -= in64 */
480/* in[i] < 2^63 */
481static void felem_diff_128_64(widefelem out, const felem in)
482{
483    static const widelimb two64p8 = (((widelimb) 1) << 64) +
484        (((widelimb) 1) << 8);
485    static const widelimb two64m8 = (((widelimb) 1) << 64) -
486        (((widelimb) 1) << 8);
487    static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
488        (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
489
490    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
491    out[0] += two64p8;
492    out[1] += two64m48m8;
493    out[2] += two64m8;
494    out[3] += two64m8;
495
496    out[0] -= in[0];
497    out[1] -= in[1];
498    out[2] -= in[2];
499    out[3] -= in[3];
500}
501
502/*
503 * Multiply a field element by a scalar: out = out * scalar The scalars we
504 * actually use are small, so results fit without overflow
505 */
506static void felem_scalar(felem out, const limb scalar)
507{
508    out[0] *= scalar;
509    out[1] *= scalar;
510    out[2] *= scalar;
511    out[3] *= scalar;
512}
513
514/*
515 * Multiply an unreduced field element by a scalar: out = out * scalar The
516 * scalars we actually use are small, so results fit without overflow
517 */
518static void widefelem_scalar(widefelem out, const widelimb scalar)
519{
520    out[0] *= scalar;
521    out[1] *= scalar;
522    out[2] *= scalar;
523    out[3] *= scalar;
524    out[4] *= scalar;
525    out[5] *= scalar;
526    out[6] *= scalar;
527}
528
529/* Square a field element: out = in^2 */
530static void felem_square(widefelem out, const felem in)
531{
532    limb tmp0, tmp1, tmp2;
533    tmp0 = 2 * in[0];
534    tmp1 = 2 * in[1];
535    tmp2 = 2 * in[2];
536    out[0] = ((widelimb) in[0]) * in[0];
537    out[1] = ((widelimb) in[0]) * tmp1;
538    out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
539    out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
540    out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
541    out[5] = ((widelimb) in[3]) * tmp2;
542    out[6] = ((widelimb) in[3]) * in[3];
543}
544
545/* Multiply two field elements: out = in1 * in2 */
546static void felem_mul(widefelem out, const felem in1, const felem in2)
547{
548    out[0] = ((widelimb) in1[0]) * in2[0];
549    out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
550    out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
551        ((widelimb) in1[2]) * in2[0];
552    out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
553        ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
554    out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
555        ((widelimb) in1[3]) * in2[1];
556    out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
557    out[6] = ((widelimb) in1[3]) * in2[3];
558}
559
560/*-
561 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
562 * Requires in[i] < 2^126,
563 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
564static void felem_reduce(felem out, const widefelem in)
565{
566    static const widelimb two127p15 = (((widelimb) 1) << 127) +
567        (((widelimb) 1) << 15);
568    static const widelimb two127m71 = (((widelimb) 1) << 127) -
569        (((widelimb) 1) << 71);
570    static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
571        (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
572    widelimb output[5];
573
574    /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
575    output[0] = in[0] + two127p15;
576    output[1] = in[1] + two127m71m55;
577    output[2] = in[2] + two127m71;
578    output[3] = in[3];
579    output[4] = in[4];
580
581    /* Eliminate in[4], in[5], in[6] */
582    output[4] += in[6] >> 16;
583    output[3] += (in[6] & 0xffff) << 40;
584    output[2] -= in[6];
585
586    output[3] += in[5] >> 16;
587    output[2] += (in[5] & 0xffff) << 40;
588    output[1] -= in[5];
589
590    output[2] += output[4] >> 16;
591    output[1] += (output[4] & 0xffff) << 40;
592    output[0] -= output[4];
593
594    /* Carry 2 -> 3 -> 4 */
595    output[3] += output[2] >> 56;
596    output[2] &= 0x00ffffffffffffff;
597
598    output[4] = output[3] >> 56;
599    output[3] &= 0x00ffffffffffffff;
600
601    /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
602
603    /* Eliminate output[4] */
604    output[2] += output[4] >> 16;
605    /* output[2] < 2^56 + 2^56 = 2^57 */
606    output[1] += (output[4] & 0xffff) << 40;
607    output[0] -= output[4];
608
609    /* Carry 0 -> 1 -> 2 -> 3 */
610    output[1] += output[0] >> 56;
611    out[0] = output[0] & 0x00ffffffffffffff;
612
613    output[2] += output[1] >> 56;
614    /* output[2] < 2^57 + 2^72 */
615    out[1] = output[1] & 0x00ffffffffffffff;
616    output[3] += output[2] >> 56;
617    /* output[3] <= 2^56 + 2^16 */
618    out[2] = output[2] & 0x00ffffffffffffff;
619
620    /*-
621     * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
622     * out[3] <= 2^56 + 2^16 (due to final carry),
623     * so out < 2*p
624     */
625    out[3] = output[3];
626}
627
628static void felem_square_reduce(felem out, const felem in)
629{
630    widefelem tmp;
631    felem_square(tmp, in);
632    felem_reduce(out, tmp);
633}
634
635static void felem_mul_reduce(felem out, const felem in1, const felem in2)
636{
637    widefelem tmp;
638    felem_mul(tmp, in1, in2);
639    felem_reduce(out, tmp);
640}
641
642/*
643 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
644 * call felem_reduce first)
645 */
646static void felem_contract(felem out, const felem in)
647{
648    static const int64_t two56 = ((limb) 1) << 56;
649    /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
650    /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
651    int64_t tmp[4], a;
652    tmp[0] = in[0];
653    tmp[1] = in[1];
654    tmp[2] = in[2];
655    tmp[3] = in[3];
656    /* Case 1: a = 1 iff in >= 2^224 */
657    a = (in[3] >> 56);
658    tmp[0] -= a;
659    tmp[1] += a << 40;
660    tmp[3] &= 0x00ffffffffffffff;
661    /*
662     * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
663     * and the lower part is non-zero
664     */
665    a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
666        (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
667    a &= 0x00ffffffffffffff;
668    /* turn a into an all-one mask (if a = 0) or an all-zero mask */
669    a = (a - 1) >> 63;
670    /* subtract 2^224 - 2^96 + 1 if a is all-one */
671    tmp[3] &= a ^ 0xffffffffffffffff;
672    tmp[2] &= a ^ 0xffffffffffffffff;
673    tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
674    tmp[0] -= 1 & a;
675
676    /*
677     * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
678     * non-zero, so we only need one step
679     */
680    a = tmp[0] >> 63;
681    tmp[0] += two56 & a;
682    tmp[1] -= 1 & a;
683
684    /* carry 1 -> 2 -> 3 */
685    tmp[2] += tmp[1] >> 56;
686    tmp[1] &= 0x00ffffffffffffff;
687
688    tmp[3] += tmp[2] >> 56;
689    tmp[2] &= 0x00ffffffffffffff;
690
691    /* Now 0 <= out < p */
692    out[0] = tmp[0];
693    out[1] = tmp[1];
694    out[2] = tmp[2];
695    out[3] = tmp[3];
696}
697
698/*
699 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
700 * elements are reduced to in < 2^225, so we only need to check three cases:
701 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
702 */
703static limb felem_is_zero(const felem in)
704{
705    limb zero, two224m96p1, two225m97p2;
706
707    zero = in[0] | in[1] | in[2] | in[3];
708    zero = (((int64_t) (zero) - 1) >> 63) & 1;
709    two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
710        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
711    two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
712    two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
713        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
714    two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
715    return (zero | two224m96p1 | two225m97p2);
716}
717
718static int felem_is_zero_int(const void *in)
719{
720    return (int)(felem_is_zero(in) & ((limb) 1));
721}
722
723/* Invert a field element */
724/* Computation chain copied from djb's code */
725static void felem_inv(felem out, const felem in)
726{
727    felem ftmp, ftmp2, ftmp3, ftmp4;
728    widefelem tmp;
729    unsigned i;
730
731    felem_square(tmp, in);
732    felem_reduce(ftmp, tmp);    /* 2 */
733    felem_mul(tmp, in, ftmp);
734    felem_reduce(ftmp, tmp);    /* 2^2 - 1 */
735    felem_square(tmp, ftmp);
736    felem_reduce(ftmp, tmp);    /* 2^3 - 2 */
737    felem_mul(tmp, in, ftmp);
738    felem_reduce(ftmp, tmp);    /* 2^3 - 1 */
739    felem_square(tmp, ftmp);
740    felem_reduce(ftmp2, tmp);   /* 2^4 - 2 */
741    felem_square(tmp, ftmp2);
742    felem_reduce(ftmp2, tmp);   /* 2^5 - 4 */
743    felem_square(tmp, ftmp2);
744    felem_reduce(ftmp2, tmp);   /* 2^6 - 8 */
745    felem_mul(tmp, ftmp2, ftmp);
746    felem_reduce(ftmp, tmp);    /* 2^6 - 1 */
747    felem_square(tmp, ftmp);
748    felem_reduce(ftmp2, tmp);   /* 2^7 - 2 */
749    for (i = 0; i < 5; ++i) {   /* 2^12 - 2^6 */
750        felem_square(tmp, ftmp2);
751        felem_reduce(ftmp2, tmp);
752    }
753    felem_mul(tmp, ftmp2, ftmp);
754    felem_reduce(ftmp2, tmp);   /* 2^12 - 1 */
755    felem_square(tmp, ftmp2);
756    felem_reduce(ftmp3, tmp);   /* 2^13 - 2 */
757    for (i = 0; i < 11; ++i) {  /* 2^24 - 2^12 */
758        felem_square(tmp, ftmp3);
759        felem_reduce(ftmp3, tmp);
760    }
761    felem_mul(tmp, ftmp3, ftmp2);
762    felem_reduce(ftmp2, tmp);   /* 2^24 - 1 */
763    felem_square(tmp, ftmp2);
764    felem_reduce(ftmp3, tmp);   /* 2^25 - 2 */
765    for (i = 0; i < 23; ++i) {  /* 2^48 - 2^24 */
766        felem_square(tmp, ftmp3);
767        felem_reduce(ftmp3, tmp);
768    }
769    felem_mul(tmp, ftmp3, ftmp2);
770    felem_reduce(ftmp3, tmp);   /* 2^48 - 1 */
771    felem_square(tmp, ftmp3);
772    felem_reduce(ftmp4, tmp);   /* 2^49 - 2 */
773    for (i = 0; i < 47; ++i) {  /* 2^96 - 2^48 */
774        felem_square(tmp, ftmp4);
775        felem_reduce(ftmp4, tmp);
776    }
777    felem_mul(tmp, ftmp3, ftmp4);
778    felem_reduce(ftmp3, tmp);   /* 2^96 - 1 */
779    felem_square(tmp, ftmp3);
780    felem_reduce(ftmp4, tmp);   /* 2^97 - 2 */
781    for (i = 0; i < 23; ++i) {  /* 2^120 - 2^24 */
782        felem_square(tmp, ftmp4);
783        felem_reduce(ftmp4, tmp);
784    }
785    felem_mul(tmp, ftmp2, ftmp4);
786    felem_reduce(ftmp2, tmp);   /* 2^120 - 1 */
787    for (i = 0; i < 6; ++i) {   /* 2^126 - 2^6 */
788        felem_square(tmp, ftmp2);
789        felem_reduce(ftmp2, tmp);
790    }
791    felem_mul(tmp, ftmp2, ftmp);
792    felem_reduce(ftmp, tmp);    /* 2^126 - 1 */
793    felem_square(tmp, ftmp);
794    felem_reduce(ftmp, tmp);    /* 2^127 - 2 */
795    felem_mul(tmp, ftmp, in);
796    felem_reduce(ftmp, tmp);    /* 2^127 - 1 */
797    for (i = 0; i < 97; ++i) {  /* 2^224 - 2^97 */
798        felem_square(tmp, ftmp);
799        felem_reduce(ftmp, tmp);
800    }
801    felem_mul(tmp, ftmp, ftmp3);
802    felem_reduce(out, tmp);     /* 2^224 - 2^96 - 1 */
803}
804
805/*
806 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
807 * out to itself.
808 */
809static void copy_conditional(felem out, const felem in, limb icopy)
810{
811    unsigned i;
812    /*
813     * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
814     */
815    const limb copy = -icopy;
816    for (i = 0; i < 4; ++i) {
817        const limb tmp = copy & (in[i] ^ out[i]);
818        out[i] ^= tmp;
819    }
820}
821
822/******************************************************************************/
823/*-
824 *                       ELLIPTIC CURVE POINT OPERATIONS
825 *
826 * Points are represented in Jacobian projective coordinates:
827 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
828 * or to the point at infinity if Z == 0.
829 *
830 */
831
832/*-
833 * Double an elliptic curve point:
834 * (X', Y', Z') = 2 * (X, Y, Z), where
835 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
836 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2
837 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
838 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
839 * while x_out == y_in is not (maybe this works, but it's not tested).
840 */
841static void
842point_double(felem x_out, felem y_out, felem z_out,
843             const felem x_in, const felem y_in, const felem z_in)
844{
845    widefelem tmp, tmp2;
846    felem delta, gamma, beta, alpha, ftmp, ftmp2;
847
848    felem_assign(ftmp, x_in);
849    felem_assign(ftmp2, x_in);
850
851    /* delta = z^2 */
852    felem_square(tmp, z_in);
853    felem_reduce(delta, tmp);
854
855    /* gamma = y^2 */
856    felem_square(tmp, y_in);
857    felem_reduce(gamma, tmp);
858
859    /* beta = x*gamma */
860    felem_mul(tmp, x_in, gamma);
861    felem_reduce(beta, tmp);
862
863    /* alpha = 3*(x-delta)*(x+delta) */
864    felem_diff(ftmp, delta);
865    /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
866    felem_sum(ftmp2, delta);
867    /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
868    felem_scalar(ftmp2, 3);
869    /* ftmp2[i] < 3 * 2^58 < 2^60 */
870    felem_mul(tmp, ftmp, ftmp2);
871    /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
872    felem_reduce(alpha, tmp);
873
874    /* x' = alpha^2 - 8*beta */
875    felem_square(tmp, alpha);
876    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
877    felem_assign(ftmp, beta);
878    felem_scalar(ftmp, 8);
879    /* ftmp[i] < 8 * 2^57 = 2^60 */
880    felem_diff_128_64(tmp, ftmp);
881    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
882    felem_reduce(x_out, tmp);
883
884    /* z' = (y + z)^2 - gamma - delta */
885    felem_sum(delta, gamma);
886    /* delta[i] < 2^57 + 2^57 = 2^58 */
887    felem_assign(ftmp, y_in);
888    felem_sum(ftmp, z_in);
889    /* ftmp[i] < 2^57 + 2^57 = 2^58 */
890    felem_square(tmp, ftmp);
891    /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
892    felem_diff_128_64(tmp, delta);
893    /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
894    felem_reduce(z_out, tmp);
895
896    /* y' = alpha*(4*beta - x') - 8*gamma^2 */
897    felem_scalar(beta, 4);
898    /* beta[i] < 4 * 2^57 = 2^59 */
899    felem_diff(beta, x_out);
900    /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
901    felem_mul(tmp, alpha, beta);
902    /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
903    felem_square(tmp2, gamma);
904    /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
905    widefelem_scalar(tmp2, 8);
906    /* tmp2[i] < 8 * 2^116 = 2^119 */
907    widefelem_diff(tmp, tmp2);
908    /* tmp[i] < 2^119 + 2^120 < 2^121 */
909    felem_reduce(y_out, tmp);
910}
911
912/*-
913 * Add two elliptic curve points:
914 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
915 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
916 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
917 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
918 *        Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
919 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
920 *
921 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
922 */
923
924/*
925 * This function is not entirely constant-time: it includes a branch for
926 * checking whether the two input points are equal, (while not equal to the
927 * point at infinity). This case never happens during single point
928 * multiplication, so there is no timing leak for ECDH or ECDSA signing.
929 */
930static void point_add(felem x3, felem y3, felem z3,
931                      const felem x1, const felem y1, const felem z1,
932                      const int mixed, const felem x2, const felem y2,
933                      const felem z2)
934{
935    felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
936    widefelem tmp, tmp2;
937    limb z1_is_zero, z2_is_zero, x_equal, y_equal;
938
939    if (!mixed) {
940        /* ftmp2 = z2^2 */
941        felem_square(tmp, z2);
942        felem_reduce(ftmp2, tmp);
943
944        /* ftmp4 = z2^3 */
945        felem_mul(tmp, ftmp2, z2);
946        felem_reduce(ftmp4, tmp);
947
948        /* ftmp4 = z2^3*y1 */
949        felem_mul(tmp2, ftmp4, y1);
950        felem_reduce(ftmp4, tmp2);
951
952        /* ftmp2 = z2^2*x1 */
953        felem_mul(tmp2, ftmp2, x1);
954        felem_reduce(ftmp2, tmp2);
955    } else {
956        /*
957         * We'll assume z2 = 1 (special case z2 = 0 is handled later)
958         */
959
960        /* ftmp4 = z2^3*y1 */
961        felem_assign(ftmp4, y1);
962
963        /* ftmp2 = z2^2*x1 */
964        felem_assign(ftmp2, x1);
965    }
966
967    /* ftmp = z1^2 */
968    felem_square(tmp, z1);
969    felem_reduce(ftmp, tmp);
970
971    /* ftmp3 = z1^3 */
972    felem_mul(tmp, ftmp, z1);
973    felem_reduce(ftmp3, tmp);
974
975    /* tmp = z1^3*y2 */
976    felem_mul(tmp, ftmp3, y2);
977    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
978
979    /* ftmp3 = z1^3*y2 - z2^3*y1 */
980    felem_diff_128_64(tmp, ftmp4);
981    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
982    felem_reduce(ftmp3, tmp);
983
984    /* tmp = z1^2*x2 */
985    felem_mul(tmp, ftmp, x2);
986    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
987
988    /* ftmp = z1^2*x2 - z2^2*x1 */
989    felem_diff_128_64(tmp, ftmp2);
990    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
991    felem_reduce(ftmp, tmp);
992
993    /*
994     * the formulae are incorrect if the points are equal so we check for
995     * this and do doubling if this happens
996     */
997    x_equal = felem_is_zero(ftmp);
998    y_equal = felem_is_zero(ftmp3);
999    z1_is_zero = felem_is_zero(z1);
1000    z2_is_zero = felem_is_zero(z2);
1001    /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
1002    if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
1003        point_double(x3, y3, z3, x1, y1, z1);
1004        return;
1005    }
1006
1007    /* ftmp5 = z1*z2 */
1008    if (!mixed) {
1009        felem_mul(tmp, z1, z2);
1010        felem_reduce(ftmp5, tmp);
1011    } else {
1012        /* special case z2 = 0 is handled later */
1013        felem_assign(ftmp5, z1);
1014    }
1015
1016    /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1017    felem_mul(tmp, ftmp, ftmp5);
1018    felem_reduce(z_out, tmp);
1019
1020    /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1021    felem_assign(ftmp5, ftmp);
1022    felem_square(tmp, ftmp);
1023    felem_reduce(ftmp, tmp);
1024
1025    /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1026    felem_mul(tmp, ftmp, ftmp5);
1027    felem_reduce(ftmp5, tmp);
1028
1029    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1030    felem_mul(tmp, ftmp2, ftmp);
1031    felem_reduce(ftmp2, tmp);
1032
1033    /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1034    felem_mul(tmp, ftmp4, ftmp5);
1035    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1036
1037    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1038    felem_square(tmp2, ftmp3);
1039    /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1040
1041    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1042    felem_diff_128_64(tmp2, ftmp5);
1043    /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1044
1045    /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1046    felem_assign(ftmp5, ftmp2);
1047    felem_scalar(ftmp5, 2);
1048    /* ftmp5[i] < 2 * 2^57 = 2^58 */
1049
1050    /*-
1051     * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1052     *  2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1053     */
1054    felem_diff_128_64(tmp2, ftmp5);
1055    /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1056    felem_reduce(x_out, tmp2);
1057
1058    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1059    felem_diff(ftmp2, x_out);
1060    /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1061
1062    /*
1063     * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1064     */
1065    felem_mul(tmp2, ftmp3, ftmp2);
1066    /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1067
1068    /*-
1069     * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1070     *  z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1071     */
1072    widefelem_diff(tmp2, tmp);
1073    /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1074    felem_reduce(y_out, tmp2);
1075
1076    /*
1077     * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1078     * the point at infinity, so we need to check for this separately
1079     */
1080
1081    /*
1082     * if point 1 is at infinity, copy point 2 to output, and vice versa
1083     */
1084    copy_conditional(x_out, x2, z1_is_zero);
1085    copy_conditional(x_out, x1, z2_is_zero);
1086    copy_conditional(y_out, y2, z1_is_zero);
1087    copy_conditional(y_out, y1, z2_is_zero);
1088    copy_conditional(z_out, z2, z1_is_zero);
1089    copy_conditional(z_out, z1, z2_is_zero);
1090    felem_assign(x3, x_out);
1091    felem_assign(y3, y_out);
1092    felem_assign(z3, z_out);
1093}
1094
1095/*
1096 * select_point selects the |idx|th point from a precomputation table and
1097 * copies it to out.
1098 * The pre_comp array argument should be size of |size| argument
1099 */
1100static void select_point(const u64 idx, unsigned int size,
1101                         const felem pre_comp[][3], felem out[3])
1102{
1103    unsigned i, j;
1104    limb *outlimbs = &out[0][0];
1105    memset(outlimbs, 0, 3 * sizeof(felem));
1106
1107    for (i = 0; i < size; i++) {
1108        const limb *inlimbs = &pre_comp[i][0][0];
1109        u64 mask = i ^ idx;
1110        mask |= mask >> 4;
1111        mask |= mask >> 2;
1112        mask |= mask >> 1;
1113        mask &= 1;
1114        mask--;
1115        for (j = 0; j < 4 * 3; j++)
1116            outlimbs[j] |= inlimbs[j] & mask;
1117    }
1118}
1119
1120/* get_bit returns the |i|th bit in |in| */
1121static char get_bit(const felem_bytearray in, unsigned i)
1122{
1123    if (i >= 224)
1124        return 0;
1125    return (in[i >> 3] >> (i & 7)) & 1;
1126}
1127
1128/*
1129 * Interleaved point multiplication using precomputed point multiples: The
1130 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1131 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1132 * generator, using certain (large) precomputed multiples in g_pre_comp.
1133 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1134 */
1135static void batch_mul(felem x_out, felem y_out, felem z_out,
1136                      const felem_bytearray scalars[],
1137                      const unsigned num_points, const u8 *g_scalar,
1138                      const int mixed, const felem pre_comp[][17][3],
1139                      const felem g_pre_comp[2][16][3])
1140{
1141    int i, skip;
1142    unsigned num;
1143    unsigned gen_mul = (g_scalar != NULL);
1144    felem nq[3], tmp[4];
1145    u64 bits;
1146    u8 sign, digit;
1147
1148    /* set nq to the point at infinity */
1149    memset(nq, 0, 3 * sizeof(felem));
1150
1151    /*
1152     * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1153     * of the generator (two in each of the last 28 rounds) and additions of
1154     * other points multiples (every 5th round).
1155     */
1156    skip = 1;                   /* save two point operations in the first
1157                                 * round */
1158    for (i = (num_points ? 220 : 27); i >= 0; --i) {
1159        /* double */
1160        if (!skip)
1161            point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1162
1163        /* add multiples of the generator */
1164        if (gen_mul && (i <= 27)) {
1165            /* first, look 28 bits upwards */
1166            bits = get_bit(g_scalar, i + 196) << 3;
1167            bits |= get_bit(g_scalar, i + 140) << 2;
1168            bits |= get_bit(g_scalar, i + 84) << 1;
1169            bits |= get_bit(g_scalar, i + 28);
1170            /* select the point to add, in constant time */
1171            select_point(bits, 16, g_pre_comp[1], tmp);
1172
1173            if (!skip) {
1174                /* value 1 below is argument for "mixed" */
1175                point_add(nq[0], nq[1], nq[2],
1176                          nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1177            } else {
1178                memcpy(nq, tmp, 3 * sizeof(felem));
1179                skip = 0;
1180            }
1181
1182            /* second, look at the current position */
1183            bits = get_bit(g_scalar, i + 168) << 3;
1184            bits |= get_bit(g_scalar, i + 112) << 2;
1185            bits |= get_bit(g_scalar, i + 56) << 1;
1186            bits |= get_bit(g_scalar, i);
1187            /* select the point to add, in constant time */
1188            select_point(bits, 16, g_pre_comp[0], tmp);
1189            point_add(nq[0], nq[1], nq[2],
1190                      nq[0], nq[1], nq[2],
1191                      1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1192        }
1193
1194        /* do other additions every 5 doublings */
1195        if (num_points && (i % 5 == 0)) {
1196            /* loop over all scalars */
1197            for (num = 0; num < num_points; ++num) {
1198                bits = get_bit(scalars[num], i + 4) << 5;
1199                bits |= get_bit(scalars[num], i + 3) << 4;
1200                bits |= get_bit(scalars[num], i + 2) << 3;
1201                bits |= get_bit(scalars[num], i + 1) << 2;
1202                bits |= get_bit(scalars[num], i) << 1;
1203                bits |= get_bit(scalars[num], i - 1);
1204                ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1205
1206                /* select the point to add or subtract */
1207                select_point(digit, 17, pre_comp[num], tmp);
1208                felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1209                                            * point */
1210                copy_conditional(tmp[1], tmp[3], sign);
1211
1212                if (!skip) {
1213                    point_add(nq[0], nq[1], nq[2],
1214                              nq[0], nq[1], nq[2],
1215                              mixed, tmp[0], tmp[1], tmp[2]);
1216                } else {
1217                    memcpy(nq, tmp, 3 * sizeof(felem));
1218                    skip = 0;
1219                }
1220            }
1221        }
1222    }
1223    felem_assign(x_out, nq[0]);
1224    felem_assign(y_out, nq[1]);
1225    felem_assign(z_out, nq[2]);
1226}
1227
1228/******************************************************************************/
1229/*
1230 * FUNCTIONS TO MANAGE PRECOMPUTATION
1231 */
1232
1233static NISTP224_PRE_COMP *nistp224_pre_comp_new()
1234{
1235    NISTP224_PRE_COMP *ret = NULL;
1236    ret = (NISTP224_PRE_COMP *) OPENSSL_malloc(sizeof(*ret));
1237    if (!ret) {
1238        ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1239        return ret;
1240    }
1241    memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1242    ret->references = 1;
1243    return ret;
1244}
1245
1246static void *nistp224_pre_comp_dup(void *src_)
1247{
1248    NISTP224_PRE_COMP *src = src_;
1249
1250    /* no need to actually copy, these objects never change! */
1251    CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1252
1253    return src_;
1254}
1255
1256static void nistp224_pre_comp_free(void *pre_)
1257{
1258    int i;
1259    NISTP224_PRE_COMP *pre = pre_;
1260
1261    if (!pre)
1262        return;
1263
1264    i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1265    if (i > 0)
1266        return;
1267
1268    OPENSSL_free(pre);
1269}
1270
1271static void nistp224_pre_comp_clear_free(void *pre_)
1272{
1273    int i;
1274    NISTP224_PRE_COMP *pre = pre_;
1275
1276    if (!pre)
1277        return;
1278
1279    i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1280    if (i > 0)
1281        return;
1282
1283    OPENSSL_cleanse(pre, sizeof(*pre));
1284    OPENSSL_free(pre);
1285}
1286
1287/******************************************************************************/
1288/*
1289 * OPENSSL EC_METHOD FUNCTIONS
1290 */
1291
1292int ec_GFp_nistp224_group_init(EC_GROUP *group)
1293{
1294    int ret;
1295    ret = ec_GFp_simple_group_init(group);
1296    group->a_is_minus3 = 1;
1297    return ret;
1298}
1299
1300int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1301                                    const BIGNUM *a, const BIGNUM *b,
1302                                    BN_CTX *ctx)
1303{
1304    int ret = 0;
1305    BN_CTX *new_ctx = NULL;
1306    BIGNUM *curve_p, *curve_a, *curve_b;
1307
1308    if (ctx == NULL)
1309        if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1310            return 0;
1311    BN_CTX_start(ctx);
1312    if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1313        ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1314        ((curve_b = BN_CTX_get(ctx)) == NULL))
1315        goto err;
1316    BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1317    BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1318    BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1319    if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1320        ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1321              EC_R_WRONG_CURVE_PARAMETERS);
1322        goto err;
1323    }
1324    group->field_mod_func = BN_nist_mod_224;
1325    ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1326 err:
1327    BN_CTX_end(ctx);
1328    if (new_ctx != NULL)
1329        BN_CTX_free(new_ctx);
1330    return ret;
1331}
1332
1333/*
1334 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1335 * (X/Z^2, Y/Z^3)
1336 */
1337int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1338                                                 const EC_POINT *point,
1339                                                 BIGNUM *x, BIGNUM *y,
1340                                                 BN_CTX *ctx)
1341{
1342    felem z1, z2, x_in, y_in, x_out, y_out;
1343    widefelem tmp;
1344
1345    if (EC_POINT_is_at_infinity(group, point)) {
1346        ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1347              EC_R_POINT_AT_INFINITY);
1348        return 0;
1349    }
1350    if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1351        (!BN_to_felem(z1, &point->Z)))
1352        return 0;
1353    felem_inv(z2, z1);
1354    felem_square(tmp, z2);
1355    felem_reduce(z1, tmp);
1356    felem_mul(tmp, x_in, z1);
1357    felem_reduce(x_in, tmp);
1358    felem_contract(x_out, x_in);
1359    if (x != NULL) {
1360        if (!felem_to_BN(x, x_out)) {
1361            ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1362                  ERR_R_BN_LIB);
1363            return 0;
1364        }
1365    }
1366    felem_mul(tmp, z1, z2);
1367    felem_reduce(z1, tmp);
1368    felem_mul(tmp, y_in, z1);
1369    felem_reduce(y_in, tmp);
1370    felem_contract(y_out, y_in);
1371    if (y != NULL) {
1372        if (!felem_to_BN(y, y_out)) {
1373            ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1374                  ERR_R_BN_LIB);
1375            return 0;
1376        }
1377    }
1378    return 1;
1379}
1380
1381static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1382                               felem tmp_felems[ /* num+1 */ ])
1383{
1384    /*
1385     * Runs in constant time, unless an input is the point at infinity (which
1386     * normally shouldn't happen).
1387     */
1388    ec_GFp_nistp_points_make_affine_internal(num,
1389                                             points,
1390                                             sizeof(felem),
1391                                             tmp_felems,
1392                                             (void (*)(void *))felem_one,
1393                                             felem_is_zero_int,
1394                                             (void (*)(void *, const void *))
1395                                             felem_assign,
1396                                             (void (*)(void *, const void *))
1397                                             felem_square_reduce, (void (*)
1398                                                                   (void *,
1399                                                                    const void
1400                                                                    *,
1401                                                                    const void
1402                                                                    *))
1403                                             felem_mul_reduce,
1404                                             (void (*)(void *, const void *))
1405                                             felem_inv,
1406                                             (void (*)(void *, const void *))
1407                                             felem_contract);
1408}
1409
1410/*
1411 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1412 * values Result is stored in r (r can equal one of the inputs).
1413 */
1414int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1415                               const BIGNUM *scalar, size_t num,
1416                               const EC_POINT *points[],
1417                               const BIGNUM *scalars[], BN_CTX *ctx)
1418{
1419    int ret = 0;
1420    int j;
1421    unsigned i;
1422    int mixed = 0;
1423    BN_CTX *new_ctx = NULL;
1424    BIGNUM *x, *y, *z, *tmp_scalar;
1425    felem_bytearray g_secret;
1426    felem_bytearray *secrets = NULL;
1427    felem(*pre_comp)[17][3] = NULL;
1428    felem *tmp_felems = NULL;
1429    felem_bytearray tmp;
1430    unsigned num_bytes;
1431    int have_pre_comp = 0;
1432    size_t num_points = num;
1433    felem x_in, y_in, z_in, x_out, y_out, z_out;
1434    NISTP224_PRE_COMP *pre = NULL;
1435    const felem(*g_pre_comp)[16][3] = NULL;
1436    EC_POINT *generator = NULL;
1437    const EC_POINT *p = NULL;
1438    const BIGNUM *p_scalar = NULL;
1439
1440    if (ctx == NULL)
1441        if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1442            return 0;
1443    BN_CTX_start(ctx);
1444    if (((x = BN_CTX_get(ctx)) == NULL) ||
1445        ((y = BN_CTX_get(ctx)) == NULL) ||
1446        ((z = BN_CTX_get(ctx)) == NULL) ||
1447        ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1448        goto err;
1449
1450    if (scalar != NULL) {
1451        pre = EC_EX_DATA_get_data(group->extra_data,
1452                                  nistp224_pre_comp_dup,
1453                                  nistp224_pre_comp_free,
1454                                  nistp224_pre_comp_clear_free);
1455        if (pre)
1456            /* we have precomputation, try to use it */
1457            g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1458        else
1459            /* try to use the standard precomputation */
1460            g_pre_comp = &gmul[0];
1461        generator = EC_POINT_new(group);
1462        if (generator == NULL)
1463            goto err;
1464        /* get the generator from precomputation */
1465        if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1466            !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1467            !felem_to_BN(z, g_pre_comp[0][1][2])) {
1468            ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1469            goto err;
1470        }
1471        if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1472                                                      generator, x, y, z,
1473                                                      ctx))
1474            goto err;
1475        if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1476            /* precomputation matches generator */
1477            have_pre_comp = 1;
1478        else
1479            /*
1480             * we don't have valid precomputation: treat the generator as a
1481             * random point
1482             */
1483            num_points = num_points + 1;
1484    }
1485
1486    if (num_points > 0) {
1487        if (num_points >= 3) {
1488            /*
1489             * unless we precompute multiples for just one or two points,
1490             * converting those into affine form is time well spent
1491             */
1492            mixed = 1;
1493        }
1494        secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1495        pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem));
1496        if (mixed)
1497            tmp_felems =
1498                OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem));
1499        if ((secrets == NULL) || (pre_comp == NULL)
1500            || (mixed && (tmp_felems == NULL))) {
1501            ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1502            goto err;
1503        }
1504
1505        /*
1506         * we treat NULL scalars as 0, and NULL points as points at infinity,
1507         * i.e., they contribute nothing to the linear combination
1508         */
1509        memset(secrets, 0, num_points * sizeof(felem_bytearray));
1510        memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem));
1511        for (i = 0; i < num_points; ++i) {
1512            if (i == num)
1513                /* the generator */
1514            {
1515                p = EC_GROUP_get0_generator(group);
1516                p_scalar = scalar;
1517            } else
1518                /* the i^th point */
1519            {
1520                p = points[i];
1521                p_scalar = scalars[i];
1522            }
1523            if ((p_scalar != NULL) && (p != NULL)) {
1524                /* reduce scalar to 0 <= scalar < 2^224 */
1525                if ((BN_num_bits(p_scalar) > 224)
1526                    || (BN_is_negative(p_scalar))) {
1527                    /*
1528                     * this is an unusual input, and we don't guarantee
1529                     * constant-timeness
1530                     */
1531                    if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) {
1532                        ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1533                        goto err;
1534                    }
1535                    num_bytes = BN_bn2bin(tmp_scalar, tmp);
1536                } else
1537                    num_bytes = BN_bn2bin(p_scalar, tmp);
1538                flip_endian(secrets[i], tmp, num_bytes);
1539                /* precompute multiples */
1540                if ((!BN_to_felem(x_out, &p->X)) ||
1541                    (!BN_to_felem(y_out, &p->Y)) ||
1542                    (!BN_to_felem(z_out, &p->Z)))
1543                    goto err;
1544                felem_assign(pre_comp[i][1][0], x_out);
1545                felem_assign(pre_comp[i][1][1], y_out);
1546                felem_assign(pre_comp[i][1][2], z_out);
1547                for (j = 2; j <= 16; ++j) {
1548                    if (j & 1) {
1549                        point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1550                                  pre_comp[i][j][2], pre_comp[i][1][0],
1551                                  pre_comp[i][1][1], pre_comp[i][1][2], 0,
1552                                  pre_comp[i][j - 1][0],
1553                                  pre_comp[i][j - 1][1],
1554                                  pre_comp[i][j - 1][2]);
1555                    } else {
1556                        point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1557                                     pre_comp[i][j][2], pre_comp[i][j / 2][0],
1558                                     pre_comp[i][j / 2][1],
1559                                     pre_comp[i][j / 2][2]);
1560                    }
1561                }
1562            }
1563        }
1564        if (mixed)
1565            make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1566    }
1567
1568    /* the scalar for the generator */
1569    if ((scalar != NULL) && (have_pre_comp)) {
1570        memset(g_secret, 0, sizeof(g_secret));
1571        /* reduce scalar to 0 <= scalar < 2^224 */
1572        if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1573            /*
1574             * this is an unusual input, and we don't guarantee
1575             * constant-timeness
1576             */
1577            if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) {
1578                ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1579                goto err;
1580            }
1581            num_bytes = BN_bn2bin(tmp_scalar, tmp);
1582        } else
1583            num_bytes = BN_bn2bin(scalar, tmp);
1584        flip_endian(g_secret, tmp, num_bytes);
1585        /* do the multiplication with generator precomputation */
1586        batch_mul(x_out, y_out, z_out,
1587                  (const felem_bytearray(*))secrets, num_points,
1588                  g_secret,
1589                  mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1590    } else
1591        /* do the multiplication without generator precomputation */
1592        batch_mul(x_out, y_out, z_out,
1593                  (const felem_bytearray(*))secrets, num_points,
1594                  NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1595    /* reduce the output to its unique minimal representation */
1596    felem_contract(x_in, x_out);
1597    felem_contract(y_in, y_out);
1598    felem_contract(z_in, z_out);
1599    if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1600        (!felem_to_BN(z, z_in))) {
1601        ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1602        goto err;
1603    }
1604    ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1605
1606 err:
1607    BN_CTX_end(ctx);
1608    if (generator != NULL)
1609        EC_POINT_free(generator);
1610    if (new_ctx != NULL)
1611        BN_CTX_free(new_ctx);
1612    if (secrets != NULL)
1613        OPENSSL_free(secrets);
1614    if (pre_comp != NULL)
1615        OPENSSL_free(pre_comp);
1616    if (tmp_felems != NULL)
1617        OPENSSL_free(tmp_felems);
1618    return ret;
1619}
1620
1621int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1622{
1623    int ret = 0;
1624    NISTP224_PRE_COMP *pre = NULL;
1625    int i, j;
1626    BN_CTX *new_ctx = NULL;
1627    BIGNUM *x, *y;
1628    EC_POINT *generator = NULL;
1629    felem tmp_felems[32];
1630
1631    /* throw away old precomputation */
1632    EC_EX_DATA_free_data(&group->extra_data, nistp224_pre_comp_dup,
1633                         nistp224_pre_comp_free,
1634                         nistp224_pre_comp_clear_free);
1635    if (ctx == NULL)
1636        if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1637            return 0;
1638    BN_CTX_start(ctx);
1639    if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL))
1640        goto err;
1641    /* get the generator */
1642    if (group->generator == NULL)
1643        goto err;
1644    generator = EC_POINT_new(group);
1645    if (generator == NULL)
1646        goto err;
1647    BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1648    BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1649    if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1650        goto err;
1651    if ((pre = nistp224_pre_comp_new()) == NULL)
1652        goto err;
1653    /*
1654     * if the generator is the standard one, use built-in precomputation
1655     */
1656    if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1657        memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1658        goto done;
1659    }
1660    if ((!BN_to_felem(pre->g_pre_comp[0][1][0], &group->generator->X)) ||
1661        (!BN_to_felem(pre->g_pre_comp[0][1][1], &group->generator->Y)) ||
1662        (!BN_to_felem(pre->g_pre_comp[0][1][2], &group->generator->Z)))
1663        goto err;
1664    /*
1665     * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1666     * 2^140*G, 2^196*G for the second one
1667     */
1668    for (i = 1; i <= 8; i <<= 1) {
1669        point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1670                     pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1671                     pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1672        for (j = 0; j < 27; ++j) {
1673            point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1674                         pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1675                         pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1676        }
1677        if (i == 8)
1678            break;
1679        point_double(pre->g_pre_comp[0][2 * i][0],
1680                     pre->g_pre_comp[0][2 * i][1],
1681                     pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1682                     pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1683        for (j = 0; j < 27; ++j) {
1684            point_double(pre->g_pre_comp[0][2 * i][0],
1685                         pre->g_pre_comp[0][2 * i][1],
1686                         pre->g_pre_comp[0][2 * i][2],
1687                         pre->g_pre_comp[0][2 * i][0],
1688                         pre->g_pre_comp[0][2 * i][1],
1689                         pre->g_pre_comp[0][2 * i][2]);
1690        }
1691    }
1692    for (i = 0; i < 2; i++) {
1693        /* g_pre_comp[i][0] is the point at infinity */
1694        memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1695        /* the remaining multiples */
1696        /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1697        point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1698                  pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1699                  pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1700                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1701                  pre->g_pre_comp[i][2][2]);
1702        /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1703        point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1704                  pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1705                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1706                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1707                  pre->g_pre_comp[i][2][2]);
1708        /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1709        point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1710                  pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1711                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1712                  0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1713                  pre->g_pre_comp[i][4][2]);
1714        /*
1715         * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1716         */
1717        point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1718                  pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1719                  pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1720                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1721                  pre->g_pre_comp[i][2][2]);
1722        for (j = 1; j < 8; ++j) {
1723            /* odd multiples: add G resp. 2^28*G */
1724            point_add(pre->g_pre_comp[i][2 * j + 1][0],
1725                      pre->g_pre_comp[i][2 * j + 1][1],
1726                      pre->g_pre_comp[i][2 * j + 1][2],
1727                      pre->g_pre_comp[i][2 * j][0],
1728                      pre->g_pre_comp[i][2 * j][1],
1729                      pre->g_pre_comp[i][2 * j][2], 0,
1730                      pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1731                      pre->g_pre_comp[i][1][2]);
1732        }
1733    }
1734    make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1735
1736 done:
1737    if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp224_pre_comp_dup,
1738                             nistp224_pre_comp_free,
1739                             nistp224_pre_comp_clear_free))
1740        goto err;
1741    ret = 1;
1742    pre = NULL;
1743 err:
1744    BN_CTX_end(ctx);
1745    if (generator != NULL)
1746        EC_POINT_free(generator);
1747    if (new_ctx != NULL)
1748        BN_CTX_free(new_ctx);
1749    if (pre)
1750        nistp224_pre_comp_free(pre);
1751    return ret;
1752}
1753
1754int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1755{
1756    if (EC_EX_DATA_get_data(group->extra_data, nistp224_pre_comp_dup,
1757                            nistp224_pre_comp_free,
1758                            nistp224_pre_comp_clear_free)
1759        != NULL)
1760        return 1;
1761    else
1762        return 0;
1763}
1764
1765#else
1766static void *dummy = &dummy;
1767#endif
1768