1238384Sjkim/* crypto/ec/ecp_nistp224.c */
2238384Sjkim/*
3238384Sjkim * Written by Emilia Kasper (Google) for the OpenSSL project.
4238384Sjkim */
5238384Sjkim/* Copyright 2011 Google Inc.
6238384Sjkim *
7238384Sjkim * Licensed under the Apache License, Version 2.0 (the "License");
8238384Sjkim *
9238384Sjkim * you may not use this file except in compliance with the License.
10238384Sjkim * You may obtain a copy of the License at
11238384Sjkim *
12238384Sjkim *     http://www.apache.org/licenses/LICENSE-2.0
13238384Sjkim *
14238384Sjkim *  Unless required by applicable law or agreed to in writing, software
15238384Sjkim *  distributed under the License is distributed on an "AS IS" BASIS,
16238384Sjkim *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17238384Sjkim *  See the License for the specific language governing permissions and
18238384Sjkim *  limitations under the License.
19238384Sjkim */
20238384Sjkim
21238384Sjkim/*
22238384Sjkim * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
23238384Sjkim *
24238384Sjkim * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
25238384Sjkim * and Adam Langley's public domain 64-bit C implementation of curve25519
26238384Sjkim */
27238384Sjkim
28238384Sjkim#include <openssl/opensslconf.h>
29238384Sjkim#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
30238384Sjkim
31280304Sjkim# ifndef OPENSSL_SYS_VMS
32280304Sjkim#  include <stdint.h>
33280304Sjkim# else
34280304Sjkim#  include <inttypes.h>
35280304Sjkim# endif
36238384Sjkim
37280304Sjkim# include <string.h>
38280304Sjkim# include <openssl/err.h>
39280304Sjkim# include "ec_lcl.h"
40238384Sjkim
41280304Sjkim# if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
42238384Sjkim  /* even with gcc, the typedef won't work for 32-bit platforms */
43280304Sjkimtypedef __uint128_t uint128_t;  /* nonstandard; implemented by gcc on 64-bit
44280304Sjkim                                 * platforms */
45280304Sjkim# else
46280304Sjkim#  error "Need GCC 3.1 or later to define type uint128_t"
47280304Sjkim# endif
48238384Sjkim
49238384Sjkimtypedef uint8_t u8;
50238384Sjkimtypedef uint64_t u64;
51238384Sjkimtypedef int64_t s64;
52238384Sjkim
53238384Sjkim/******************************************************************************/
54280304Sjkim/*-
55280304Sjkim * INTERNAL REPRESENTATION OF FIELD ELEMENTS
56238384Sjkim *
57238384Sjkim * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
58238384Sjkim * using 64-bit coefficients called 'limbs',
59238384Sjkim * and sometimes (for multiplication results) as
60238384Sjkim * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
61238384Sjkim * using 128-bit coefficients called 'widelimbs'.
62238384Sjkim * A 4-limb representation is an 'felem';
63238384Sjkim * a 7-widelimb representation is a 'widefelem'.
64238384Sjkim * Even within felems, bits of adjacent limbs overlap, and we don't always
65238384Sjkim * reduce the representations: we ensure that inputs to each felem
66238384Sjkim * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
67238384Sjkim * and fit into a 128-bit word without overflow. The coefficients are then
68238384Sjkim * again partially reduced to obtain an felem satisfying a_i < 2^57.
69238384Sjkim * We only reduce to the unique minimal representation at the end of the
70238384Sjkim * computation.
71238384Sjkim */
72238384Sjkim
73238384Sjkimtypedef uint64_t limb;
74238384Sjkimtypedef uint128_t widelimb;
75238384Sjkim
76238384Sjkimtypedef limb felem[4];
77238384Sjkimtypedef widelimb widefelem[7];
78238384Sjkim
79280304Sjkim/*
80280304Sjkim * Field element represented as a byte arrary. 28*8 = 224 bits is also the
81280304Sjkim * group order size for the elliptic curve, and we also use this type for
82280304Sjkim * scalars for point multiplication.
83280304Sjkim */
84238384Sjkimtypedef u8 felem_bytearray[28];
85238384Sjkim
86238384Sjkimstatic const felem_bytearray nistp224_curve_params[5] = {
87280304Sjkim    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
88280304Sjkim     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
89280304Sjkim     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
90280304Sjkim    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
91280304Sjkim     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
92280304Sjkim     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
93280304Sjkim    {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
94280304Sjkim     0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
95280304Sjkim     0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
96280304Sjkim    {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
97280304Sjkim     0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
98280304Sjkim     0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
99280304Sjkim    {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
100280304Sjkim     0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
101280304Sjkim     0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
102238384Sjkim};
103238384Sjkim
104280304Sjkim/*-
105280304Sjkim * Precomputed multiples of the standard generator
106238384Sjkim * Points are given in coordinates (X, Y, Z) where Z normally is 1
107238384Sjkim * (0 for the point at infinity).
108238384Sjkim * For each field element, slice a_0 is word 0, etc.
109238384Sjkim *
110238384Sjkim * The table has 2 * 16 elements, starting with the following:
111238384Sjkim * index | bits    | point
112238384Sjkim * ------+---------+------------------------------
113238384Sjkim *     0 | 0 0 0 0 | 0G
114238384Sjkim *     1 | 0 0 0 1 | 1G
115238384Sjkim *     2 | 0 0 1 0 | 2^56G
116238384Sjkim *     3 | 0 0 1 1 | (2^56 + 1)G
117238384Sjkim *     4 | 0 1 0 0 | 2^112G
118238384Sjkim *     5 | 0 1 0 1 | (2^112 + 1)G
119238384Sjkim *     6 | 0 1 1 0 | (2^112 + 2^56)G
120238384Sjkim *     7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
121238384Sjkim *     8 | 1 0 0 0 | 2^168G
122238384Sjkim *     9 | 1 0 0 1 | (2^168 + 1)G
123238384Sjkim *    10 | 1 0 1 0 | (2^168 + 2^56)G
124238384Sjkim *    11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
125238384Sjkim *    12 | 1 1 0 0 | (2^168 + 2^112)G
126238384Sjkim *    13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
127238384Sjkim *    14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
128238384Sjkim *    15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
129238384Sjkim * followed by a copy of this with each element multiplied by 2^28.
130238384Sjkim *
131238384Sjkim * The reason for this is so that we can clock bits into four different
132238384Sjkim * locations when doing simple scalar multiplies against the base point,
133238384Sjkim * and then another four locations using the second 16 elements.
134238384Sjkim */
135280304Sjkimstatic const felem gmul[2][16][3] = { {{{0, 0, 0, 0},
136280304Sjkim                                        {0, 0, 0, 0},
137280304Sjkim                                        {0, 0, 0, 0}},
138280304Sjkim                                       {{0x3280d6115c1d21, 0xc1d356c2112234,
139280304Sjkim                                         0x7f321390b94a03, 0xb70e0cbd6bb4bf},
140280304Sjkim                                        {0xd5819985007e34, 0x75a05a07476444,
141280304Sjkim                                         0xfb4c22dfe6cd43, 0xbd376388b5f723},
142280304Sjkim                                        {1, 0, 0, 0}},
143280304Sjkim                                       {{0xfd9675666ebbe9, 0xbca7664d40ce5e,
144280304Sjkim                                         0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
145280304Sjkim                                        {0x29e0b892dc9c43, 0xece8608436e662,
146280304Sjkim                                         0xdc858f185310d0, 0x9812dd4eb8d321},
147280304Sjkim                                        {1, 0, 0, 0}},
148280304Sjkim                                       {{0x6d3e678d5d8eb8, 0x559eed1cb362f1,
149280304Sjkim                                         0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
150280304Sjkim                                        {0xf19f90ed50266d, 0xabf2b4bf65f9df,
151280304Sjkim                                         0x313865468fafec, 0x5cb379ba910a17},
152280304Sjkim                                        {1, 0, 0, 0}},
153280304Sjkim                                       {{0x0641966cab26e3, 0x91fb2991fab0a0,
154280304Sjkim                                         0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
155280304Sjkim                                        {0x7510407766af5d, 0x84d929610d5450,
156280304Sjkim                                         0x81d77aae82f706, 0x6916f6d4338c5b},
157280304Sjkim                                        {1, 0, 0, 0}},
158280304Sjkim                                       {{0xea95ac3b1f15c6, 0x086000905e82d4,
159280304Sjkim                                         0xdd323ae4d1c8b1, 0x932b56be7685a3},
160280304Sjkim                                        {0x9ef93dea25dbbf, 0x41665960f390f0,
161280304Sjkim                                         0xfdec76dbe2a8a7, 0x523e80f019062a},
162280304Sjkim                                        {1, 0, 0, 0}},
163280304Sjkim                                       {{0x822fdd26732c73, 0xa01c83531b5d0f,
164280304Sjkim                                         0x363f37347c1ba4, 0xc391b45c84725c},
165280304Sjkim                                        {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec,
166280304Sjkim                                         0xc393da7e222a7f, 0x1efb7890ede244},
167280304Sjkim                                        {1, 0, 0, 0}},
168280304Sjkim                                       {{0x4c9e90ca217da1, 0xd11beca79159bb,
169280304Sjkim                                         0xff8d33c2c98b7c, 0x2610b39409f849},
170280304Sjkim                                        {0x44d1352ac64da0, 0xcdbb7b2c46b4fb,
171280304Sjkim                                         0x966c079b753c89, 0xfe67e4e820b112},
172280304Sjkim                                        {1, 0, 0, 0}},
173280304Sjkim                                       {{0xe28cae2df5312d, 0xc71b61d16f5c6e,
174280304Sjkim                                         0x79b7619a3e7c4c, 0x05c73240899b47},
175280304Sjkim                                        {0x9f7f6382c73e3a, 0x18615165c56bda,
176280304Sjkim                                         0x641fab2116fd56, 0x72855882b08394},
177280304Sjkim                                        {1, 0, 0, 0}},
178280304Sjkim                                       {{0x0469182f161c09, 0x74a98ca8d00fb5,
179280304Sjkim                                         0xb89da93489a3e0, 0x41c98768fb0c1d},
180280304Sjkim                                        {0xe5ea05fb32da81, 0x3dce9ffbca6855,
181280304Sjkim                                         0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
182280304Sjkim                                        {1, 0, 0, 0}},
183280304Sjkim                                       {{0xdab22b2333e87f, 0x4430137a5dd2f6,
184280304Sjkim                                         0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
185280304Sjkim                                        {0x764a7df0c8fda5, 0x185ba5c3fa2044,
186280304Sjkim                                         0x9281d688bcbe50, 0xc40331df893881},
187280304Sjkim                                        {1, 0, 0, 0}},
188280304Sjkim                                       {{0xb89530796f0f60, 0xade92bd26909a3,
189280304Sjkim                                         0x1a0c83fb4884da, 0x1765bf22a5a984},
190280304Sjkim                                        {0x772a9ee75db09e, 0x23bc6c67cec16f,
191280304Sjkim                                         0x4c1edba8b14e2f, 0xe2a215d9611369},
192280304Sjkim                                        {1, 0, 0, 0}},
193280304Sjkim                                       {{0x571e509fb5efb3, 0xade88696410552,
194280304Sjkim                                         0xc8ae85fada74fe, 0x6c7e4be83bbde3},
195280304Sjkim                                        {0xff9f51160f4652, 0xb47ce2495a6539,
196280304Sjkim                                         0xa2946c53b582f4, 0x286d2db3ee9a60},
197280304Sjkim                                        {1, 0, 0, 0}},
198280304Sjkim                                       {{0x40bbd5081a44af, 0x0995183b13926c,
199280304Sjkim                                         0xbcefba6f47f6d0, 0x215619e9cc0057},
200280304Sjkim                                        {0x8bc94d3b0df45e, 0xf11c54a3694f6f,
201280304Sjkim                                         0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
202280304Sjkim                                        {1, 0, 0, 0}},
203280304Sjkim                                       {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8,
204280304Sjkim                                         0x1c29819435d2c6, 0xc813132f4c07e9},
205280304Sjkim                                        {0x2891425503b11f, 0x08781030579fea,
206280304Sjkim                                         0xf5426ba5cc9674, 0x1e28ebf18562bc},
207280304Sjkim                                        {1, 0, 0, 0}},
208280304Sjkim                                       {{0x9f31997cc864eb, 0x06cd91d28b5e4c,
209280304Sjkim                                         0xff17036691a973, 0xf1aef351497c58},
210280304Sjkim                                        {0xdd1f2d600564ff, 0xdead073b1402db,
211280304Sjkim                                         0x74a684435bd693, 0xeea7471f962558},
212280304Sjkim                                        {1, 0, 0, 0}}},
213280304Sjkim{{{0, 0, 0, 0},
214280304Sjkim  {0, 0, 0, 0},
215280304Sjkim  {0, 0, 0, 0}},
216280304Sjkim {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
217280304Sjkim  {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
218280304Sjkim  {1, 0, 0, 0}},
219280304Sjkim {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
220280304Sjkim  {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
221280304Sjkim  {1, 0, 0, 0}},
222280304Sjkim {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
223280304Sjkim  {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
224280304Sjkim  {1, 0, 0, 0}},
225280304Sjkim {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
226280304Sjkim  {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
227280304Sjkim  {1, 0, 0, 0}},
228280304Sjkim {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
229280304Sjkim  {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
230280304Sjkim  {1, 0, 0, 0}},
231280304Sjkim {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
232280304Sjkim  {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
233280304Sjkim  {1, 0, 0, 0}},
234280304Sjkim {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
235280304Sjkim  {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
236280304Sjkim  {1, 0, 0, 0}},
237280304Sjkim {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
238280304Sjkim  {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
239280304Sjkim  {1, 0, 0, 0}},
240280304Sjkim {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
241280304Sjkim  {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
242280304Sjkim  {1, 0, 0, 0}},
243280304Sjkim {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
244280304Sjkim  {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
245280304Sjkim  {1, 0, 0, 0}},
246280304Sjkim {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
247280304Sjkim  {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
248280304Sjkim  {1, 0, 0, 0}},
249280304Sjkim {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
250280304Sjkim  {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
251280304Sjkim  {1, 0, 0, 0}},
252280304Sjkim {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
253280304Sjkim  {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
254280304Sjkim  {1, 0, 0, 0}},
255280304Sjkim {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
256280304Sjkim  {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
257280304Sjkim  {1, 0, 0, 0}},
258280304Sjkim {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
259280304Sjkim  {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
260280304Sjkim  {1, 0, 0, 0}}}
261280304Sjkim};
262238384Sjkim
263238384Sjkim/* Precomputation for the group generator. */
264238384Sjkimtypedef struct {
265280304Sjkim    felem g_pre_comp[2][16][3];
266280304Sjkim    int references;
267238384Sjkim} NISTP224_PRE_COMP;
268238384Sjkim
269238384Sjkimconst EC_METHOD *EC_GFp_nistp224_method(void)
270280304Sjkim{
271280304Sjkim    static const EC_METHOD ret = {
272280304Sjkim        EC_FLAGS_DEFAULT_OCT,
273280304Sjkim        NID_X9_62_prime_field,
274280304Sjkim        ec_GFp_nistp224_group_init,
275280304Sjkim        ec_GFp_simple_group_finish,
276280304Sjkim        ec_GFp_simple_group_clear_finish,
277280304Sjkim        ec_GFp_nist_group_copy,
278280304Sjkim        ec_GFp_nistp224_group_set_curve,
279280304Sjkim        ec_GFp_simple_group_get_curve,
280280304Sjkim        ec_GFp_simple_group_get_degree,
281280304Sjkim        ec_GFp_simple_group_check_discriminant,
282280304Sjkim        ec_GFp_simple_point_init,
283280304Sjkim        ec_GFp_simple_point_finish,
284280304Sjkim        ec_GFp_simple_point_clear_finish,
285280304Sjkim        ec_GFp_simple_point_copy,
286280304Sjkim        ec_GFp_simple_point_set_to_infinity,
287280304Sjkim        ec_GFp_simple_set_Jprojective_coordinates_GFp,
288280304Sjkim        ec_GFp_simple_get_Jprojective_coordinates_GFp,
289280304Sjkim        ec_GFp_simple_point_set_affine_coordinates,
290280304Sjkim        ec_GFp_nistp224_point_get_affine_coordinates,
291280304Sjkim        0 /* point_set_compressed_coordinates */ ,
292280304Sjkim        0 /* point2oct */ ,
293280304Sjkim        0 /* oct2point */ ,
294280304Sjkim        ec_GFp_simple_add,
295280304Sjkim        ec_GFp_simple_dbl,
296280304Sjkim        ec_GFp_simple_invert,
297280304Sjkim        ec_GFp_simple_is_at_infinity,
298280304Sjkim        ec_GFp_simple_is_on_curve,
299280304Sjkim        ec_GFp_simple_cmp,
300280304Sjkim        ec_GFp_simple_make_affine,
301280304Sjkim        ec_GFp_simple_points_make_affine,
302280304Sjkim        ec_GFp_nistp224_points_mul,
303280304Sjkim        ec_GFp_nistp224_precompute_mult,
304280304Sjkim        ec_GFp_nistp224_have_precompute_mult,
305280304Sjkim        ec_GFp_nist_field_mul,
306280304Sjkim        ec_GFp_nist_field_sqr,
307280304Sjkim        0 /* field_div */ ,
308280304Sjkim        0 /* field_encode */ ,
309280304Sjkim        0 /* field_decode */ ,
310280304Sjkim        0                       /* field_set_to_one */
311280304Sjkim    };
312238384Sjkim
313280304Sjkim    return &ret;
314280304Sjkim}
315238384Sjkim
316280304Sjkim/*
317280304Sjkim * Helper functions to convert field elements to/from internal representation
318280304Sjkim */
319238384Sjkimstatic void bin28_to_felem(felem out, const u8 in[28])
320280304Sjkim{
321280304Sjkim    out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
322280304Sjkim    out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff;
323280304Sjkim    out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff;
324280304Sjkim    out[3] = (*((const uint64_t *)(in+20))) >> 8;
325280304Sjkim}
326238384Sjkim
327238384Sjkimstatic void felem_to_bin28(u8 out[28], const felem in)
328280304Sjkim{
329280304Sjkim    unsigned i;
330280304Sjkim    for (i = 0; i < 7; ++i) {
331280304Sjkim        out[i] = in[0] >> (8 * i);
332280304Sjkim        out[i + 7] = in[1] >> (8 * i);
333280304Sjkim        out[i + 14] = in[2] >> (8 * i);
334280304Sjkim        out[i + 21] = in[3] >> (8 * i);
335280304Sjkim    }
336280304Sjkim}
337238384Sjkim
338238384Sjkim/* To preserve endianness when using BN_bn2bin and BN_bin2bn */
339238384Sjkimstatic void flip_endian(u8 *out, const u8 *in, unsigned len)
340280304Sjkim{
341280304Sjkim    unsigned i;
342280304Sjkim    for (i = 0; i < len; ++i)
343280304Sjkim        out[i] = in[len - 1 - i];
344280304Sjkim}
345238384Sjkim
346238384Sjkim/* From OpenSSL BIGNUM to internal representation */
347238384Sjkimstatic int BN_to_felem(felem out, const BIGNUM *bn)
348280304Sjkim{
349280304Sjkim    felem_bytearray b_in;
350280304Sjkim    felem_bytearray b_out;
351280304Sjkim    unsigned num_bytes;
352238384Sjkim
353280304Sjkim    /* BN_bn2bin eats leading zeroes */
354280304Sjkim    memset(b_out, 0, sizeof b_out);
355280304Sjkim    num_bytes = BN_num_bytes(bn);
356280304Sjkim    if (num_bytes > sizeof b_out) {
357280304Sjkim        ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
358280304Sjkim        return 0;
359280304Sjkim    }
360280304Sjkim    if (BN_is_negative(bn)) {
361280304Sjkim        ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
362280304Sjkim        return 0;
363280304Sjkim    }
364280304Sjkim    num_bytes = BN_bn2bin(bn, b_in);
365280304Sjkim    flip_endian(b_out, b_in, num_bytes);
366280304Sjkim    bin28_to_felem(out, b_out);
367280304Sjkim    return 1;
368280304Sjkim}
369238384Sjkim
370238384Sjkim/* From internal representation to OpenSSL BIGNUM */
371238384Sjkimstatic BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
372280304Sjkim{
373280304Sjkim    felem_bytearray b_in, b_out;
374280304Sjkim    felem_to_bin28(b_in, in);
375280304Sjkim    flip_endian(b_out, b_in, sizeof b_out);
376280304Sjkim    return BN_bin2bn(b_out, sizeof b_out, out);
377280304Sjkim}
378238384Sjkim
379238384Sjkim/******************************************************************************/
380280304Sjkim/*-
381280304Sjkim *                              FIELD OPERATIONS
382238384Sjkim *
383238384Sjkim * Field operations, using the internal representation of field elements.
384238384Sjkim * NB! These operations are specific to our point multiplication and cannot be
385238384Sjkim * expected to be correct in general - e.g., multiplication with a large scalar
386238384Sjkim * will cause an overflow.
387238384Sjkim *
388238384Sjkim */
389238384Sjkim
390238384Sjkimstatic void felem_one(felem out)
391280304Sjkim{
392280304Sjkim    out[0] = 1;
393280304Sjkim    out[1] = 0;
394280304Sjkim    out[2] = 0;
395280304Sjkim    out[3] = 0;
396280304Sjkim}
397238384Sjkim
398238384Sjkimstatic void felem_assign(felem out, const felem in)
399280304Sjkim{
400280304Sjkim    out[0] = in[0];
401280304Sjkim    out[1] = in[1];
402280304Sjkim    out[2] = in[2];
403280304Sjkim    out[3] = in[3];
404280304Sjkim}
405238384Sjkim
406238384Sjkim/* Sum two field elements: out += in */
407238384Sjkimstatic void felem_sum(felem out, const felem in)
408280304Sjkim{
409280304Sjkim    out[0] += in[0];
410280304Sjkim    out[1] += in[1];
411280304Sjkim    out[2] += in[2];
412280304Sjkim    out[3] += in[3];
413280304Sjkim}
414238384Sjkim
415238384Sjkim/* Get negative value: out = -in */
416238384Sjkim/* Assumes in[i] < 2^57 */
417238384Sjkimstatic void felem_neg(felem out, const felem in)
418280304Sjkim{
419280304Sjkim    static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
420280304Sjkim    static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
421280304Sjkim    static const limb two58m42m2 = (((limb) 1) << 58) -
422280304Sjkim        (((limb) 1) << 42) - (((limb) 1) << 2);
423238384Sjkim
424280304Sjkim    /* Set to 0 mod 2^224-2^96+1 to ensure out > in */
425280304Sjkim    out[0] = two58p2 - in[0];
426280304Sjkim    out[1] = two58m42m2 - in[1];
427280304Sjkim    out[2] = two58m2 - in[2];
428280304Sjkim    out[3] = two58m2 - in[3];
429280304Sjkim}
430238384Sjkim
431238384Sjkim/* Subtract field elements: out -= in */
432238384Sjkim/* Assumes in[i] < 2^57 */
433238384Sjkimstatic void felem_diff(felem out, const felem in)
434280304Sjkim{
435280304Sjkim    static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
436280304Sjkim    static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
437280304Sjkim    static const limb two58m42m2 = (((limb) 1) << 58) -
438280304Sjkim        (((limb) 1) << 42) - (((limb) 1) << 2);
439238384Sjkim
440280304Sjkim    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
441280304Sjkim    out[0] += two58p2;
442280304Sjkim    out[1] += two58m42m2;
443280304Sjkim    out[2] += two58m2;
444280304Sjkim    out[3] += two58m2;
445238384Sjkim
446280304Sjkim    out[0] -= in[0];
447280304Sjkim    out[1] -= in[1];
448280304Sjkim    out[2] -= in[2];
449280304Sjkim    out[3] -= in[3];
450280304Sjkim}
451238384Sjkim
452238384Sjkim/* Subtract in unreduced 128-bit mode: out -= in */
453238384Sjkim/* Assumes in[i] < 2^119 */
454238384Sjkimstatic void widefelem_diff(widefelem out, const widefelem in)
455280304Sjkim{
456280304Sjkim    static const widelimb two120 = ((widelimb) 1) << 120;
457280304Sjkim    static const widelimb two120m64 = (((widelimb) 1) << 120) -
458280304Sjkim        (((widelimb) 1) << 64);
459280304Sjkim    static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
460280304Sjkim        (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
461238384Sjkim
462280304Sjkim    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
463280304Sjkim    out[0] += two120;
464280304Sjkim    out[1] += two120m64;
465280304Sjkim    out[2] += two120m64;
466280304Sjkim    out[3] += two120;
467280304Sjkim    out[4] += two120m104m64;
468280304Sjkim    out[5] += two120m64;
469280304Sjkim    out[6] += two120m64;
470238384Sjkim
471280304Sjkim    out[0] -= in[0];
472280304Sjkim    out[1] -= in[1];
473280304Sjkim    out[2] -= in[2];
474280304Sjkim    out[3] -= in[3];
475280304Sjkim    out[4] -= in[4];
476280304Sjkim    out[5] -= in[5];
477280304Sjkim    out[6] -= in[6];
478280304Sjkim}
479238384Sjkim
480238384Sjkim/* Subtract in mixed mode: out128 -= in64 */
481238384Sjkim/* in[i] < 2^63 */
482238384Sjkimstatic void felem_diff_128_64(widefelem out, const felem in)
483280304Sjkim{
484280304Sjkim    static const widelimb two64p8 = (((widelimb) 1) << 64) +
485280304Sjkim        (((widelimb) 1) << 8);
486280304Sjkim    static const widelimb two64m8 = (((widelimb) 1) << 64) -
487280304Sjkim        (((widelimb) 1) << 8);
488280304Sjkim    static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
489280304Sjkim        (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
490238384Sjkim
491280304Sjkim    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
492280304Sjkim    out[0] += two64p8;
493280304Sjkim    out[1] += two64m48m8;
494280304Sjkim    out[2] += two64m8;
495280304Sjkim    out[3] += two64m8;
496238384Sjkim
497280304Sjkim    out[0] -= in[0];
498280304Sjkim    out[1] -= in[1];
499280304Sjkim    out[2] -= in[2];
500280304Sjkim    out[3] -= in[3];
501280304Sjkim}
502238384Sjkim
503280304Sjkim/*
504280304Sjkim * Multiply a field element by a scalar: out = out * scalar The scalars we
505280304Sjkim * actually use are small, so results fit without overflow
506280304Sjkim */
507238384Sjkimstatic void felem_scalar(felem out, const limb scalar)
508280304Sjkim{
509280304Sjkim    out[0] *= scalar;
510280304Sjkim    out[1] *= scalar;
511280304Sjkim    out[2] *= scalar;
512280304Sjkim    out[3] *= scalar;
513280304Sjkim}
514238384Sjkim
515280304Sjkim/*
516280304Sjkim * Multiply an unreduced field element by a scalar: out = out * scalar The
517280304Sjkim * scalars we actually use are small, so results fit without overflow
518280304Sjkim */
519238384Sjkimstatic void widefelem_scalar(widefelem out, const widelimb scalar)
520280304Sjkim{
521280304Sjkim    out[0] *= scalar;
522280304Sjkim    out[1] *= scalar;
523280304Sjkim    out[2] *= scalar;
524280304Sjkim    out[3] *= scalar;
525280304Sjkim    out[4] *= scalar;
526280304Sjkim    out[5] *= scalar;
527280304Sjkim    out[6] *= scalar;
528280304Sjkim}
529238384Sjkim
530238384Sjkim/* Square a field element: out = in^2 */
531238384Sjkimstatic void felem_square(widefelem out, const felem in)
532280304Sjkim{
533280304Sjkim    limb tmp0, tmp1, tmp2;
534280304Sjkim    tmp0 = 2 * in[0];
535280304Sjkim    tmp1 = 2 * in[1];
536280304Sjkim    tmp2 = 2 * in[2];
537280304Sjkim    out[0] = ((widelimb) in[0]) * in[0];
538280304Sjkim    out[1] = ((widelimb) in[0]) * tmp1;
539280304Sjkim    out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
540280304Sjkim    out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
541280304Sjkim    out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
542280304Sjkim    out[5] = ((widelimb) in[3]) * tmp2;
543280304Sjkim    out[6] = ((widelimb) in[3]) * in[3];
544280304Sjkim}
545238384Sjkim
546238384Sjkim/* Multiply two field elements: out = in1 * in2 */
547238384Sjkimstatic void felem_mul(widefelem out, const felem in1, const felem in2)
548280304Sjkim{
549280304Sjkim    out[0] = ((widelimb) in1[0]) * in2[0];
550280304Sjkim    out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
551280304Sjkim    out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
552280304Sjkim        ((widelimb) in1[2]) * in2[0];
553280304Sjkim    out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
554280304Sjkim        ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
555280304Sjkim    out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
556280304Sjkim        ((widelimb) in1[3]) * in2[1];
557280304Sjkim    out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
558280304Sjkim    out[6] = ((widelimb) in1[3]) * in2[3];
559280304Sjkim}
560238384Sjkim
561280304Sjkim/*-
562280304Sjkim * Reduce seven 128-bit coefficients to four 64-bit coefficients.
563238384Sjkim * Requires in[i] < 2^126,
564238384Sjkim * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
565238384Sjkimstatic void felem_reduce(felem out, const widefelem in)
566280304Sjkim{
567280304Sjkim    static const widelimb two127p15 = (((widelimb) 1) << 127) +
568280304Sjkim        (((widelimb) 1) << 15);
569280304Sjkim    static const widelimb two127m71 = (((widelimb) 1) << 127) -
570280304Sjkim        (((widelimb) 1) << 71);
571280304Sjkim    static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
572280304Sjkim        (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
573280304Sjkim    widelimb output[5];
574238384Sjkim
575280304Sjkim    /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
576280304Sjkim    output[0] = in[0] + two127p15;
577280304Sjkim    output[1] = in[1] + two127m71m55;
578280304Sjkim    output[2] = in[2] + two127m71;
579280304Sjkim    output[3] = in[3];
580280304Sjkim    output[4] = in[4];
581238384Sjkim
582280304Sjkim    /* Eliminate in[4], in[5], in[6] */
583280304Sjkim    output[4] += in[6] >> 16;
584280304Sjkim    output[3] += (in[6] & 0xffff) << 40;
585280304Sjkim    output[2] -= in[6];
586238384Sjkim
587280304Sjkim    output[3] += in[5] >> 16;
588280304Sjkim    output[2] += (in[5] & 0xffff) << 40;
589280304Sjkim    output[1] -= in[5];
590238384Sjkim
591280304Sjkim    output[2] += output[4] >> 16;
592280304Sjkim    output[1] += (output[4] & 0xffff) << 40;
593280304Sjkim    output[0] -= output[4];
594238384Sjkim
595280304Sjkim    /* Carry 2 -> 3 -> 4 */
596280304Sjkim    output[3] += output[2] >> 56;
597280304Sjkim    output[2] &= 0x00ffffffffffffff;
598238384Sjkim
599280304Sjkim    output[4] = output[3] >> 56;
600280304Sjkim    output[3] &= 0x00ffffffffffffff;
601238384Sjkim
602280304Sjkim    /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
603238384Sjkim
604280304Sjkim    /* Eliminate output[4] */
605280304Sjkim    output[2] += output[4] >> 16;
606280304Sjkim    /* output[2] < 2^56 + 2^56 = 2^57 */
607280304Sjkim    output[1] += (output[4] & 0xffff) << 40;
608280304Sjkim    output[0] -= output[4];
609238384Sjkim
610280304Sjkim    /* Carry 0 -> 1 -> 2 -> 3 */
611280304Sjkim    output[1] += output[0] >> 56;
612280304Sjkim    out[0] = output[0] & 0x00ffffffffffffff;
613238384Sjkim
614280304Sjkim    output[2] += output[1] >> 56;
615280304Sjkim    /* output[2] < 2^57 + 2^72 */
616280304Sjkim    out[1] = output[1] & 0x00ffffffffffffff;
617280304Sjkim    output[3] += output[2] >> 56;
618280304Sjkim    /* output[3] <= 2^56 + 2^16 */
619280304Sjkim    out[2] = output[2] & 0x00ffffffffffffff;
620238384Sjkim
621280304Sjkim    /*-
622280304Sjkim     * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
623280304Sjkim     * out[3] <= 2^56 + 2^16 (due to final carry),
624280304Sjkim     * so out < 2*p
625280304Sjkim     */
626280304Sjkim    out[3] = output[3];
627280304Sjkim}
628238384Sjkim
629238384Sjkimstatic void felem_square_reduce(felem out, const felem in)
630280304Sjkim{
631280304Sjkim    widefelem tmp;
632280304Sjkim    felem_square(tmp, in);
633280304Sjkim    felem_reduce(out, tmp);
634280304Sjkim}
635238384Sjkim
636238384Sjkimstatic void felem_mul_reduce(felem out, const felem in1, const felem in2)
637280304Sjkim{
638280304Sjkim    widefelem tmp;
639280304Sjkim    felem_mul(tmp, in1, in2);
640280304Sjkim    felem_reduce(out, tmp);
641280304Sjkim}
642238384Sjkim
643280304Sjkim/*
644280304Sjkim * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
645280304Sjkim * call felem_reduce first)
646280304Sjkim */
647238384Sjkimstatic void felem_contract(felem out, const felem in)
648280304Sjkim{
649280304Sjkim    static const int64_t two56 = ((limb) 1) << 56;
650280304Sjkim    /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
651280304Sjkim    /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
652280304Sjkim    int64_t tmp[4], a;
653280304Sjkim    tmp[0] = in[0];
654280304Sjkim    tmp[1] = in[1];
655280304Sjkim    tmp[2] = in[2];
656280304Sjkim    tmp[3] = in[3];
657280304Sjkim    /* Case 1: a = 1 iff in >= 2^224 */
658280304Sjkim    a = (in[3] >> 56);
659280304Sjkim    tmp[0] -= a;
660280304Sjkim    tmp[1] += a << 40;
661280304Sjkim    tmp[3] &= 0x00ffffffffffffff;
662280304Sjkim    /*
663280304Sjkim     * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
664280304Sjkim     * and the lower part is non-zero
665280304Sjkim     */
666280304Sjkim    a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
667280304Sjkim        (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
668280304Sjkim    a &= 0x00ffffffffffffff;
669280304Sjkim    /* turn a into an all-one mask (if a = 0) or an all-zero mask */
670280304Sjkim    a = (a - 1) >> 63;
671280304Sjkim    /* subtract 2^224 - 2^96 + 1 if a is all-one */
672280304Sjkim    tmp[3] &= a ^ 0xffffffffffffffff;
673280304Sjkim    tmp[2] &= a ^ 0xffffffffffffffff;
674280304Sjkim    tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
675280304Sjkim    tmp[0] -= 1 & a;
676238384Sjkim
677280304Sjkim    /*
678280304Sjkim     * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
679280304Sjkim     * non-zero, so we only need one step
680280304Sjkim     */
681280304Sjkim    a = tmp[0] >> 63;
682280304Sjkim    tmp[0] += two56 & a;
683280304Sjkim    tmp[1] -= 1 & a;
684238384Sjkim
685280304Sjkim    /* carry 1 -> 2 -> 3 */
686280304Sjkim    tmp[2] += tmp[1] >> 56;
687280304Sjkim    tmp[1] &= 0x00ffffffffffffff;
688238384Sjkim
689280304Sjkim    tmp[3] += tmp[2] >> 56;
690280304Sjkim    tmp[2] &= 0x00ffffffffffffff;
691238384Sjkim
692280304Sjkim    /* Now 0 <= out < p */
693280304Sjkim    out[0] = tmp[0];
694280304Sjkim    out[1] = tmp[1];
695280304Sjkim    out[2] = tmp[2];
696280304Sjkim    out[3] = tmp[3];
697280304Sjkim}
698238384Sjkim
699280304Sjkim/*
700280304Sjkim * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
701280304Sjkim * elements are reduced to in < 2^225, so we only need to check three cases:
702280304Sjkim * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
703280304Sjkim */
704238384Sjkimstatic limb felem_is_zero(const felem in)
705280304Sjkim{
706280304Sjkim    limb zero, two224m96p1, two225m97p2;
707238384Sjkim
708280304Sjkim    zero = in[0] | in[1] | in[2] | in[3];
709280304Sjkim    zero = (((int64_t) (zero) - 1) >> 63) & 1;
710280304Sjkim    two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
711280304Sjkim        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
712280304Sjkim    two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
713280304Sjkim    two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
714280304Sjkim        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
715280304Sjkim    two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
716280304Sjkim    return (zero | two224m96p1 | two225m97p2);
717280304Sjkim}
718238384Sjkim
719238384Sjkimstatic limb felem_is_zero_int(const felem in)
720280304Sjkim{
721280304Sjkim    return (int)(felem_is_zero(in) & ((limb) 1));
722280304Sjkim}
723238384Sjkim
724238384Sjkim/* Invert a field element */
725238384Sjkim/* Computation chain copied from djb's code */
726238384Sjkimstatic void felem_inv(felem out, const felem in)
727280304Sjkim{
728280304Sjkim    felem ftmp, ftmp2, ftmp3, ftmp4;
729280304Sjkim    widefelem tmp;
730280304Sjkim    unsigned i;
731238384Sjkim
732280304Sjkim    felem_square(tmp, in);
733280304Sjkim    felem_reduce(ftmp, tmp);    /* 2 */
734280304Sjkim    felem_mul(tmp, in, ftmp);
735280304Sjkim    felem_reduce(ftmp, tmp);    /* 2^2 - 1 */
736280304Sjkim    felem_square(tmp, ftmp);
737280304Sjkim    felem_reduce(ftmp, tmp);    /* 2^3 - 2 */
738280304Sjkim    felem_mul(tmp, in, ftmp);
739280304Sjkim    felem_reduce(ftmp, tmp);    /* 2^3 - 1 */
740280304Sjkim    felem_square(tmp, ftmp);
741280304Sjkim    felem_reduce(ftmp2, tmp);   /* 2^4 - 2 */
742280304Sjkim    felem_square(tmp, ftmp2);
743280304Sjkim    felem_reduce(ftmp2, tmp);   /* 2^5 - 4 */
744280304Sjkim    felem_square(tmp, ftmp2);
745280304Sjkim    felem_reduce(ftmp2, tmp);   /* 2^6 - 8 */
746280304Sjkim    felem_mul(tmp, ftmp2, ftmp);
747280304Sjkim    felem_reduce(ftmp, tmp);    /* 2^6 - 1 */
748280304Sjkim    felem_square(tmp, ftmp);
749280304Sjkim    felem_reduce(ftmp2, tmp);   /* 2^7 - 2 */
750280304Sjkim    for (i = 0; i < 5; ++i) {   /* 2^12 - 2^6 */
751280304Sjkim        felem_square(tmp, ftmp2);
752280304Sjkim        felem_reduce(ftmp2, tmp);
753280304Sjkim    }
754280304Sjkim    felem_mul(tmp, ftmp2, ftmp);
755280304Sjkim    felem_reduce(ftmp2, tmp);   /* 2^12 - 1 */
756280304Sjkim    felem_square(tmp, ftmp2);
757280304Sjkim    felem_reduce(ftmp3, tmp);   /* 2^13 - 2 */
758280304Sjkim    for (i = 0; i < 11; ++i) {  /* 2^24 - 2^12 */
759280304Sjkim        felem_square(tmp, ftmp3);
760280304Sjkim        felem_reduce(ftmp3, tmp);
761280304Sjkim    }
762280304Sjkim    felem_mul(tmp, ftmp3, ftmp2);
763280304Sjkim    felem_reduce(ftmp2, tmp);   /* 2^24 - 1 */
764280304Sjkim    felem_square(tmp, ftmp2);
765280304Sjkim    felem_reduce(ftmp3, tmp);   /* 2^25 - 2 */
766280304Sjkim    for (i = 0; i < 23; ++i) {  /* 2^48 - 2^24 */
767280304Sjkim        felem_square(tmp, ftmp3);
768280304Sjkim        felem_reduce(ftmp3, tmp);
769280304Sjkim    }
770280304Sjkim    felem_mul(tmp, ftmp3, ftmp2);
771280304Sjkim    felem_reduce(ftmp3, tmp);   /* 2^48 - 1 */
772280304Sjkim    felem_square(tmp, ftmp3);
773280304Sjkim    felem_reduce(ftmp4, tmp);   /* 2^49 - 2 */
774280304Sjkim    for (i = 0; i < 47; ++i) {  /* 2^96 - 2^48 */
775280304Sjkim        felem_square(tmp, ftmp4);
776280304Sjkim        felem_reduce(ftmp4, tmp);
777280304Sjkim    }
778280304Sjkim    felem_mul(tmp, ftmp3, ftmp4);
779280304Sjkim    felem_reduce(ftmp3, tmp);   /* 2^96 - 1 */
780280304Sjkim    felem_square(tmp, ftmp3);
781280304Sjkim    felem_reduce(ftmp4, tmp);   /* 2^97 - 2 */
782280304Sjkim    for (i = 0; i < 23; ++i) {  /* 2^120 - 2^24 */
783280304Sjkim        felem_square(tmp, ftmp4);
784280304Sjkim        felem_reduce(ftmp4, tmp);
785280304Sjkim    }
786280304Sjkim    felem_mul(tmp, ftmp2, ftmp4);
787280304Sjkim    felem_reduce(ftmp2, tmp);   /* 2^120 - 1 */
788280304Sjkim    for (i = 0; i < 6; ++i) {   /* 2^126 - 2^6 */
789280304Sjkim        felem_square(tmp, ftmp2);
790280304Sjkim        felem_reduce(ftmp2, tmp);
791280304Sjkim    }
792280304Sjkim    felem_mul(tmp, ftmp2, ftmp);
793280304Sjkim    felem_reduce(ftmp, tmp);    /* 2^126 - 1 */
794280304Sjkim    felem_square(tmp, ftmp);
795280304Sjkim    felem_reduce(ftmp, tmp);    /* 2^127 - 2 */
796280304Sjkim    felem_mul(tmp, ftmp, in);
797280304Sjkim    felem_reduce(ftmp, tmp);    /* 2^127 - 1 */
798280304Sjkim    for (i = 0; i < 97; ++i) {  /* 2^224 - 2^97 */
799280304Sjkim        felem_square(tmp, ftmp);
800280304Sjkim        felem_reduce(ftmp, tmp);
801280304Sjkim    }
802280304Sjkim    felem_mul(tmp, ftmp, ftmp3);
803280304Sjkim    felem_reduce(out, tmp);     /* 2^224 - 2^96 - 1 */
804280304Sjkim}
805238384Sjkim
806280304Sjkim/*
807280304Sjkim * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
808280304Sjkim * out to itself.
809280304Sjkim */
810280304Sjkimstatic void copy_conditional(felem out, const felem in, limb icopy)
811280304Sjkim{
812280304Sjkim    unsigned i;
813280304Sjkim    /*
814280304Sjkim     * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
815280304Sjkim     */
816280304Sjkim    const limb copy = -icopy;
817280304Sjkim    for (i = 0; i < 4; ++i) {
818280304Sjkim        const limb tmp = copy & (in[i] ^ out[i]);
819280304Sjkim        out[i] ^= tmp;
820280304Sjkim    }
821280304Sjkim}
822238384Sjkim
823238384Sjkim/******************************************************************************/
824280304Sjkim/*-
825280304Sjkim *                       ELLIPTIC CURVE POINT OPERATIONS
826238384Sjkim *
827238384Sjkim * Points are represented in Jacobian projective coordinates:
828238384Sjkim * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
829238384Sjkim * or to the point at infinity if Z == 0.
830238384Sjkim *
831238384Sjkim */
832238384Sjkim
833280304Sjkim/*-
834280304Sjkim * Double an elliptic curve point:
835238384Sjkim * (X', Y', Z') = 2 * (X, Y, Z), where
836238384Sjkim * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
837238384Sjkim * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2
838238384Sjkim * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
839238384Sjkim * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
840280304Sjkim * while x_out == y_in is not (maybe this works, but it's not tested).
841280304Sjkim */
842238384Sjkimstatic void
843238384Sjkimpoint_double(felem x_out, felem y_out, felem z_out,
844238384Sjkim             const felem x_in, const felem y_in, const felem z_in)
845280304Sjkim{
846280304Sjkim    widefelem tmp, tmp2;
847280304Sjkim    felem delta, gamma, beta, alpha, ftmp, ftmp2;
848238384Sjkim
849280304Sjkim    felem_assign(ftmp, x_in);
850280304Sjkim    felem_assign(ftmp2, x_in);
851238384Sjkim
852280304Sjkim    /* delta = z^2 */
853280304Sjkim    felem_square(tmp, z_in);
854280304Sjkim    felem_reduce(delta, tmp);
855238384Sjkim
856280304Sjkim    /* gamma = y^2 */
857280304Sjkim    felem_square(tmp, y_in);
858280304Sjkim    felem_reduce(gamma, tmp);
859238384Sjkim
860280304Sjkim    /* beta = x*gamma */
861280304Sjkim    felem_mul(tmp, x_in, gamma);
862280304Sjkim    felem_reduce(beta, tmp);
863238384Sjkim
864280304Sjkim    /* alpha = 3*(x-delta)*(x+delta) */
865280304Sjkim    felem_diff(ftmp, delta);
866280304Sjkim    /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
867280304Sjkim    felem_sum(ftmp2, delta);
868280304Sjkim    /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
869280304Sjkim    felem_scalar(ftmp2, 3);
870280304Sjkim    /* ftmp2[i] < 3 * 2^58 < 2^60 */
871280304Sjkim    felem_mul(tmp, ftmp, ftmp2);
872280304Sjkim    /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
873280304Sjkim    felem_reduce(alpha, tmp);
874238384Sjkim
875280304Sjkim    /* x' = alpha^2 - 8*beta */
876280304Sjkim    felem_square(tmp, alpha);
877280304Sjkim    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
878280304Sjkim    felem_assign(ftmp, beta);
879280304Sjkim    felem_scalar(ftmp, 8);
880280304Sjkim    /* ftmp[i] < 8 * 2^57 = 2^60 */
881280304Sjkim    felem_diff_128_64(tmp, ftmp);
882280304Sjkim    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
883280304Sjkim    felem_reduce(x_out, tmp);
884238384Sjkim
885280304Sjkim    /* z' = (y + z)^2 - gamma - delta */
886280304Sjkim    felem_sum(delta, gamma);
887280304Sjkim    /* delta[i] < 2^57 + 2^57 = 2^58 */
888280304Sjkim    felem_assign(ftmp, y_in);
889280304Sjkim    felem_sum(ftmp, z_in);
890280304Sjkim    /* ftmp[i] < 2^57 + 2^57 = 2^58 */
891280304Sjkim    felem_square(tmp, ftmp);
892280304Sjkim    /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
893280304Sjkim    felem_diff_128_64(tmp, delta);
894280304Sjkim    /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
895280304Sjkim    felem_reduce(z_out, tmp);
896238384Sjkim
897280304Sjkim    /* y' = alpha*(4*beta - x') - 8*gamma^2 */
898280304Sjkim    felem_scalar(beta, 4);
899280304Sjkim    /* beta[i] < 4 * 2^57 = 2^59 */
900280304Sjkim    felem_diff(beta, x_out);
901280304Sjkim    /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
902280304Sjkim    felem_mul(tmp, alpha, beta);
903280304Sjkim    /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
904280304Sjkim    felem_square(tmp2, gamma);
905280304Sjkim    /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
906280304Sjkim    widefelem_scalar(tmp2, 8);
907280304Sjkim    /* tmp2[i] < 8 * 2^116 = 2^119 */
908280304Sjkim    widefelem_diff(tmp, tmp2);
909280304Sjkim    /* tmp[i] < 2^119 + 2^120 < 2^121 */
910280304Sjkim    felem_reduce(y_out, tmp);
911280304Sjkim}
912238384Sjkim
913280304Sjkim/*-
914280304Sjkim * Add two elliptic curve points:
915238384Sjkim * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
916238384Sjkim * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
917238384Sjkim * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
918238384Sjkim * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
919238384Sjkim *        Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
920238384Sjkim * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
921238384Sjkim *
922238384Sjkim * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
923238384Sjkim */
924238384Sjkim
925280304Sjkim/*
926280304Sjkim * This function is not entirely constant-time: it includes a branch for
927280304Sjkim * checking whether the two input points are equal, (while not equal to the
928280304Sjkim * point at infinity). This case never happens during single point
929280304Sjkim * multiplication, so there is no timing leak for ECDH or ECDSA signing.
930280304Sjkim */
931238384Sjkimstatic void point_add(felem x3, felem y3, felem z3,
932280304Sjkim                      const felem x1, const felem y1, const felem z1,
933280304Sjkim                      const int mixed, const felem x2, const felem y2,
934280304Sjkim                      const felem z2)
935280304Sjkim{
936280304Sjkim    felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
937280304Sjkim    widefelem tmp, tmp2;
938280304Sjkim    limb z1_is_zero, z2_is_zero, x_equal, y_equal;
939238384Sjkim
940280304Sjkim    if (!mixed) {
941280304Sjkim        /* ftmp2 = z2^2 */
942280304Sjkim        felem_square(tmp, z2);
943280304Sjkim        felem_reduce(ftmp2, tmp);
944238384Sjkim
945280304Sjkim        /* ftmp4 = z2^3 */
946280304Sjkim        felem_mul(tmp, ftmp2, z2);
947280304Sjkim        felem_reduce(ftmp4, tmp);
948238384Sjkim
949280304Sjkim        /* ftmp4 = z2^3*y1 */
950280304Sjkim        felem_mul(tmp2, ftmp4, y1);
951280304Sjkim        felem_reduce(ftmp4, tmp2);
952238384Sjkim
953280304Sjkim        /* ftmp2 = z2^2*x1 */
954280304Sjkim        felem_mul(tmp2, ftmp2, x1);
955280304Sjkim        felem_reduce(ftmp2, tmp2);
956280304Sjkim    } else {
957280304Sjkim        /*
958280304Sjkim         * We'll assume z2 = 1 (special case z2 = 0 is handled later)
959280304Sjkim         */
960238384Sjkim
961280304Sjkim        /* ftmp4 = z2^3*y1 */
962280304Sjkim        felem_assign(ftmp4, y1);
963238384Sjkim
964280304Sjkim        /* ftmp2 = z2^2*x1 */
965280304Sjkim        felem_assign(ftmp2, x1);
966280304Sjkim    }
967238384Sjkim
968280304Sjkim    /* ftmp = z1^2 */
969280304Sjkim    felem_square(tmp, z1);
970280304Sjkim    felem_reduce(ftmp, tmp);
971238384Sjkim
972280304Sjkim    /* ftmp3 = z1^3 */
973280304Sjkim    felem_mul(tmp, ftmp, z1);
974280304Sjkim    felem_reduce(ftmp3, tmp);
975238384Sjkim
976280304Sjkim    /* tmp = z1^3*y2 */
977280304Sjkim    felem_mul(tmp, ftmp3, y2);
978280304Sjkim    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
979238384Sjkim
980280304Sjkim    /* ftmp3 = z1^3*y2 - z2^3*y1 */
981280304Sjkim    felem_diff_128_64(tmp, ftmp4);
982280304Sjkim    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
983280304Sjkim    felem_reduce(ftmp3, tmp);
984238384Sjkim
985280304Sjkim    /* tmp = z1^2*x2 */
986280304Sjkim    felem_mul(tmp, ftmp, x2);
987280304Sjkim    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
988238384Sjkim
989280304Sjkim    /* ftmp = z1^2*x2 - z2^2*x1 */
990280304Sjkim    felem_diff_128_64(tmp, ftmp2);
991280304Sjkim    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
992280304Sjkim    felem_reduce(ftmp, tmp);
993238384Sjkim
994280304Sjkim    /*
995280304Sjkim     * the formulae are incorrect if the points are equal so we check for
996280304Sjkim     * this and do doubling if this happens
997280304Sjkim     */
998280304Sjkim    x_equal = felem_is_zero(ftmp);
999280304Sjkim    y_equal = felem_is_zero(ftmp3);
1000280304Sjkim    z1_is_zero = felem_is_zero(z1);
1001280304Sjkim    z2_is_zero = felem_is_zero(z2);
1002280304Sjkim    /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
1003280304Sjkim    if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
1004280304Sjkim        point_double(x3, y3, z3, x1, y1, z1);
1005280304Sjkim        return;
1006280304Sjkim    }
1007238384Sjkim
1008280304Sjkim    /* ftmp5 = z1*z2 */
1009280304Sjkim    if (!mixed) {
1010280304Sjkim        felem_mul(tmp, z1, z2);
1011280304Sjkim        felem_reduce(ftmp5, tmp);
1012280304Sjkim    } else {
1013280304Sjkim        /* special case z2 = 0 is handled later */
1014280304Sjkim        felem_assign(ftmp5, z1);
1015280304Sjkim    }
1016238384Sjkim
1017280304Sjkim    /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1018280304Sjkim    felem_mul(tmp, ftmp, ftmp5);
1019280304Sjkim    felem_reduce(z_out, tmp);
1020238384Sjkim
1021280304Sjkim    /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1022280304Sjkim    felem_assign(ftmp5, ftmp);
1023280304Sjkim    felem_square(tmp, ftmp);
1024280304Sjkim    felem_reduce(ftmp, tmp);
1025238384Sjkim
1026280304Sjkim    /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1027280304Sjkim    felem_mul(tmp, ftmp, ftmp5);
1028280304Sjkim    felem_reduce(ftmp5, tmp);
1029238384Sjkim
1030280304Sjkim    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1031280304Sjkim    felem_mul(tmp, ftmp2, ftmp);
1032280304Sjkim    felem_reduce(ftmp2, tmp);
1033238384Sjkim
1034280304Sjkim    /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1035280304Sjkim    felem_mul(tmp, ftmp4, ftmp5);
1036280304Sjkim    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1037238384Sjkim
1038280304Sjkim    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1039280304Sjkim    felem_square(tmp2, ftmp3);
1040280304Sjkim    /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1041238384Sjkim
1042280304Sjkim    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1043280304Sjkim    felem_diff_128_64(tmp2, ftmp5);
1044280304Sjkim    /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1045238384Sjkim
1046280304Sjkim    /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1047280304Sjkim    felem_assign(ftmp5, ftmp2);
1048280304Sjkim    felem_scalar(ftmp5, 2);
1049280304Sjkim    /* ftmp5[i] < 2 * 2^57 = 2^58 */
1050238384Sjkim
1051280304Sjkim    /*-
1052280304Sjkim     * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1053280304Sjkim     *  2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1054280304Sjkim     */
1055280304Sjkim    felem_diff_128_64(tmp2, ftmp5);
1056280304Sjkim    /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1057280304Sjkim    felem_reduce(x_out, tmp2);
1058238384Sjkim
1059280304Sjkim    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1060280304Sjkim    felem_diff(ftmp2, x_out);
1061280304Sjkim    /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1062238384Sjkim
1063280304Sjkim    /*
1064280304Sjkim     * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1065280304Sjkim     */
1066280304Sjkim    felem_mul(tmp2, ftmp3, ftmp2);
1067280304Sjkim    /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1068238384Sjkim
1069280304Sjkim    /*-
1070280304Sjkim     * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1071280304Sjkim     *  z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1072280304Sjkim     */
1073280304Sjkim    widefelem_diff(tmp2, tmp);
1074280304Sjkim    /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1075280304Sjkim    felem_reduce(y_out, tmp2);
1076238384Sjkim
1077280304Sjkim    /*
1078280304Sjkim     * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1079280304Sjkim     * the point at infinity, so we need to check for this separately
1080280304Sjkim     */
1081238384Sjkim
1082280304Sjkim    /*
1083280304Sjkim     * if point 1 is at infinity, copy point 2 to output, and vice versa
1084280304Sjkim     */
1085280304Sjkim    copy_conditional(x_out, x2, z1_is_zero);
1086280304Sjkim    copy_conditional(x_out, x1, z2_is_zero);
1087280304Sjkim    copy_conditional(y_out, y2, z1_is_zero);
1088280304Sjkim    copy_conditional(y_out, y1, z2_is_zero);
1089280304Sjkim    copy_conditional(z_out, z2, z1_is_zero);
1090280304Sjkim    copy_conditional(z_out, z1, z2_is_zero);
1091280304Sjkim    felem_assign(x3, x_out);
1092280304Sjkim    felem_assign(y3, y_out);
1093280304Sjkim    felem_assign(z3, z_out);
1094280304Sjkim}
1095238384Sjkim
1096280304Sjkim/*
1097280304Sjkim * select_point selects the |idx|th point from a precomputation table and
1098280304Sjkim * copies it to out.
1099280304Sjkim * The pre_comp array argument should be size of |size| argument
1100280304Sjkim */
1101280304Sjkimstatic void select_point(const u64 idx, unsigned int size,
1102280304Sjkim                         const felem pre_comp[][3], felem out[3])
1103280304Sjkim{
1104280304Sjkim    unsigned i, j;
1105280304Sjkim    limb *outlimbs = &out[0][0];
1106280304Sjkim    memset(outlimbs, 0, 3 * sizeof(felem));
1107238384Sjkim
1108280304Sjkim    for (i = 0; i < size; i++) {
1109280304Sjkim        const limb *inlimbs = &pre_comp[i][0][0];
1110280304Sjkim        u64 mask = i ^ idx;
1111280304Sjkim        mask |= mask >> 4;
1112280304Sjkim        mask |= mask >> 2;
1113280304Sjkim        mask |= mask >> 1;
1114280304Sjkim        mask &= 1;
1115280304Sjkim        mask--;
1116280304Sjkim        for (j = 0; j < 4 * 3; j++)
1117280304Sjkim            outlimbs[j] |= inlimbs[j] & mask;
1118280304Sjkim    }
1119280304Sjkim}
1120238384Sjkim
1121238384Sjkim/* get_bit returns the |i|th bit in |in| */
1122238384Sjkimstatic char get_bit(const felem_bytearray in, unsigned i)
1123280304Sjkim{
1124280304Sjkim    if (i >= 224)
1125280304Sjkim        return 0;
1126280304Sjkim    return (in[i >> 3] >> (i & 7)) & 1;
1127280304Sjkim}
1128238384Sjkim
1129280304Sjkim/*
1130280304Sjkim * Interleaved point multiplication using precomputed point multiples: The
1131280304Sjkim * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1132280304Sjkim * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1133280304Sjkim * generator, using certain (large) precomputed multiples in g_pre_comp.
1134280304Sjkim * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1135280304Sjkim */
1136238384Sjkimstatic void batch_mul(felem x_out, felem y_out, felem z_out,
1137280304Sjkim                      const felem_bytearray scalars[],
1138280304Sjkim                      const unsigned num_points, const u8 *g_scalar,
1139280304Sjkim                      const int mixed, const felem pre_comp[][17][3],
1140280304Sjkim                      const felem g_pre_comp[2][16][3])
1141280304Sjkim{
1142280304Sjkim    int i, skip;
1143280304Sjkim    unsigned num;
1144280304Sjkim    unsigned gen_mul = (g_scalar != NULL);
1145280304Sjkim    felem nq[3], tmp[4];
1146280304Sjkim    u64 bits;
1147280304Sjkim    u8 sign, digit;
1148238384Sjkim
1149280304Sjkim    /* set nq to the point at infinity */
1150280304Sjkim    memset(nq, 0, 3 * sizeof(felem));
1151238384Sjkim
1152280304Sjkim    /*
1153280304Sjkim     * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1154280304Sjkim     * of the generator (two in each of the last 28 rounds) and additions of
1155280304Sjkim     * other points multiples (every 5th round).
1156280304Sjkim     */
1157280304Sjkim    skip = 1;                   /* save two point operations in the first
1158280304Sjkim                                 * round */
1159280304Sjkim    for (i = (num_points ? 220 : 27); i >= 0; --i) {
1160280304Sjkim        /* double */
1161280304Sjkim        if (!skip)
1162280304Sjkim            point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1163238384Sjkim
1164280304Sjkim        /* add multiples of the generator */
1165280304Sjkim        if (gen_mul && (i <= 27)) {
1166280304Sjkim            /* first, look 28 bits upwards */
1167280304Sjkim            bits = get_bit(g_scalar, i + 196) << 3;
1168280304Sjkim            bits |= get_bit(g_scalar, i + 140) << 2;
1169280304Sjkim            bits |= get_bit(g_scalar, i + 84) << 1;
1170280304Sjkim            bits |= get_bit(g_scalar, i + 28);
1171280304Sjkim            /* select the point to add, in constant time */
1172280304Sjkim            select_point(bits, 16, g_pre_comp[1], tmp);
1173238384Sjkim
1174280304Sjkim            if (!skip) {
1175280304Sjkim                /* value 1 below is argument for "mixed" */
1176280304Sjkim                point_add(nq[0], nq[1], nq[2],
1177280304Sjkim                          nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1178280304Sjkim            } else {
1179280304Sjkim                memcpy(nq, tmp, 3 * sizeof(felem));
1180280304Sjkim                skip = 0;
1181280304Sjkim            }
1182238384Sjkim
1183280304Sjkim            /* second, look at the current position */
1184280304Sjkim            bits = get_bit(g_scalar, i + 168) << 3;
1185280304Sjkim            bits |= get_bit(g_scalar, i + 112) << 2;
1186280304Sjkim            bits |= get_bit(g_scalar, i + 56) << 1;
1187280304Sjkim            bits |= get_bit(g_scalar, i);
1188280304Sjkim            /* select the point to add, in constant time */
1189280304Sjkim            select_point(bits, 16, g_pre_comp[0], tmp);
1190280304Sjkim            point_add(nq[0], nq[1], nq[2],
1191280304Sjkim                      nq[0], nq[1], nq[2],
1192280304Sjkim                      1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1193280304Sjkim        }
1194238384Sjkim
1195280304Sjkim        /* do other additions every 5 doublings */
1196280304Sjkim        if (num_points && (i % 5 == 0)) {
1197280304Sjkim            /* loop over all scalars */
1198280304Sjkim            for (num = 0; num < num_points; ++num) {
1199280304Sjkim                bits = get_bit(scalars[num], i + 4) << 5;
1200280304Sjkim                bits |= get_bit(scalars[num], i + 3) << 4;
1201280304Sjkim                bits |= get_bit(scalars[num], i + 2) << 3;
1202280304Sjkim                bits |= get_bit(scalars[num], i + 1) << 2;
1203280304Sjkim                bits |= get_bit(scalars[num], i) << 1;
1204280304Sjkim                bits |= get_bit(scalars[num], i - 1);
1205280304Sjkim                ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1206238384Sjkim
1207280304Sjkim                /* select the point to add or subtract */
1208280304Sjkim                select_point(digit, 17, pre_comp[num], tmp);
1209280304Sjkim                felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1210280304Sjkim                                            * point */
1211280304Sjkim                copy_conditional(tmp[1], tmp[3], sign);
1212238384Sjkim
1213280304Sjkim                if (!skip) {
1214280304Sjkim                    point_add(nq[0], nq[1], nq[2],
1215280304Sjkim                              nq[0], nq[1], nq[2],
1216280304Sjkim                              mixed, tmp[0], tmp[1], tmp[2]);
1217280304Sjkim                } else {
1218280304Sjkim                    memcpy(nq, tmp, 3 * sizeof(felem));
1219280304Sjkim                    skip = 0;
1220280304Sjkim                }
1221280304Sjkim            }
1222280304Sjkim        }
1223280304Sjkim    }
1224280304Sjkim    felem_assign(x_out, nq[0]);
1225280304Sjkim    felem_assign(y_out, nq[1]);
1226280304Sjkim    felem_assign(z_out, nq[2]);
1227280304Sjkim}
1228238384Sjkim
1229238384Sjkim/******************************************************************************/
1230280304Sjkim/*
1231280304Sjkim * FUNCTIONS TO MANAGE PRECOMPUTATION
1232238384Sjkim */
1233238384Sjkim
1234238384Sjkimstatic NISTP224_PRE_COMP *nistp224_pre_comp_new()
1235280304Sjkim{
1236280304Sjkim    NISTP224_PRE_COMP *ret = NULL;
1237280304Sjkim    ret = (NISTP224_PRE_COMP *) OPENSSL_malloc(sizeof *ret);
1238280304Sjkim    if (!ret) {
1239280304Sjkim        ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1240280304Sjkim        return ret;
1241280304Sjkim    }
1242280304Sjkim    memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1243280304Sjkim    ret->references = 1;
1244280304Sjkim    return ret;
1245280304Sjkim}
1246238384Sjkim
1247238384Sjkimstatic void *nistp224_pre_comp_dup(void *src_)
1248280304Sjkim{
1249280304Sjkim    NISTP224_PRE_COMP *src = src_;
1250238384Sjkim
1251280304Sjkim    /* no need to actually copy, these objects never change! */
1252280304Sjkim    CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1253238384Sjkim
1254280304Sjkim    return src_;
1255280304Sjkim}
1256238384Sjkim
1257238384Sjkimstatic void nistp224_pre_comp_free(void *pre_)
1258280304Sjkim{
1259280304Sjkim    int i;
1260280304Sjkim    NISTP224_PRE_COMP *pre = pre_;
1261238384Sjkim
1262280304Sjkim    if (!pre)
1263280304Sjkim        return;
1264238384Sjkim
1265280304Sjkim    i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1266280304Sjkim    if (i > 0)
1267280304Sjkim        return;
1268238384Sjkim
1269280304Sjkim    OPENSSL_free(pre);
1270280304Sjkim}
1271238384Sjkim
1272238384Sjkimstatic void nistp224_pre_comp_clear_free(void *pre_)
1273280304Sjkim{
1274280304Sjkim    int i;
1275280304Sjkim    NISTP224_PRE_COMP *pre = pre_;
1276238384Sjkim
1277280304Sjkim    if (!pre)
1278280304Sjkim        return;
1279238384Sjkim
1280280304Sjkim    i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1281280304Sjkim    if (i > 0)
1282280304Sjkim        return;
1283238384Sjkim
1284280304Sjkim    OPENSSL_cleanse(pre, sizeof *pre);
1285280304Sjkim    OPENSSL_free(pre);
1286280304Sjkim}
1287238384Sjkim
1288238384Sjkim/******************************************************************************/
1289280304Sjkim/*
1290280304Sjkim * OPENSSL EC_METHOD FUNCTIONS
1291238384Sjkim */
1292238384Sjkim
1293238384Sjkimint ec_GFp_nistp224_group_init(EC_GROUP *group)
1294280304Sjkim{
1295280304Sjkim    int ret;
1296280304Sjkim    ret = ec_GFp_simple_group_init(group);
1297280304Sjkim    group->a_is_minus3 = 1;
1298280304Sjkim    return ret;
1299280304Sjkim}
1300238384Sjkim
1301238384Sjkimint ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1302280304Sjkim                                    const BIGNUM *a, const BIGNUM *b,
1303280304Sjkim                                    BN_CTX *ctx)
1304280304Sjkim{
1305280304Sjkim    int ret = 0;
1306280304Sjkim    BN_CTX *new_ctx = NULL;
1307280304Sjkim    BIGNUM *curve_p, *curve_a, *curve_b;
1308238384Sjkim
1309280304Sjkim    if (ctx == NULL)
1310280304Sjkim        if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1311280304Sjkim            return 0;
1312280304Sjkim    BN_CTX_start(ctx);
1313280304Sjkim    if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1314280304Sjkim        ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1315280304Sjkim        ((curve_b = BN_CTX_get(ctx)) == NULL))
1316280304Sjkim        goto err;
1317280304Sjkim    BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1318280304Sjkim    BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1319280304Sjkim    BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1320280304Sjkim    if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1321280304Sjkim        ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1322280304Sjkim              EC_R_WRONG_CURVE_PARAMETERS);
1323280304Sjkim        goto err;
1324280304Sjkim    }
1325280304Sjkim    group->field_mod_func = BN_nist_mod_224;
1326280304Sjkim    ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1327280304Sjkim err:
1328280304Sjkim    BN_CTX_end(ctx);
1329280304Sjkim    if (new_ctx != NULL)
1330280304Sjkim        BN_CTX_free(new_ctx);
1331280304Sjkim    return ret;
1332280304Sjkim}
1333238384Sjkim
1334280304Sjkim/*
1335280304Sjkim * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1336280304Sjkim * (X/Z^2, Y/Z^3)
1337280304Sjkim */
1338238384Sjkimint ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1339280304Sjkim                                                 const EC_POINT *point,
1340280304Sjkim                                                 BIGNUM *x, BIGNUM *y,
1341280304Sjkim                                                 BN_CTX *ctx)
1342280304Sjkim{
1343280304Sjkim    felem z1, z2, x_in, y_in, x_out, y_out;
1344280304Sjkim    widefelem tmp;
1345238384Sjkim
1346280304Sjkim    if (EC_POINT_is_at_infinity(group, point)) {
1347280304Sjkim        ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1348280304Sjkim              EC_R_POINT_AT_INFINITY);
1349280304Sjkim        return 0;
1350280304Sjkim    }
1351280304Sjkim    if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1352280304Sjkim        (!BN_to_felem(z1, &point->Z)))
1353280304Sjkim        return 0;
1354280304Sjkim    felem_inv(z2, z1);
1355280304Sjkim    felem_square(tmp, z2);
1356280304Sjkim    felem_reduce(z1, tmp);
1357280304Sjkim    felem_mul(tmp, x_in, z1);
1358280304Sjkim    felem_reduce(x_in, tmp);
1359280304Sjkim    felem_contract(x_out, x_in);
1360280304Sjkim    if (x != NULL) {
1361280304Sjkim        if (!felem_to_BN(x, x_out)) {
1362280304Sjkim            ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1363280304Sjkim                  ERR_R_BN_LIB);
1364280304Sjkim            return 0;
1365280304Sjkim        }
1366280304Sjkim    }
1367280304Sjkim    felem_mul(tmp, z1, z2);
1368280304Sjkim    felem_reduce(z1, tmp);
1369280304Sjkim    felem_mul(tmp, y_in, z1);
1370280304Sjkim    felem_reduce(y_in, tmp);
1371280304Sjkim    felem_contract(y_out, y_in);
1372280304Sjkim    if (y != NULL) {
1373280304Sjkim        if (!felem_to_BN(y, y_out)) {
1374280304Sjkim            ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1375280304Sjkim                  ERR_R_BN_LIB);
1376280304Sjkim            return 0;
1377280304Sjkim        }
1378280304Sjkim    }
1379280304Sjkim    return 1;
1380280304Sjkim}
1381238384Sjkim
1382280304Sjkimstatic void make_points_affine(size_t num, felem points[ /* num */ ][3],
1383280304Sjkim                               felem tmp_felems[ /* num+1 */ ])
1384280304Sjkim{
1385280304Sjkim    /*
1386280304Sjkim     * Runs in constant time, unless an input is the point at infinity (which
1387280304Sjkim     * normally shouldn't happen).
1388280304Sjkim     */
1389280304Sjkim    ec_GFp_nistp_points_make_affine_internal(num,
1390280304Sjkim                                             points,
1391280304Sjkim                                             sizeof(felem),
1392280304Sjkim                                             tmp_felems,
1393280304Sjkim                                             (void (*)(void *))felem_one,
1394280304Sjkim                                             (int (*)(const void *))
1395280304Sjkim                                             felem_is_zero_int,
1396280304Sjkim                                             (void (*)(void *, const void *))
1397280304Sjkim                                             felem_assign,
1398280304Sjkim                                             (void (*)(void *, const void *))
1399280304Sjkim                                             felem_square_reduce, (void (*)
1400280304Sjkim                                                                   (void *,
1401280304Sjkim                                                                    const void
1402280304Sjkim                                                                    *,
1403280304Sjkim                                                                    const void
1404280304Sjkim                                                                    *))
1405280304Sjkim                                             felem_mul_reduce,
1406280304Sjkim                                             (void (*)(void *, const void *))
1407280304Sjkim                                             felem_inv,
1408280304Sjkim                                             (void (*)(void *, const void *))
1409280304Sjkim                                             felem_contract);
1410280304Sjkim}
1411238384Sjkim
1412280304Sjkim/*
1413280304Sjkim * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1414280304Sjkim * values Result is stored in r (r can equal one of the inputs).
1415280304Sjkim */
1416238384Sjkimint ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1417280304Sjkim                               const BIGNUM *scalar, size_t num,
1418280304Sjkim                               const EC_POINT *points[],
1419280304Sjkim                               const BIGNUM *scalars[], BN_CTX *ctx)
1420280304Sjkim{
1421280304Sjkim    int ret = 0;
1422280304Sjkim    int j;
1423280304Sjkim    unsigned i;
1424280304Sjkim    int mixed = 0;
1425280304Sjkim    BN_CTX *new_ctx = NULL;
1426280304Sjkim    BIGNUM *x, *y, *z, *tmp_scalar;
1427280304Sjkim    felem_bytearray g_secret;
1428280304Sjkim    felem_bytearray *secrets = NULL;
1429280304Sjkim    felem(*pre_comp)[17][3] = NULL;
1430280304Sjkim    felem *tmp_felems = NULL;
1431280304Sjkim    felem_bytearray tmp;
1432280304Sjkim    unsigned num_bytes;
1433280304Sjkim    int have_pre_comp = 0;
1434280304Sjkim    size_t num_points = num;
1435280304Sjkim    felem x_in, y_in, z_in, x_out, y_out, z_out;
1436280304Sjkim    NISTP224_PRE_COMP *pre = NULL;
1437280304Sjkim    const felem(*g_pre_comp)[16][3] = NULL;
1438280304Sjkim    EC_POINT *generator = NULL;
1439280304Sjkim    const EC_POINT *p = NULL;
1440280304Sjkim    const BIGNUM *p_scalar = NULL;
1441238384Sjkim
1442280304Sjkim    if (ctx == NULL)
1443280304Sjkim        if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1444280304Sjkim            return 0;
1445280304Sjkim    BN_CTX_start(ctx);
1446280304Sjkim    if (((x = BN_CTX_get(ctx)) == NULL) ||
1447280304Sjkim        ((y = BN_CTX_get(ctx)) == NULL) ||
1448280304Sjkim        ((z = BN_CTX_get(ctx)) == NULL) ||
1449280304Sjkim        ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1450280304Sjkim        goto err;
1451238384Sjkim
1452280304Sjkim    if (scalar != NULL) {
1453280304Sjkim        pre = EC_EX_DATA_get_data(group->extra_data,
1454280304Sjkim                                  nistp224_pre_comp_dup,
1455280304Sjkim                                  nistp224_pre_comp_free,
1456280304Sjkim                                  nistp224_pre_comp_clear_free);
1457280304Sjkim        if (pre)
1458280304Sjkim            /* we have precomputation, try to use it */
1459280304Sjkim            g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1460280304Sjkim        else
1461280304Sjkim            /* try to use the standard precomputation */
1462280304Sjkim            g_pre_comp = &gmul[0];
1463280304Sjkim        generator = EC_POINT_new(group);
1464280304Sjkim        if (generator == NULL)
1465280304Sjkim            goto err;
1466280304Sjkim        /* get the generator from precomputation */
1467280304Sjkim        if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1468280304Sjkim            !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1469280304Sjkim            !felem_to_BN(z, g_pre_comp[0][1][2])) {
1470280304Sjkim            ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1471280304Sjkim            goto err;
1472280304Sjkim        }
1473280304Sjkim        if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1474280304Sjkim                                                      generator, x, y, z,
1475280304Sjkim                                                      ctx))
1476280304Sjkim            goto err;
1477280304Sjkim        if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1478280304Sjkim            /* precomputation matches generator */
1479280304Sjkim            have_pre_comp = 1;
1480280304Sjkim        else
1481280304Sjkim            /*
1482280304Sjkim             * we don't have valid precomputation: treat the generator as a
1483280304Sjkim             * random point
1484280304Sjkim             */
1485280304Sjkim            num_points = num_points + 1;
1486280304Sjkim    }
1487238384Sjkim
1488280304Sjkim    if (num_points > 0) {
1489280304Sjkim        if (num_points >= 3) {
1490280304Sjkim            /*
1491280304Sjkim             * unless we precompute multiples for just one or two points,
1492280304Sjkim             * converting those into affine form is time well spent
1493280304Sjkim             */
1494280304Sjkim            mixed = 1;
1495280304Sjkim        }
1496280304Sjkim        secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1497280304Sjkim        pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem));
1498280304Sjkim        if (mixed)
1499280304Sjkim            tmp_felems =
1500280304Sjkim                OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem));
1501280304Sjkim        if ((secrets == NULL) || (pre_comp == NULL)
1502280304Sjkim            || (mixed && (tmp_felems == NULL))) {
1503280304Sjkim            ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1504280304Sjkim            goto err;
1505280304Sjkim        }
1506238384Sjkim
1507280304Sjkim        /*
1508280304Sjkim         * we treat NULL scalars as 0, and NULL points as points at infinity,
1509280304Sjkim         * i.e., they contribute nothing to the linear combination
1510280304Sjkim         */
1511280304Sjkim        memset(secrets, 0, num_points * sizeof(felem_bytearray));
1512280304Sjkim        memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem));
1513280304Sjkim        for (i = 0; i < num_points; ++i) {
1514280304Sjkim            if (i == num)
1515280304Sjkim                /* the generator */
1516280304Sjkim            {
1517280304Sjkim                p = EC_GROUP_get0_generator(group);
1518280304Sjkim                p_scalar = scalar;
1519280304Sjkim            } else
1520280304Sjkim                /* the i^th point */
1521280304Sjkim            {
1522280304Sjkim                p = points[i];
1523280304Sjkim                p_scalar = scalars[i];
1524280304Sjkim            }
1525280304Sjkim            if ((p_scalar != NULL) && (p != NULL)) {
1526280304Sjkim                /* reduce scalar to 0 <= scalar < 2^224 */
1527280304Sjkim                if ((BN_num_bits(p_scalar) > 224)
1528280304Sjkim                    || (BN_is_negative(p_scalar))) {
1529280304Sjkim                    /*
1530280304Sjkim                     * this is an unusual input, and we don't guarantee
1531280304Sjkim                     * constant-timeness
1532280304Sjkim                     */
1533280304Sjkim                    if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) {
1534280304Sjkim                        ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1535280304Sjkim                        goto err;
1536280304Sjkim                    }
1537280304Sjkim                    num_bytes = BN_bn2bin(tmp_scalar, tmp);
1538280304Sjkim                } else
1539280304Sjkim                    num_bytes = BN_bn2bin(p_scalar, tmp);
1540280304Sjkim                flip_endian(secrets[i], tmp, num_bytes);
1541280304Sjkim                /* precompute multiples */
1542280304Sjkim                if ((!BN_to_felem(x_out, &p->X)) ||
1543280304Sjkim                    (!BN_to_felem(y_out, &p->Y)) ||
1544280304Sjkim                    (!BN_to_felem(z_out, &p->Z)))
1545280304Sjkim                    goto err;
1546280304Sjkim                felem_assign(pre_comp[i][1][0], x_out);
1547280304Sjkim                felem_assign(pre_comp[i][1][1], y_out);
1548280304Sjkim                felem_assign(pre_comp[i][1][2], z_out);
1549280304Sjkim                for (j = 2; j <= 16; ++j) {
1550280304Sjkim                    if (j & 1) {
1551280304Sjkim                        point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1552280304Sjkim                                  pre_comp[i][j][2], pre_comp[i][1][0],
1553280304Sjkim                                  pre_comp[i][1][1], pre_comp[i][1][2], 0,
1554280304Sjkim                                  pre_comp[i][j - 1][0],
1555280304Sjkim                                  pre_comp[i][j - 1][1],
1556280304Sjkim                                  pre_comp[i][j - 1][2]);
1557280304Sjkim                    } else {
1558280304Sjkim                        point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1559280304Sjkim                                     pre_comp[i][j][2], pre_comp[i][j / 2][0],
1560280304Sjkim                                     pre_comp[i][j / 2][1],
1561280304Sjkim                                     pre_comp[i][j / 2][2]);
1562280304Sjkim                    }
1563280304Sjkim                }
1564280304Sjkim            }
1565280304Sjkim        }
1566280304Sjkim        if (mixed)
1567280304Sjkim            make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1568280304Sjkim    }
1569238384Sjkim
1570280304Sjkim    /* the scalar for the generator */
1571280304Sjkim    if ((scalar != NULL) && (have_pre_comp)) {
1572280304Sjkim        memset(g_secret, 0, sizeof g_secret);
1573280304Sjkim        /* reduce scalar to 0 <= scalar < 2^224 */
1574280304Sjkim        if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1575280304Sjkim            /*
1576280304Sjkim             * this is an unusual input, and we don't guarantee
1577280304Sjkim             * constant-timeness
1578280304Sjkim             */
1579280304Sjkim            if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) {
1580280304Sjkim                ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1581280304Sjkim                goto err;
1582280304Sjkim            }
1583280304Sjkim            num_bytes = BN_bn2bin(tmp_scalar, tmp);
1584280304Sjkim        } else
1585280304Sjkim            num_bytes = BN_bn2bin(scalar, tmp);
1586280304Sjkim        flip_endian(g_secret, tmp, num_bytes);
1587280304Sjkim        /* do the multiplication with generator precomputation */
1588280304Sjkim        batch_mul(x_out, y_out, z_out,
1589280304Sjkim                  (const felem_bytearray(*))secrets, num_points,
1590280304Sjkim                  g_secret,
1591280304Sjkim                  mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1592280304Sjkim    } else
1593280304Sjkim        /* do the multiplication without generator precomputation */
1594280304Sjkim        batch_mul(x_out, y_out, z_out,
1595280304Sjkim                  (const felem_bytearray(*))secrets, num_points,
1596280304Sjkim                  NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1597280304Sjkim    /* reduce the output to its unique minimal representation */
1598280304Sjkim    felem_contract(x_in, x_out);
1599280304Sjkim    felem_contract(y_in, y_out);
1600280304Sjkim    felem_contract(z_in, z_out);
1601280304Sjkim    if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1602280304Sjkim        (!felem_to_BN(z, z_in))) {
1603280304Sjkim        ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1604280304Sjkim        goto err;
1605280304Sjkim    }
1606280304Sjkim    ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1607238384Sjkim
1608280304Sjkim err:
1609280304Sjkim    BN_CTX_end(ctx);
1610280304Sjkim    if (generator != NULL)
1611280304Sjkim        EC_POINT_free(generator);
1612280304Sjkim    if (new_ctx != NULL)
1613280304Sjkim        BN_CTX_free(new_ctx);
1614280304Sjkim    if (secrets != NULL)
1615280304Sjkim        OPENSSL_free(secrets);
1616280304Sjkim    if (pre_comp != NULL)
1617280304Sjkim        OPENSSL_free(pre_comp);
1618280304Sjkim    if (tmp_felems != NULL)
1619280304Sjkim        OPENSSL_free(tmp_felems);
1620280304Sjkim    return ret;
1621280304Sjkim}
1622238384Sjkim
1623238384Sjkimint ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1624280304Sjkim{
1625280304Sjkim    int ret = 0;
1626280304Sjkim    NISTP224_PRE_COMP *pre = NULL;
1627280304Sjkim    int i, j;
1628280304Sjkim    BN_CTX *new_ctx = NULL;
1629280304Sjkim    BIGNUM *x, *y;
1630280304Sjkim    EC_POINT *generator = NULL;
1631280304Sjkim    felem tmp_felems[32];
1632238384Sjkim
1633280304Sjkim    /* throw away old precomputation */
1634280304Sjkim    EC_EX_DATA_free_data(&group->extra_data, nistp224_pre_comp_dup,
1635280304Sjkim                         nistp224_pre_comp_free,
1636280304Sjkim                         nistp224_pre_comp_clear_free);
1637280304Sjkim    if (ctx == NULL)
1638280304Sjkim        if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1639280304Sjkim            return 0;
1640280304Sjkim    BN_CTX_start(ctx);
1641280304Sjkim    if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL))
1642280304Sjkim        goto err;
1643280304Sjkim    /* get the generator */
1644280304Sjkim    if (group->generator == NULL)
1645280304Sjkim        goto err;
1646280304Sjkim    generator = EC_POINT_new(group);
1647280304Sjkim    if (generator == NULL)
1648280304Sjkim        goto err;
1649280304Sjkim    BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1650280304Sjkim    BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1651280304Sjkim    if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1652280304Sjkim        goto err;
1653280304Sjkim    if ((pre = nistp224_pre_comp_new()) == NULL)
1654280304Sjkim        goto err;
1655280304Sjkim    /*
1656280304Sjkim     * if the generator is the standard one, use built-in precomputation
1657280304Sjkim     */
1658280304Sjkim    if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1659280304Sjkim        memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1660280304Sjkim        ret = 1;
1661280304Sjkim        goto err;
1662280304Sjkim    }
1663280304Sjkim    if ((!BN_to_felem(pre->g_pre_comp[0][1][0], &group->generator->X)) ||
1664280304Sjkim        (!BN_to_felem(pre->g_pre_comp[0][1][1], &group->generator->Y)) ||
1665280304Sjkim        (!BN_to_felem(pre->g_pre_comp[0][1][2], &group->generator->Z)))
1666280304Sjkim        goto err;
1667280304Sjkim    /*
1668280304Sjkim     * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1669280304Sjkim     * 2^140*G, 2^196*G for the second one
1670280304Sjkim     */
1671280304Sjkim    for (i = 1; i <= 8; i <<= 1) {
1672280304Sjkim        point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1673280304Sjkim                     pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1674280304Sjkim                     pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1675280304Sjkim        for (j = 0; j < 27; ++j) {
1676280304Sjkim            point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1677280304Sjkim                         pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1678280304Sjkim                         pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1679280304Sjkim        }
1680280304Sjkim        if (i == 8)
1681280304Sjkim            break;
1682280304Sjkim        point_double(pre->g_pre_comp[0][2 * i][0],
1683280304Sjkim                     pre->g_pre_comp[0][2 * i][1],
1684280304Sjkim                     pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1685280304Sjkim                     pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1686280304Sjkim        for (j = 0; j < 27; ++j) {
1687280304Sjkim            point_double(pre->g_pre_comp[0][2 * i][0],
1688280304Sjkim                         pre->g_pre_comp[0][2 * i][1],
1689280304Sjkim                         pre->g_pre_comp[0][2 * i][2],
1690280304Sjkim                         pre->g_pre_comp[0][2 * i][0],
1691280304Sjkim                         pre->g_pre_comp[0][2 * i][1],
1692280304Sjkim                         pre->g_pre_comp[0][2 * i][2]);
1693280304Sjkim        }
1694280304Sjkim    }
1695280304Sjkim    for (i = 0; i < 2; i++) {
1696280304Sjkim        /* g_pre_comp[i][0] is the point at infinity */
1697280304Sjkim        memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1698280304Sjkim        /* the remaining multiples */
1699280304Sjkim        /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1700280304Sjkim        point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1701280304Sjkim                  pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1702280304Sjkim                  pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1703280304Sjkim                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1704280304Sjkim                  pre->g_pre_comp[i][2][2]);
1705280304Sjkim        /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1706280304Sjkim        point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1707280304Sjkim                  pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1708280304Sjkim                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1709280304Sjkim                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1710280304Sjkim                  pre->g_pre_comp[i][2][2]);
1711280304Sjkim        /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1712280304Sjkim        point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1713280304Sjkim                  pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1714280304Sjkim                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1715280304Sjkim                  0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1716280304Sjkim                  pre->g_pre_comp[i][4][2]);
1717280304Sjkim        /*
1718280304Sjkim         * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1719280304Sjkim         */
1720280304Sjkim        point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1721280304Sjkim                  pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1722280304Sjkim                  pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1723280304Sjkim                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1724280304Sjkim                  pre->g_pre_comp[i][2][2]);
1725280304Sjkim        for (j = 1; j < 8; ++j) {
1726280304Sjkim            /* odd multiples: add G resp. 2^28*G */
1727280304Sjkim            point_add(pre->g_pre_comp[i][2 * j + 1][0],
1728280304Sjkim                      pre->g_pre_comp[i][2 * j + 1][1],
1729280304Sjkim                      pre->g_pre_comp[i][2 * j + 1][2],
1730280304Sjkim                      pre->g_pre_comp[i][2 * j][0],
1731280304Sjkim                      pre->g_pre_comp[i][2 * j][1],
1732280304Sjkim                      pre->g_pre_comp[i][2 * j][2], 0,
1733280304Sjkim                      pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1734280304Sjkim                      pre->g_pre_comp[i][1][2]);
1735280304Sjkim        }
1736280304Sjkim    }
1737280304Sjkim    make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1738238384Sjkim
1739280304Sjkim    if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp224_pre_comp_dup,
1740280304Sjkim                             nistp224_pre_comp_free,
1741280304Sjkim                             nistp224_pre_comp_clear_free))
1742280304Sjkim        goto err;
1743280304Sjkim    ret = 1;
1744280304Sjkim    pre = NULL;
1745238384Sjkim err:
1746280304Sjkim    BN_CTX_end(ctx);
1747280304Sjkim    if (generator != NULL)
1748280304Sjkim        EC_POINT_free(generator);
1749280304Sjkim    if (new_ctx != NULL)
1750280304Sjkim        BN_CTX_free(new_ctx);
1751280304Sjkim    if (pre)
1752280304Sjkim        nistp224_pre_comp_free(pre);
1753280304Sjkim    return ret;
1754280304Sjkim}
1755238384Sjkim
1756238384Sjkimint ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1757280304Sjkim{
1758280304Sjkim    if (EC_EX_DATA_get_data(group->extra_data, nistp224_pre_comp_dup,
1759280304Sjkim                            nistp224_pre_comp_free,
1760280304Sjkim                            nistp224_pre_comp_clear_free)
1761280304Sjkim        != NULL)
1762280304Sjkim        return 1;
1763280304Sjkim    else
1764280304Sjkim        return 0;
1765280304Sjkim}
1766238384Sjkim
1767238384Sjkim#else
1768280304Sjkimstatic void *dummy = &dummy;
1769238384Sjkim#endif
1770