1238384Sjkim/* crypto/ec/ecp_nistp224.c */
2238384Sjkim/*
3238384Sjkim * Written by Emilia Kasper (Google) for the OpenSSL project.
4238384Sjkim */
5238384Sjkim/* Copyright 2011 Google Inc.
6238384Sjkim *
7238384Sjkim * Licensed under the Apache License, Version 2.0 (the "License");
8238384Sjkim *
9238384Sjkim * you may not use this file except in compliance with the License.
10238384Sjkim * You may obtain a copy of the License at
11238384Sjkim *
12238384Sjkim *     http://www.apache.org/licenses/LICENSE-2.0
13238384Sjkim *
14238384Sjkim *  Unless required by applicable law or agreed to in writing, software
15238384Sjkim *  distributed under the License is distributed on an "AS IS" BASIS,
16238384Sjkim *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17238384Sjkim *  See the License for the specific language governing permissions and
18238384Sjkim *  limitations under the License.
19238384Sjkim */
20238384Sjkim
21238384Sjkim/*
22238384Sjkim * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
23238384Sjkim *
24238384Sjkim * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
25238384Sjkim * and Adam Langley's public domain 64-bit C implementation of curve25519
26238384Sjkim */
27238384Sjkim
28238384Sjkim#include <openssl/opensslconf.h>
29238384Sjkim#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
30238384Sjkim
31296341Sdelphij# ifndef OPENSSL_SYS_VMS
32296341Sdelphij#  include <stdint.h>
33296341Sdelphij# else
34296341Sdelphij#  include <inttypes.h>
35296341Sdelphij# endif
36238384Sjkim
37296341Sdelphij# include <string.h>
38296341Sdelphij# include <openssl/err.h>
39296341Sdelphij# include "ec_lcl.h"
40238384Sjkim
41296341Sdelphij# if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
42238384Sjkim  /* even with gcc, the typedef won't work for 32-bit platforms */
43296341Sdelphijtypedef __uint128_t uint128_t;  /* nonstandard; implemented by gcc on 64-bit
44296341Sdelphij                                 * platforms */
45296341Sdelphij# else
46296341Sdelphij#  error "Need GCC 3.1 or later to define type uint128_t"
47296341Sdelphij# endif
48238384Sjkim
49238384Sjkimtypedef uint8_t u8;
50238384Sjkimtypedef uint64_t u64;
51238384Sjkimtypedef int64_t s64;
52238384Sjkim
53238384Sjkim/******************************************************************************/
54296341Sdelphij/*-
55296341Sdelphij * INTERNAL REPRESENTATION OF FIELD ELEMENTS
56238384Sjkim *
57238384Sjkim * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
58238384Sjkim * using 64-bit coefficients called 'limbs',
59238384Sjkim * and sometimes (for multiplication results) as
60238384Sjkim * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
61238384Sjkim * using 128-bit coefficients called 'widelimbs'.
62238384Sjkim * A 4-limb representation is an 'felem';
63238384Sjkim * a 7-widelimb representation is a 'widefelem'.
64238384Sjkim * Even within felems, bits of adjacent limbs overlap, and we don't always
65238384Sjkim * reduce the representations: we ensure that inputs to each felem
66238384Sjkim * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
67238384Sjkim * and fit into a 128-bit word without overflow. The coefficients are then
68238384Sjkim * again partially reduced to obtain an felem satisfying a_i < 2^57.
69238384Sjkim * We only reduce to the unique minimal representation at the end of the
70238384Sjkim * computation.
71238384Sjkim */
72238384Sjkim
73238384Sjkimtypedef uint64_t limb;
74238384Sjkimtypedef uint128_t widelimb;
75238384Sjkim
76238384Sjkimtypedef limb felem[4];
77238384Sjkimtypedef widelimb widefelem[7];
78238384Sjkim
79296341Sdelphij/*
80296341Sdelphij * Field element represented as a byte arrary. 28*8 = 224 bits is also the
81296341Sdelphij * group order size for the elliptic curve, and we also use this type for
82296341Sdelphij * scalars for point multiplication.
83296341Sdelphij */
84238384Sjkimtypedef u8 felem_bytearray[28];
85238384Sjkim
86238384Sjkimstatic const felem_bytearray nistp224_curve_params[5] = {
87296341Sdelphij    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
88296341Sdelphij     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
89296341Sdelphij     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
90296341Sdelphij    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
91296341Sdelphij     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
92296341Sdelphij     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
93296341Sdelphij    {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
94296341Sdelphij     0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
95296341Sdelphij     0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
96296341Sdelphij    {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
97296341Sdelphij     0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
98296341Sdelphij     0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
99296341Sdelphij    {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
100296341Sdelphij     0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
101296341Sdelphij     0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
102238384Sjkim};
103238384Sjkim
104296341Sdelphij/*-
105296341Sdelphij * Precomputed multiples of the standard generator
106238384Sjkim * Points are given in coordinates (X, Y, Z) where Z normally is 1
107238384Sjkim * (0 for the point at infinity).
108238384Sjkim * For each field element, slice a_0 is word 0, etc.
109238384Sjkim *
110238384Sjkim * The table has 2 * 16 elements, starting with the following:
111238384Sjkim * index | bits    | point
112238384Sjkim * ------+---------+------------------------------
113238384Sjkim *     0 | 0 0 0 0 | 0G
114238384Sjkim *     1 | 0 0 0 1 | 1G
115238384Sjkim *     2 | 0 0 1 0 | 2^56G
116238384Sjkim *     3 | 0 0 1 1 | (2^56 + 1)G
117238384Sjkim *     4 | 0 1 0 0 | 2^112G
118238384Sjkim *     5 | 0 1 0 1 | (2^112 + 1)G
119238384Sjkim *     6 | 0 1 1 0 | (2^112 + 2^56)G
120238384Sjkim *     7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
121238384Sjkim *     8 | 1 0 0 0 | 2^168G
122238384Sjkim *     9 | 1 0 0 1 | (2^168 + 1)G
123238384Sjkim *    10 | 1 0 1 0 | (2^168 + 2^56)G
124238384Sjkim *    11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
125238384Sjkim *    12 | 1 1 0 0 | (2^168 + 2^112)G
126238384Sjkim *    13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
127238384Sjkim *    14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
128238384Sjkim *    15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
129238384Sjkim * followed by a copy of this with each element multiplied by 2^28.
130238384Sjkim *
131238384Sjkim * The reason for this is so that we can clock bits into four different
132238384Sjkim * locations when doing simple scalar multiplies against the base point,
133238384Sjkim * and then another four locations using the second 16 elements.
134238384Sjkim */
135296341Sdelphijstatic const felem gmul[2][16][3] = { {{{0, 0, 0, 0},
136296341Sdelphij                                        {0, 0, 0, 0},
137296341Sdelphij                                        {0, 0, 0, 0}},
138296341Sdelphij                                       {{0x3280d6115c1d21, 0xc1d356c2112234,
139296341Sdelphij                                         0x7f321390b94a03, 0xb70e0cbd6bb4bf},
140296341Sdelphij                                        {0xd5819985007e34, 0x75a05a07476444,
141296341Sdelphij                                         0xfb4c22dfe6cd43, 0xbd376388b5f723},
142296341Sdelphij                                        {1, 0, 0, 0}},
143296341Sdelphij                                       {{0xfd9675666ebbe9, 0xbca7664d40ce5e,
144296341Sdelphij                                         0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
145296341Sdelphij                                        {0x29e0b892dc9c43, 0xece8608436e662,
146296341Sdelphij                                         0xdc858f185310d0, 0x9812dd4eb8d321},
147296341Sdelphij                                        {1, 0, 0, 0}},
148296341Sdelphij                                       {{0x6d3e678d5d8eb8, 0x559eed1cb362f1,
149296341Sdelphij                                         0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
150296341Sdelphij                                        {0xf19f90ed50266d, 0xabf2b4bf65f9df,
151296341Sdelphij                                         0x313865468fafec, 0x5cb379ba910a17},
152296341Sdelphij                                        {1, 0, 0, 0}},
153296341Sdelphij                                       {{0x0641966cab26e3, 0x91fb2991fab0a0,
154296341Sdelphij                                         0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
155296341Sdelphij                                        {0x7510407766af5d, 0x84d929610d5450,
156296341Sdelphij                                         0x81d77aae82f706, 0x6916f6d4338c5b},
157296341Sdelphij                                        {1, 0, 0, 0}},
158296341Sdelphij                                       {{0xea95ac3b1f15c6, 0x086000905e82d4,
159296341Sdelphij                                         0xdd323ae4d1c8b1, 0x932b56be7685a3},
160296341Sdelphij                                        {0x9ef93dea25dbbf, 0x41665960f390f0,
161296341Sdelphij                                         0xfdec76dbe2a8a7, 0x523e80f019062a},
162296341Sdelphij                                        {1, 0, 0, 0}},
163296341Sdelphij                                       {{0x822fdd26732c73, 0xa01c83531b5d0f,
164296341Sdelphij                                         0x363f37347c1ba4, 0xc391b45c84725c},
165296341Sdelphij                                        {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec,
166296341Sdelphij                                         0xc393da7e222a7f, 0x1efb7890ede244},
167296341Sdelphij                                        {1, 0, 0, 0}},
168296341Sdelphij                                       {{0x4c9e90ca217da1, 0xd11beca79159bb,
169296341Sdelphij                                         0xff8d33c2c98b7c, 0x2610b39409f849},
170296341Sdelphij                                        {0x44d1352ac64da0, 0xcdbb7b2c46b4fb,
171296341Sdelphij                                         0x966c079b753c89, 0xfe67e4e820b112},
172296341Sdelphij                                        {1, 0, 0, 0}},
173296341Sdelphij                                       {{0xe28cae2df5312d, 0xc71b61d16f5c6e,
174296341Sdelphij                                         0x79b7619a3e7c4c, 0x05c73240899b47},
175296341Sdelphij                                        {0x9f7f6382c73e3a, 0x18615165c56bda,
176296341Sdelphij                                         0x641fab2116fd56, 0x72855882b08394},
177296341Sdelphij                                        {1, 0, 0, 0}},
178296341Sdelphij                                       {{0x0469182f161c09, 0x74a98ca8d00fb5,
179296341Sdelphij                                         0xb89da93489a3e0, 0x41c98768fb0c1d},
180296341Sdelphij                                        {0xe5ea05fb32da81, 0x3dce9ffbca6855,
181296341Sdelphij                                         0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
182296341Sdelphij                                        {1, 0, 0, 0}},
183296341Sdelphij                                       {{0xdab22b2333e87f, 0x4430137a5dd2f6,
184296341Sdelphij                                         0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
185296341Sdelphij                                        {0x764a7df0c8fda5, 0x185ba5c3fa2044,
186296341Sdelphij                                         0x9281d688bcbe50, 0xc40331df893881},
187296341Sdelphij                                        {1, 0, 0, 0}},
188296341Sdelphij                                       {{0xb89530796f0f60, 0xade92bd26909a3,
189296341Sdelphij                                         0x1a0c83fb4884da, 0x1765bf22a5a984},
190296341Sdelphij                                        {0x772a9ee75db09e, 0x23bc6c67cec16f,
191296341Sdelphij                                         0x4c1edba8b14e2f, 0xe2a215d9611369},
192296341Sdelphij                                        {1, 0, 0, 0}},
193296341Sdelphij                                       {{0x571e509fb5efb3, 0xade88696410552,
194296341Sdelphij                                         0xc8ae85fada74fe, 0x6c7e4be83bbde3},
195296341Sdelphij                                        {0xff9f51160f4652, 0xb47ce2495a6539,
196296341Sdelphij                                         0xa2946c53b582f4, 0x286d2db3ee9a60},
197296341Sdelphij                                        {1, 0, 0, 0}},
198296341Sdelphij                                       {{0x40bbd5081a44af, 0x0995183b13926c,
199296341Sdelphij                                         0xbcefba6f47f6d0, 0x215619e9cc0057},
200296341Sdelphij                                        {0x8bc94d3b0df45e, 0xf11c54a3694f6f,
201296341Sdelphij                                         0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
202296341Sdelphij                                        {1, 0, 0, 0}},
203296341Sdelphij                                       {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8,
204296341Sdelphij                                         0x1c29819435d2c6, 0xc813132f4c07e9},
205296341Sdelphij                                        {0x2891425503b11f, 0x08781030579fea,
206296341Sdelphij                                         0xf5426ba5cc9674, 0x1e28ebf18562bc},
207296341Sdelphij                                        {1, 0, 0, 0}},
208296341Sdelphij                                       {{0x9f31997cc864eb, 0x06cd91d28b5e4c,
209296341Sdelphij                                         0xff17036691a973, 0xf1aef351497c58},
210296341Sdelphij                                        {0xdd1f2d600564ff, 0xdead073b1402db,
211296341Sdelphij                                         0x74a684435bd693, 0xeea7471f962558},
212296341Sdelphij                                        {1, 0, 0, 0}}},
213296341Sdelphij{{{0, 0, 0, 0},
214296341Sdelphij  {0, 0, 0, 0},
215296341Sdelphij  {0, 0, 0, 0}},
216296341Sdelphij {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
217296341Sdelphij  {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
218296341Sdelphij  {1, 0, 0, 0}},
219296341Sdelphij {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
220296341Sdelphij  {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
221296341Sdelphij  {1, 0, 0, 0}},
222296341Sdelphij {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
223296341Sdelphij  {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
224296341Sdelphij  {1, 0, 0, 0}},
225296341Sdelphij {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
226296341Sdelphij  {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
227296341Sdelphij  {1, 0, 0, 0}},
228296341Sdelphij {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
229296341Sdelphij  {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
230296341Sdelphij  {1, 0, 0, 0}},
231296341Sdelphij {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
232296341Sdelphij  {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
233296341Sdelphij  {1, 0, 0, 0}},
234296341Sdelphij {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
235296341Sdelphij  {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
236296341Sdelphij  {1, 0, 0, 0}},
237296341Sdelphij {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
238296341Sdelphij  {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
239296341Sdelphij  {1, 0, 0, 0}},
240296341Sdelphij {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
241296341Sdelphij  {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
242296341Sdelphij  {1, 0, 0, 0}},
243296341Sdelphij {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
244296341Sdelphij  {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
245296341Sdelphij  {1, 0, 0, 0}},
246296341Sdelphij {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
247296341Sdelphij  {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
248296341Sdelphij  {1, 0, 0, 0}},
249296341Sdelphij {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
250296341Sdelphij  {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
251296341Sdelphij  {1, 0, 0, 0}},
252296341Sdelphij {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
253296341Sdelphij  {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
254296341Sdelphij  {1, 0, 0, 0}},
255296341Sdelphij {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
256296341Sdelphij  {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
257296341Sdelphij  {1, 0, 0, 0}},
258296341Sdelphij {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
259296341Sdelphij  {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
260296341Sdelphij  {1, 0, 0, 0}}}
261296341Sdelphij};
262238384Sjkim
263238384Sjkim/* Precomputation for the group generator. */
264238384Sjkimtypedef struct {
265296341Sdelphij    felem g_pre_comp[2][16][3];
266296341Sdelphij    int references;
267238384Sjkim} NISTP224_PRE_COMP;
268238384Sjkim
269238384Sjkimconst EC_METHOD *EC_GFp_nistp224_method(void)
270296341Sdelphij{
271296341Sdelphij    static const EC_METHOD ret = {
272296341Sdelphij        EC_FLAGS_DEFAULT_OCT,
273296341Sdelphij        NID_X9_62_prime_field,
274296341Sdelphij        ec_GFp_nistp224_group_init,
275296341Sdelphij        ec_GFp_simple_group_finish,
276296341Sdelphij        ec_GFp_simple_group_clear_finish,
277296341Sdelphij        ec_GFp_nist_group_copy,
278296341Sdelphij        ec_GFp_nistp224_group_set_curve,
279296341Sdelphij        ec_GFp_simple_group_get_curve,
280296341Sdelphij        ec_GFp_simple_group_get_degree,
281296341Sdelphij        ec_GFp_simple_group_check_discriminant,
282296341Sdelphij        ec_GFp_simple_point_init,
283296341Sdelphij        ec_GFp_simple_point_finish,
284296341Sdelphij        ec_GFp_simple_point_clear_finish,
285296341Sdelphij        ec_GFp_simple_point_copy,
286296341Sdelphij        ec_GFp_simple_point_set_to_infinity,
287296341Sdelphij        ec_GFp_simple_set_Jprojective_coordinates_GFp,
288296341Sdelphij        ec_GFp_simple_get_Jprojective_coordinates_GFp,
289296341Sdelphij        ec_GFp_simple_point_set_affine_coordinates,
290296341Sdelphij        ec_GFp_nistp224_point_get_affine_coordinates,
291296341Sdelphij        0 /* point_set_compressed_coordinates */ ,
292296341Sdelphij        0 /* point2oct */ ,
293296341Sdelphij        0 /* oct2point */ ,
294296341Sdelphij        ec_GFp_simple_add,
295296341Sdelphij        ec_GFp_simple_dbl,
296296341Sdelphij        ec_GFp_simple_invert,
297296341Sdelphij        ec_GFp_simple_is_at_infinity,
298296341Sdelphij        ec_GFp_simple_is_on_curve,
299296341Sdelphij        ec_GFp_simple_cmp,
300296341Sdelphij        ec_GFp_simple_make_affine,
301296341Sdelphij        ec_GFp_simple_points_make_affine,
302296341Sdelphij        ec_GFp_nistp224_points_mul,
303296341Sdelphij        ec_GFp_nistp224_precompute_mult,
304296341Sdelphij        ec_GFp_nistp224_have_precompute_mult,
305296341Sdelphij        ec_GFp_nist_field_mul,
306296341Sdelphij        ec_GFp_nist_field_sqr,
307296341Sdelphij        0 /* field_div */ ,
308296341Sdelphij        0 /* field_encode */ ,
309296341Sdelphij        0 /* field_decode */ ,
310296341Sdelphij        0                       /* field_set_to_one */
311296341Sdelphij    };
312238384Sjkim
313296341Sdelphij    return &ret;
314296341Sdelphij}
315238384Sjkim
316296341Sdelphij/*
317296341Sdelphij * Helper functions to convert field elements to/from internal representation
318296341Sdelphij */
319238384Sjkimstatic void bin28_to_felem(felem out, const u8 in[28])
320296341Sdelphij{
321296341Sdelphij    out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
322296341Sdelphij    out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff;
323296341Sdelphij    out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff;
324296341Sdelphij    out[3] = (*((const uint64_t *)(in+20))) >> 8;
325296341Sdelphij}
326238384Sjkim
327238384Sjkimstatic void felem_to_bin28(u8 out[28], const felem in)
328296341Sdelphij{
329296341Sdelphij    unsigned i;
330296341Sdelphij    for (i = 0; i < 7; ++i) {
331296341Sdelphij        out[i] = in[0] >> (8 * i);
332296341Sdelphij        out[i + 7] = in[1] >> (8 * i);
333296341Sdelphij        out[i + 14] = in[2] >> (8 * i);
334296341Sdelphij        out[i + 21] = in[3] >> (8 * i);
335296341Sdelphij    }
336296341Sdelphij}
337238384Sjkim
338238384Sjkim/* To preserve endianness when using BN_bn2bin and BN_bin2bn */
339238384Sjkimstatic void flip_endian(u8 *out, const u8 *in, unsigned len)
340296341Sdelphij{
341296341Sdelphij    unsigned i;
342296341Sdelphij    for (i = 0; i < len; ++i)
343296341Sdelphij        out[i] = in[len - 1 - i];
344296341Sdelphij}
345238384Sjkim
346238384Sjkim/* From OpenSSL BIGNUM to internal representation */
347238384Sjkimstatic int BN_to_felem(felem out, const BIGNUM *bn)
348296341Sdelphij{
349296341Sdelphij    felem_bytearray b_in;
350296341Sdelphij    felem_bytearray b_out;
351296341Sdelphij    unsigned num_bytes;
352238384Sjkim
353296341Sdelphij    /* BN_bn2bin eats leading zeroes */
354296341Sdelphij    memset(b_out, 0, sizeof b_out);
355296341Sdelphij    num_bytes = BN_num_bytes(bn);
356296341Sdelphij    if (num_bytes > sizeof b_out) {
357296341Sdelphij        ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
358296341Sdelphij        return 0;
359296341Sdelphij    }
360296341Sdelphij    if (BN_is_negative(bn)) {
361296341Sdelphij        ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
362296341Sdelphij        return 0;
363296341Sdelphij    }
364296341Sdelphij    num_bytes = BN_bn2bin(bn, b_in);
365296341Sdelphij    flip_endian(b_out, b_in, num_bytes);
366296341Sdelphij    bin28_to_felem(out, b_out);
367296341Sdelphij    return 1;
368296341Sdelphij}
369238384Sjkim
370238384Sjkim/* From internal representation to OpenSSL BIGNUM */
371238384Sjkimstatic BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
372296341Sdelphij{
373296341Sdelphij    felem_bytearray b_in, b_out;
374296341Sdelphij    felem_to_bin28(b_in, in);
375296341Sdelphij    flip_endian(b_out, b_in, sizeof b_out);
376296341Sdelphij    return BN_bin2bn(b_out, sizeof b_out, out);
377296341Sdelphij}
378238384Sjkim
379238384Sjkim/******************************************************************************/
380296341Sdelphij/*-
381296341Sdelphij *                              FIELD OPERATIONS
382238384Sjkim *
383238384Sjkim * Field operations, using the internal representation of field elements.
384238384Sjkim * NB! These operations are specific to our point multiplication and cannot be
385238384Sjkim * expected to be correct in general - e.g., multiplication with a large scalar
386238384Sjkim * will cause an overflow.
387238384Sjkim *
388238384Sjkim */
389238384Sjkim
390238384Sjkimstatic void felem_one(felem out)
391296341Sdelphij{
392296341Sdelphij    out[0] = 1;
393296341Sdelphij    out[1] = 0;
394296341Sdelphij    out[2] = 0;
395296341Sdelphij    out[3] = 0;
396296341Sdelphij}
397238384Sjkim
398238384Sjkimstatic void felem_assign(felem out, const felem in)
399296341Sdelphij{
400296341Sdelphij    out[0] = in[0];
401296341Sdelphij    out[1] = in[1];
402296341Sdelphij    out[2] = in[2];
403296341Sdelphij    out[3] = in[3];
404296341Sdelphij}
405238384Sjkim
406238384Sjkim/* Sum two field elements: out += in */
407238384Sjkimstatic void felem_sum(felem out, const felem in)
408296341Sdelphij{
409296341Sdelphij    out[0] += in[0];
410296341Sdelphij    out[1] += in[1];
411296341Sdelphij    out[2] += in[2];
412296341Sdelphij    out[3] += in[3];
413296341Sdelphij}
414238384Sjkim
415238384Sjkim/* Get negative value: out = -in */
416238384Sjkim/* Assumes in[i] < 2^57 */
417238384Sjkimstatic void felem_neg(felem out, const felem in)
418296341Sdelphij{
419296341Sdelphij    static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
420296341Sdelphij    static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
421296341Sdelphij    static const limb two58m42m2 = (((limb) 1) << 58) -
422296341Sdelphij        (((limb) 1) << 42) - (((limb) 1) << 2);
423238384Sjkim
424296341Sdelphij    /* Set to 0 mod 2^224-2^96+1 to ensure out > in */
425296341Sdelphij    out[0] = two58p2 - in[0];
426296341Sdelphij    out[1] = two58m42m2 - in[1];
427296341Sdelphij    out[2] = two58m2 - in[2];
428296341Sdelphij    out[3] = two58m2 - in[3];
429296341Sdelphij}
430238384Sjkim
431238384Sjkim/* Subtract field elements: out -= in */
432238384Sjkim/* Assumes in[i] < 2^57 */
433238384Sjkimstatic void felem_diff(felem out, const felem in)
434296341Sdelphij{
435296341Sdelphij    static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
436296341Sdelphij    static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
437296341Sdelphij    static const limb two58m42m2 = (((limb) 1) << 58) -
438296341Sdelphij        (((limb) 1) << 42) - (((limb) 1) << 2);
439238384Sjkim
440296341Sdelphij    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
441296341Sdelphij    out[0] += two58p2;
442296341Sdelphij    out[1] += two58m42m2;
443296341Sdelphij    out[2] += two58m2;
444296341Sdelphij    out[3] += two58m2;
445238384Sjkim
446296341Sdelphij    out[0] -= in[0];
447296341Sdelphij    out[1] -= in[1];
448296341Sdelphij    out[2] -= in[2];
449296341Sdelphij    out[3] -= in[3];
450296341Sdelphij}
451238384Sjkim
452238384Sjkim/* Subtract in unreduced 128-bit mode: out -= in */
453238384Sjkim/* Assumes in[i] < 2^119 */
454238384Sjkimstatic void widefelem_diff(widefelem out, const widefelem in)
455296341Sdelphij{
456296341Sdelphij    static const widelimb two120 = ((widelimb) 1) << 120;
457296341Sdelphij    static const widelimb two120m64 = (((widelimb) 1) << 120) -
458296341Sdelphij        (((widelimb) 1) << 64);
459296341Sdelphij    static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
460296341Sdelphij        (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
461238384Sjkim
462296341Sdelphij    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
463296341Sdelphij    out[0] += two120;
464296341Sdelphij    out[1] += two120m64;
465296341Sdelphij    out[2] += two120m64;
466296341Sdelphij    out[3] += two120;
467296341Sdelphij    out[4] += two120m104m64;
468296341Sdelphij    out[5] += two120m64;
469296341Sdelphij    out[6] += two120m64;
470238384Sjkim
471296341Sdelphij    out[0] -= in[0];
472296341Sdelphij    out[1] -= in[1];
473296341Sdelphij    out[2] -= in[2];
474296341Sdelphij    out[3] -= in[3];
475296341Sdelphij    out[4] -= in[4];
476296341Sdelphij    out[5] -= in[5];
477296341Sdelphij    out[6] -= in[6];
478296341Sdelphij}
479238384Sjkim
480238384Sjkim/* Subtract in mixed mode: out128 -= in64 */
481238384Sjkim/* in[i] < 2^63 */
482238384Sjkimstatic void felem_diff_128_64(widefelem out, const felem in)
483296341Sdelphij{
484296341Sdelphij    static const widelimb two64p8 = (((widelimb) 1) << 64) +
485296341Sdelphij        (((widelimb) 1) << 8);
486296341Sdelphij    static const widelimb two64m8 = (((widelimb) 1) << 64) -
487296341Sdelphij        (((widelimb) 1) << 8);
488296341Sdelphij    static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
489296341Sdelphij        (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
490238384Sjkim
491296341Sdelphij    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
492296341Sdelphij    out[0] += two64p8;
493296341Sdelphij    out[1] += two64m48m8;
494296341Sdelphij    out[2] += two64m8;
495296341Sdelphij    out[3] += two64m8;
496238384Sjkim
497296341Sdelphij    out[0] -= in[0];
498296341Sdelphij    out[1] -= in[1];
499296341Sdelphij    out[2] -= in[2];
500296341Sdelphij    out[3] -= in[3];
501296341Sdelphij}
502238384Sjkim
503296341Sdelphij/*
504296341Sdelphij * Multiply a field element by a scalar: out = out * scalar The scalars we
505296341Sdelphij * actually use are small, so results fit without overflow
506296341Sdelphij */
507238384Sjkimstatic void felem_scalar(felem out, const limb scalar)
508296341Sdelphij{
509296341Sdelphij    out[0] *= scalar;
510296341Sdelphij    out[1] *= scalar;
511296341Sdelphij    out[2] *= scalar;
512296341Sdelphij    out[3] *= scalar;
513296341Sdelphij}
514238384Sjkim
515296341Sdelphij/*
516296341Sdelphij * Multiply an unreduced field element by a scalar: out = out * scalar The
517296341Sdelphij * scalars we actually use are small, so results fit without overflow
518296341Sdelphij */
519238384Sjkimstatic void widefelem_scalar(widefelem out, const widelimb scalar)
520296341Sdelphij{
521296341Sdelphij    out[0] *= scalar;
522296341Sdelphij    out[1] *= scalar;
523296341Sdelphij    out[2] *= scalar;
524296341Sdelphij    out[3] *= scalar;
525296341Sdelphij    out[4] *= scalar;
526296341Sdelphij    out[5] *= scalar;
527296341Sdelphij    out[6] *= scalar;
528296341Sdelphij}
529238384Sjkim
530238384Sjkim/* Square a field element: out = in^2 */
531238384Sjkimstatic void felem_square(widefelem out, const felem in)
532296341Sdelphij{
533296341Sdelphij    limb tmp0, tmp1, tmp2;
534296341Sdelphij    tmp0 = 2 * in[0];
535296341Sdelphij    tmp1 = 2 * in[1];
536296341Sdelphij    tmp2 = 2 * in[2];
537296341Sdelphij    out[0] = ((widelimb) in[0]) * in[0];
538296341Sdelphij    out[1] = ((widelimb) in[0]) * tmp1;
539296341Sdelphij    out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
540296341Sdelphij    out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
541296341Sdelphij    out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
542296341Sdelphij    out[5] = ((widelimb) in[3]) * tmp2;
543296341Sdelphij    out[6] = ((widelimb) in[3]) * in[3];
544296341Sdelphij}
545238384Sjkim
546238384Sjkim/* Multiply two field elements: out = in1 * in2 */
547238384Sjkimstatic void felem_mul(widefelem out, const felem in1, const felem in2)
548296341Sdelphij{
549296341Sdelphij    out[0] = ((widelimb) in1[0]) * in2[0];
550296341Sdelphij    out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
551296341Sdelphij    out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
552296341Sdelphij        ((widelimb) in1[2]) * in2[0];
553296341Sdelphij    out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
554296341Sdelphij        ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
555296341Sdelphij    out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
556296341Sdelphij        ((widelimb) in1[3]) * in2[1];
557296341Sdelphij    out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
558296341Sdelphij    out[6] = ((widelimb) in1[3]) * in2[3];
559296341Sdelphij}
560238384Sjkim
561296341Sdelphij/*-
562296341Sdelphij * Reduce seven 128-bit coefficients to four 64-bit coefficients.
563238384Sjkim * Requires in[i] < 2^126,
564238384Sjkim * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
565238384Sjkimstatic void felem_reduce(felem out, const widefelem in)
566296341Sdelphij{
567296341Sdelphij    static const widelimb two127p15 = (((widelimb) 1) << 127) +
568296341Sdelphij        (((widelimb) 1) << 15);
569296341Sdelphij    static const widelimb two127m71 = (((widelimb) 1) << 127) -
570296341Sdelphij        (((widelimb) 1) << 71);
571296341Sdelphij    static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
572296341Sdelphij        (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
573296341Sdelphij    widelimb output[5];
574238384Sjkim
575296341Sdelphij    /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
576296341Sdelphij    output[0] = in[0] + two127p15;
577296341Sdelphij    output[1] = in[1] + two127m71m55;
578296341Sdelphij    output[2] = in[2] + two127m71;
579296341Sdelphij    output[3] = in[3];
580296341Sdelphij    output[4] = in[4];
581238384Sjkim
582296341Sdelphij    /* Eliminate in[4], in[5], in[6] */
583296341Sdelphij    output[4] += in[6] >> 16;
584296341Sdelphij    output[3] += (in[6] & 0xffff) << 40;
585296341Sdelphij    output[2] -= in[6];
586238384Sjkim
587296341Sdelphij    output[3] += in[5] >> 16;
588296341Sdelphij    output[2] += (in[5] & 0xffff) << 40;
589296341Sdelphij    output[1] -= in[5];
590238384Sjkim
591296341Sdelphij    output[2] += output[4] >> 16;
592296341Sdelphij    output[1] += (output[4] & 0xffff) << 40;
593296341Sdelphij    output[0] -= output[4];
594238384Sjkim
595296341Sdelphij    /* Carry 2 -> 3 -> 4 */
596296341Sdelphij    output[3] += output[2] >> 56;
597296341Sdelphij    output[2] &= 0x00ffffffffffffff;
598238384Sjkim
599296341Sdelphij    output[4] = output[3] >> 56;
600296341Sdelphij    output[3] &= 0x00ffffffffffffff;
601238384Sjkim
602296341Sdelphij    /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
603238384Sjkim
604296341Sdelphij    /* Eliminate output[4] */
605296341Sdelphij    output[2] += output[4] >> 16;
606296341Sdelphij    /* output[2] < 2^56 + 2^56 = 2^57 */
607296341Sdelphij    output[1] += (output[4] & 0xffff) << 40;
608296341Sdelphij    output[0] -= output[4];
609238384Sjkim
610296341Sdelphij    /* Carry 0 -> 1 -> 2 -> 3 */
611296341Sdelphij    output[1] += output[0] >> 56;
612296341Sdelphij    out[0] = output[0] & 0x00ffffffffffffff;
613238384Sjkim
614296341Sdelphij    output[2] += output[1] >> 56;
615296341Sdelphij    /* output[2] < 2^57 + 2^72 */
616296341Sdelphij    out[1] = output[1] & 0x00ffffffffffffff;
617296341Sdelphij    output[3] += output[2] >> 56;
618296341Sdelphij    /* output[3] <= 2^56 + 2^16 */
619296341Sdelphij    out[2] = output[2] & 0x00ffffffffffffff;
620238384Sjkim
621296341Sdelphij    /*-
622296341Sdelphij     * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
623296341Sdelphij     * out[3] <= 2^56 + 2^16 (due to final carry),
624296341Sdelphij     * so out < 2*p
625296341Sdelphij     */
626296341Sdelphij    out[3] = output[3];
627296341Sdelphij}
628238384Sjkim
629238384Sjkimstatic void felem_square_reduce(felem out, const felem in)
630296341Sdelphij{
631296341Sdelphij    widefelem tmp;
632296341Sdelphij    felem_square(tmp, in);
633296341Sdelphij    felem_reduce(out, tmp);
634296341Sdelphij}
635238384Sjkim
636238384Sjkimstatic void felem_mul_reduce(felem out, const felem in1, const felem in2)
637296341Sdelphij{
638296341Sdelphij    widefelem tmp;
639296341Sdelphij    felem_mul(tmp, in1, in2);
640296341Sdelphij    felem_reduce(out, tmp);
641296341Sdelphij}
642238384Sjkim
643296341Sdelphij/*
644296341Sdelphij * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
645296341Sdelphij * call felem_reduce first)
646296341Sdelphij */
647238384Sjkimstatic void felem_contract(felem out, const felem in)
648296341Sdelphij{
649296341Sdelphij    static const int64_t two56 = ((limb) 1) << 56;
650296341Sdelphij    /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
651296341Sdelphij    /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
652296341Sdelphij    int64_t tmp[4], a;
653296341Sdelphij    tmp[0] = in[0];
654296341Sdelphij    tmp[1] = in[1];
655296341Sdelphij    tmp[2] = in[2];
656296341Sdelphij    tmp[3] = in[3];
657296341Sdelphij    /* Case 1: a = 1 iff in >= 2^224 */
658296341Sdelphij    a = (in[3] >> 56);
659296341Sdelphij    tmp[0] -= a;
660296341Sdelphij    tmp[1] += a << 40;
661296341Sdelphij    tmp[3] &= 0x00ffffffffffffff;
662296341Sdelphij    /*
663296341Sdelphij     * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
664296341Sdelphij     * and the lower part is non-zero
665296341Sdelphij     */
666296341Sdelphij    a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
667296341Sdelphij        (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
668296341Sdelphij    a &= 0x00ffffffffffffff;
669296341Sdelphij    /* turn a into an all-one mask (if a = 0) or an all-zero mask */
670296341Sdelphij    a = (a - 1) >> 63;
671296341Sdelphij    /* subtract 2^224 - 2^96 + 1 if a is all-one */
672296341Sdelphij    tmp[3] &= a ^ 0xffffffffffffffff;
673296341Sdelphij    tmp[2] &= a ^ 0xffffffffffffffff;
674296341Sdelphij    tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
675296341Sdelphij    tmp[0] -= 1 & a;
676238384Sjkim
677296341Sdelphij    /*
678296341Sdelphij     * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
679296341Sdelphij     * non-zero, so we only need one step
680296341Sdelphij     */
681296341Sdelphij    a = tmp[0] >> 63;
682296341Sdelphij    tmp[0] += two56 & a;
683296341Sdelphij    tmp[1] -= 1 & a;
684238384Sjkim
685296341Sdelphij    /* carry 1 -> 2 -> 3 */
686296341Sdelphij    tmp[2] += tmp[1] >> 56;
687296341Sdelphij    tmp[1] &= 0x00ffffffffffffff;
688238384Sjkim
689296341Sdelphij    tmp[3] += tmp[2] >> 56;
690296341Sdelphij    tmp[2] &= 0x00ffffffffffffff;
691238384Sjkim
692296341Sdelphij    /* Now 0 <= out < p */
693296341Sdelphij    out[0] = tmp[0];
694296341Sdelphij    out[1] = tmp[1];
695296341Sdelphij    out[2] = tmp[2];
696296341Sdelphij    out[3] = tmp[3];
697296341Sdelphij}
698238384Sjkim
699296341Sdelphij/*
700296341Sdelphij * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
701296341Sdelphij * elements are reduced to in < 2^225, so we only need to check three cases:
702296341Sdelphij * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
703296341Sdelphij */
704238384Sjkimstatic limb felem_is_zero(const felem in)
705296341Sdelphij{
706296341Sdelphij    limb zero, two224m96p1, two225m97p2;
707238384Sjkim
708296341Sdelphij    zero = in[0] | in[1] | in[2] | in[3];
709296341Sdelphij    zero = (((int64_t) (zero) - 1) >> 63) & 1;
710296341Sdelphij    two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
711296341Sdelphij        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
712296341Sdelphij    two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
713296341Sdelphij    two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
714296341Sdelphij        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
715296341Sdelphij    two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
716296341Sdelphij    return (zero | two224m96p1 | two225m97p2);
717296341Sdelphij}
718238384Sjkim
719238384Sjkimstatic limb felem_is_zero_int(const felem in)
720296341Sdelphij{
721296341Sdelphij    return (int)(felem_is_zero(in) & ((limb) 1));
722296341Sdelphij}
723238384Sjkim
724238384Sjkim/* Invert a field element */
725238384Sjkim/* Computation chain copied from djb's code */
726238384Sjkimstatic void felem_inv(felem out, const felem in)
727296341Sdelphij{
728296341Sdelphij    felem ftmp, ftmp2, ftmp3, ftmp4;
729296341Sdelphij    widefelem tmp;
730296341Sdelphij    unsigned i;
731238384Sjkim
732296341Sdelphij    felem_square(tmp, in);
733296341Sdelphij    felem_reduce(ftmp, tmp);    /* 2 */
734296341Sdelphij    felem_mul(tmp, in, ftmp);
735296341Sdelphij    felem_reduce(ftmp, tmp);    /* 2^2 - 1 */
736296341Sdelphij    felem_square(tmp, ftmp);
737296341Sdelphij    felem_reduce(ftmp, tmp);    /* 2^3 - 2 */
738296341Sdelphij    felem_mul(tmp, in, ftmp);
739296341Sdelphij    felem_reduce(ftmp, tmp);    /* 2^3 - 1 */
740296341Sdelphij    felem_square(tmp, ftmp);
741296341Sdelphij    felem_reduce(ftmp2, tmp);   /* 2^4 - 2 */
742296341Sdelphij    felem_square(tmp, ftmp2);
743296341Sdelphij    felem_reduce(ftmp2, tmp);   /* 2^5 - 4 */
744296341Sdelphij    felem_square(tmp, ftmp2);
745296341Sdelphij    felem_reduce(ftmp2, tmp);   /* 2^6 - 8 */
746296341Sdelphij    felem_mul(tmp, ftmp2, ftmp);
747296341Sdelphij    felem_reduce(ftmp, tmp);    /* 2^6 - 1 */
748296341Sdelphij    felem_square(tmp, ftmp);
749296341Sdelphij    felem_reduce(ftmp2, tmp);   /* 2^7 - 2 */
750296341Sdelphij    for (i = 0; i < 5; ++i) {   /* 2^12 - 2^6 */
751296341Sdelphij        felem_square(tmp, ftmp2);
752296341Sdelphij        felem_reduce(ftmp2, tmp);
753296341Sdelphij    }
754296341Sdelphij    felem_mul(tmp, ftmp2, ftmp);
755296341Sdelphij    felem_reduce(ftmp2, tmp);   /* 2^12 - 1 */
756296341Sdelphij    felem_square(tmp, ftmp2);
757296341Sdelphij    felem_reduce(ftmp3, tmp);   /* 2^13 - 2 */
758296341Sdelphij    for (i = 0; i < 11; ++i) {  /* 2^24 - 2^12 */
759296341Sdelphij        felem_square(tmp, ftmp3);
760296341Sdelphij        felem_reduce(ftmp3, tmp);
761296341Sdelphij    }
762296341Sdelphij    felem_mul(tmp, ftmp3, ftmp2);
763296341Sdelphij    felem_reduce(ftmp2, tmp);   /* 2^24 - 1 */
764296341Sdelphij    felem_square(tmp, ftmp2);
765296341Sdelphij    felem_reduce(ftmp3, tmp);   /* 2^25 - 2 */
766296341Sdelphij    for (i = 0; i < 23; ++i) {  /* 2^48 - 2^24 */
767296341Sdelphij        felem_square(tmp, ftmp3);
768296341Sdelphij        felem_reduce(ftmp3, tmp);
769296341Sdelphij    }
770296341Sdelphij    felem_mul(tmp, ftmp3, ftmp2);
771296341Sdelphij    felem_reduce(ftmp3, tmp);   /* 2^48 - 1 */
772296341Sdelphij    felem_square(tmp, ftmp3);
773296341Sdelphij    felem_reduce(ftmp4, tmp);   /* 2^49 - 2 */
774296341Sdelphij    for (i = 0; i < 47; ++i) {  /* 2^96 - 2^48 */
775296341Sdelphij        felem_square(tmp, ftmp4);
776296341Sdelphij        felem_reduce(ftmp4, tmp);
777296341Sdelphij    }
778296341Sdelphij    felem_mul(tmp, ftmp3, ftmp4);
779296341Sdelphij    felem_reduce(ftmp3, tmp);   /* 2^96 - 1 */
780296341Sdelphij    felem_square(tmp, ftmp3);
781296341Sdelphij    felem_reduce(ftmp4, tmp);   /* 2^97 - 2 */
782296341Sdelphij    for (i = 0; i < 23; ++i) {  /* 2^120 - 2^24 */
783296341Sdelphij        felem_square(tmp, ftmp4);
784296341Sdelphij        felem_reduce(ftmp4, tmp);
785296341Sdelphij    }
786296341Sdelphij    felem_mul(tmp, ftmp2, ftmp4);
787296341Sdelphij    felem_reduce(ftmp2, tmp);   /* 2^120 - 1 */
788296341Sdelphij    for (i = 0; i < 6; ++i) {   /* 2^126 - 2^6 */
789296341Sdelphij        felem_square(tmp, ftmp2);
790296341Sdelphij        felem_reduce(ftmp2, tmp);
791296341Sdelphij    }
792296341Sdelphij    felem_mul(tmp, ftmp2, ftmp);
793296341Sdelphij    felem_reduce(ftmp, tmp);    /* 2^126 - 1 */
794296341Sdelphij    felem_square(tmp, ftmp);
795296341Sdelphij    felem_reduce(ftmp, tmp);    /* 2^127 - 2 */
796296341Sdelphij    felem_mul(tmp, ftmp, in);
797296341Sdelphij    felem_reduce(ftmp, tmp);    /* 2^127 - 1 */
798296341Sdelphij    for (i = 0; i < 97; ++i) {  /* 2^224 - 2^97 */
799296341Sdelphij        felem_square(tmp, ftmp);
800296341Sdelphij        felem_reduce(ftmp, tmp);
801296341Sdelphij    }
802296341Sdelphij    felem_mul(tmp, ftmp, ftmp3);
803296341Sdelphij    felem_reduce(out, tmp);     /* 2^224 - 2^96 - 1 */
804296341Sdelphij}
805238384Sjkim
806296341Sdelphij/*
807296341Sdelphij * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
808296341Sdelphij * out to itself.
809296341Sdelphij */
810296341Sdelphijstatic void copy_conditional(felem out, const felem in, limb icopy)
811296341Sdelphij{
812296341Sdelphij    unsigned i;
813296341Sdelphij    /*
814296341Sdelphij     * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
815296341Sdelphij     */
816296341Sdelphij    const limb copy = -icopy;
817296341Sdelphij    for (i = 0; i < 4; ++i) {
818296341Sdelphij        const limb tmp = copy & (in[i] ^ out[i]);
819296341Sdelphij        out[i] ^= tmp;
820296341Sdelphij    }
821296341Sdelphij}
822238384Sjkim
823238384Sjkim/******************************************************************************/
824296341Sdelphij/*-
825296341Sdelphij *                       ELLIPTIC CURVE POINT OPERATIONS
826238384Sjkim *
827238384Sjkim * Points are represented in Jacobian projective coordinates:
828238384Sjkim * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
829238384Sjkim * or to the point at infinity if Z == 0.
830238384Sjkim *
831238384Sjkim */
832238384Sjkim
833296341Sdelphij/*-
834296341Sdelphij * Double an elliptic curve point:
835238384Sjkim * (X', Y', Z') = 2 * (X, Y, Z), where
836238384Sjkim * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
837238384Sjkim * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2
838238384Sjkim * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
839238384Sjkim * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
840296341Sdelphij * while x_out == y_in is not (maybe this works, but it's not tested).
841296341Sdelphij */
842238384Sjkimstatic void
843238384Sjkimpoint_double(felem x_out, felem y_out, felem z_out,
844238384Sjkim             const felem x_in, const felem y_in, const felem z_in)
845296341Sdelphij{
846296341Sdelphij    widefelem tmp, tmp2;
847296341Sdelphij    felem delta, gamma, beta, alpha, ftmp, ftmp2;
848238384Sjkim
849296341Sdelphij    felem_assign(ftmp, x_in);
850296341Sdelphij    felem_assign(ftmp2, x_in);
851238384Sjkim
852296341Sdelphij    /* delta = z^2 */
853296341Sdelphij    felem_square(tmp, z_in);
854296341Sdelphij    felem_reduce(delta, tmp);
855238384Sjkim
856296341Sdelphij    /* gamma = y^2 */
857296341Sdelphij    felem_square(tmp, y_in);
858296341Sdelphij    felem_reduce(gamma, tmp);
859238384Sjkim
860296341Sdelphij    /* beta = x*gamma */
861296341Sdelphij    felem_mul(tmp, x_in, gamma);
862296341Sdelphij    felem_reduce(beta, tmp);
863238384Sjkim
864296341Sdelphij    /* alpha = 3*(x-delta)*(x+delta) */
865296341Sdelphij    felem_diff(ftmp, delta);
866296341Sdelphij    /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
867296341Sdelphij    felem_sum(ftmp2, delta);
868296341Sdelphij    /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
869296341Sdelphij    felem_scalar(ftmp2, 3);
870296341Sdelphij    /* ftmp2[i] < 3 * 2^58 < 2^60 */
871296341Sdelphij    felem_mul(tmp, ftmp, ftmp2);
872296341Sdelphij    /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
873296341Sdelphij    felem_reduce(alpha, tmp);
874238384Sjkim
875296341Sdelphij    /* x' = alpha^2 - 8*beta */
876296341Sdelphij    felem_square(tmp, alpha);
877296341Sdelphij    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
878296341Sdelphij    felem_assign(ftmp, beta);
879296341Sdelphij    felem_scalar(ftmp, 8);
880296341Sdelphij    /* ftmp[i] < 8 * 2^57 = 2^60 */
881296341Sdelphij    felem_diff_128_64(tmp, ftmp);
882296341Sdelphij    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
883296341Sdelphij    felem_reduce(x_out, tmp);
884238384Sjkim
885296341Sdelphij    /* z' = (y + z)^2 - gamma - delta */
886296341Sdelphij    felem_sum(delta, gamma);
887296341Sdelphij    /* delta[i] < 2^57 + 2^57 = 2^58 */
888296341Sdelphij    felem_assign(ftmp, y_in);
889296341Sdelphij    felem_sum(ftmp, z_in);
890296341Sdelphij    /* ftmp[i] < 2^57 + 2^57 = 2^58 */
891296341Sdelphij    felem_square(tmp, ftmp);
892296341Sdelphij    /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
893296341Sdelphij    felem_diff_128_64(tmp, delta);
894296341Sdelphij    /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
895296341Sdelphij    felem_reduce(z_out, tmp);
896238384Sjkim
897296341Sdelphij    /* y' = alpha*(4*beta - x') - 8*gamma^2 */
898296341Sdelphij    felem_scalar(beta, 4);
899296341Sdelphij    /* beta[i] < 4 * 2^57 = 2^59 */
900296341Sdelphij    felem_diff(beta, x_out);
901296341Sdelphij    /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
902296341Sdelphij    felem_mul(tmp, alpha, beta);
903296341Sdelphij    /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
904296341Sdelphij    felem_square(tmp2, gamma);
905296341Sdelphij    /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
906296341Sdelphij    widefelem_scalar(tmp2, 8);
907296341Sdelphij    /* tmp2[i] < 8 * 2^116 = 2^119 */
908296341Sdelphij    widefelem_diff(tmp, tmp2);
909296341Sdelphij    /* tmp[i] < 2^119 + 2^120 < 2^121 */
910296341Sdelphij    felem_reduce(y_out, tmp);
911296341Sdelphij}
912238384Sjkim
913296341Sdelphij/*-
914296341Sdelphij * Add two elliptic curve points:
915238384Sjkim * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
916238384Sjkim * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
917238384Sjkim * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
918238384Sjkim * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
919238384Sjkim *        Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
920238384Sjkim * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
921238384Sjkim *
922238384Sjkim * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
923238384Sjkim */
924238384Sjkim
925296341Sdelphij/*
926296341Sdelphij * This function is not entirely constant-time: it includes a branch for
927296341Sdelphij * checking whether the two input points are equal, (while not equal to the
928296341Sdelphij * point at infinity). This case never happens during single point
929296341Sdelphij * multiplication, so there is no timing leak for ECDH or ECDSA signing.
930296341Sdelphij */
931238384Sjkimstatic void point_add(felem x3, felem y3, felem z3,
932296341Sdelphij                      const felem x1, const felem y1, const felem z1,
933296341Sdelphij                      const int mixed, const felem x2, const felem y2,
934296341Sdelphij                      const felem z2)
935296341Sdelphij{
936296341Sdelphij    felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
937296341Sdelphij    widefelem tmp, tmp2;
938296341Sdelphij    limb z1_is_zero, z2_is_zero, x_equal, y_equal;
939238384Sjkim
940296341Sdelphij    if (!mixed) {
941296341Sdelphij        /* ftmp2 = z2^2 */
942296341Sdelphij        felem_square(tmp, z2);
943296341Sdelphij        felem_reduce(ftmp2, tmp);
944238384Sjkim
945296341Sdelphij        /* ftmp4 = z2^3 */
946296341Sdelphij        felem_mul(tmp, ftmp2, z2);
947296341Sdelphij        felem_reduce(ftmp4, tmp);
948238384Sjkim
949296341Sdelphij        /* ftmp4 = z2^3*y1 */
950296341Sdelphij        felem_mul(tmp2, ftmp4, y1);
951296341Sdelphij        felem_reduce(ftmp4, tmp2);
952238384Sjkim
953296341Sdelphij        /* ftmp2 = z2^2*x1 */
954296341Sdelphij        felem_mul(tmp2, ftmp2, x1);
955296341Sdelphij        felem_reduce(ftmp2, tmp2);
956296341Sdelphij    } else {
957296341Sdelphij        /*
958296341Sdelphij         * We'll assume z2 = 1 (special case z2 = 0 is handled later)
959296341Sdelphij         */
960238384Sjkim
961296341Sdelphij        /* ftmp4 = z2^3*y1 */
962296341Sdelphij        felem_assign(ftmp4, y1);
963238384Sjkim
964296341Sdelphij        /* ftmp2 = z2^2*x1 */
965296341Sdelphij        felem_assign(ftmp2, x1);
966296341Sdelphij    }
967238384Sjkim
968296341Sdelphij    /* ftmp = z1^2 */
969296341Sdelphij    felem_square(tmp, z1);
970296341Sdelphij    felem_reduce(ftmp, tmp);
971238384Sjkim
972296341Sdelphij    /* ftmp3 = z1^3 */
973296341Sdelphij    felem_mul(tmp, ftmp, z1);
974296341Sdelphij    felem_reduce(ftmp3, tmp);
975238384Sjkim
976296341Sdelphij    /* tmp = z1^3*y2 */
977296341Sdelphij    felem_mul(tmp, ftmp3, y2);
978296341Sdelphij    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
979238384Sjkim
980296341Sdelphij    /* ftmp3 = z1^3*y2 - z2^3*y1 */
981296341Sdelphij    felem_diff_128_64(tmp, ftmp4);
982296341Sdelphij    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
983296341Sdelphij    felem_reduce(ftmp3, tmp);
984238384Sjkim
985296341Sdelphij    /* tmp = z1^2*x2 */
986296341Sdelphij    felem_mul(tmp, ftmp, x2);
987296341Sdelphij    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
988238384Sjkim
989296341Sdelphij    /* ftmp = z1^2*x2 - z2^2*x1 */
990296341Sdelphij    felem_diff_128_64(tmp, ftmp2);
991296341Sdelphij    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
992296341Sdelphij    felem_reduce(ftmp, tmp);
993238384Sjkim
994296341Sdelphij    /*
995296341Sdelphij     * the formulae are incorrect if the points are equal so we check for
996296341Sdelphij     * this and do doubling if this happens
997296341Sdelphij     */
998296341Sdelphij    x_equal = felem_is_zero(ftmp);
999296341Sdelphij    y_equal = felem_is_zero(ftmp3);
1000296341Sdelphij    z1_is_zero = felem_is_zero(z1);
1001296341Sdelphij    z2_is_zero = felem_is_zero(z2);
1002296341Sdelphij    /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
1003296341Sdelphij    if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
1004296341Sdelphij        point_double(x3, y3, z3, x1, y1, z1);
1005296341Sdelphij        return;
1006296341Sdelphij    }
1007238384Sjkim
1008296341Sdelphij    /* ftmp5 = z1*z2 */
1009296341Sdelphij    if (!mixed) {
1010296341Sdelphij        felem_mul(tmp, z1, z2);
1011296341Sdelphij        felem_reduce(ftmp5, tmp);
1012296341Sdelphij    } else {
1013296341Sdelphij        /* special case z2 = 0 is handled later */
1014296341Sdelphij        felem_assign(ftmp5, z1);
1015296341Sdelphij    }
1016238384Sjkim
1017296341Sdelphij    /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1018296341Sdelphij    felem_mul(tmp, ftmp, ftmp5);
1019296341Sdelphij    felem_reduce(z_out, tmp);
1020238384Sjkim
1021296341Sdelphij    /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1022296341Sdelphij    felem_assign(ftmp5, ftmp);
1023296341Sdelphij    felem_square(tmp, ftmp);
1024296341Sdelphij    felem_reduce(ftmp, tmp);
1025238384Sjkim
1026296341Sdelphij    /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1027296341Sdelphij    felem_mul(tmp, ftmp, ftmp5);
1028296341Sdelphij    felem_reduce(ftmp5, tmp);
1029238384Sjkim
1030296341Sdelphij    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1031296341Sdelphij    felem_mul(tmp, ftmp2, ftmp);
1032296341Sdelphij    felem_reduce(ftmp2, tmp);
1033238384Sjkim
1034296341Sdelphij    /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1035296341Sdelphij    felem_mul(tmp, ftmp4, ftmp5);
1036296341Sdelphij    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1037238384Sjkim
1038296341Sdelphij    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1039296341Sdelphij    felem_square(tmp2, ftmp3);
1040296341Sdelphij    /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1041238384Sjkim
1042296341Sdelphij    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1043296341Sdelphij    felem_diff_128_64(tmp2, ftmp5);
1044296341Sdelphij    /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1045238384Sjkim
1046296341Sdelphij    /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1047296341Sdelphij    felem_assign(ftmp5, ftmp2);
1048296341Sdelphij    felem_scalar(ftmp5, 2);
1049296341Sdelphij    /* ftmp5[i] < 2 * 2^57 = 2^58 */
1050238384Sjkim
1051296341Sdelphij    /*-
1052296341Sdelphij     * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1053296341Sdelphij     *  2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1054296341Sdelphij     */
1055296341Sdelphij    felem_diff_128_64(tmp2, ftmp5);
1056296341Sdelphij    /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1057296341Sdelphij    felem_reduce(x_out, tmp2);
1058238384Sjkim
1059296341Sdelphij    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1060296341Sdelphij    felem_diff(ftmp2, x_out);
1061296341Sdelphij    /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1062238384Sjkim
1063296341Sdelphij    /*
1064296341Sdelphij     * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1065296341Sdelphij     */
1066296341Sdelphij    felem_mul(tmp2, ftmp3, ftmp2);
1067296341Sdelphij    /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1068238384Sjkim
1069296341Sdelphij    /*-
1070296341Sdelphij     * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1071296341Sdelphij     *  z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1072296341Sdelphij     */
1073296341Sdelphij    widefelem_diff(tmp2, tmp);
1074296341Sdelphij    /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1075296341Sdelphij    felem_reduce(y_out, tmp2);
1076238384Sjkim
1077296341Sdelphij    /*
1078296341Sdelphij     * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1079296341Sdelphij     * the point at infinity, so we need to check for this separately
1080296341Sdelphij     */
1081238384Sjkim
1082296341Sdelphij    /*
1083296341Sdelphij     * if point 1 is at infinity, copy point 2 to output, and vice versa
1084296341Sdelphij     */
1085296341Sdelphij    copy_conditional(x_out, x2, z1_is_zero);
1086296341Sdelphij    copy_conditional(x_out, x1, z2_is_zero);
1087296341Sdelphij    copy_conditional(y_out, y2, z1_is_zero);
1088296341Sdelphij    copy_conditional(y_out, y1, z2_is_zero);
1089296341Sdelphij    copy_conditional(z_out, z2, z1_is_zero);
1090296341Sdelphij    copy_conditional(z_out, z1, z2_is_zero);
1091296341Sdelphij    felem_assign(x3, x_out);
1092296341Sdelphij    felem_assign(y3, y_out);
1093296341Sdelphij    felem_assign(z3, z_out);
1094296341Sdelphij}
1095238384Sjkim
1096296341Sdelphij/*
1097296341Sdelphij * select_point selects the |idx|th point from a precomputation table and
1098296341Sdelphij * copies it to out.
1099296341Sdelphij * The pre_comp array argument should be size of |size| argument
1100296341Sdelphij */
1101296341Sdelphijstatic void select_point(const u64 idx, unsigned int size,
1102296341Sdelphij                         const felem pre_comp[][3], felem out[3])
1103296341Sdelphij{
1104296341Sdelphij    unsigned i, j;
1105296341Sdelphij    limb *outlimbs = &out[0][0];
1106296341Sdelphij    memset(outlimbs, 0, 3 * sizeof(felem));
1107238384Sjkim
1108296341Sdelphij    for (i = 0; i < size; i++) {
1109296341Sdelphij        const limb *inlimbs = &pre_comp[i][0][0];
1110296341Sdelphij        u64 mask = i ^ idx;
1111296341Sdelphij        mask |= mask >> 4;
1112296341Sdelphij        mask |= mask >> 2;
1113296341Sdelphij        mask |= mask >> 1;
1114296341Sdelphij        mask &= 1;
1115296341Sdelphij        mask--;
1116296341Sdelphij        for (j = 0; j < 4 * 3; j++)
1117296341Sdelphij            outlimbs[j] |= inlimbs[j] & mask;
1118296341Sdelphij    }
1119296341Sdelphij}
1120238384Sjkim
1121238384Sjkim/* get_bit returns the |i|th bit in |in| */
1122238384Sjkimstatic char get_bit(const felem_bytearray in, unsigned i)
1123296341Sdelphij{
1124296341Sdelphij    if (i >= 224)
1125296341Sdelphij        return 0;
1126296341Sdelphij    return (in[i >> 3] >> (i & 7)) & 1;
1127296341Sdelphij}
1128238384Sjkim
1129296341Sdelphij/*
1130296341Sdelphij * Interleaved point multiplication using precomputed point multiples: The
1131296341Sdelphij * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1132296341Sdelphij * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1133296341Sdelphij * generator, using certain (large) precomputed multiples in g_pre_comp.
1134296341Sdelphij * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1135296341Sdelphij */
1136238384Sjkimstatic void batch_mul(felem x_out, felem y_out, felem z_out,
1137296341Sdelphij                      const felem_bytearray scalars[],
1138296341Sdelphij                      const unsigned num_points, const u8 *g_scalar,
1139296341Sdelphij                      const int mixed, const felem pre_comp[][17][3],
1140296341Sdelphij                      const felem g_pre_comp[2][16][3])
1141296341Sdelphij{
1142296341Sdelphij    int i, skip;
1143296341Sdelphij    unsigned num;
1144296341Sdelphij    unsigned gen_mul = (g_scalar != NULL);
1145296341Sdelphij    felem nq[3], tmp[4];
1146296341Sdelphij    u64 bits;
1147296341Sdelphij    u8 sign, digit;
1148238384Sjkim
1149296341Sdelphij    /* set nq to the point at infinity */
1150296341Sdelphij    memset(nq, 0, 3 * sizeof(felem));
1151238384Sjkim
1152296341Sdelphij    /*
1153296341Sdelphij     * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1154296341Sdelphij     * of the generator (two in each of the last 28 rounds) and additions of
1155296341Sdelphij     * other points multiples (every 5th round).
1156296341Sdelphij     */
1157296341Sdelphij    skip = 1;                   /* save two point operations in the first
1158296341Sdelphij                                 * round */
1159296341Sdelphij    for (i = (num_points ? 220 : 27); i >= 0; --i) {
1160296341Sdelphij        /* double */
1161296341Sdelphij        if (!skip)
1162296341Sdelphij            point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1163238384Sjkim
1164296341Sdelphij        /* add multiples of the generator */
1165296341Sdelphij        if (gen_mul && (i <= 27)) {
1166296341Sdelphij            /* first, look 28 bits upwards */
1167296341Sdelphij            bits = get_bit(g_scalar, i + 196) << 3;
1168296341Sdelphij            bits |= get_bit(g_scalar, i + 140) << 2;
1169296341Sdelphij            bits |= get_bit(g_scalar, i + 84) << 1;
1170296341Sdelphij            bits |= get_bit(g_scalar, i + 28);
1171296341Sdelphij            /* select the point to add, in constant time */
1172296341Sdelphij            select_point(bits, 16, g_pre_comp[1], tmp);
1173238384Sjkim
1174296341Sdelphij            if (!skip) {
1175296341Sdelphij                /* value 1 below is argument for "mixed" */
1176296341Sdelphij                point_add(nq[0], nq[1], nq[2],
1177296341Sdelphij                          nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1178296341Sdelphij            } else {
1179296341Sdelphij                memcpy(nq, tmp, 3 * sizeof(felem));
1180296341Sdelphij                skip = 0;
1181296341Sdelphij            }
1182238384Sjkim
1183296341Sdelphij            /* second, look at the current position */
1184296341Sdelphij            bits = get_bit(g_scalar, i + 168) << 3;
1185296341Sdelphij            bits |= get_bit(g_scalar, i + 112) << 2;
1186296341Sdelphij            bits |= get_bit(g_scalar, i + 56) << 1;
1187296341Sdelphij            bits |= get_bit(g_scalar, i);
1188296341Sdelphij            /* select the point to add, in constant time */
1189296341Sdelphij            select_point(bits, 16, g_pre_comp[0], tmp);
1190296341Sdelphij            point_add(nq[0], nq[1], nq[2],
1191296341Sdelphij                      nq[0], nq[1], nq[2],
1192296341Sdelphij                      1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1193296341Sdelphij        }
1194238384Sjkim
1195296341Sdelphij        /* do other additions every 5 doublings */
1196296341Sdelphij        if (num_points && (i % 5 == 0)) {
1197296341Sdelphij            /* loop over all scalars */
1198296341Sdelphij            for (num = 0; num < num_points; ++num) {
1199296341Sdelphij                bits = get_bit(scalars[num], i + 4) << 5;
1200296341Sdelphij                bits |= get_bit(scalars[num], i + 3) << 4;
1201296341Sdelphij                bits |= get_bit(scalars[num], i + 2) << 3;
1202296341Sdelphij                bits |= get_bit(scalars[num], i + 1) << 2;
1203296341Sdelphij                bits |= get_bit(scalars[num], i) << 1;
1204296341Sdelphij                bits |= get_bit(scalars[num], i - 1);
1205296341Sdelphij                ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1206238384Sjkim
1207296341Sdelphij                /* select the point to add or subtract */
1208296341Sdelphij                select_point(digit, 17, pre_comp[num], tmp);
1209296341Sdelphij                felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1210296341Sdelphij                                            * point */
1211296341Sdelphij                copy_conditional(tmp[1], tmp[3], sign);
1212238384Sjkim
1213296341Sdelphij                if (!skip) {
1214296341Sdelphij                    point_add(nq[0], nq[1], nq[2],
1215296341Sdelphij                              nq[0], nq[1], nq[2],
1216296341Sdelphij                              mixed, tmp[0], tmp[1], tmp[2]);
1217296341Sdelphij                } else {
1218296341Sdelphij                    memcpy(nq, tmp, 3 * sizeof(felem));
1219296341Sdelphij                    skip = 0;
1220296341Sdelphij                }
1221296341Sdelphij            }
1222296341Sdelphij        }
1223296341Sdelphij    }
1224296341Sdelphij    felem_assign(x_out, nq[0]);
1225296341Sdelphij    felem_assign(y_out, nq[1]);
1226296341Sdelphij    felem_assign(z_out, nq[2]);
1227296341Sdelphij}
1228238384Sjkim
1229238384Sjkim/******************************************************************************/
1230296341Sdelphij/*
1231296341Sdelphij * FUNCTIONS TO MANAGE PRECOMPUTATION
1232238384Sjkim */
1233238384Sjkim
1234238384Sjkimstatic NISTP224_PRE_COMP *nistp224_pre_comp_new()
1235296341Sdelphij{
1236296341Sdelphij    NISTP224_PRE_COMP *ret = NULL;
1237296341Sdelphij    ret = (NISTP224_PRE_COMP *) OPENSSL_malloc(sizeof *ret);
1238296341Sdelphij    if (!ret) {
1239296341Sdelphij        ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1240296341Sdelphij        return ret;
1241296341Sdelphij    }
1242296341Sdelphij    memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1243296341Sdelphij    ret->references = 1;
1244296341Sdelphij    return ret;
1245296341Sdelphij}
1246238384Sjkim
1247238384Sjkimstatic void *nistp224_pre_comp_dup(void *src_)
1248296341Sdelphij{
1249296341Sdelphij    NISTP224_PRE_COMP *src = src_;
1250238384Sjkim
1251296341Sdelphij    /* no need to actually copy, these objects never change! */
1252296341Sdelphij    CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1253238384Sjkim
1254296341Sdelphij    return src_;
1255296341Sdelphij}
1256238384Sjkim
1257238384Sjkimstatic void nistp224_pre_comp_free(void *pre_)
1258296341Sdelphij{
1259296341Sdelphij    int i;
1260296341Sdelphij    NISTP224_PRE_COMP *pre = pre_;
1261238384Sjkim
1262296341Sdelphij    if (!pre)
1263296341Sdelphij        return;
1264238384Sjkim
1265296341Sdelphij    i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1266296341Sdelphij    if (i > 0)
1267296341Sdelphij        return;
1268238384Sjkim
1269296341Sdelphij    OPENSSL_free(pre);
1270296341Sdelphij}
1271238384Sjkim
1272238384Sjkimstatic void nistp224_pre_comp_clear_free(void *pre_)
1273296341Sdelphij{
1274296341Sdelphij    int i;
1275296341Sdelphij    NISTP224_PRE_COMP *pre = pre_;
1276238384Sjkim
1277296341Sdelphij    if (!pre)
1278296341Sdelphij        return;
1279238384Sjkim
1280296341Sdelphij    i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1281296341Sdelphij    if (i > 0)
1282296341Sdelphij        return;
1283238384Sjkim
1284296341Sdelphij    OPENSSL_cleanse(pre, sizeof *pre);
1285296341Sdelphij    OPENSSL_free(pre);
1286296341Sdelphij}
1287238384Sjkim
1288238384Sjkim/******************************************************************************/
1289296341Sdelphij/*
1290296341Sdelphij * OPENSSL EC_METHOD FUNCTIONS
1291238384Sjkim */
1292238384Sjkim
1293238384Sjkimint ec_GFp_nistp224_group_init(EC_GROUP *group)
1294296341Sdelphij{
1295296341Sdelphij    int ret;
1296296341Sdelphij    ret = ec_GFp_simple_group_init(group);
1297296341Sdelphij    group->a_is_minus3 = 1;
1298296341Sdelphij    return ret;
1299296341Sdelphij}
1300238384Sjkim
1301238384Sjkimint ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1302296341Sdelphij                                    const BIGNUM *a, const BIGNUM *b,
1303296341Sdelphij                                    BN_CTX *ctx)
1304296341Sdelphij{
1305296341Sdelphij    int ret = 0;
1306296341Sdelphij    BN_CTX *new_ctx = NULL;
1307296341Sdelphij    BIGNUM *curve_p, *curve_a, *curve_b;
1308238384Sjkim
1309296341Sdelphij    if (ctx == NULL)
1310296341Sdelphij        if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1311296341Sdelphij            return 0;
1312296341Sdelphij    BN_CTX_start(ctx);
1313296341Sdelphij    if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1314296341Sdelphij        ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1315296341Sdelphij        ((curve_b = BN_CTX_get(ctx)) == NULL))
1316296341Sdelphij        goto err;
1317296341Sdelphij    BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1318296341Sdelphij    BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1319296341Sdelphij    BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1320296341Sdelphij    if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1321296341Sdelphij        ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1322296341Sdelphij              EC_R_WRONG_CURVE_PARAMETERS);
1323296341Sdelphij        goto err;
1324296341Sdelphij    }
1325296341Sdelphij    group->field_mod_func = BN_nist_mod_224;
1326296341Sdelphij    ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1327296341Sdelphij err:
1328296341Sdelphij    BN_CTX_end(ctx);
1329296341Sdelphij    if (new_ctx != NULL)
1330296341Sdelphij        BN_CTX_free(new_ctx);
1331296341Sdelphij    return ret;
1332296341Sdelphij}
1333238384Sjkim
1334296341Sdelphij/*
1335296341Sdelphij * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1336296341Sdelphij * (X/Z^2, Y/Z^3)
1337296341Sdelphij */
1338238384Sjkimint ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1339296341Sdelphij                                                 const EC_POINT *point,
1340296341Sdelphij                                                 BIGNUM *x, BIGNUM *y,
1341296341Sdelphij                                                 BN_CTX *ctx)
1342296341Sdelphij{
1343296341Sdelphij    felem z1, z2, x_in, y_in, x_out, y_out;
1344296341Sdelphij    widefelem tmp;
1345238384Sjkim
1346296341Sdelphij    if (EC_POINT_is_at_infinity(group, point)) {
1347296341Sdelphij        ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1348296341Sdelphij              EC_R_POINT_AT_INFINITY);
1349296341Sdelphij        return 0;
1350296341Sdelphij    }
1351296341Sdelphij    if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1352296341Sdelphij        (!BN_to_felem(z1, &point->Z)))
1353296341Sdelphij        return 0;
1354296341Sdelphij    felem_inv(z2, z1);
1355296341Sdelphij    felem_square(tmp, z2);
1356296341Sdelphij    felem_reduce(z1, tmp);
1357296341Sdelphij    felem_mul(tmp, x_in, z1);
1358296341Sdelphij    felem_reduce(x_in, tmp);
1359296341Sdelphij    felem_contract(x_out, x_in);
1360296341Sdelphij    if (x != NULL) {
1361296341Sdelphij        if (!felem_to_BN(x, x_out)) {
1362296341Sdelphij            ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1363296341Sdelphij                  ERR_R_BN_LIB);
1364296341Sdelphij            return 0;
1365296341Sdelphij        }
1366296341Sdelphij    }
1367296341Sdelphij    felem_mul(tmp, z1, z2);
1368296341Sdelphij    felem_reduce(z1, tmp);
1369296341Sdelphij    felem_mul(tmp, y_in, z1);
1370296341Sdelphij    felem_reduce(y_in, tmp);
1371296341Sdelphij    felem_contract(y_out, y_in);
1372296341Sdelphij    if (y != NULL) {
1373296341Sdelphij        if (!felem_to_BN(y, y_out)) {
1374296341Sdelphij            ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1375296341Sdelphij                  ERR_R_BN_LIB);
1376296341Sdelphij            return 0;
1377296341Sdelphij        }
1378296341Sdelphij    }
1379296341Sdelphij    return 1;
1380296341Sdelphij}
1381238384Sjkim
1382296341Sdelphijstatic void make_points_affine(size_t num, felem points[ /* num */ ][3],
1383296341Sdelphij                               felem tmp_felems[ /* num+1 */ ])
1384296341Sdelphij{
1385296341Sdelphij    /*
1386296341Sdelphij     * Runs in constant time, unless an input is the point at infinity (which
1387296341Sdelphij     * normally shouldn't happen).
1388296341Sdelphij     */
1389296341Sdelphij    ec_GFp_nistp_points_make_affine_internal(num,
1390296341Sdelphij                                             points,
1391296341Sdelphij                                             sizeof(felem),
1392296341Sdelphij                                             tmp_felems,
1393296341Sdelphij                                             (void (*)(void *))felem_one,
1394296341Sdelphij                                             (int (*)(const void *))
1395296341Sdelphij                                             felem_is_zero_int,
1396296341Sdelphij                                             (void (*)(void *, const void *))
1397296341Sdelphij                                             felem_assign,
1398296341Sdelphij                                             (void (*)(void *, const void *))
1399296341Sdelphij                                             felem_square_reduce, (void (*)
1400296341Sdelphij                                                                   (void *,
1401296341Sdelphij                                                                    const void
1402296341Sdelphij                                                                    *,
1403296341Sdelphij                                                                    const void
1404296341Sdelphij                                                                    *))
1405296341Sdelphij                                             felem_mul_reduce,
1406296341Sdelphij                                             (void (*)(void *, const void *))
1407296341Sdelphij                                             felem_inv,
1408296341Sdelphij                                             (void (*)(void *, const void *))
1409296341Sdelphij                                             felem_contract);
1410296341Sdelphij}
1411238384Sjkim
1412296341Sdelphij/*
1413296341Sdelphij * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1414296341Sdelphij * values Result is stored in r (r can equal one of the inputs).
1415296341Sdelphij */
1416238384Sjkimint ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1417296341Sdelphij                               const BIGNUM *scalar, size_t num,
1418296341Sdelphij                               const EC_POINT *points[],
1419296341Sdelphij                               const BIGNUM *scalars[], BN_CTX *ctx)
1420296341Sdelphij{
1421296341Sdelphij    int ret = 0;
1422296341Sdelphij    int j;
1423296341Sdelphij    unsigned i;
1424296341Sdelphij    int mixed = 0;
1425296341Sdelphij    BN_CTX *new_ctx = NULL;
1426296341Sdelphij    BIGNUM *x, *y, *z, *tmp_scalar;
1427296341Sdelphij    felem_bytearray g_secret;
1428296341Sdelphij    felem_bytearray *secrets = NULL;
1429296341Sdelphij    felem(*pre_comp)[17][3] = NULL;
1430296341Sdelphij    felem *tmp_felems = NULL;
1431296341Sdelphij    felem_bytearray tmp;
1432296341Sdelphij    unsigned num_bytes;
1433296341Sdelphij    int have_pre_comp = 0;
1434296341Sdelphij    size_t num_points = num;
1435296341Sdelphij    felem x_in, y_in, z_in, x_out, y_out, z_out;
1436296341Sdelphij    NISTP224_PRE_COMP *pre = NULL;
1437296341Sdelphij    const felem(*g_pre_comp)[16][3] = NULL;
1438296341Sdelphij    EC_POINT *generator = NULL;
1439296341Sdelphij    const EC_POINT *p = NULL;
1440296341Sdelphij    const BIGNUM *p_scalar = NULL;
1441238384Sjkim
1442296341Sdelphij    if (ctx == NULL)
1443296341Sdelphij        if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1444296341Sdelphij            return 0;
1445296341Sdelphij    BN_CTX_start(ctx);
1446296341Sdelphij    if (((x = BN_CTX_get(ctx)) == NULL) ||
1447296341Sdelphij        ((y = BN_CTX_get(ctx)) == NULL) ||
1448296341Sdelphij        ((z = BN_CTX_get(ctx)) == NULL) ||
1449296341Sdelphij        ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1450296341Sdelphij        goto err;
1451238384Sjkim
1452296341Sdelphij    if (scalar != NULL) {
1453296341Sdelphij        pre = EC_EX_DATA_get_data(group->extra_data,
1454296341Sdelphij                                  nistp224_pre_comp_dup,
1455296341Sdelphij                                  nistp224_pre_comp_free,
1456296341Sdelphij                                  nistp224_pre_comp_clear_free);
1457296341Sdelphij        if (pre)
1458296341Sdelphij            /* we have precomputation, try to use it */
1459296341Sdelphij            g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1460296341Sdelphij        else
1461296341Sdelphij            /* try to use the standard precomputation */
1462296341Sdelphij            g_pre_comp = &gmul[0];
1463296341Sdelphij        generator = EC_POINT_new(group);
1464296341Sdelphij        if (generator == NULL)
1465296341Sdelphij            goto err;
1466296341Sdelphij        /* get the generator from precomputation */
1467296341Sdelphij        if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1468296341Sdelphij            !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1469296341Sdelphij            !felem_to_BN(z, g_pre_comp[0][1][2])) {
1470296341Sdelphij            ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1471296341Sdelphij            goto err;
1472296341Sdelphij        }
1473296341Sdelphij        if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1474296341Sdelphij                                                      generator, x, y, z,
1475296341Sdelphij                                                      ctx))
1476296341Sdelphij            goto err;
1477296341Sdelphij        if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1478296341Sdelphij            /* precomputation matches generator */
1479296341Sdelphij            have_pre_comp = 1;
1480296341Sdelphij        else
1481296341Sdelphij            /*
1482296341Sdelphij             * we don't have valid precomputation: treat the generator as a
1483296341Sdelphij             * random point
1484296341Sdelphij             */
1485296341Sdelphij            num_points = num_points + 1;
1486296341Sdelphij    }
1487238384Sjkim
1488296341Sdelphij    if (num_points > 0) {
1489296341Sdelphij        if (num_points >= 3) {
1490296341Sdelphij            /*
1491296341Sdelphij             * unless we precompute multiples for just one or two points,
1492296341Sdelphij             * converting those into affine form is time well spent
1493296341Sdelphij             */
1494296341Sdelphij            mixed = 1;
1495296341Sdelphij        }
1496296341Sdelphij        secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1497296341Sdelphij        pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem));
1498296341Sdelphij        if (mixed)
1499296341Sdelphij            tmp_felems =
1500296341Sdelphij                OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem));
1501296341Sdelphij        if ((secrets == NULL) || (pre_comp == NULL)
1502296341Sdelphij            || (mixed && (tmp_felems == NULL))) {
1503296341Sdelphij            ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1504296341Sdelphij            goto err;
1505296341Sdelphij        }
1506238384Sjkim
1507296341Sdelphij        /*
1508296341Sdelphij         * we treat NULL scalars as 0, and NULL points as points at infinity,
1509296341Sdelphij         * i.e., they contribute nothing to the linear combination
1510296341Sdelphij         */
1511296341Sdelphij        memset(secrets, 0, num_points * sizeof(felem_bytearray));
1512296341Sdelphij        memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem));
1513296341Sdelphij        for (i = 0; i < num_points; ++i) {
1514296341Sdelphij            if (i == num)
1515296341Sdelphij                /* the generator */
1516296341Sdelphij            {
1517296341Sdelphij                p = EC_GROUP_get0_generator(group);
1518296341Sdelphij                p_scalar = scalar;
1519296341Sdelphij            } else
1520296341Sdelphij                /* the i^th point */
1521296341Sdelphij            {
1522296341Sdelphij                p = points[i];
1523296341Sdelphij                p_scalar = scalars[i];
1524296341Sdelphij            }
1525296341Sdelphij            if ((p_scalar != NULL) && (p != NULL)) {
1526296341Sdelphij                /* reduce scalar to 0 <= scalar < 2^224 */
1527296341Sdelphij                if ((BN_num_bits(p_scalar) > 224)
1528296341Sdelphij                    || (BN_is_negative(p_scalar))) {
1529296341Sdelphij                    /*
1530296341Sdelphij                     * this is an unusual input, and we don't guarantee
1531296341Sdelphij                     * constant-timeness
1532296341Sdelphij                     */
1533296341Sdelphij                    if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) {
1534296341Sdelphij                        ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1535296341Sdelphij                        goto err;
1536296341Sdelphij                    }
1537296341Sdelphij                    num_bytes = BN_bn2bin(tmp_scalar, tmp);
1538296341Sdelphij                } else
1539296341Sdelphij                    num_bytes = BN_bn2bin(p_scalar, tmp);
1540296341Sdelphij                flip_endian(secrets[i], tmp, num_bytes);
1541296341Sdelphij                /* precompute multiples */
1542296341Sdelphij                if ((!BN_to_felem(x_out, &p->X)) ||
1543296341Sdelphij                    (!BN_to_felem(y_out, &p->Y)) ||
1544296341Sdelphij                    (!BN_to_felem(z_out, &p->Z)))
1545296341Sdelphij                    goto err;
1546296341Sdelphij                felem_assign(pre_comp[i][1][0], x_out);
1547296341Sdelphij                felem_assign(pre_comp[i][1][1], y_out);
1548296341Sdelphij                felem_assign(pre_comp[i][1][2], z_out);
1549296341Sdelphij                for (j = 2; j <= 16; ++j) {
1550296341Sdelphij                    if (j & 1) {
1551296341Sdelphij                        point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1552296341Sdelphij                                  pre_comp[i][j][2], pre_comp[i][1][0],
1553296341Sdelphij                                  pre_comp[i][1][1], pre_comp[i][1][2], 0,
1554296341Sdelphij                                  pre_comp[i][j - 1][0],
1555296341Sdelphij                                  pre_comp[i][j - 1][1],
1556296341Sdelphij                                  pre_comp[i][j - 1][2]);
1557296341Sdelphij                    } else {
1558296341Sdelphij                        point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1559296341Sdelphij                                     pre_comp[i][j][2], pre_comp[i][j / 2][0],
1560296341Sdelphij                                     pre_comp[i][j / 2][1],
1561296341Sdelphij                                     pre_comp[i][j / 2][2]);
1562296341Sdelphij                    }
1563296341Sdelphij                }
1564296341Sdelphij            }
1565296341Sdelphij        }
1566296341Sdelphij        if (mixed)
1567296341Sdelphij            make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1568296341Sdelphij    }
1569238384Sjkim
1570296341Sdelphij    /* the scalar for the generator */
1571296341Sdelphij    if ((scalar != NULL) && (have_pre_comp)) {
1572296341Sdelphij        memset(g_secret, 0, sizeof g_secret);
1573296341Sdelphij        /* reduce scalar to 0 <= scalar < 2^224 */
1574296341Sdelphij        if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1575296341Sdelphij            /*
1576296341Sdelphij             * this is an unusual input, and we don't guarantee
1577296341Sdelphij             * constant-timeness
1578296341Sdelphij             */
1579296341Sdelphij            if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) {
1580296341Sdelphij                ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1581296341Sdelphij                goto err;
1582296341Sdelphij            }
1583296341Sdelphij            num_bytes = BN_bn2bin(tmp_scalar, tmp);
1584296341Sdelphij        } else
1585296341Sdelphij            num_bytes = BN_bn2bin(scalar, tmp);
1586296341Sdelphij        flip_endian(g_secret, tmp, num_bytes);
1587296341Sdelphij        /* do the multiplication with generator precomputation */
1588296341Sdelphij        batch_mul(x_out, y_out, z_out,
1589296341Sdelphij                  (const felem_bytearray(*))secrets, num_points,
1590296341Sdelphij                  g_secret,
1591296341Sdelphij                  mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1592296341Sdelphij    } else
1593296341Sdelphij        /* do the multiplication without generator precomputation */
1594296341Sdelphij        batch_mul(x_out, y_out, z_out,
1595296341Sdelphij                  (const felem_bytearray(*))secrets, num_points,
1596296341Sdelphij                  NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1597296341Sdelphij    /* reduce the output to its unique minimal representation */
1598296341Sdelphij    felem_contract(x_in, x_out);
1599296341Sdelphij    felem_contract(y_in, y_out);
1600296341Sdelphij    felem_contract(z_in, z_out);
1601296341Sdelphij    if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1602296341Sdelphij        (!felem_to_BN(z, z_in))) {
1603296341Sdelphij        ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1604296341Sdelphij        goto err;
1605296341Sdelphij    }
1606296341Sdelphij    ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1607238384Sjkim
1608296341Sdelphij err:
1609296341Sdelphij    BN_CTX_end(ctx);
1610296341Sdelphij    if (generator != NULL)
1611296341Sdelphij        EC_POINT_free(generator);
1612296341Sdelphij    if (new_ctx != NULL)
1613296341Sdelphij        BN_CTX_free(new_ctx);
1614296341Sdelphij    if (secrets != NULL)
1615296341Sdelphij        OPENSSL_free(secrets);
1616296341Sdelphij    if (pre_comp != NULL)
1617296341Sdelphij        OPENSSL_free(pre_comp);
1618296341Sdelphij    if (tmp_felems != NULL)
1619296341Sdelphij        OPENSSL_free(tmp_felems);
1620296341Sdelphij    return ret;
1621296341Sdelphij}
1622238384Sjkim
1623238384Sjkimint ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1624296341Sdelphij{
1625296341Sdelphij    int ret = 0;
1626296341Sdelphij    NISTP224_PRE_COMP *pre = NULL;
1627296341Sdelphij    int i, j;
1628296341Sdelphij    BN_CTX *new_ctx = NULL;
1629296341Sdelphij    BIGNUM *x, *y;
1630296341Sdelphij    EC_POINT *generator = NULL;
1631296341Sdelphij    felem tmp_felems[32];
1632238384Sjkim
1633296341Sdelphij    /* throw away old precomputation */
1634296341Sdelphij    EC_EX_DATA_free_data(&group->extra_data, nistp224_pre_comp_dup,
1635296341Sdelphij                         nistp224_pre_comp_free,
1636296341Sdelphij                         nistp224_pre_comp_clear_free);
1637296341Sdelphij    if (ctx == NULL)
1638296341Sdelphij        if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1639296341Sdelphij            return 0;
1640296341Sdelphij    BN_CTX_start(ctx);
1641296341Sdelphij    if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL))
1642296341Sdelphij        goto err;
1643296341Sdelphij    /* get the generator */
1644296341Sdelphij    if (group->generator == NULL)
1645296341Sdelphij        goto err;
1646296341Sdelphij    generator = EC_POINT_new(group);
1647296341Sdelphij    if (generator == NULL)
1648296341Sdelphij        goto err;
1649296341Sdelphij    BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1650296341Sdelphij    BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1651296341Sdelphij    if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1652296341Sdelphij        goto err;
1653296341Sdelphij    if ((pre = nistp224_pre_comp_new()) == NULL)
1654296341Sdelphij        goto err;
1655296341Sdelphij    /*
1656296341Sdelphij     * if the generator is the standard one, use built-in precomputation
1657296341Sdelphij     */
1658296341Sdelphij    if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1659296341Sdelphij        memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1660296341Sdelphij        ret = 1;
1661296341Sdelphij        goto err;
1662296341Sdelphij    }
1663296341Sdelphij    if ((!BN_to_felem(pre->g_pre_comp[0][1][0], &group->generator->X)) ||
1664296341Sdelphij        (!BN_to_felem(pre->g_pre_comp[0][1][1], &group->generator->Y)) ||
1665296341Sdelphij        (!BN_to_felem(pre->g_pre_comp[0][1][2], &group->generator->Z)))
1666296341Sdelphij        goto err;
1667296341Sdelphij    /*
1668296341Sdelphij     * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1669296341Sdelphij     * 2^140*G, 2^196*G for the second one
1670296341Sdelphij     */
1671296341Sdelphij    for (i = 1; i <= 8; i <<= 1) {
1672296341Sdelphij        point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1673296341Sdelphij                     pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1674296341Sdelphij                     pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1675296341Sdelphij        for (j = 0; j < 27; ++j) {
1676296341Sdelphij            point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1677296341Sdelphij                         pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1678296341Sdelphij                         pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1679296341Sdelphij        }
1680296341Sdelphij        if (i == 8)
1681296341Sdelphij            break;
1682296341Sdelphij        point_double(pre->g_pre_comp[0][2 * i][0],
1683296341Sdelphij                     pre->g_pre_comp[0][2 * i][1],
1684296341Sdelphij                     pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1685296341Sdelphij                     pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1686296341Sdelphij        for (j = 0; j < 27; ++j) {
1687296341Sdelphij            point_double(pre->g_pre_comp[0][2 * i][0],
1688296341Sdelphij                         pre->g_pre_comp[0][2 * i][1],
1689296341Sdelphij                         pre->g_pre_comp[0][2 * i][2],
1690296341Sdelphij                         pre->g_pre_comp[0][2 * i][0],
1691296341Sdelphij                         pre->g_pre_comp[0][2 * i][1],
1692296341Sdelphij                         pre->g_pre_comp[0][2 * i][2]);
1693296341Sdelphij        }
1694296341Sdelphij    }
1695296341Sdelphij    for (i = 0; i < 2; i++) {
1696296341Sdelphij        /* g_pre_comp[i][0] is the point at infinity */
1697296341Sdelphij        memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1698296341Sdelphij        /* the remaining multiples */
1699296341Sdelphij        /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1700296341Sdelphij        point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1701296341Sdelphij                  pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1702296341Sdelphij                  pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1703296341Sdelphij                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1704296341Sdelphij                  pre->g_pre_comp[i][2][2]);
1705296341Sdelphij        /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1706296341Sdelphij        point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1707296341Sdelphij                  pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1708296341Sdelphij                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1709296341Sdelphij                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1710296341Sdelphij                  pre->g_pre_comp[i][2][2]);
1711296341Sdelphij        /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1712296341Sdelphij        point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1713296341Sdelphij                  pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1714296341Sdelphij                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1715296341Sdelphij                  0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1716296341Sdelphij                  pre->g_pre_comp[i][4][2]);
1717296341Sdelphij        /*
1718296341Sdelphij         * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1719296341Sdelphij         */
1720296341Sdelphij        point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1721296341Sdelphij                  pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1722296341Sdelphij                  pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1723296341Sdelphij                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1724296341Sdelphij                  pre->g_pre_comp[i][2][2]);
1725296341Sdelphij        for (j = 1; j < 8; ++j) {
1726296341Sdelphij            /* odd multiples: add G resp. 2^28*G */
1727296341Sdelphij            point_add(pre->g_pre_comp[i][2 * j + 1][0],
1728296341Sdelphij                      pre->g_pre_comp[i][2 * j + 1][1],
1729296341Sdelphij                      pre->g_pre_comp[i][2 * j + 1][2],
1730296341Sdelphij                      pre->g_pre_comp[i][2 * j][0],
1731296341Sdelphij                      pre->g_pre_comp[i][2 * j][1],
1732296341Sdelphij                      pre->g_pre_comp[i][2 * j][2], 0,
1733296341Sdelphij                      pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1734296341Sdelphij                      pre->g_pre_comp[i][1][2]);
1735296341Sdelphij        }
1736296341Sdelphij    }
1737296341Sdelphij    make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1738238384Sjkim
1739296341Sdelphij    if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp224_pre_comp_dup,
1740296341Sdelphij                             nistp224_pre_comp_free,
1741296341Sdelphij                             nistp224_pre_comp_clear_free))
1742296341Sdelphij        goto err;
1743296341Sdelphij    ret = 1;
1744296341Sdelphij    pre = NULL;
1745238384Sjkim err:
1746296341Sdelphij    BN_CTX_end(ctx);
1747296341Sdelphij    if (generator != NULL)
1748296341Sdelphij        EC_POINT_free(generator);
1749296341Sdelphij    if (new_ctx != NULL)
1750296341Sdelphij        BN_CTX_free(new_ctx);
1751296341Sdelphij    if (pre)
1752296341Sdelphij        nistp224_pre_comp_free(pre);
1753296341Sdelphij    return ret;
1754296341Sdelphij}
1755238384Sjkim
1756238384Sjkimint ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1757296341Sdelphij{
1758296341Sdelphij    if (EC_EX_DATA_get_data(group->extra_data, nistp224_pre_comp_dup,
1759296341Sdelphij                            nistp224_pre_comp_free,
1760296341Sdelphij                            nistp224_pre_comp_clear_free)
1761296341Sdelphij        != NULL)
1762296341Sdelphij        return 1;
1763296341Sdelphij    else
1764296341Sdelphij        return 0;
1765296341Sdelphij}
1766238384Sjkim
1767238384Sjkim#else
1768296341Sdelphijstatic void *dummy = &dummy;
1769238384Sjkim#endif
1770