crypto/ec/ecp_nistp224.c

238384Sjkim/* crypto/ec/ecp_nistp224.c */
238384Sjkim/*
238384Sjkim * Written by Emilia Kasper (Google) for the OpenSSL project.
238384Sjkim */
238384Sjkim/* Copyright 2011 Google Inc.
238384Sjkim *
238384Sjkim * Licensed under the Apache License, Version 2.0 (the "License");
238384Sjkim *
238384Sjkim * you may not use this file except in compliance with the License.
238384Sjkim * You may obtain a copy of the License at
238384Sjkim *
238384Sjkim *     http://www.apache.org/licenses/LICENSE-2.0
238384Sjkim *
238384Sjkim *  Unless required by applicable law or agreed to in writing, software
238384Sjkim *  distributed under the License is distributed on an "AS IS" BASIS,
238384Sjkim *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
238384Sjkim *  See the License for the specific language governing permissions and
238384Sjkim *  limitations under the License.
238384Sjkim */
238384Sjkim
238384Sjkim/*
238384Sjkim * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
238384Sjkim *
238384Sjkim * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
238384Sjkim * and Adam Langley's public domain 64-bit C implementation of curve25519
238384Sjkim */
238384Sjkim
238384Sjkim#include <openssl/opensslconf.h>
238384Sjkim#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
238384Sjkim
280304Sjkim# ifndef OPENSSL_SYS_VMS
280304Sjkim#  include <stdint.h>
280304Sjkim# else
280304Sjkim#  include <inttypes.h>
280304Sjkim# endif
238384Sjkim
280304Sjkim# include <string.h>
280304Sjkim# include <openssl/err.h>
280304Sjkim# include "ec_lcl.h"
238384Sjkim
280304Sjkim# if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
238384Sjkim  /* even with gcc, the typedef won't work for 32-bit platforms */
280304Sjkimtypedef __uint128_t uint128_t;  /* nonstandard; implemented by gcc on 64-bit
280304Sjkim                                 * platforms */
280304Sjkim# else
280304Sjkim#  error "Need GCC 3.1 or later to define type uint128_t"
280304Sjkim# endif
238384Sjkim
238384Sjkimtypedef uint8_t u8;
238384Sjkimtypedef uint64_t u64;
238384Sjkimtypedef int64_t s64;
238384Sjkim
238384Sjkim/******************************************************************************/
280304Sjkim/*-
280304Sjkim * INTERNAL REPRESENTATION OF FIELD ELEMENTS
238384Sjkim *
238384Sjkim * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
238384Sjkim * using 64-bit coefficients called 'limbs',
238384Sjkim * and sometimes (for multiplication results) as
238384Sjkim * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
238384Sjkim * using 128-bit coefficients called 'widelimbs'.
238384Sjkim * A 4-limb representation is an 'felem';
238384Sjkim * a 7-widelimb representation is a 'widefelem'.
238384Sjkim * Even within felems, bits of adjacent limbs overlap, and we don't always
238384Sjkim * reduce the representations: we ensure that inputs to each felem
238384Sjkim * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
238384Sjkim * and fit into a 128-bit word without overflow. The coefficients are then
238384Sjkim * again partially reduced to obtain an felem satisfying a_i < 2^57.
238384Sjkim * We only reduce to the unique minimal representation at the end of the
238384Sjkim * computation.
238384Sjkim */
238384Sjkim
238384Sjkimtypedef uint64_t limb;
238384Sjkimtypedef uint128_t widelimb;
238384Sjkim
238384Sjkimtypedef limb felem[4];
238384Sjkimtypedef widelimb widefelem[7];
238384Sjkim
280304Sjkim/*
280304Sjkim * Field element represented as a byte arrary. 28*8 = 224 bits is also the
280304Sjkim * group order size for the elliptic curve, and we also use this type for
280304Sjkim * scalars for point multiplication.
280304Sjkim */
238384Sjkimtypedef u8 felem_bytearray[28];
238384Sjkim
238384Sjkimstatic const felem_bytearray nistp224_curve_params[5] = {
280304Sjkim    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
280304Sjkim     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
280304Sjkim     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
280304Sjkim    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
280304Sjkim     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
280304Sjkim     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
280304Sjkim    {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
280304Sjkim     0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
280304Sjkim     0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
280304Sjkim    {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
280304Sjkim     0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
280304Sjkim     0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
280304Sjkim    {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
280304Sjkim     0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
280304Sjkim     0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
238384Sjkim};
238384Sjkim
280304Sjkim/*-
280304Sjkim * Precomputed multiples of the standard generator
238384Sjkim * Points are given in coordinates (X, Y, Z) where Z normally is 1
238384Sjkim * (0 for the point at infinity).
238384Sjkim * For each field element, slice a_0 is word 0, etc.
238384Sjkim *
238384Sjkim * The table has 2 * 16 elements, starting with the following:
238384Sjkim * index | bits    | point
238384Sjkim * ------+---------+------------------------------
238384Sjkim *     0 | 0 0 0 0 | 0G
238384Sjkim *     1 | 0 0 0 1 | 1G
238384Sjkim *     2 | 0 0 1 0 | 2^56G
238384Sjkim *     3 | 0 0 1 1 | (2^56 + 1)G
238384Sjkim *     4 | 0 1 0 0 | 2^112G
238384Sjkim *     5 | 0 1 0 1 | (2^112 + 1)G
238384Sjkim *     6 | 0 1 1 0 | (2^112 + 2^56)G
238384Sjkim *     7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
238384Sjkim *     8 | 1 0 0 0 | 2^168G
238384Sjkim *     9 | 1 0 0 1 | (2^168 + 1)G
238384Sjkim *    10 | 1 0 1 0 | (2^168 + 2^56)G
238384Sjkim *    11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
238384Sjkim *    12 | 1 1 0 0 | (2^168 + 2^112)G
238384Sjkim *    13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
238384Sjkim *    14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
238384Sjkim *    15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
238384Sjkim * followed by a copy of this with each element multiplied by 2^28.
238384Sjkim *
238384Sjkim * The reason for this is so that we can clock bits into four different
238384Sjkim * locations when doing simple scalar multiplies against the base point,
238384Sjkim * and then another four locations using the second 16 elements.
238384Sjkim */
280304Sjkimstatic const felem gmul[2][16][3] = { {{{0, 0, 0, 0},
280304Sjkim                                        {0, 0, 0, 0},
280304Sjkim                                        {0, 0, 0, 0}},
280304Sjkim                                       {{0x3280d6115c1d21, 0xc1d356c2112234,
280304Sjkim                                         0x7f321390b94a03, 0xb70e0cbd6bb4bf},
280304Sjkim                                        {0xd5819985007e34, 0x75a05a07476444,
280304Sjkim                                         0xfb4c22dfe6cd43, 0xbd376388b5f723},
280304Sjkim                                        {1, 0, 0, 0}},
280304Sjkim                                       {{0xfd9675666ebbe9, 0xbca7664d40ce5e,
280304Sjkim                                         0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
280304Sjkim                                        {0x29e0b892dc9c43, 0xece8608436e662,
280304Sjkim                                         0xdc858f185310d0, 0x9812dd4eb8d321},
280304Sjkim                                        {1, 0, 0, 0}},
280304Sjkim                                       {{0x6d3e678d5d8eb8, 0x559eed1cb362f1,
280304Sjkim                                         0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
280304Sjkim                                        {0xf19f90ed50266d, 0xabf2b4bf65f9df,
280304Sjkim                                         0x313865468fafec, 0x5cb379ba910a17},
280304Sjkim                                        {1, 0, 0, 0}},
280304Sjkim                                       {{0x0641966cab26e3, 0x91fb2991fab0a0,
280304Sjkim                                         0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
280304Sjkim                                        {0x7510407766af5d, 0x84d929610d5450,
280304Sjkim                                         0x81d77aae82f706, 0x6916f6d4338c5b},
280304Sjkim                                        {1, 0, 0, 0}},
280304Sjkim                                       {{0xea95ac3b1f15c6, 0x086000905e82d4,
280304Sjkim                                         0xdd323ae4d1c8b1, 0x932b56be7685a3},
280304Sjkim                                        {0x9ef93dea25dbbf, 0x41665960f390f0,
280304Sjkim                                         0xfdec76dbe2a8a7, 0x523e80f019062a},
280304Sjkim                                        {1, 0, 0, 0}},
280304Sjkim                                       {{0x822fdd26732c73, 0xa01c83531b5d0f,
280304Sjkim                                         0x363f37347c1ba4, 0xc391b45c84725c},
280304Sjkim                                        {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec,
280304Sjkim                                         0xc393da7e222a7f, 0x1efb7890ede244},
280304Sjkim                                        {1, 0, 0, 0}},
280304Sjkim                                       {{0x4c9e90ca217da1, 0xd11beca79159bb,
280304Sjkim                                         0xff8d33c2c98b7c, 0x2610b39409f849},
280304Sjkim                                        {0x44d1352ac64da0, 0xcdbb7b2c46b4fb,
280304Sjkim                                         0x966c079b753c89, 0xfe67e4e820b112},
280304Sjkim                                        {1, 0, 0, 0}},
280304Sjkim                                       {{0xe28cae2df5312d, 0xc71b61d16f5c6e,
280304Sjkim                                         0x79b7619a3e7c4c, 0x05c73240899b47},
280304Sjkim                                        {0x9f7f6382c73e3a, 0x18615165c56bda,
280304Sjkim                                         0x641fab2116fd56, 0x72855882b08394},
280304Sjkim                                        {1, 0, 0, 0}},
280304Sjkim                                       {{0x0469182f161c09, 0x74a98ca8d00fb5,
280304Sjkim                                         0xb89da93489a3e0, 0x41c98768fb0c1d},
280304Sjkim                                        {0xe5ea05fb32da81, 0x3dce9ffbca6855,
280304Sjkim                                         0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
280304Sjkim                                        {1, 0, 0, 0}},
280304Sjkim                                       {{0xdab22b2333e87f, 0x4430137a5dd2f6,
280304Sjkim                                         0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
280304Sjkim                                        {0x764a7df0c8fda5, 0x185ba5c3fa2044,
280304Sjkim                                         0x9281d688bcbe50, 0xc40331df893881},
280304Sjkim                                        {1, 0, 0, 0}},
280304Sjkim                                       {{0xb89530796f0f60, 0xade92bd26909a3,
280304Sjkim                                         0x1a0c83fb4884da, 0x1765bf22a5a984},
280304Sjkim                                        {0x772a9ee75db09e, 0x23bc6c67cec16f,
280304Sjkim                                         0x4c1edba8b14e2f, 0xe2a215d9611369},
280304Sjkim                                        {1, 0, 0, 0}},
280304Sjkim                                       {{0x571e509fb5efb3, 0xade88696410552,
280304Sjkim                                         0xc8ae85fada74fe, 0x6c7e4be83bbde3},
280304Sjkim                                        {0xff9f51160f4652, 0xb47ce2495a6539,
280304Sjkim                                         0xa2946c53b582f4, 0x286d2db3ee9a60},
280304Sjkim                                        {1, 0, 0, 0}},
280304Sjkim                                       {{0x40bbd5081a44af, 0x0995183b13926c,
280304Sjkim                                         0xbcefba6f47f6d0, 0x215619e9cc0057},
280304Sjkim                                        {0x8bc94d3b0df45e, 0xf11c54a3694f6f,
280304Sjkim                                         0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
280304Sjkim                                        {1, 0, 0, 0}},
280304Sjkim                                       {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8,
280304Sjkim                                         0x1c29819435d2c6, 0xc813132f4c07e9},
280304Sjkim                                        {0x2891425503b11f, 0x08781030579fea,
280304Sjkim                                         0xf5426ba5cc9674, 0x1e28ebf18562bc},
280304Sjkim                                        {1, 0, 0, 0}},
280304Sjkim                                       {{0x9f31997cc864eb, 0x06cd91d28b5e4c,
280304Sjkim                                         0xff17036691a973, 0xf1aef351497c58},
280304Sjkim                                        {0xdd1f2d600564ff, 0xdead073b1402db,
280304Sjkim                                         0x74a684435bd693, 0xeea7471f962558},
280304Sjkim                                        {1, 0, 0, 0}}},
280304Sjkim{{{0, 0, 0, 0},
280304Sjkim  {0, 0, 0, 0},
280304Sjkim  {0, 0, 0, 0}},
280304Sjkim {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
280304Sjkim  {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
280304Sjkim  {1, 0, 0, 0}},
280304Sjkim {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
280304Sjkim  {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
280304Sjkim  {1, 0, 0, 0}},
280304Sjkim {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
280304Sjkim  {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
280304Sjkim  {1, 0, 0, 0}},
280304Sjkim {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
280304Sjkim  {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
280304Sjkim  {1, 0, 0, 0}},
280304Sjkim {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
280304Sjkim  {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
280304Sjkim  {1, 0, 0, 0}},
280304Sjkim {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
280304Sjkim  {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
280304Sjkim  {1, 0, 0, 0}},
280304Sjkim {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
280304Sjkim  {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
280304Sjkim  {1, 0, 0, 0}},
280304Sjkim {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
280304Sjkim  {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
280304Sjkim  {1, 0, 0, 0}},
280304Sjkim {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
280304Sjkim  {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
280304Sjkim  {1, 0, 0, 0}},
280304Sjkim {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
280304Sjkim  {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
280304Sjkim  {1, 0, 0, 0}},
280304Sjkim {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
280304Sjkim  {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
280304Sjkim  {1, 0, 0, 0}},
280304Sjkim {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
280304Sjkim  {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
280304Sjkim  {1, 0, 0, 0}},
280304Sjkim {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
280304Sjkim  {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
280304Sjkim  {1, 0, 0, 0}},
280304Sjkim {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
280304Sjkim  {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
280304Sjkim  {1, 0, 0, 0}},
280304Sjkim {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
280304Sjkim  {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
280304Sjkim  {1, 0, 0, 0}}}
280304Sjkim};
238384Sjkim
238384Sjkim/* Precomputation for the group generator. */
238384Sjkimtypedef struct {
280304Sjkim    felem g_pre_comp[2][16][3];
280304Sjkim    int references;
238384Sjkim} NISTP224_PRE_COMP;
238384Sjkim
238384Sjkimconst EC_METHOD *EC_GFp_nistp224_method(void)
280304Sjkim{
280304Sjkim    static const EC_METHOD ret = {
280304Sjkim        EC_FLAGS_DEFAULT_OCT,
280304Sjkim        NID_X9_62_prime_field,
280304Sjkim        ec_GFp_nistp224_group_init,
280304Sjkim        ec_GFp_simple_group_finish,
280304Sjkim        ec_GFp_simple_group_clear_finish,
280304Sjkim        ec_GFp_nist_group_copy,
280304Sjkim        ec_GFp_nistp224_group_set_curve,
280304Sjkim        ec_GFp_simple_group_get_curve,
280304Sjkim        ec_GFp_simple_group_get_degree,
280304Sjkim        ec_GFp_simple_group_check_discriminant,
280304Sjkim        ec_GFp_simple_point_init,
280304Sjkim        ec_GFp_simple_point_finish,
280304Sjkim        ec_GFp_simple_point_clear_finish,
280304Sjkim        ec_GFp_simple_point_copy,
280304Sjkim        ec_GFp_simple_point_set_to_infinity,
280304Sjkim        ec_GFp_simple_set_Jprojective_coordinates_GFp,
280304Sjkim        ec_GFp_simple_get_Jprojective_coordinates_GFp,
280304Sjkim        ec_GFp_simple_point_set_affine_coordinates,
280304Sjkim        ec_GFp_nistp224_point_get_affine_coordinates,
280304Sjkim        0 /* point_set_compressed_coordinates */ ,
280304Sjkim        0 /* point2oct */ ,
280304Sjkim        0 /* oct2point */ ,
280304Sjkim        ec_GFp_simple_add,
280304Sjkim        ec_GFp_simple_dbl,
280304Sjkim        ec_GFp_simple_invert,
280304Sjkim        ec_GFp_simple_is_at_infinity,
280304Sjkim        ec_GFp_simple_is_on_curve,
280304Sjkim        ec_GFp_simple_cmp,
280304Sjkim        ec_GFp_simple_make_affine,
280304Sjkim        ec_GFp_simple_points_make_affine,
280304Sjkim        ec_GFp_nistp224_points_mul,
280304Sjkim        ec_GFp_nistp224_precompute_mult,
280304Sjkim        ec_GFp_nistp224_have_precompute_mult,
280304Sjkim        ec_GFp_nist_field_mul,
280304Sjkim        ec_GFp_nist_field_sqr,
280304Sjkim        0 /* field_div */ ,
280304Sjkim        0 /* field_encode */ ,
280304Sjkim        0 /* field_decode */ ,
280304Sjkim        0                       /* field_set_to_one */
280304Sjkim    };
238384Sjkim
280304Sjkim    return &ret;
280304Sjkim}
238384Sjkim
280304Sjkim/*
280304Sjkim * Helper functions to convert field elements to/from internal representation
280304Sjkim */
238384Sjkimstatic void bin28_to_felem(felem out, const u8 in[28])
280304Sjkim{
280304Sjkim    out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
280304Sjkim    out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff;
280304Sjkim    out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff;
280304Sjkim    out[3] = (*((const uint64_t *)(in+20))) >> 8;
280304Sjkim}
238384Sjkim
238384Sjkimstatic void felem_to_bin28(u8 out[28], const felem in)
280304Sjkim{
280304Sjkim    unsigned i;
280304Sjkim    for (i = 0; i < 7; ++i) {
280304Sjkim        out[i] = in[0] >> (8 * i);
280304Sjkim        out[i + 7] = in[1] >> (8 * i);
280304Sjkim        out[i + 14] = in[2] >> (8 * i);
280304Sjkim        out[i + 21] = in[3] >> (8 * i);
280304Sjkim    }
280304Sjkim}
238384Sjkim
238384Sjkim/* To preserve endianness when using BN_bn2bin and BN_bin2bn */
238384Sjkimstatic void flip_endian(u8 *out, const u8 *in, unsigned len)
280304Sjkim{
280304Sjkim    unsigned i;
280304Sjkim    for (i = 0; i < len; ++i)
280304Sjkim        out[i] = in[len - 1 - i];
280304Sjkim}
238384Sjkim
238384Sjkim/* From OpenSSL BIGNUM to internal representation */
238384Sjkimstatic int BN_to_felem(felem out, const BIGNUM *bn)
280304Sjkim{
280304Sjkim    felem_bytearray b_in;
280304Sjkim    felem_bytearray b_out;
280304Sjkim    unsigned num_bytes;
238384Sjkim
280304Sjkim    /* BN_bn2bin eats leading zeroes */
280304Sjkim    memset(b_out, 0, sizeof b_out);
280304Sjkim    num_bytes = BN_num_bytes(bn);
280304Sjkim    if (num_bytes > sizeof b_out) {
280304Sjkim        ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
280304Sjkim        return 0;
280304Sjkim    }
280304Sjkim    if (BN_is_negative(bn)) {
280304Sjkim        ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
280304Sjkim        return 0;
280304Sjkim    }
280304Sjkim    num_bytes = BN_bn2bin(bn, b_in);
280304Sjkim    flip_endian(b_out, b_in, num_bytes);
280304Sjkim    bin28_to_felem(out, b_out);
280304Sjkim    return 1;
280304Sjkim}
238384Sjkim
238384Sjkim/* From internal representation to OpenSSL BIGNUM */
238384Sjkimstatic BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
280304Sjkim{
280304Sjkim    felem_bytearray b_in, b_out;
280304Sjkim    felem_to_bin28(b_in, in);
280304Sjkim    flip_endian(b_out, b_in, sizeof b_out);
280304Sjkim    return BN_bin2bn(b_out, sizeof b_out, out);
280304Sjkim}
238384Sjkim
238384Sjkim/******************************************************************************/
280304Sjkim/*-
280304Sjkim *                              FIELD OPERATIONS
238384Sjkim *
238384Sjkim * Field operations, using the internal representation of field elements.
238384Sjkim * NB! These operations are specific to our point multiplication and cannot be
238384Sjkim * expected to be correct in general - e.g., multiplication with a large scalar
238384Sjkim * will cause an overflow.
238384Sjkim *
238384Sjkim */
238384Sjkim
238384Sjkimstatic void felem_one(felem out)
280304Sjkim{
280304Sjkim    out[0] = 1;
280304Sjkim    out[1] = 0;
280304Sjkim    out[2] = 0;
280304Sjkim    out[3] = 0;
280304Sjkim}
238384Sjkim
238384Sjkimstatic void felem_assign(felem out, const felem in)
280304Sjkim{
280304Sjkim    out[0] = in[0];
280304Sjkim    out[1] = in[1];
280304Sjkim    out[2] = in[2];
280304Sjkim    out[3] = in[3];
280304Sjkim}
238384Sjkim
238384Sjkim/* Sum two field elements: out += in */
238384Sjkimstatic void felem_sum(felem out, const felem in)
280304Sjkim{
280304Sjkim    out[0] += in[0];
280304Sjkim    out[1] += in[1];
280304Sjkim    out[2] += in[2];
280304Sjkim    out[3] += in[3];
280304Sjkim}
238384Sjkim
238384Sjkim/* Get negative value: out = -in */
238384Sjkim/* Assumes in[i] < 2^57 */
238384Sjkimstatic void felem_neg(felem out, const felem in)
280304Sjkim{
280304Sjkim    static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
280304Sjkim    static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
280304Sjkim    static const limb two58m42m2 = (((limb) 1) << 58) -
280304Sjkim        (((limb) 1) << 42) - (((limb) 1) << 2);
238384Sjkim
280304Sjkim    /* Set to 0 mod 2^224-2^96+1 to ensure out > in */
280304Sjkim    out[0] = two58p2 - in[0];
280304Sjkim    out[1] = two58m42m2 - in[1];
280304Sjkim    out[2] = two58m2 - in[2];
280304Sjkim    out[3] = two58m2 - in[3];
280304Sjkim}
238384Sjkim
238384Sjkim/* Subtract field elements: out -= in */
238384Sjkim/* Assumes in[i] < 2^57 */
238384Sjkimstatic void felem_diff(felem out, const felem in)
280304Sjkim{
280304Sjkim    static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
280304Sjkim    static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
280304Sjkim    static const limb two58m42m2 = (((limb) 1) << 58) -
280304Sjkim        (((limb) 1) << 42) - (((limb) 1) << 2);
238384Sjkim
280304Sjkim    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
280304Sjkim    out[0] += two58p2;
280304Sjkim    out[1] += two58m42m2;
280304Sjkim    out[2] += two58m2;
280304Sjkim    out[3] += two58m2;
238384Sjkim
280304Sjkim    out[0] -= in[0];
280304Sjkim    out[1] -= in[1];
280304Sjkim    out[2] -= in[2];
280304Sjkim    out[3] -= in[3];
280304Sjkim}
238384Sjkim
238384Sjkim/* Subtract in unreduced 128-bit mode: out -= in */
238384Sjkim/* Assumes in[i] < 2^119 */
238384Sjkimstatic void widefelem_diff(widefelem out, const widefelem in)
280304Sjkim{
280304Sjkim    static const widelimb two120 = ((widelimb) 1) << 120;
280304Sjkim    static const widelimb two120m64 = (((widelimb) 1) << 120) -
280304Sjkim        (((widelimb) 1) << 64);
280304Sjkim    static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
280304Sjkim        (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
238384Sjkim
280304Sjkim    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
280304Sjkim    out[0] += two120;
280304Sjkim    out[1] += two120m64;
280304Sjkim    out[2] += two120m64;
280304Sjkim    out[3] += two120;
280304Sjkim    out[4] += two120m104m64;
280304Sjkim    out[5] += two120m64;
280304Sjkim    out[6] += two120m64;
238384Sjkim
280304Sjkim    out[0] -= in[0];
280304Sjkim    out[1] -= in[1];
280304Sjkim    out[2] -= in[2];
280304Sjkim    out[3] -= in[3];
280304Sjkim    out[4] -= in[4];
280304Sjkim    out[5] -= in[5];
280304Sjkim    out[6] -= in[6];
280304Sjkim}
238384Sjkim
238384Sjkim/* Subtract in mixed mode: out128 -= in64 */
238384Sjkim/* in[i] < 2^63 */
238384Sjkimstatic void felem_diff_128_64(widefelem out, const felem in)
280304Sjkim{
280304Sjkim    static const widelimb two64p8 = (((widelimb) 1) << 64) +
280304Sjkim        (((widelimb) 1) << 8);
280304Sjkim    static const widelimb two64m8 = (((widelimb) 1) << 64) -
280304Sjkim        (((widelimb) 1) << 8);
280304Sjkim    static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
280304Sjkim        (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
238384Sjkim
280304Sjkim    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
280304Sjkim    out[0] += two64p8;
280304Sjkim    out[1] += two64m48m8;
280304Sjkim    out[2] += two64m8;
280304Sjkim    out[3] += two64m8;
238384Sjkim
280304Sjkim    out[0] -= in[0];
280304Sjkim    out[1] -= in[1];
280304Sjkim    out[2] -= in[2];
280304Sjkim    out[3] -= in[3];
280304Sjkim}
238384Sjkim
280304Sjkim/*
280304Sjkim * Multiply a field element by a scalar: out = out * scalar The scalars we
280304Sjkim * actually use are small, so results fit without overflow
280304Sjkim */
238384Sjkimstatic void felem_scalar(felem out, const limb scalar)
280304Sjkim{
280304Sjkim    out[0] *= scalar;
280304Sjkim    out[1] *= scalar;
280304Sjkim    out[2] *= scalar;
280304Sjkim    out[3] *= scalar;
280304Sjkim}
238384Sjkim
280304Sjkim/*
280304Sjkim * Multiply an unreduced field element by a scalar: out = out * scalar The
280304Sjkim * scalars we actually use are small, so results fit without overflow
280304Sjkim */
238384Sjkimstatic void widefelem_scalar(widefelem out, const widelimb scalar)
280304Sjkim{
280304Sjkim    out[0] *= scalar;
280304Sjkim    out[1] *= scalar;
280304Sjkim    out[2] *= scalar;
280304Sjkim    out[3] *= scalar;
280304Sjkim    out[4] *= scalar;
280304Sjkim    out[5] *= scalar;
280304Sjkim    out[6] *= scalar;
280304Sjkim}
238384Sjkim
238384Sjkim/* Square a field element: out = in^2 */
238384Sjkimstatic void felem_square(widefelem out, const felem in)
280304Sjkim{
280304Sjkim    limb tmp0, tmp1, tmp2;
280304Sjkim    tmp0 = 2 * in[0];
280304Sjkim    tmp1 = 2 * in[1];
280304Sjkim    tmp2 = 2 * in[2];
280304Sjkim    out[0] = ((widelimb) in[0]) * in[0];
280304Sjkim    out[1] = ((widelimb) in[0]) * tmp1;
280304Sjkim    out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
280304Sjkim    out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
280304Sjkim    out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
280304Sjkim    out[5] = ((widelimb) in[3]) * tmp2;
280304Sjkim    out[6] = ((widelimb) in[3]) * in[3];
280304Sjkim}
238384Sjkim
238384Sjkim/* Multiply two field elements: out = in1 * in2 */
238384Sjkimstatic void felem_mul(widefelem out, const felem in1, const felem in2)
280304Sjkim{
280304Sjkim    out[0] = ((widelimb) in1[0]) * in2[0];
280304Sjkim    out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
280304Sjkim    out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
280304Sjkim        ((widelimb) in1[2]) * in2[0];
280304Sjkim    out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
280304Sjkim        ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
280304Sjkim    out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
280304Sjkim        ((widelimb) in1[3]) * in2[1];
280304Sjkim    out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
280304Sjkim    out[6] = ((widelimb) in1[3]) * in2[3];
280304Sjkim}
238384Sjkim
280304Sjkim/*-
280304Sjkim * Reduce seven 128-bit coefficients to four 64-bit coefficients.
238384Sjkim * Requires in[i] < 2^126,
238384Sjkim * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
238384Sjkimstatic void felem_reduce(felem out, const widefelem in)
280304Sjkim{
280304Sjkim    static const widelimb two127p15 = (((widelimb) 1) << 127) +
280304Sjkim        (((widelimb) 1) << 15);
280304Sjkim    static const widelimb two127m71 = (((widelimb) 1) << 127) -
280304Sjkim        (((widelimb) 1) << 71);
280304Sjkim    static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
280304Sjkim        (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
280304Sjkim    widelimb output[5];
238384Sjkim
280304Sjkim    /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
280304Sjkim    output[0] = in[0] + two127p15;
280304Sjkim    output[1] = in[1] + two127m71m55;
280304Sjkim    output[2] = in[2] + two127m71;
280304Sjkim    output[3] = in[3];
280304Sjkim    output[4] = in[4];
238384Sjkim
280304Sjkim    /* Eliminate in[4], in[5], in[6] */
280304Sjkim    output[4] += in[6] >> 16;
280304Sjkim    output[3] += (in[6] & 0xffff) << 40;
280304Sjkim    output[2] -= in[6];
238384Sjkim
280304Sjkim    output[3] += in[5] >> 16;
280304Sjkim    output[2] += (in[5] & 0xffff) << 40;
280304Sjkim    output[1] -= in[5];
238384Sjkim
280304Sjkim    output[2] += output[4] >> 16;
280304Sjkim    output[1] += (output[4] & 0xffff) << 40;
280304Sjkim    output[0] -= output[4];
238384Sjkim
280304Sjkim    /* Carry 2 -> 3 -> 4 */
280304Sjkim    output[3] += output[2] >> 56;
280304Sjkim    output[2] &= 0x00ffffffffffffff;
238384Sjkim
280304Sjkim    output[4] = output[3] >> 56;
280304Sjkim    output[3] &= 0x00ffffffffffffff;
238384Sjkim
280304Sjkim    /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
238384Sjkim
280304Sjkim    /* Eliminate output[4] */
280304Sjkim    output[2] += output[4] >> 16;
280304Sjkim    /* output[2] < 2^56 + 2^56 = 2^57 */
280304Sjkim    output[1] += (output[4] & 0xffff) << 40;
280304Sjkim    output[0] -= output[4];
238384Sjkim
280304Sjkim    /* Carry 0 -> 1 -> 2 -> 3 */
280304Sjkim    output[1] += output[0] >> 56;
280304Sjkim    out[0] = output[0] & 0x00ffffffffffffff;
238384Sjkim
280304Sjkim    output[2] += output[1] >> 56;
280304Sjkim    /* output[2] < 2^57 + 2^72 */
280304Sjkim    out[1] = output[1] & 0x00ffffffffffffff;
280304Sjkim    output[3] += output[2] >> 56;
280304Sjkim    /* output[3] <= 2^56 + 2^16 */
280304Sjkim    out[2] = output[2] & 0x00ffffffffffffff;
238384Sjkim
280304Sjkim    /*-
280304Sjkim     * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
280304Sjkim     * out[3] <= 2^56 + 2^16 (due to final carry),
280304Sjkim     * so out < 2*p
280304Sjkim     */
280304Sjkim    out[3] = output[3];
280304Sjkim}
238384Sjkim
238384Sjkimstatic void felem_square_reduce(felem out, const felem in)
280304Sjkim{
280304Sjkim    widefelem tmp;
280304Sjkim    felem_square(tmp, in);
280304Sjkim    felem_reduce(out, tmp);
280304Sjkim}
238384Sjkim
238384Sjkimstatic void felem_mul_reduce(felem out, const felem in1, const felem in2)
280304Sjkim{
280304Sjkim    widefelem tmp;
280304Sjkim    felem_mul(tmp, in1, in2);
280304Sjkim    felem_reduce(out, tmp);
280304Sjkim}
238384Sjkim
280304Sjkim/*
280304Sjkim * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
280304Sjkim * call felem_reduce first)
280304Sjkim */
238384Sjkimstatic void felem_contract(felem out, const felem in)
280304Sjkim{
280304Sjkim    static const int64_t two56 = ((limb) 1) << 56;
280304Sjkim    /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
280304Sjkim    /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
280304Sjkim    int64_t tmp[4], a;
280304Sjkim    tmp[0] = in[0];
280304Sjkim    tmp[1] = in[1];
280304Sjkim    tmp[2] = in[2];
280304Sjkim    tmp[3] = in[3];
280304Sjkim    /* Case 1: a = 1 iff in >= 2^224 */
280304Sjkim    a = (in[3] >> 56);
280304Sjkim    tmp[0] -= a;
280304Sjkim    tmp[1] += a << 40;
280304Sjkim    tmp[3] &= 0x00ffffffffffffff;
280304Sjkim    /*
280304Sjkim     * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
280304Sjkim     * and the lower part is non-zero
280304Sjkim     */
280304Sjkim    a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
280304Sjkim        (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
280304Sjkim    a &= 0x00ffffffffffffff;
280304Sjkim    /* turn a into an all-one mask (if a = 0) or an all-zero mask */
280304Sjkim    a = (a - 1) >> 63;
280304Sjkim    /* subtract 2^224 - 2^96 + 1 if a is all-one */
280304Sjkim    tmp[3] &= a ^ 0xffffffffffffffff;
280304Sjkim    tmp[2] &= a ^ 0xffffffffffffffff;
280304Sjkim    tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
280304Sjkim    tmp[0] -= 1 & a;
238384Sjkim
280304Sjkim    /*
280304Sjkim     * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
280304Sjkim     * non-zero, so we only need one step
280304Sjkim     */
280304Sjkim    a = tmp[0] >> 63;
280304Sjkim    tmp[0] += two56 & a;
280304Sjkim    tmp[1] -= 1 & a;
238384Sjkim
280304Sjkim    /* carry 1 -> 2 -> 3 */
280304Sjkim    tmp[2] += tmp[1] >> 56;
280304Sjkim    tmp[1] &= 0x00ffffffffffffff;
238384Sjkim
280304Sjkim    tmp[3] += tmp[2] >> 56;
280304Sjkim    tmp[2] &= 0x00ffffffffffffff;
238384Sjkim
280304Sjkim    /* Now 0 <= out < p */
280304Sjkim    out[0] = tmp[0];
280304Sjkim    out[1] = tmp[1];
280304Sjkim    out[2] = tmp[2];
280304Sjkim    out[3] = tmp[3];
280304Sjkim}
238384Sjkim
280304Sjkim/*
280304Sjkim * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
280304Sjkim * elements are reduced to in < 2^225, so we only need to check three cases:
280304Sjkim * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
280304Sjkim */
238384Sjkimstatic limb felem_is_zero(const felem in)
280304Sjkim{
280304Sjkim    limb zero, two224m96p1, two225m97p2;
238384Sjkim
280304Sjkim    zero = in[0] | in[1] | in[2] | in[3];
280304Sjkim    zero = (((int64_t) (zero) - 1) >> 63) & 1;
280304Sjkim    two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
280304Sjkim        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
280304Sjkim    two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
280304Sjkim    two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
280304Sjkim        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
280304Sjkim    two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
280304Sjkim    return (zero | two224m96p1 | two225m97p2);
280304Sjkim}
238384Sjkim
238384Sjkimstatic limb felem_is_zero_int(const felem in)
280304Sjkim{
280304Sjkim    return (int)(felem_is_zero(in) & ((limb) 1));
280304Sjkim}
238384Sjkim
238384Sjkim/* Invert a field element */
238384Sjkim/* Computation chain copied from djb's code */
238384Sjkimstatic void felem_inv(felem out, const felem in)
280304Sjkim{
280304Sjkim    felem ftmp, ftmp2, ftmp3, ftmp4;
280304Sjkim    widefelem tmp;
280304Sjkim    unsigned i;
238384Sjkim
280304Sjkim    felem_square(tmp, in);
280304Sjkim    felem_reduce(ftmp, tmp);    /* 2 */
280304Sjkim    felem_mul(tmp, in, ftmp);
280304Sjkim    felem_reduce(ftmp, tmp);    /* 2^2 - 1 */
280304Sjkim    felem_square(tmp, ftmp);
280304Sjkim    felem_reduce(ftmp, tmp);    /* 2^3 - 2 */
280304Sjkim    felem_mul(tmp, in, ftmp);
280304Sjkim    felem_reduce(ftmp, tmp);    /* 2^3 - 1 */
280304Sjkim    felem_square(tmp, ftmp);
280304Sjkim    felem_reduce(ftmp2, tmp);   /* 2^4 - 2 */
280304Sjkim    felem_square(tmp, ftmp2);
280304Sjkim    felem_reduce(ftmp2, tmp);   /* 2^5 - 4 */
280304Sjkim    felem_square(tmp, ftmp2);
280304Sjkim    felem_reduce(ftmp2, tmp);   /* 2^6 - 8 */
280304Sjkim    felem_mul(tmp, ftmp2, ftmp);
280304Sjkim    felem_reduce(ftmp, tmp);    /* 2^6 - 1 */
280304Sjkim    felem_square(tmp, ftmp);
280304Sjkim    felem_reduce(ftmp2, tmp);   /* 2^7 - 2 */
280304Sjkim    for (i = 0; i < 5; ++i) {   /* 2^12 - 2^6 */
280304Sjkim        felem_square(tmp, ftmp2);
280304Sjkim        felem_reduce(ftmp2, tmp);
280304Sjkim    }
280304Sjkim    felem_mul(tmp, ftmp2, ftmp);
280304Sjkim    felem_reduce(ftmp2, tmp);   /* 2^12 - 1 */
280304Sjkim    felem_square(tmp, ftmp2);
280304Sjkim    felem_reduce(ftmp3, tmp);   /* 2^13 - 2 */
280304Sjkim    for (i = 0; i < 11; ++i) {  /* 2^24 - 2^12 */
280304Sjkim        felem_square(tmp, ftmp3);
280304Sjkim        felem_reduce(ftmp3, tmp);
280304Sjkim    }
280304Sjkim    felem_mul(tmp, ftmp3, ftmp2);
280304Sjkim    felem_reduce(ftmp2, tmp);   /* 2^24 - 1 */
280304Sjkim    felem_square(tmp, ftmp2);
280304Sjkim    felem_reduce(ftmp3, tmp);   /* 2^25 - 2 */
280304Sjkim    for (i = 0; i < 23; ++i) {  /* 2^48 - 2^24 */
280304Sjkim        felem_square(tmp, ftmp3);
280304Sjkim        felem_reduce(ftmp3, tmp);
280304Sjkim    }
280304Sjkim    felem_mul(tmp, ftmp3, ftmp2);
280304Sjkim    felem_reduce(ftmp3, tmp);   /* 2^48 - 1 */
280304Sjkim    felem_square(tmp, ftmp3);
280304Sjkim    felem_reduce(ftmp4, tmp);   /* 2^49 - 2 */
280304Sjkim    for (i = 0; i < 47; ++i) {  /* 2^96 - 2^48 */
280304Sjkim        felem_square(tmp, ftmp4);
280304Sjkim        felem_reduce(ftmp4, tmp);
280304Sjkim    }
280304Sjkim    felem_mul(tmp, ftmp3, ftmp4);
280304Sjkim    felem_reduce(ftmp3, tmp);   /* 2^96 - 1 */
280304Sjkim    felem_square(tmp, ftmp3);
280304Sjkim    felem_reduce(ftmp4, tmp);   /* 2^97 - 2 */
280304Sjkim    for (i = 0; i < 23; ++i) {  /* 2^120 - 2^24 */
280304Sjkim        felem_square(tmp, ftmp4);
280304Sjkim        felem_reduce(ftmp4, tmp);
280304Sjkim    }
280304Sjkim    felem_mul(tmp, ftmp2, ftmp4);
280304Sjkim    felem_reduce(ftmp2, tmp);   /* 2^120 - 1 */
280304Sjkim    for (i = 0; i < 6; ++i) {   /* 2^126 - 2^6 */
280304Sjkim        felem_square(tmp, ftmp2);
280304Sjkim        felem_reduce(ftmp2, tmp);
280304Sjkim    }
280304Sjkim    felem_mul(tmp, ftmp2, ftmp);
280304Sjkim    felem_reduce(ftmp, tmp);    /* 2^126 - 1 */
280304Sjkim    felem_square(tmp, ftmp);
280304Sjkim    felem_reduce(ftmp, tmp);    /* 2^127 - 2 */
280304Sjkim    felem_mul(tmp, ftmp, in);
280304Sjkim    felem_reduce(ftmp, tmp);    /* 2^127 - 1 */
280304Sjkim    for (i = 0; i < 97; ++i) {  /* 2^224 - 2^97 */
280304Sjkim        felem_square(tmp, ftmp);
280304Sjkim        felem_reduce(ftmp, tmp);
280304Sjkim    }
280304Sjkim    felem_mul(tmp, ftmp, ftmp3);
280304Sjkim    felem_reduce(out, tmp);     /* 2^224 - 2^96 - 1 */
280304Sjkim}
238384Sjkim
280304Sjkim/*
280304Sjkim * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
280304Sjkim * out to itself.
280304Sjkim */
280304Sjkimstatic void copy_conditional(felem out, const felem in, limb icopy)
280304Sjkim{
280304Sjkim    unsigned i;
280304Sjkim    /*
280304Sjkim     * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
280304Sjkim     */
280304Sjkim    const limb copy = -icopy;
280304Sjkim    for (i = 0; i < 4; ++i) {
280304Sjkim        const limb tmp = copy & (in[i] ^ out[i]);
280304Sjkim        out[i] ^= tmp;
280304Sjkim    }
280304Sjkim}
238384Sjkim
238384Sjkim/******************************************************************************/
280304Sjkim/*-
280304Sjkim *                       ELLIPTIC CURVE POINT OPERATIONS
238384Sjkim *
238384Sjkim * Points are represented in Jacobian projective coordinates:
238384Sjkim * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
238384Sjkim * or to the point at infinity if Z == 0.
238384Sjkim *
238384Sjkim */
238384Sjkim
280304Sjkim/*-
280304Sjkim * Double an elliptic curve point:
238384Sjkim * (X', Y', Z') = 2 * (X, Y, Z), where
238384Sjkim * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
238384Sjkim * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2
238384Sjkim * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
238384Sjkim * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
280304Sjkim * while x_out == y_in is not (maybe this works, but it's not tested).
280304Sjkim */
238384Sjkimstatic void
238384Sjkimpoint_double(felem x_out, felem y_out, felem z_out,
238384Sjkim             const felem x_in, const felem y_in, const felem z_in)
280304Sjkim{
280304Sjkim    widefelem tmp, tmp2;
280304Sjkim    felem delta, gamma, beta, alpha, ftmp, ftmp2;
238384Sjkim
280304Sjkim    felem_assign(ftmp, x_in);
280304Sjkim    felem_assign(ftmp2, x_in);
238384Sjkim
280304Sjkim    /* delta = z^2 */
280304Sjkim    felem_square(tmp, z_in);
280304Sjkim    felem_reduce(delta, tmp);
238384Sjkim
280304Sjkim    /* gamma = y^2 */
280304Sjkim    felem_square(tmp, y_in);
280304Sjkim    felem_reduce(gamma, tmp);
238384Sjkim
280304Sjkim    /* beta = x*gamma */
280304Sjkim    felem_mul(tmp, x_in, gamma);
280304Sjkim    felem_reduce(beta, tmp);
238384Sjkim
280304Sjkim    /* alpha = 3*(x-delta)*(x+delta) */
280304Sjkim    felem_diff(ftmp, delta);
280304Sjkim    /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
280304Sjkim    felem_sum(ftmp2, delta);
280304Sjkim    /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
280304Sjkim    felem_scalar(ftmp2, 3);
280304Sjkim    /* ftmp2[i] < 3 * 2^58 < 2^60 */
280304Sjkim    felem_mul(tmp, ftmp, ftmp2);
280304Sjkim    /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
280304Sjkim    felem_reduce(alpha, tmp);
238384Sjkim
280304Sjkim    /* x' = alpha^2 - 8*beta */
280304Sjkim    felem_square(tmp, alpha);
280304Sjkim    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
280304Sjkim    felem_assign(ftmp, beta);
280304Sjkim    felem_scalar(ftmp, 8);
280304Sjkim    /* ftmp[i] < 8 * 2^57 = 2^60 */
280304Sjkim    felem_diff_128_64(tmp, ftmp);
280304Sjkim    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
280304Sjkim    felem_reduce(x_out, tmp);
238384Sjkim
280304Sjkim    /* z' = (y + z)^2 - gamma - delta */
280304Sjkim    felem_sum(delta, gamma);
280304Sjkim    /* delta[i] < 2^57 + 2^57 = 2^58 */
280304Sjkim    felem_assign(ftmp, y_in);
280304Sjkim    felem_sum(ftmp, z_in);
280304Sjkim    /* ftmp[i] < 2^57 + 2^57 = 2^58 */
280304Sjkim    felem_square(tmp, ftmp);
280304Sjkim    /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
280304Sjkim    felem_diff_128_64(tmp, delta);
280304Sjkim    /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
280304Sjkim    felem_reduce(z_out, tmp);
238384Sjkim
280304Sjkim    /* y' = alpha*(4*beta - x') - 8*gamma^2 */
280304Sjkim    felem_scalar(beta, 4);
280304Sjkim    /* beta[i] < 4 * 2^57 = 2^59 */
280304Sjkim    felem_diff(beta, x_out);
280304Sjkim    /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
280304Sjkim    felem_mul(tmp, alpha, beta);
280304Sjkim    /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
280304Sjkim    felem_square(tmp2, gamma);
280304Sjkim    /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
280304Sjkim    widefelem_scalar(tmp2, 8);
280304Sjkim    /* tmp2[i] < 8 * 2^116 = 2^119 */
280304Sjkim    widefelem_diff(tmp, tmp2);
280304Sjkim    /* tmp[i] < 2^119 + 2^120 < 2^121 */
280304Sjkim    felem_reduce(y_out, tmp);
280304Sjkim}
238384Sjkim
280304Sjkim/*-
280304Sjkim * Add two elliptic curve points:
238384Sjkim * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
238384Sjkim * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
238384Sjkim * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
238384Sjkim * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
238384Sjkim *        Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
238384Sjkim * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
238384Sjkim *
238384Sjkim * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
238384Sjkim */
238384Sjkim
280304Sjkim/*
280304Sjkim * This function is not entirely constant-time: it includes a branch for
280304Sjkim * checking whether the two input points are equal, (while not equal to the
280304Sjkim * point at infinity). This case never happens during single point
280304Sjkim * multiplication, so there is no timing leak for ECDH or ECDSA signing.
280304Sjkim */
238384Sjkimstatic void point_add(felem x3, felem y3, felem z3,
280304Sjkim                      const felem x1, const felem y1, const felem z1,
280304Sjkim                      const int mixed, const felem x2, const felem y2,
280304Sjkim                      const felem z2)
280304Sjkim{
280304Sjkim    felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
280304Sjkim    widefelem tmp, tmp2;
280304Sjkim    limb z1_is_zero, z2_is_zero, x_equal, y_equal;
238384Sjkim
280304Sjkim    if (!mixed) {
280304Sjkim        /* ftmp2 = z2^2 */
280304Sjkim        felem_square(tmp, z2);
280304Sjkim        felem_reduce(ftmp2, tmp);
238384Sjkim
280304Sjkim        /* ftmp4 = z2^3 */
280304Sjkim        felem_mul(tmp, ftmp2, z2);
280304Sjkim        felem_reduce(ftmp4, tmp);
238384Sjkim
280304Sjkim        /* ftmp4 = z2^3*y1 */
280304Sjkim        felem_mul(tmp2, ftmp4, y1);
280304Sjkim        felem_reduce(ftmp4, tmp2);
238384Sjkim
280304Sjkim        /* ftmp2 = z2^2*x1 */
280304Sjkim        felem_mul(tmp2, ftmp2, x1);
280304Sjkim        felem_reduce(ftmp2, tmp2);
280304Sjkim    } else {
280304Sjkim        /*
280304Sjkim         * We'll assume z2 = 1 (special case z2 = 0 is handled later)
280304Sjkim         */
238384Sjkim
280304Sjkim        /* ftmp4 = z2^3*y1 */
280304Sjkim        felem_assign(ftmp4, y1);
238384Sjkim
280304Sjkim        /* ftmp2 = z2^2*x1 */
280304Sjkim        felem_assign(ftmp2, x1);
280304Sjkim    }
238384Sjkim
280304Sjkim    /* ftmp = z1^2 */
280304Sjkim    felem_square(tmp, z1);
280304Sjkim    felem_reduce(ftmp, tmp);
238384Sjkim
280304Sjkim    /* ftmp3 = z1^3 */
280304Sjkim    felem_mul(tmp, ftmp, z1);
280304Sjkim    felem_reduce(ftmp3, tmp);
238384Sjkim
280304Sjkim    /* tmp = z1^3*y2 */
280304Sjkim    felem_mul(tmp, ftmp3, y2);
280304Sjkim    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
238384Sjkim
280304Sjkim    /* ftmp3 = z1^3*y2 - z2^3*y1 */
280304Sjkim    felem_diff_128_64(tmp, ftmp4);
280304Sjkim    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
280304Sjkim    felem_reduce(ftmp3, tmp);
238384Sjkim
280304Sjkim    /* tmp = z1^2*x2 */
280304Sjkim    felem_mul(tmp, ftmp, x2);
280304Sjkim    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
238384Sjkim
280304Sjkim    /* ftmp = z1^2*x2 - z2^2*x1 */
280304Sjkim    felem_diff_128_64(tmp, ftmp2);
280304Sjkim    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
280304Sjkim    felem_reduce(ftmp, tmp);
238384Sjkim
280304Sjkim    /*
280304Sjkim     * the formulae are incorrect if the points are equal so we check for
280304Sjkim     * this and do doubling if this happens
280304Sjkim     */
280304Sjkim    x_equal = felem_is_zero(ftmp);
280304Sjkim    y_equal = felem_is_zero(ftmp3);
280304Sjkim    z1_is_zero = felem_is_zero(z1);
280304Sjkim    z2_is_zero = felem_is_zero(z2);
280304Sjkim    /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
280304Sjkim    if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
280304Sjkim        point_double(x3, y3, z3, x1, y1, z1);
280304Sjkim        return;
280304Sjkim    }
238384Sjkim
280304Sjkim    /* ftmp5 = z1*z2 */
280304Sjkim    if (!mixed) {
280304Sjkim        felem_mul(tmp, z1, z2);
280304Sjkim        felem_reduce(ftmp5, tmp);
280304Sjkim    } else {
280304Sjkim        /* special case z2 = 0 is handled later */
280304Sjkim        felem_assign(ftmp5, z1);
280304Sjkim    }
238384Sjkim
280304Sjkim    /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
280304Sjkim    felem_mul(tmp, ftmp, ftmp5);
280304Sjkim    felem_reduce(z_out, tmp);
238384Sjkim
280304Sjkim    /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
280304Sjkim    felem_assign(ftmp5, ftmp);
280304Sjkim    felem_square(tmp, ftmp);
280304Sjkim    felem_reduce(ftmp, tmp);
238384Sjkim
280304Sjkim    /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
280304Sjkim    felem_mul(tmp, ftmp, ftmp5);
280304Sjkim    felem_reduce(ftmp5, tmp);
238384Sjkim
280304Sjkim    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
280304Sjkim    felem_mul(tmp, ftmp2, ftmp);
280304Sjkim    felem_reduce(ftmp2, tmp);
238384Sjkim
280304Sjkim    /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
280304Sjkim    felem_mul(tmp, ftmp4, ftmp5);
280304Sjkim    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
238384Sjkim
280304Sjkim    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
280304Sjkim    felem_square(tmp2, ftmp3);
280304Sjkim    /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
238384Sjkim
280304Sjkim    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
280304Sjkim    felem_diff_128_64(tmp2, ftmp5);
280304Sjkim    /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
238384Sjkim
280304Sjkim    /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
280304Sjkim    felem_assign(ftmp5, ftmp2);
280304Sjkim    felem_scalar(ftmp5, 2);
280304Sjkim    /* ftmp5[i] < 2 * 2^57 = 2^58 */
238384Sjkim
280304Sjkim    /*-
280304Sjkim     * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
280304Sjkim     *  2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
280304Sjkim     */
280304Sjkim    felem_diff_128_64(tmp2, ftmp5);
280304Sjkim    /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
280304Sjkim    felem_reduce(x_out, tmp2);
238384Sjkim
280304Sjkim    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
280304Sjkim    felem_diff(ftmp2, x_out);
280304Sjkim    /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
238384Sjkim
280304Sjkim    /*
280304Sjkim     * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
280304Sjkim     */
280304Sjkim    felem_mul(tmp2, ftmp3, ftmp2);
280304Sjkim    /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
238384Sjkim
280304Sjkim    /*-
280304Sjkim     * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
280304Sjkim     *  z2^3*y1*(z1^2*x2 - z2^2*x1)^3
280304Sjkim     */
280304Sjkim    widefelem_diff(tmp2, tmp);
280304Sjkim    /* tmp2[i] < 2^118 + 2^120 < 2^121 */
280304Sjkim    felem_reduce(y_out, tmp2);
238384Sjkim
280304Sjkim    /*
280304Sjkim     * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
280304Sjkim     * the point at infinity, so we need to check for this separately
280304Sjkim     */
238384Sjkim
280304Sjkim    /*
280304Sjkim     * if point 1 is at infinity, copy point 2 to output, and vice versa
280304Sjkim     */
280304Sjkim    copy_conditional(x_out, x2, z1_is_zero);
280304Sjkim    copy_conditional(x_out, x1, z2_is_zero);
280304Sjkim    copy_conditional(y_out, y2, z1_is_zero);
280304Sjkim    copy_conditional(y_out, y1, z2_is_zero);
280304Sjkim    copy_conditional(z_out, z2, z1_is_zero);
280304Sjkim    copy_conditional(z_out, z1, z2_is_zero);
280304Sjkim    felem_assign(x3, x_out);
280304Sjkim    felem_assign(y3, y_out);
280304Sjkim    felem_assign(z3, z_out);
280304Sjkim}
238384Sjkim
280304Sjkim/*
280304Sjkim * select_point selects the |idx|th point from a precomputation table and
280304Sjkim * copies it to out.
280304Sjkim * The pre_comp array argument should be size of |size| argument
280304Sjkim */
280304Sjkimstatic void select_point(const u64 idx, unsigned int size,
280304Sjkim                         const felem pre_comp[][3], felem out[3])
280304Sjkim{
280304Sjkim    unsigned i, j;
280304Sjkim    limb *outlimbs = &out[0][0];
280304Sjkim    memset(outlimbs, 0, 3 * sizeof(felem));
238384Sjkim
280304Sjkim    for (i = 0; i < size; i++) {
280304Sjkim        const limb *inlimbs = &pre_comp[i][0][0];
280304Sjkim        u64 mask = i ^ idx;
280304Sjkim        mask |= mask >> 4;
280304Sjkim        mask |= mask >> 2;
280304Sjkim        mask |= mask >> 1;
280304Sjkim        mask &= 1;
280304Sjkim        mask--;
280304Sjkim        for (j = 0; j < 4 * 3; j++)
280304Sjkim            outlimbs[j] |= inlimbs[j] & mask;
280304Sjkim    }
280304Sjkim}
238384Sjkim
238384Sjkim/* get_bit returns the |i|th bit in |in| */
238384Sjkimstatic char get_bit(const felem_bytearray in, unsigned i)
280304Sjkim{
280304Sjkim    if (i >= 224)
280304Sjkim        return 0;
280304Sjkim    return (in[i >> 3] >> (i & 7)) & 1;
280304Sjkim}
238384Sjkim
280304Sjkim/*
280304Sjkim * Interleaved point multiplication using precomputed point multiples: The
280304Sjkim * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
280304Sjkim * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
280304Sjkim * generator, using certain (large) precomputed multiples in g_pre_comp.
280304Sjkim * Output point (X, Y, Z) is stored in x_out, y_out, z_out
280304Sjkim */
238384Sjkimstatic void batch_mul(felem x_out, felem y_out, felem z_out,
280304Sjkim                      const felem_bytearray scalars[],
280304Sjkim                      const unsigned num_points, const u8 *g_scalar,
280304Sjkim                      const int mixed, const felem pre_comp[][17][3],
280304Sjkim                      const felem g_pre_comp[2][16][3])
280304Sjkim{
280304Sjkim    int i, skip;
280304Sjkim    unsigned num;
280304Sjkim    unsigned gen_mul = (g_scalar != NULL);
280304Sjkim    felem nq[3], tmp[4];
280304Sjkim    u64 bits;
280304Sjkim    u8 sign, digit;
238384Sjkim
280304Sjkim    /* set nq to the point at infinity */
280304Sjkim    memset(nq, 0, 3 * sizeof(felem));
238384Sjkim
280304Sjkim    /*
280304Sjkim     * Loop over all scalars msb-to-lsb, interleaving additions of multiples
280304Sjkim     * of the generator (two in each of the last 28 rounds) and additions of
280304Sjkim     * other points multiples (every 5th round).
280304Sjkim     */
280304Sjkim    skip = 1;                   /* save two point operations in the first
280304Sjkim                                 * round */
280304Sjkim    for (i = (num_points ? 220 : 27); i >= 0; --i) {
280304Sjkim        /* double */
280304Sjkim        if (!skip)
280304Sjkim            point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
238384Sjkim
280304Sjkim        /* add multiples of the generator */
280304Sjkim        if (gen_mul && (i <= 27)) {
280304Sjkim            /* first, look 28 bits upwards */
280304Sjkim            bits = get_bit(g_scalar, i + 196) << 3;
280304Sjkim            bits |= get_bit(g_scalar, i + 140) << 2;
280304Sjkim            bits |= get_bit(g_scalar, i + 84) << 1;
280304Sjkim            bits |= get_bit(g_scalar, i + 28);
280304Sjkim            /* select the point to add, in constant time */
280304Sjkim            select_point(bits, 16, g_pre_comp[1], tmp);
238384Sjkim
280304Sjkim            if (!skip) {
280304Sjkim                /* value 1 below is argument for "mixed" */
280304Sjkim                point_add(nq[0], nq[1], nq[2],
280304Sjkim                          nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
280304Sjkim            } else {
280304Sjkim                memcpy(nq, tmp, 3 * sizeof(felem));
280304Sjkim                skip = 0;
280304Sjkim            }
238384Sjkim
280304Sjkim            /* second, look at the current position */
280304Sjkim            bits = get_bit(g_scalar, i + 168) << 3;
280304Sjkim            bits |= get_bit(g_scalar, i + 112) << 2;
280304Sjkim            bits |= get_bit(g_scalar, i + 56) << 1;
280304Sjkim            bits |= get_bit(g_scalar, i);
280304Sjkim            /* select the point to add, in constant time */
280304Sjkim            select_point(bits, 16, g_pre_comp[0], tmp);
280304Sjkim            point_add(nq[0], nq[1], nq[2],
280304Sjkim                      nq[0], nq[1], nq[2],
280304Sjkim                      1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
280304Sjkim        }
238384Sjkim
280304Sjkim        /* do other additions every 5 doublings */
280304Sjkim        if (num_points && (i % 5 == 0)) {
280304Sjkim            /* loop over all scalars */
280304Sjkim            for (num = 0; num < num_points; ++num) {
280304Sjkim                bits = get_bit(scalars[num], i + 4) << 5;
280304Sjkim                bits |= get_bit(scalars[num], i + 3) << 4;
280304Sjkim                bits |= get_bit(scalars[num], i + 2) << 3;
280304Sjkim                bits |= get_bit(scalars[num], i + 1) << 2;
280304Sjkim                bits |= get_bit(scalars[num], i) << 1;
280304Sjkim                bits |= get_bit(scalars[num], i - 1);
280304Sjkim                ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
238384Sjkim
280304Sjkim                /* select the point to add or subtract */
280304Sjkim                select_point(digit, 17, pre_comp[num], tmp);
280304Sjkim                felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
280304Sjkim                                            * point */
280304Sjkim                copy_conditional(tmp[1], tmp[3], sign);
238384Sjkim
280304Sjkim                if (!skip) {
280304Sjkim                    point_add(nq[0], nq[1], nq[2],
280304Sjkim                              nq[0], nq[1], nq[2],
280304Sjkim                              mixed, tmp[0], tmp[1], tmp[2]);
280304Sjkim                } else {
280304Sjkim                    memcpy(nq, tmp, 3 * sizeof(felem));
280304Sjkim                    skip = 0;
280304Sjkim                }
280304Sjkim            }
280304Sjkim        }
280304Sjkim    }
280304Sjkim    felem_assign(x_out, nq[0]);
280304Sjkim    felem_assign(y_out, nq[1]);
280304Sjkim    felem_assign(z_out, nq[2]);
280304Sjkim}
238384Sjkim
238384Sjkim/******************************************************************************/
280304Sjkim/*
280304Sjkim * FUNCTIONS TO MANAGE PRECOMPUTATION
238384Sjkim */
238384Sjkim
238384Sjkimstatic NISTP224_PRE_COMP *nistp224_pre_comp_new()
280304Sjkim{
280304Sjkim    NISTP224_PRE_COMP *ret = NULL;
280304Sjkim    ret = (NISTP224_PRE_COMP *) OPENSSL_malloc(sizeof *ret);
280304Sjkim    if (!ret) {
280304Sjkim        ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
280304Sjkim        return ret;
280304Sjkim    }
280304Sjkim    memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
280304Sjkim    ret->references = 1;
280304Sjkim    return ret;
280304Sjkim}
238384Sjkim
238384Sjkimstatic void *nistp224_pre_comp_dup(void *src_)
280304Sjkim{
280304Sjkim    NISTP224_PRE_COMP *src = src_;
238384Sjkim
280304Sjkim    /* no need to actually copy, these objects never change! */
280304Sjkim    CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
238384Sjkim
280304Sjkim    return src_;
280304Sjkim}
238384Sjkim
238384Sjkimstatic void nistp224_pre_comp_free(void *pre_)
280304Sjkim{
280304Sjkim    int i;
280304Sjkim    NISTP224_PRE_COMP *pre = pre_;
238384Sjkim
280304Sjkim    if (!pre)
280304Sjkim        return;
238384Sjkim
280304Sjkim    i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
280304Sjkim    if (i > 0)
280304Sjkim        return;
238384Sjkim
280304Sjkim    OPENSSL_free(pre);
280304Sjkim}
238384Sjkim
238384Sjkimstatic void nistp224_pre_comp_clear_free(void *pre_)
280304Sjkim{
280304Sjkim    int i;
280304Sjkim    NISTP224_PRE_COMP *pre = pre_;
238384Sjkim
280304Sjkim    if (!pre)
280304Sjkim        return;
238384Sjkim
280304Sjkim    i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
280304Sjkim    if (i > 0)
280304Sjkim        return;
238384Sjkim
280304Sjkim    OPENSSL_cleanse(pre, sizeof *pre);
280304Sjkim    OPENSSL_free(pre);
280304Sjkim}
238384Sjkim
238384Sjkim/******************************************************************************/
280304Sjkim/*
280304Sjkim * OPENSSL EC_METHOD FUNCTIONS
238384Sjkim */
238384Sjkim
238384Sjkimint ec_GFp_nistp224_group_init(EC_GROUP *group)
280304Sjkim{
280304Sjkim    int ret;
280304Sjkim    ret = ec_GFp_simple_group_init(group);
280304Sjkim    group->a_is_minus3 = 1;
280304Sjkim    return ret;
280304Sjkim}
238384Sjkim
238384Sjkimint ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
280304Sjkim                                    const BIGNUM *a, const BIGNUM *b,
280304Sjkim                                    BN_CTX *ctx)
280304Sjkim{
280304Sjkim    int ret = 0;
280304Sjkim    BN_CTX *new_ctx = NULL;
280304Sjkim    BIGNUM *curve_p, *curve_a, *curve_b;
238384Sjkim
280304Sjkim    if (ctx == NULL)
280304Sjkim        if ((ctx = new_ctx = BN_CTX_new()) == NULL)
280304Sjkim            return 0;
280304Sjkim    BN_CTX_start(ctx);
280304Sjkim    if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
280304Sjkim        ((curve_a = BN_CTX_get(ctx)) == NULL) ||
280304Sjkim        ((curve_b = BN_CTX_get(ctx)) == NULL))
280304Sjkim        goto err;
280304Sjkim    BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
280304Sjkim    BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
280304Sjkim    BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
280304Sjkim    if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
280304Sjkim        ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
280304Sjkim              EC_R_WRONG_CURVE_PARAMETERS);
280304Sjkim        goto err;
280304Sjkim    }
280304Sjkim    group->field_mod_func = BN_nist_mod_224;
280304Sjkim    ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
280304Sjkim err:
280304Sjkim    BN_CTX_end(ctx);
280304Sjkim    if (new_ctx != NULL)
280304Sjkim        BN_CTX_free(new_ctx);
280304Sjkim    return ret;
280304Sjkim}
238384Sjkim
280304Sjkim/*
280304Sjkim * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
280304Sjkim * (X/Z^2, Y/Z^3)
280304Sjkim */
238384Sjkimint ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
280304Sjkim                                                 const EC_POINT *point,
280304Sjkim                                                 BIGNUM *x, BIGNUM *y,
280304Sjkim                                                 BN_CTX *ctx)
280304Sjkim{
280304Sjkim    felem z1, z2, x_in, y_in, x_out, y_out;
280304Sjkim    widefelem tmp;
238384Sjkim
280304Sjkim    if (EC_POINT_is_at_infinity(group, point)) {
280304Sjkim        ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
280304Sjkim              EC_R_POINT_AT_INFINITY);
280304Sjkim        return 0;
280304Sjkim    }
280304Sjkim    if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
280304Sjkim        (!BN_to_felem(z1, &point->Z)))
280304Sjkim        return 0;
280304Sjkim    felem_inv(z2, z1);
280304Sjkim    felem_square(tmp, z2);
280304Sjkim    felem_reduce(z1, tmp);
280304Sjkim    felem_mul(tmp, x_in, z1);
280304Sjkim    felem_reduce(x_in, tmp);
280304Sjkim    felem_contract(x_out, x_in);
280304Sjkim    if (x != NULL) {
280304Sjkim        if (!felem_to_BN(x, x_out)) {
280304Sjkim            ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
280304Sjkim                  ERR_R_BN_LIB);
280304Sjkim            return 0;
280304Sjkim        }
280304Sjkim    }
280304Sjkim    felem_mul(tmp, z1, z2);
280304Sjkim    felem_reduce(z1, tmp);
280304Sjkim    felem_mul(tmp, y_in, z1);
280304Sjkim    felem_reduce(y_in, tmp);
280304Sjkim    felem_contract(y_out, y_in);
280304Sjkim    if (y != NULL) {
280304Sjkim        if (!felem_to_BN(y, y_out)) {
280304Sjkim            ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
280304Sjkim                  ERR_R_BN_LIB);
280304Sjkim            return 0;
280304Sjkim        }
280304Sjkim    }
280304Sjkim    return 1;
280304Sjkim}
238384Sjkim
280304Sjkimstatic void make_points_affine(size_t num, felem points[ /* num */ ][3],
280304Sjkim                               felem tmp_felems[ /* num+1 */ ])
280304Sjkim{
280304Sjkim    /*
280304Sjkim     * Runs in constant time, unless an input is the point at infinity (which
280304Sjkim     * normally shouldn't happen).
280304Sjkim     */
280304Sjkim    ec_GFp_nistp_points_make_affine_internal(num,
280304Sjkim                                             points,
280304Sjkim                                             sizeof(felem),
280304Sjkim                                             tmp_felems,
280304Sjkim                                             (void (*)(void *))felem_one,
280304Sjkim                                             (int (*)(const void *))
280304Sjkim                                             felem_is_zero_int,
280304Sjkim                                             (void (*)(void *, const void *))
280304Sjkim                                             felem_assign,
280304Sjkim                                             (void (*)(void *, const void *))
280304Sjkim                                             felem_square_reduce, (void (*)
280304Sjkim                                                                   (void *,
280304Sjkim                                                                    const void
280304Sjkim                                                                    *,
280304Sjkim                                                                    const void
280304Sjkim                                                                    *))
280304Sjkim                                             felem_mul_reduce,
280304Sjkim                                             (void (*)(void *, const void *))
280304Sjkim                                             felem_inv,
280304Sjkim                                             (void (*)(void *, const void *))
280304Sjkim                                             felem_contract);
280304Sjkim}
238384Sjkim
280304Sjkim/*
280304Sjkim * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
280304Sjkim * values Result is stored in r (r can equal one of the inputs).
280304Sjkim */
238384Sjkimint ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
280304Sjkim                               const BIGNUM *scalar, size_t num,
280304Sjkim                               const EC_POINT *points[],
280304Sjkim                               const BIGNUM *scalars[], BN_CTX *ctx)
280304Sjkim{
280304Sjkim    int ret = 0;
280304Sjkim    int j;
280304Sjkim    unsigned i;
280304Sjkim    int mixed = 0;
280304Sjkim    BN_CTX *new_ctx = NULL;
280304Sjkim    BIGNUM *x, *y, *z, *tmp_scalar;
280304Sjkim    felem_bytearray g_secret;
280304Sjkim    felem_bytearray *secrets = NULL;
280304Sjkim    felem(*pre_comp)[17][3] = NULL;
280304Sjkim    felem *tmp_felems = NULL;
280304Sjkim    felem_bytearray tmp;
280304Sjkim    unsigned num_bytes;
280304Sjkim    int have_pre_comp = 0;
280304Sjkim    size_t num_points = num;
280304Sjkim    felem x_in, y_in, z_in, x_out, y_out, z_out;
280304Sjkim    NISTP224_PRE_COMP *pre = NULL;
280304Sjkim    const felem(*g_pre_comp)[16][3] = NULL;
280304Sjkim    EC_POINT *generator = NULL;
280304Sjkim    const EC_POINT *p = NULL;
280304Sjkim    const BIGNUM *p_scalar = NULL;
238384Sjkim
280304Sjkim    if (ctx == NULL)
280304Sjkim        if ((ctx = new_ctx = BN_CTX_new()) == NULL)
280304Sjkim            return 0;
280304Sjkim    BN_CTX_start(ctx);
280304Sjkim    if (((x = BN_CTX_get(ctx)) == NULL) ||
280304Sjkim        ((y = BN_CTX_get(ctx)) == NULL) ||
280304Sjkim        ((z = BN_CTX_get(ctx)) == NULL) ||
280304Sjkim        ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
280304Sjkim        goto err;
238384Sjkim
280304Sjkim    if (scalar != NULL) {
280304Sjkim        pre = EC_EX_DATA_get_data(group->extra_data,
280304Sjkim                                  nistp224_pre_comp_dup,
280304Sjkim                                  nistp224_pre_comp_free,
280304Sjkim                                  nistp224_pre_comp_clear_free);
280304Sjkim        if (pre)
280304Sjkim            /* we have precomputation, try to use it */
280304Sjkim            g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
280304Sjkim        else
280304Sjkim            /* try to use the standard precomputation */
280304Sjkim            g_pre_comp = &gmul[0];
280304Sjkim        generator = EC_POINT_new(group);
280304Sjkim        if (generator == NULL)
280304Sjkim            goto err;
280304Sjkim        /* get the generator from precomputation */
280304Sjkim        if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
280304Sjkim            !felem_to_BN(y, g_pre_comp[0][1][1]) ||
280304Sjkim            !felem_to_BN(z, g_pre_comp[0][1][2])) {
280304Sjkim            ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
280304Sjkim            goto err;
280304Sjkim        }
280304Sjkim        if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
280304Sjkim                                                      generator, x, y, z,
280304Sjkim                                                      ctx))
280304Sjkim            goto err;
280304Sjkim        if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
280304Sjkim            /* precomputation matches generator */
280304Sjkim            have_pre_comp = 1;
280304Sjkim        else
280304Sjkim            /*
280304Sjkim             * we don't have valid precomputation: treat the generator as a
280304Sjkim             * random point
280304Sjkim             */
280304Sjkim            num_points = num_points + 1;
280304Sjkim    }
238384Sjkim
280304Sjkim    if (num_points > 0) {
280304Sjkim        if (num_points >= 3) {
280304Sjkim            /*
280304Sjkim             * unless we precompute multiples for just one or two points,
280304Sjkim             * converting those into affine form is time well spent
280304Sjkim             */
280304Sjkim            mixed = 1;
280304Sjkim        }
280304Sjkim        secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
280304Sjkim        pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem));
280304Sjkim        if (mixed)
280304Sjkim            tmp_felems =
280304Sjkim                OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem));
280304Sjkim        if ((secrets == NULL) || (pre_comp == NULL)
280304Sjkim            || (mixed && (tmp_felems == NULL))) {
280304Sjkim            ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
280304Sjkim            goto err;
280304Sjkim        }
238384Sjkim
280304Sjkim        /*
280304Sjkim         * we treat NULL scalars as 0, and NULL points as points at infinity,
280304Sjkim         * i.e., they contribute nothing to the linear combination
280304Sjkim         */
280304Sjkim        memset(secrets, 0, num_points * sizeof(felem_bytearray));
280304Sjkim        memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem));
280304Sjkim        for (i = 0; i < num_points; ++i) {
280304Sjkim            if (i == num)
280304Sjkim                /* the generator */
280304Sjkim            {
280304Sjkim                p = EC_GROUP_get0_generator(group);
280304Sjkim                p_scalar = scalar;
280304Sjkim            } else
280304Sjkim                /* the i^th point */
280304Sjkim            {
280304Sjkim                p = points[i];
280304Sjkim                p_scalar = scalars[i];
280304Sjkim            }
280304Sjkim            if ((p_scalar != NULL) && (p != NULL)) {
280304Sjkim                /* reduce scalar to 0 <= scalar < 2^224 */
280304Sjkim                if ((BN_num_bits(p_scalar) > 224)
280304Sjkim                    || (BN_is_negative(p_scalar))) {
280304Sjkim                    /*
280304Sjkim                     * this is an unusual input, and we don't guarantee
280304Sjkim                     * constant-timeness
280304Sjkim                     */
280304Sjkim                    if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) {
280304Sjkim                        ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
280304Sjkim                        goto err;
280304Sjkim                    }
280304Sjkim                    num_bytes = BN_bn2bin(tmp_scalar, tmp);
280304Sjkim                } else
280304Sjkim                    num_bytes = BN_bn2bin(p_scalar, tmp);
280304Sjkim                flip_endian(secrets[i], tmp, num_bytes);
280304Sjkim                /* precompute multiples */
280304Sjkim                if ((!BN_to_felem(x_out, &p->X)) ||
280304Sjkim                    (!BN_to_felem(y_out, &p->Y)) ||
280304Sjkim                    (!BN_to_felem(z_out, &p->Z)))
280304Sjkim                    goto err;
280304Sjkim                felem_assign(pre_comp[i][1][0], x_out);
280304Sjkim                felem_assign(pre_comp[i][1][1], y_out);
280304Sjkim                felem_assign(pre_comp[i][1][2], z_out);
280304Sjkim                for (j = 2; j <= 16; ++j) {
280304Sjkim                    if (j & 1) {
280304Sjkim                        point_add(pre_comp[i][j][0], pre_comp[i][j][1],
280304Sjkim                                  pre_comp[i][j][2], pre_comp[i][1][0],
280304Sjkim                                  pre_comp[i][1][1], pre_comp[i][1][2], 0,
280304Sjkim                                  pre_comp[i][j - 1][0],
280304Sjkim                                  pre_comp[i][j - 1][1],
280304Sjkim                                  pre_comp[i][j - 1][2]);
280304Sjkim                    } else {
280304Sjkim                        point_double(pre_comp[i][j][0], pre_comp[i][j][1],
280304Sjkim                                     pre_comp[i][j][2], pre_comp[i][j / 2][0],
280304Sjkim                                     pre_comp[i][j / 2][1],
280304Sjkim                                     pre_comp[i][j / 2][2]);
280304Sjkim                    }
280304Sjkim                }
280304Sjkim            }
280304Sjkim        }
280304Sjkim        if (mixed)
280304Sjkim            make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
280304Sjkim    }
238384Sjkim
280304Sjkim    /* the scalar for the generator */
280304Sjkim    if ((scalar != NULL) && (have_pre_comp)) {
280304Sjkim        memset(g_secret, 0, sizeof g_secret);
280304Sjkim        /* reduce scalar to 0 <= scalar < 2^224 */
280304Sjkim        if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
280304Sjkim            /*
280304Sjkim             * this is an unusual input, and we don't guarantee
280304Sjkim             * constant-timeness
280304Sjkim             */
280304Sjkim            if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) {
280304Sjkim                ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
280304Sjkim                goto err;
280304Sjkim            }
280304Sjkim            num_bytes = BN_bn2bin(tmp_scalar, tmp);
280304Sjkim        } else
280304Sjkim            num_bytes = BN_bn2bin(scalar, tmp);
280304Sjkim        flip_endian(g_secret, tmp, num_bytes);
280304Sjkim        /* do the multiplication with generator precomputation */
280304Sjkim        batch_mul(x_out, y_out, z_out,
280304Sjkim                  (const felem_bytearray(*))secrets, num_points,
280304Sjkim                  g_secret,
280304Sjkim                  mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
280304Sjkim    } else
280304Sjkim        /* do the multiplication without generator precomputation */
280304Sjkim        batch_mul(x_out, y_out, z_out,
280304Sjkim                  (const felem_bytearray(*))secrets, num_points,
280304Sjkim                  NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
280304Sjkim    /* reduce the output to its unique minimal representation */
280304Sjkim    felem_contract(x_in, x_out);
280304Sjkim    felem_contract(y_in, y_out);
280304Sjkim    felem_contract(z_in, z_out);
280304Sjkim    if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
280304Sjkim        (!felem_to_BN(z, z_in))) {
280304Sjkim        ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
280304Sjkim        goto err;
280304Sjkim    }
280304Sjkim    ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
238384Sjkim
280304Sjkim err:
280304Sjkim    BN_CTX_end(ctx);
280304Sjkim    if (generator != NULL)
280304Sjkim        EC_POINT_free(generator);
280304Sjkim    if (new_ctx != NULL)
280304Sjkim        BN_CTX_free(new_ctx);
280304Sjkim    if (secrets != NULL)
280304Sjkim        OPENSSL_free(secrets);
280304Sjkim    if (pre_comp != NULL)
280304Sjkim        OPENSSL_free(pre_comp);
280304Sjkim    if (tmp_felems != NULL)
280304Sjkim        OPENSSL_free(tmp_felems);
280304Sjkim    return ret;
280304Sjkim}
238384Sjkim
238384Sjkimint ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
280304Sjkim{
280304Sjkim    int ret = 0;
280304Sjkim    NISTP224_PRE_COMP *pre = NULL;
280304Sjkim    int i, j;
280304Sjkim    BN_CTX *new_ctx = NULL;
280304Sjkim    BIGNUM *x, *y;
280304Sjkim    EC_POINT *generator = NULL;
280304Sjkim    felem tmp_felems[32];
238384Sjkim
280304Sjkim    /* throw away old precomputation */
280304Sjkim    EC_EX_DATA_free_data(&group->extra_data, nistp224_pre_comp_dup,
280304Sjkim                         nistp224_pre_comp_free,
280304Sjkim                         nistp224_pre_comp_clear_free);
280304Sjkim    if (ctx == NULL)
280304Sjkim        if ((ctx = new_ctx = BN_CTX_new()) == NULL)
280304Sjkim            return 0;
280304Sjkim    BN_CTX_start(ctx);
280304Sjkim    if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL))
280304Sjkim        goto err;
280304Sjkim    /* get the generator */
280304Sjkim    if (group->generator == NULL)
280304Sjkim        goto err;
280304Sjkim    generator = EC_POINT_new(group);
280304Sjkim    if (generator == NULL)
280304Sjkim        goto err;
280304Sjkim    BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
280304Sjkim    BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
280304Sjkim    if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
280304Sjkim        goto err;
280304Sjkim    if ((pre = nistp224_pre_comp_new()) == NULL)
280304Sjkim        goto err;
280304Sjkim    /*
280304Sjkim     * if the generator is the standard one, use built-in precomputation
280304Sjkim     */
280304Sjkim    if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
280304Sjkim        memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
280304Sjkim        ret = 1;
280304Sjkim        goto err;
280304Sjkim    }
280304Sjkim    if ((!BN_to_felem(pre->g_pre_comp[0][1][0], &group->generator->X)) ||
280304Sjkim        (!BN_to_felem(pre->g_pre_comp[0][1][1], &group->generator->Y)) ||
280304Sjkim        (!BN_to_felem(pre->g_pre_comp[0][1][2], &group->generator->Z)))
280304Sjkim        goto err;
280304Sjkim    /*
280304Sjkim     * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
280304Sjkim     * 2^140*G, 2^196*G for the second one
280304Sjkim     */
280304Sjkim    for (i = 1; i <= 8; i <<= 1) {
280304Sjkim        point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
280304Sjkim                     pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
280304Sjkim                     pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
280304Sjkim        for (j = 0; j < 27; ++j) {
280304Sjkim            point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
280304Sjkim                         pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
280304Sjkim                         pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
280304Sjkim        }
280304Sjkim        if (i == 8)
280304Sjkim            break;
280304Sjkim        point_double(pre->g_pre_comp[0][2 * i][0],
280304Sjkim                     pre->g_pre_comp[0][2 * i][1],
280304Sjkim                     pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
280304Sjkim                     pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
280304Sjkim        for (j = 0; j < 27; ++j) {
280304Sjkim            point_double(pre->g_pre_comp[0][2 * i][0],
280304Sjkim                         pre->g_pre_comp[0][2 * i][1],
280304Sjkim                         pre->g_pre_comp[0][2 * i][2],
280304Sjkim                         pre->g_pre_comp[0][2 * i][0],
280304Sjkim                         pre->g_pre_comp[0][2 * i][1],
280304Sjkim                         pre->g_pre_comp[0][2 * i][2]);
280304Sjkim        }
280304Sjkim    }
280304Sjkim    for (i = 0; i < 2; i++) {
280304Sjkim        /* g_pre_comp[i][0] is the point at infinity */
280304Sjkim        memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
280304Sjkim        /* the remaining multiples */
280304Sjkim        /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
280304Sjkim        point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
280304Sjkim                  pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
280304Sjkim                  pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
280304Sjkim                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
280304Sjkim                  pre->g_pre_comp[i][2][2]);
280304Sjkim        /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
280304Sjkim        point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
280304Sjkim                  pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
280304Sjkim                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
280304Sjkim                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
280304Sjkim                  pre->g_pre_comp[i][2][2]);
280304Sjkim        /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
280304Sjkim        point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
280304Sjkim                  pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
280304Sjkim                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
280304Sjkim                  0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
280304Sjkim                  pre->g_pre_comp[i][4][2]);
280304Sjkim        /*
280304Sjkim         * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
280304Sjkim         */
280304Sjkim        point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
280304Sjkim                  pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
280304Sjkim                  pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
280304Sjkim                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
280304Sjkim                  pre->g_pre_comp[i][2][2]);
280304Sjkim        for (j = 1; j < 8; ++j) {
280304Sjkim            /* odd multiples: add G resp. 2^28*G */
280304Sjkim            point_add(pre->g_pre_comp[i][2 * j + 1][0],
280304Sjkim                      pre->g_pre_comp[i][2 * j + 1][1],
280304Sjkim                      pre->g_pre_comp[i][2 * j + 1][2],
280304Sjkim                      pre->g_pre_comp[i][2 * j][0],
280304Sjkim                      pre->g_pre_comp[i][2 * j][1],
280304Sjkim                      pre->g_pre_comp[i][2 * j][2], 0,
280304Sjkim                      pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
280304Sjkim                      pre->g_pre_comp[i][1][2]);
280304Sjkim        }
280304Sjkim    }
280304Sjkim    make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
238384Sjkim
280304Sjkim    if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp224_pre_comp_dup,
280304Sjkim                             nistp224_pre_comp_free,
280304Sjkim                             nistp224_pre_comp_clear_free))
280304Sjkim        goto err;
280304Sjkim    ret = 1;
280304Sjkim    pre = NULL;
238384Sjkim err:
280304Sjkim    BN_CTX_end(ctx);
280304Sjkim    if (generator != NULL)
280304Sjkim        EC_POINT_free(generator);
280304Sjkim    if (new_ctx != NULL)
280304Sjkim        BN_CTX_free(new_ctx);
280304Sjkim    if (pre)
280304Sjkim        nistp224_pre_comp_free(pre);
280304Sjkim    return ret;
280304Sjkim}
238384Sjkim
238384Sjkimint ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
280304Sjkim{
280304Sjkim    if (EC_EX_DATA_get_data(group->extra_data, nistp224_pre_comp_dup,
280304Sjkim                            nistp224_pre_comp_free,
280304Sjkim                            nistp224_pre_comp_clear_free)
280304Sjkim        != NULL)
280304Sjkim        return 1;
280304Sjkim    else
280304Sjkim        return 0;
280304Sjkim}
238384Sjkim
238384Sjkim#else
280304Sjkimstatic void *dummy = &dummy;
238384Sjkim#endif