bn_nist.c revision 296341
1/* crypto/bn/bn_nist.c */
2/*
3 * Written by Nils Larsch for the OpenSSL project
4 */
5/* ====================================================================
6 * Copyright (c) 1998-2005 The OpenSSL Project.  All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in
17 *    the documentation and/or other materials provided with the
18 *    distribution.
19 *
20 * 3. All advertising materials mentioning features or use of this
21 *    software must display the following acknowledgment:
22 *    "This product includes software developed by the OpenSSL Project
23 *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
24 *
25 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
26 *    endorse or promote products derived from this software without
27 *    prior written permission. For written permission, please contact
28 *    openssl-core@openssl.org.
29 *
30 * 5. Products derived from this software may not be called "OpenSSL"
31 *    nor may "OpenSSL" appear in their names without prior written
32 *    permission of the OpenSSL Project.
33 *
34 * 6. Redistributions of any form whatsoever must retain the following
35 *    acknowledgment:
36 *    "This product includes software developed by the OpenSSL Project
37 *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
38 *
39 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
40 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
41 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
42 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
43 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
45 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
46 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
48 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
49 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
50 * OF THE POSSIBILITY OF SUCH DAMAGE.
51 * ====================================================================
52 *
53 * This product includes cryptographic software written by Eric Young
54 * (eay@cryptsoft.com).  This product includes software written by Tim
55 * Hudson (tjh@cryptsoft.com).
56 *
57 */
58
59#include "bn_lcl.h"
60#include "cryptlib.h"
61
62#define BN_NIST_192_TOP (192+BN_BITS2-1)/BN_BITS2
63#define BN_NIST_224_TOP (224+BN_BITS2-1)/BN_BITS2
64#define BN_NIST_256_TOP (256+BN_BITS2-1)/BN_BITS2
65#define BN_NIST_384_TOP (384+BN_BITS2-1)/BN_BITS2
66#define BN_NIST_521_TOP (521+BN_BITS2-1)/BN_BITS2
67
68/* pre-computed tables are "carry-less" values of modulus*(i+1) */
69#if BN_BITS2 == 64
70static const BN_ULONG _nist_p_192[][BN_NIST_192_TOP] = {
71    {0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFEULL, 0xFFFFFFFFFFFFFFFFULL},
72    {0xFFFFFFFFFFFFFFFEULL, 0xFFFFFFFFFFFFFFFDULL, 0xFFFFFFFFFFFFFFFFULL},
73    {0xFFFFFFFFFFFFFFFDULL, 0xFFFFFFFFFFFFFFFCULL, 0xFFFFFFFFFFFFFFFFULL}
74};
75
76static const BN_ULONG _nist_p_192_sqr[] = {
77    0x0000000000000001ULL, 0x0000000000000002ULL, 0x0000000000000001ULL,
78    0xFFFFFFFFFFFFFFFEULL, 0xFFFFFFFFFFFFFFFDULL, 0xFFFFFFFFFFFFFFFFULL
79};
80
81static const BN_ULONG _nist_p_224[][BN_NIST_224_TOP] = {
82    {0x0000000000000001ULL, 0xFFFFFFFF00000000ULL,
83     0xFFFFFFFFFFFFFFFFULL, 0x00000000FFFFFFFFULL},
84    {0x0000000000000002ULL, 0xFFFFFFFE00000000ULL,
85     0xFFFFFFFFFFFFFFFFULL, 0x00000001FFFFFFFFULL} /* this one is
86                                                    * "carry-full" */
87};
88
89static const BN_ULONG _nist_p_224_sqr[] = {
90    0x0000000000000001ULL, 0xFFFFFFFE00000000ULL,
91    0xFFFFFFFFFFFFFFFFULL, 0x0000000200000000ULL,
92    0x0000000000000000ULL, 0xFFFFFFFFFFFFFFFEULL,
93    0xFFFFFFFFFFFFFFFFULL
94};
95
96static const BN_ULONG _nist_p_256[][BN_NIST_256_TOP] = {
97    {0xFFFFFFFFFFFFFFFFULL, 0x00000000FFFFFFFFULL,
98     0x0000000000000000ULL, 0xFFFFFFFF00000001ULL},
99    {0xFFFFFFFFFFFFFFFEULL, 0x00000001FFFFFFFFULL,
100     0x0000000000000000ULL, 0xFFFFFFFE00000002ULL},
101    {0xFFFFFFFFFFFFFFFDULL, 0x00000002FFFFFFFFULL,
102     0x0000000000000000ULL, 0xFFFFFFFD00000003ULL},
103    {0xFFFFFFFFFFFFFFFCULL, 0x00000003FFFFFFFFULL,
104     0x0000000000000000ULL, 0xFFFFFFFC00000004ULL},
105    {0xFFFFFFFFFFFFFFFBULL, 0x00000004FFFFFFFFULL,
106     0x0000000000000000ULL, 0xFFFFFFFB00000005ULL},
107};
108
109static const BN_ULONG _nist_p_256_sqr[] = {
110    0x0000000000000001ULL, 0xFFFFFFFE00000000ULL,
111    0xFFFFFFFFFFFFFFFFULL, 0x00000001FFFFFFFEULL,
112    0x00000001FFFFFFFEULL, 0x00000001FFFFFFFEULL,
113    0xFFFFFFFE00000001ULL, 0xFFFFFFFE00000002ULL
114};
115
116static const BN_ULONG _nist_p_384[][BN_NIST_384_TOP] = {
117    {0x00000000FFFFFFFFULL, 0xFFFFFFFF00000000ULL, 0xFFFFFFFFFFFFFFFEULL,
118     0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL},
119    {0x00000001FFFFFFFEULL, 0xFFFFFFFE00000000ULL, 0xFFFFFFFFFFFFFFFDULL,
120     0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL},
121    {0x00000002FFFFFFFDULL, 0xFFFFFFFD00000000ULL, 0xFFFFFFFFFFFFFFFCULL,
122     0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL},
123    {0x00000003FFFFFFFCULL, 0xFFFFFFFC00000000ULL, 0xFFFFFFFFFFFFFFFBULL,
124     0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL},
125    {0x00000004FFFFFFFBULL, 0xFFFFFFFB00000000ULL, 0xFFFFFFFFFFFFFFFAULL,
126     0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL},
127};
128
129static const BN_ULONG _nist_p_384_sqr[] = {
130    0xFFFFFFFE00000001ULL, 0x0000000200000000ULL, 0xFFFFFFFE00000000ULL,
131    0x0000000200000000ULL, 0x0000000000000001ULL, 0x0000000000000000ULL,
132    0x00000001FFFFFFFEULL, 0xFFFFFFFE00000000ULL, 0xFFFFFFFFFFFFFFFDULL,
133    0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL
134};
135
136static const BN_ULONG _nist_p_521[] =
137    { 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
138    0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
139    0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
140    0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
141    0x00000000000001FFULL
142};
143
144static const BN_ULONG _nist_p_521_sqr[] = {
145    0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
146    0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
147    0x0000000000000000ULL, 0x0000000000000000ULL, 0xFFFFFFFFFFFFFC00ULL,
148    0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
149    0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
150    0xFFFFFFFFFFFFFFFFULL, 0x000000000003FFFFULL
151};
152#elif BN_BITS2 == 32
153static const BN_ULONG _nist_p_192[][BN_NIST_192_TOP] = {
154    {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
155    {0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
156    {0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFC, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}
157};
158
159static const BN_ULONG _nist_p_192_sqr[] = {
160    0x00000001, 0x00000000, 0x00000002, 0x00000000, 0x00000001, 0x00000000,
161    0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
162};
163
164static const BN_ULONG _nist_p_224[][BN_NIST_224_TOP] = {
165    {0x00000001, 0x00000000, 0x00000000, 0xFFFFFFFF,
166     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
167    {0x00000002, 0x00000000, 0x00000000, 0xFFFFFFFE,
168     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}
169};
170
171static const BN_ULONG _nist_p_224_sqr[] = {
172    0x00000001, 0x00000000, 0x00000000, 0xFFFFFFFE,
173    0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000002,
174    0x00000000, 0x00000000, 0xFFFFFFFE, 0xFFFFFFFF,
175    0xFFFFFFFF, 0xFFFFFFFF
176};
177
178static const BN_ULONG _nist_p_256[][BN_NIST_256_TOP] = {
179    {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000,
180     0x00000000, 0x00000000, 0x00000001, 0xFFFFFFFF},
181    {0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000001,
182     0x00000000, 0x00000000, 0x00000002, 0xFFFFFFFE},
183    {0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000002,
184     0x00000000, 0x00000000, 0x00000003, 0xFFFFFFFD},
185    {0xFFFFFFFC, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000003,
186     0x00000000, 0x00000000, 0x00000004, 0xFFFFFFFC},
187    {0xFFFFFFFB, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000004,
188     0x00000000, 0x00000000, 0x00000005, 0xFFFFFFFB},
189};
190
191static const BN_ULONG _nist_p_256_sqr[] = {
192    0x00000001, 0x00000000, 0x00000000, 0xFFFFFFFE,
193    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000001,
194    0xFFFFFFFE, 0x00000001, 0xFFFFFFFE, 0x00000001,
195    0x00000001, 0xFFFFFFFE, 0x00000002, 0xFFFFFFFE
196};
197
198static const BN_ULONG _nist_p_384[][BN_NIST_384_TOP] = {
199    {0xFFFFFFFF, 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF,
200     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
201    {0xFFFFFFFE, 0x00000001, 0x00000000, 0xFFFFFFFE, 0xFFFFFFFD, 0xFFFFFFFF,
202     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
203    {0xFFFFFFFD, 0x00000002, 0x00000000, 0xFFFFFFFD, 0xFFFFFFFC, 0xFFFFFFFF,
204     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
205    {0xFFFFFFFC, 0x00000003, 0x00000000, 0xFFFFFFFC, 0xFFFFFFFB, 0xFFFFFFFF,
206     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
207    {0xFFFFFFFB, 0x00000004, 0x00000000, 0xFFFFFFFB, 0xFFFFFFFA, 0xFFFFFFFF,
208     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
209};
210
211static const BN_ULONG _nist_p_384_sqr[] = {
212    0x00000001, 0xFFFFFFFE, 0x00000000, 0x00000002, 0x00000000, 0xFFFFFFFE,
213    0x00000000, 0x00000002, 0x00000001, 0x00000000, 0x00000000, 0x00000000,
214    0xFFFFFFFE, 0x00000001, 0x00000000, 0xFFFFFFFE, 0xFFFFFFFD, 0xFFFFFFFF,
215    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
216};
217
218static const BN_ULONG _nist_p_521[] = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
219    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
220    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
221    0xFFFFFFFF, 0x000001FF
222};
223
224static const BN_ULONG _nist_p_521_sqr[] = {
225    0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
226    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
227    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xFFFFFC00, 0xFFFFFFFF,
228    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
229    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
230    0xFFFFFFFF, 0xFFFFFFFF, 0x0003FFFF
231};
232#else
233# error "unsupported BN_BITS2"
234#endif
235
236static const BIGNUM _bignum_nist_p_192 = {
237    (BN_ULONG *)_nist_p_192[0],
238    BN_NIST_192_TOP,
239    BN_NIST_192_TOP,
240    0,
241    BN_FLG_STATIC_DATA
242};
243
244static const BIGNUM _bignum_nist_p_224 = {
245    (BN_ULONG *)_nist_p_224[0],
246    BN_NIST_224_TOP,
247    BN_NIST_224_TOP,
248    0,
249    BN_FLG_STATIC_DATA
250};
251
252static const BIGNUM _bignum_nist_p_256 = {
253    (BN_ULONG *)_nist_p_256[0],
254    BN_NIST_256_TOP,
255    BN_NIST_256_TOP,
256    0,
257    BN_FLG_STATIC_DATA
258};
259
260static const BIGNUM _bignum_nist_p_384 = {
261    (BN_ULONG *)_nist_p_384[0],
262    BN_NIST_384_TOP,
263    BN_NIST_384_TOP,
264    0,
265    BN_FLG_STATIC_DATA
266};
267
268static const BIGNUM _bignum_nist_p_521 = {
269    (BN_ULONG *)_nist_p_521,
270    BN_NIST_521_TOP,
271    BN_NIST_521_TOP,
272    0,
273    BN_FLG_STATIC_DATA
274};
275
276const BIGNUM *BN_get0_nist_prime_192(void)
277{
278    return &_bignum_nist_p_192;
279}
280
281const BIGNUM *BN_get0_nist_prime_224(void)
282{
283    return &_bignum_nist_p_224;
284}
285
286const BIGNUM *BN_get0_nist_prime_256(void)
287{
288    return &_bignum_nist_p_256;
289}
290
291const BIGNUM *BN_get0_nist_prime_384(void)
292{
293    return &_bignum_nist_p_384;
294}
295
296const BIGNUM *BN_get0_nist_prime_521(void)
297{
298    return &_bignum_nist_p_521;
299}
300
301static void nist_cp_bn_0(BN_ULONG *dst, const BN_ULONG *src, int top, int max)
302{
303    int i;
304
305#ifdef BN_DEBUG
306    OPENSSL_assert(top <= max);
307#endif
308    for (i = 0; i < top; i++)
309        dst[i] = src[i];
310    for (; i < max; i++)
311        dst[i] = 0;
312}
313
314static void nist_cp_bn(BN_ULONG *dst, const BN_ULONG *src, int top)
315{
316    int i;
317
318    for (i = 0; i < top; i++)
319        dst[i] = src[i];
320}
321
322#if BN_BITS2 == 64
323# define bn_cp_64(to, n, from, m)        (to)[n] = (m>=0)?((from)[m]):0;
324# define bn_64_set_0(to, n)              (to)[n] = (BN_ULONG)0;
325/*
326 * two following macros are implemented under assumption that they
327 * are called in a sequence with *ascending* n, i.e. as they are...
328 */
329# define bn_cp_32_naked(to, n, from, m)  (((n)&1)?(to[(n)/2]|=((m)&1)?(from[(m)/2]&BN_MASK2h):(from[(m)/2]<<32))\
330                                                :(to[(n)/2] =((m)&1)?(from[(m)/2]>>32):(from[(m)/2]&BN_MASK2l)))
331# define bn_32_set_0(to, n)              (((n)&1)?(to[(n)/2]&=BN_MASK2l):(to[(n)/2]=0));
332# define bn_cp_32(to,n,from,m)           ((m)>=0)?bn_cp_32_naked(to,n,from,m):bn_32_set_0(to,n)
333# if defined(L_ENDIAN)
334#  if defined(__arch64__)
335#   define NIST_INT64 long
336#  else
337#   define NIST_INT64 long long
338#  endif
339# endif
340#else
341# define bn_cp_64(to, n, from, m) \
342        { \
343        bn_cp_32(to, (n)*2, from, (m)*2); \
344        bn_cp_32(to, (n)*2+1, from, (m)*2+1); \
345        }
346# define bn_64_set_0(to, n) \
347        { \
348        bn_32_set_0(to, (n)*2); \
349        bn_32_set_0(to, (n)*2+1); \
350        }
351# define bn_cp_32(to, n, from, m)        (to)[n] = (m>=0)?((from)[m]):0;
352# define bn_32_set_0(to, n)              (to)[n] = (BN_ULONG)0;
353# if defined(_WIN32) && !defined(__GNUC__)
354#  define NIST_INT64 __int64
355# elif defined(BN_LLONG)
356#  define NIST_INT64 long long
357# endif
358#endif                          /* BN_BITS2 != 64 */
359
360#define nist_set_192(to, from, a1, a2, a3) \
361        { \
362        bn_cp_64(to, 0, from, (a3) - 3) \
363        bn_cp_64(to, 1, from, (a2) - 3) \
364        bn_cp_64(to, 2, from, (a1) - 3) \
365        }
366
367int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
368                    BN_CTX *ctx)
369{
370    int top = a->top, i;
371    int carry;
372    register BN_ULONG *r_d, *a_d = a->d;
373    union {
374        BN_ULONG bn[BN_NIST_192_TOP];
375        unsigned int ui[BN_NIST_192_TOP * sizeof(BN_ULONG) /
376                        sizeof(unsigned int)];
377    } buf;
378    BN_ULONG c_d[BN_NIST_192_TOP], *res;
379    PTR_SIZE_INT mask;
380    static const BIGNUM _bignum_nist_p_192_sqr = {
381        (BN_ULONG *)_nist_p_192_sqr,
382        sizeof(_nist_p_192_sqr) / sizeof(_nist_p_192_sqr[0]),
383        sizeof(_nist_p_192_sqr) / sizeof(_nist_p_192_sqr[0]),
384        0, BN_FLG_STATIC_DATA
385    };
386
387    field = &_bignum_nist_p_192; /* just to make sure */
388
389    if (BN_is_negative(a) || BN_ucmp(a, &_bignum_nist_p_192_sqr) >= 0)
390        return BN_nnmod(r, a, field, ctx);
391
392    i = BN_ucmp(field, a);
393    if (i == 0) {
394        BN_zero(r);
395        return 1;
396    } else if (i > 0)
397        return (r == a) ? 1 : (BN_copy(r, a) != NULL);
398
399    if (r != a) {
400        if (!bn_wexpand(r, BN_NIST_192_TOP))
401            return 0;
402        r_d = r->d;
403        nist_cp_bn(r_d, a_d, BN_NIST_192_TOP);
404    } else
405        r_d = a_d;
406
407    nist_cp_bn_0(buf.bn, a_d + BN_NIST_192_TOP, top - BN_NIST_192_TOP,
408                 BN_NIST_192_TOP);
409
410#if defined(NIST_INT64)
411    {
412        NIST_INT64 acc;         /* accumulator */
413        unsigned int *rp = (unsigned int *)r_d;
414        const unsigned int *bp = (const unsigned int *)buf.ui;
415
416        acc = rp[0];
417        acc += bp[3 * 2 - 6];
418        acc += bp[5 * 2 - 6];
419        rp[0] = (unsigned int)acc;
420        acc >>= 32;
421
422        acc += rp[1];
423        acc += bp[3 * 2 - 5];
424        acc += bp[5 * 2 - 5];
425        rp[1] = (unsigned int)acc;
426        acc >>= 32;
427
428        acc += rp[2];
429        acc += bp[3 * 2 - 6];
430        acc += bp[4 * 2 - 6];
431        acc += bp[5 * 2 - 6];
432        rp[2] = (unsigned int)acc;
433        acc >>= 32;
434
435        acc += rp[3];
436        acc += bp[3 * 2 - 5];
437        acc += bp[4 * 2 - 5];
438        acc += bp[5 * 2 - 5];
439        rp[3] = (unsigned int)acc;
440        acc >>= 32;
441
442        acc += rp[4];
443        acc += bp[4 * 2 - 6];
444        acc += bp[5 * 2 - 6];
445        rp[4] = (unsigned int)acc;
446        acc >>= 32;
447
448        acc += rp[5];
449        acc += bp[4 * 2 - 5];
450        acc += bp[5 * 2 - 5];
451        rp[5] = (unsigned int)acc;
452
453        carry = (int)(acc >> 32);
454    }
455#else
456    {
457        BN_ULONG t_d[BN_NIST_192_TOP];
458
459        nist_set_192(t_d, buf.bn, 0, 3, 3);
460        carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
461        nist_set_192(t_d, buf.bn, 4, 4, 0);
462        carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
463        nist_set_192(t_d, buf.bn, 5, 5, 5)
464            carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
465    }
466#endif
467    if (carry > 0)
468        carry =
469            (int)bn_sub_words(r_d, r_d, _nist_p_192[carry - 1],
470                              BN_NIST_192_TOP);
471    else
472        carry = 1;
473
474    /*
475     * we need 'if (carry==0 || result>=modulus) result-=modulus;'
476     * as comparison implies subtraction, we can write
477     * 'tmp=result-modulus; if (!carry || !borrow) result=tmp;'
478     * this is what happens below, but without explicit if:-) a.
479     */
480    mask =
481        0 - (PTR_SIZE_INT) bn_sub_words(c_d, r_d, _nist_p_192[0],
482                                        BN_NIST_192_TOP);
483    mask &= 0 - (PTR_SIZE_INT) carry;
484    res = c_d;
485    res = (BN_ULONG *)
486        (((PTR_SIZE_INT) res & ~mask) | ((PTR_SIZE_INT) r_d & mask));
487    nist_cp_bn(r_d, res, BN_NIST_192_TOP);
488    r->top = BN_NIST_192_TOP;
489    bn_correct_top(r);
490
491    return 1;
492}
493
494typedef BN_ULONG (*bn_addsub_f) (BN_ULONG *, const BN_ULONG *,
495                                 const BN_ULONG *, int);
496
497#define nist_set_224(to, from, a1, a2, a3, a4, a5, a6, a7) \
498        { \
499        bn_cp_32(to, 0, from, (a7) - 7) \
500        bn_cp_32(to, 1, from, (a6) - 7) \
501        bn_cp_32(to, 2, from, (a5) - 7) \
502        bn_cp_32(to, 3, from, (a4) - 7) \
503        bn_cp_32(to, 4, from, (a3) - 7) \
504        bn_cp_32(to, 5, from, (a2) - 7) \
505        bn_cp_32(to, 6, from, (a1) - 7) \
506        }
507
508int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
509                    BN_CTX *ctx)
510{
511    int top = a->top, i;
512    int carry;
513    BN_ULONG *r_d, *a_d = a->d;
514    union {
515        BN_ULONG bn[BN_NIST_224_TOP];
516        unsigned int ui[BN_NIST_224_TOP * sizeof(BN_ULONG) /
517                        sizeof(unsigned int)];
518    } buf;
519    BN_ULONG c_d[BN_NIST_224_TOP], *res;
520    PTR_SIZE_INT mask;
521    union {
522        bn_addsub_f f;
523        PTR_SIZE_INT p;
524    } u;
525    static const BIGNUM _bignum_nist_p_224_sqr = {
526        (BN_ULONG *)_nist_p_224_sqr,
527        sizeof(_nist_p_224_sqr) / sizeof(_nist_p_224_sqr[0]),
528        sizeof(_nist_p_224_sqr) / sizeof(_nist_p_224_sqr[0]),
529        0, BN_FLG_STATIC_DATA
530    };
531
532    field = &_bignum_nist_p_224; /* just to make sure */
533
534    if (BN_is_negative(a) || BN_ucmp(a, &_bignum_nist_p_224_sqr) >= 0)
535        return BN_nnmod(r, a, field, ctx);
536
537    i = BN_ucmp(field, a);
538    if (i == 0) {
539        BN_zero(r);
540        return 1;
541    } else if (i > 0)
542        return (r == a) ? 1 : (BN_copy(r, a) != NULL);
543
544    if (r != a) {
545        if (!bn_wexpand(r, BN_NIST_224_TOP))
546            return 0;
547        r_d = r->d;
548        nist_cp_bn(r_d, a_d, BN_NIST_224_TOP);
549    } else
550        r_d = a_d;
551
552#if BN_BITS2==64
553    /* copy upper 256 bits of 448 bit number ... */
554    nist_cp_bn_0(c_d, a_d + (BN_NIST_224_TOP - 1),
555                 top - (BN_NIST_224_TOP - 1), BN_NIST_224_TOP);
556    /* ... and right shift by 32 to obtain upper 224 bits */
557    nist_set_224(buf.bn, c_d, 14, 13, 12, 11, 10, 9, 8);
558    /* truncate lower part to 224 bits too */
559    r_d[BN_NIST_224_TOP - 1] &= BN_MASK2l;
560#else
561    nist_cp_bn_0(buf.bn, a_d + BN_NIST_224_TOP, top - BN_NIST_224_TOP,
562                 BN_NIST_224_TOP);
563#endif
564
565#if defined(NIST_INT64) && BN_BITS2!=64
566    {
567        NIST_INT64 acc;         /* accumulator */
568        unsigned int *rp = (unsigned int *)r_d;
569        const unsigned int *bp = (const unsigned int *)buf.ui;
570
571        acc = rp[0];
572        acc -= bp[7 - 7];
573        acc -= bp[11 - 7];
574        rp[0] = (unsigned int)acc;
575        acc >>= 32;
576
577        acc += rp[1];
578        acc -= bp[8 - 7];
579        acc -= bp[12 - 7];
580        rp[1] = (unsigned int)acc;
581        acc >>= 32;
582
583        acc += rp[2];
584        acc -= bp[9 - 7];
585        acc -= bp[13 - 7];
586        rp[2] = (unsigned int)acc;
587        acc >>= 32;
588
589        acc += rp[3];
590        acc += bp[7 - 7];
591        acc += bp[11 - 7];
592        acc -= bp[10 - 7];
593        rp[3] = (unsigned int)acc;
594        acc >>= 32;
595
596        acc += rp[4];
597        acc += bp[8 - 7];
598        acc += bp[12 - 7];
599        acc -= bp[11 - 7];
600        rp[4] = (unsigned int)acc;
601        acc >>= 32;
602
603        acc += rp[5];
604        acc += bp[9 - 7];
605        acc += bp[13 - 7];
606        acc -= bp[12 - 7];
607        rp[5] = (unsigned int)acc;
608        acc >>= 32;
609
610        acc += rp[6];
611        acc += bp[10 - 7];
612        acc -= bp[13 - 7];
613        rp[6] = (unsigned int)acc;
614
615        carry = (int)(acc >> 32);
616# if BN_BITS2==64
617        rp[7] = carry;
618# endif
619    }
620#else
621    {
622        BN_ULONG t_d[BN_NIST_224_TOP];
623
624        nist_set_224(t_d, buf.bn, 10, 9, 8, 7, 0, 0, 0);
625        carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP);
626        nist_set_224(t_d, buf.bn, 0, 13, 12, 11, 0, 0, 0);
627        carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP);
628        nist_set_224(t_d, buf.bn, 13, 12, 11, 10, 9, 8, 7);
629        carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_224_TOP);
630        nist_set_224(t_d, buf.bn, 0, 0, 0, 0, 13, 12, 11);
631        carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_224_TOP);
632
633# if BN_BITS2==64
634        carry = (int)(r_d[BN_NIST_224_TOP - 1] >> 32);
635# endif
636    }
637#endif
638    u.f = bn_sub_words;
639    if (carry > 0) {
640        carry =
641            (int)bn_sub_words(r_d, r_d, _nist_p_224[carry - 1],
642                              BN_NIST_224_TOP);
643#if BN_BITS2==64
644        carry = (int)(~(r_d[BN_NIST_224_TOP - 1] >> 32)) & 1;
645#endif
646    } else if (carry < 0) {
647        /*
648         * it's a bit more comlicated logic in this case. if bn_add_words
649         * yields no carry, then result has to be adjusted by unconditionally
650         * *adding* the modulus. but if it does, then result has to be
651         * compared to the modulus and conditionally adjusted by
652         * *subtracting* the latter.
653         */
654        carry =
655            (int)bn_add_words(r_d, r_d, _nist_p_224[-carry - 1],
656                              BN_NIST_224_TOP);
657        mask = 0 - (PTR_SIZE_INT) carry;
658        u.p = ((PTR_SIZE_INT) bn_sub_words & mask) |
659            ((PTR_SIZE_INT) bn_add_words & ~mask);
660    } else
661        carry = 1;
662
663    /* otherwise it's effectively same as in BN_nist_mod_192... */
664    mask =
665        0 - (PTR_SIZE_INT) (*u.f) (c_d, r_d, _nist_p_224[0], BN_NIST_224_TOP);
666    mask &= 0 - (PTR_SIZE_INT) carry;
667    res = c_d;
668    res = (BN_ULONG *)(((PTR_SIZE_INT) res & ~mask) |
669                       ((PTR_SIZE_INT) r_d & mask));
670    nist_cp_bn(r_d, res, BN_NIST_224_TOP);
671    r->top = BN_NIST_224_TOP;
672    bn_correct_top(r);
673
674    return 1;
675}
676
677#define nist_set_256(to, from, a1, a2, a3, a4, a5, a6, a7, a8) \
678        { \
679        bn_cp_32(to, 0, from, (a8) - 8) \
680        bn_cp_32(to, 1, from, (a7) - 8) \
681        bn_cp_32(to, 2, from, (a6) - 8) \
682        bn_cp_32(to, 3, from, (a5) - 8) \
683        bn_cp_32(to, 4, from, (a4) - 8) \
684        bn_cp_32(to, 5, from, (a3) - 8) \
685        bn_cp_32(to, 6, from, (a2) - 8) \
686        bn_cp_32(to, 7, from, (a1) - 8) \
687        }
688
689int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
690                    BN_CTX *ctx)
691{
692    int i, top = a->top;
693    int carry = 0;
694    register BN_ULONG *a_d = a->d, *r_d;
695    union {
696        BN_ULONG bn[BN_NIST_256_TOP];
697        unsigned int ui[BN_NIST_256_TOP * sizeof(BN_ULONG) /
698                        sizeof(unsigned int)];
699    } buf;
700    BN_ULONG c_d[BN_NIST_256_TOP], *res;
701    PTR_SIZE_INT mask;
702    union {
703        bn_addsub_f f;
704        PTR_SIZE_INT p;
705    } u;
706    static const BIGNUM _bignum_nist_p_256_sqr = {
707        (BN_ULONG *)_nist_p_256_sqr,
708        sizeof(_nist_p_256_sqr) / sizeof(_nist_p_256_sqr[0]),
709        sizeof(_nist_p_256_sqr) / sizeof(_nist_p_256_sqr[0]),
710        0, BN_FLG_STATIC_DATA
711    };
712
713    field = &_bignum_nist_p_256; /* just to make sure */
714
715    if (BN_is_negative(a) || BN_ucmp(a, &_bignum_nist_p_256_sqr) >= 0)
716        return BN_nnmod(r, a, field, ctx);
717
718    i = BN_ucmp(field, a);
719    if (i == 0) {
720        BN_zero(r);
721        return 1;
722    } else if (i > 0)
723        return (r == a) ? 1 : (BN_copy(r, a) != NULL);
724
725    if (r != a) {
726        if (!bn_wexpand(r, BN_NIST_256_TOP))
727            return 0;
728        r_d = r->d;
729        nist_cp_bn(r_d, a_d, BN_NIST_256_TOP);
730    } else
731        r_d = a_d;
732
733    nist_cp_bn_0(buf.bn, a_d + BN_NIST_256_TOP, top - BN_NIST_256_TOP,
734                 BN_NIST_256_TOP);
735
736#if defined(NIST_INT64)
737    {
738        NIST_INT64 acc;         /* accumulator */
739        unsigned int *rp = (unsigned int *)r_d;
740        const unsigned int *bp = (const unsigned int *)buf.ui;
741
742        acc = rp[0];
743        acc += bp[8 - 8];
744        acc += bp[9 - 8];
745        acc -= bp[11 - 8];
746        acc -= bp[12 - 8];
747        acc -= bp[13 - 8];
748        acc -= bp[14 - 8];
749        rp[0] = (unsigned int)acc;
750        acc >>= 32;
751
752        acc += rp[1];
753        acc += bp[9 - 8];
754        acc += bp[10 - 8];
755        acc -= bp[12 - 8];
756        acc -= bp[13 - 8];
757        acc -= bp[14 - 8];
758        acc -= bp[15 - 8];
759        rp[1] = (unsigned int)acc;
760        acc >>= 32;
761
762        acc += rp[2];
763        acc += bp[10 - 8];
764        acc += bp[11 - 8];
765        acc -= bp[13 - 8];
766        acc -= bp[14 - 8];
767        acc -= bp[15 - 8];
768        rp[2] = (unsigned int)acc;
769        acc >>= 32;
770
771        acc += rp[3];
772        acc += bp[11 - 8];
773        acc += bp[11 - 8];
774        acc += bp[12 - 8];
775        acc += bp[12 - 8];
776        acc += bp[13 - 8];
777        acc -= bp[15 - 8];
778        acc -= bp[8 - 8];
779        acc -= bp[9 - 8];
780        rp[3] = (unsigned int)acc;
781        acc >>= 32;
782
783        acc += rp[4];
784        acc += bp[12 - 8];
785        acc += bp[12 - 8];
786        acc += bp[13 - 8];
787        acc += bp[13 - 8];
788        acc += bp[14 - 8];
789        acc -= bp[9 - 8];
790        acc -= bp[10 - 8];
791        rp[4] = (unsigned int)acc;
792        acc >>= 32;
793
794        acc += rp[5];
795        acc += bp[13 - 8];
796        acc += bp[13 - 8];
797        acc += bp[14 - 8];
798        acc += bp[14 - 8];
799        acc += bp[15 - 8];
800        acc -= bp[10 - 8];
801        acc -= bp[11 - 8];
802        rp[5] = (unsigned int)acc;
803        acc >>= 32;
804
805        acc += rp[6];
806        acc += bp[14 - 8];
807        acc += bp[14 - 8];
808        acc += bp[15 - 8];
809        acc += bp[15 - 8];
810        acc += bp[14 - 8];
811        acc += bp[13 - 8];
812        acc -= bp[8 - 8];
813        acc -= bp[9 - 8];
814        rp[6] = (unsigned int)acc;
815        acc >>= 32;
816
817        acc += rp[7];
818        acc += bp[15 - 8];
819        acc += bp[15 - 8];
820        acc += bp[15 - 8];
821        acc += bp[8 - 8];
822        acc -= bp[10 - 8];
823        acc -= bp[11 - 8];
824        acc -= bp[12 - 8];
825        acc -= bp[13 - 8];
826        rp[7] = (unsigned int)acc;
827
828        carry = (int)(acc >> 32);
829    }
830#else
831    {
832        BN_ULONG t_d[BN_NIST_256_TOP];
833
834        /*
835         * S1
836         */
837        nist_set_256(t_d, buf.bn, 15, 14, 13, 12, 11, 0, 0, 0);
838        /*
839         * S2
840         */
841        nist_set_256(c_d, buf.bn, 0, 15, 14, 13, 12, 0, 0, 0);
842        carry = (int)bn_add_words(t_d, t_d, c_d, BN_NIST_256_TOP);
843        /* left shift */
844        {
845            register BN_ULONG *ap, t, c;
846            ap = t_d;
847            c = 0;
848            for (i = BN_NIST_256_TOP; i != 0; --i) {
849                t = *ap;
850                *(ap++) = ((t << 1) | c) & BN_MASK2;
851                c = (t & BN_TBIT) ? 1 : 0;
852            }
853            carry <<= 1;
854            carry |= c;
855        }
856        carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
857        /*
858         * S3
859         */
860        nist_set_256(t_d, buf.bn, 15, 14, 0, 0, 0, 10, 9, 8);
861        carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
862        /*
863         * S4
864         */
865        nist_set_256(t_d, buf.bn, 8, 13, 15, 14, 13, 11, 10, 9);
866        carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
867        /*
868         * D1
869         */
870        nist_set_256(t_d, buf.bn, 10, 8, 0, 0, 0, 13, 12, 11);
871        carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
872        /*
873         * D2
874         */
875        nist_set_256(t_d, buf.bn, 11, 9, 0, 0, 15, 14, 13, 12);
876        carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
877        /*
878         * D3
879         */
880        nist_set_256(t_d, buf.bn, 12, 0, 10, 9, 8, 15, 14, 13);
881        carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
882        /*
883         * D4
884         */
885        nist_set_256(t_d, buf.bn, 13, 0, 11, 10, 9, 0, 15, 14);
886        carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
887
888    }
889#endif
890    /* see BN_nist_mod_224 for explanation */
891    u.f = bn_sub_words;
892    if (carry > 0)
893        carry =
894            (int)bn_sub_words(r_d, r_d, _nist_p_256[carry - 1],
895                              BN_NIST_256_TOP);
896    else if (carry < 0) {
897        carry =
898            (int)bn_add_words(r_d, r_d, _nist_p_256[-carry - 1],
899                              BN_NIST_256_TOP);
900        mask = 0 - (PTR_SIZE_INT) carry;
901        u.p = ((PTR_SIZE_INT) bn_sub_words & mask) |
902            ((PTR_SIZE_INT) bn_add_words & ~mask);
903    } else
904        carry = 1;
905
906    mask =
907        0 - (PTR_SIZE_INT) (*u.f) (c_d, r_d, _nist_p_256[0], BN_NIST_256_TOP);
908    mask &= 0 - (PTR_SIZE_INT) carry;
909    res = c_d;
910    res = (BN_ULONG *)(((PTR_SIZE_INT) res & ~mask) |
911                       ((PTR_SIZE_INT) r_d & mask));
912    nist_cp_bn(r_d, res, BN_NIST_256_TOP);
913    r->top = BN_NIST_256_TOP;
914    bn_correct_top(r);
915
916    return 1;
917}
918
919#define nist_set_384(to,from,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12) \
920        { \
921        bn_cp_32(to, 0, from,  (a12) - 12) \
922        bn_cp_32(to, 1, from,  (a11) - 12) \
923        bn_cp_32(to, 2, from,  (a10) - 12) \
924        bn_cp_32(to, 3, from,  (a9) - 12)  \
925        bn_cp_32(to, 4, from,  (a8) - 12)  \
926        bn_cp_32(to, 5, from,  (a7) - 12)  \
927        bn_cp_32(to, 6, from,  (a6) - 12)  \
928        bn_cp_32(to, 7, from,  (a5) - 12)  \
929        bn_cp_32(to, 8, from,  (a4) - 12)  \
930        bn_cp_32(to, 9, from,  (a3) - 12)  \
931        bn_cp_32(to, 10, from, (a2) - 12)  \
932        bn_cp_32(to, 11, from, (a1) - 12)  \
933        }
934
935int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
936                    BN_CTX *ctx)
937{
938    int i, top = a->top;
939    int carry = 0;
940    register BN_ULONG *r_d, *a_d = a->d;
941    union {
942        BN_ULONG bn[BN_NIST_384_TOP];
943        unsigned int ui[BN_NIST_384_TOP * sizeof(BN_ULONG) /
944                        sizeof(unsigned int)];
945    } buf;
946    BN_ULONG c_d[BN_NIST_384_TOP], *res;
947    PTR_SIZE_INT mask;
948    union {
949        bn_addsub_f f;
950        PTR_SIZE_INT p;
951    } u;
952    static const BIGNUM _bignum_nist_p_384_sqr = {
953        (BN_ULONG *)_nist_p_384_sqr,
954        sizeof(_nist_p_384_sqr) / sizeof(_nist_p_384_sqr[0]),
955        sizeof(_nist_p_384_sqr) / sizeof(_nist_p_384_sqr[0]),
956        0, BN_FLG_STATIC_DATA
957    };
958
959    field = &_bignum_nist_p_384; /* just to make sure */
960
961    if (BN_is_negative(a) || BN_ucmp(a, &_bignum_nist_p_384_sqr) >= 0)
962        return BN_nnmod(r, a, field, ctx);
963
964    i = BN_ucmp(field, a);
965    if (i == 0) {
966        BN_zero(r);
967        return 1;
968    } else if (i > 0)
969        return (r == a) ? 1 : (BN_copy(r, a) != NULL);
970
971    if (r != a) {
972        if (!bn_wexpand(r, BN_NIST_384_TOP))
973            return 0;
974        r_d = r->d;
975        nist_cp_bn(r_d, a_d, BN_NIST_384_TOP);
976    } else
977        r_d = a_d;
978
979    nist_cp_bn_0(buf.bn, a_d + BN_NIST_384_TOP, top - BN_NIST_384_TOP,
980                 BN_NIST_384_TOP);
981
982#if defined(NIST_INT64)
983    {
984        NIST_INT64 acc;         /* accumulator */
985        unsigned int *rp = (unsigned int *)r_d;
986        const unsigned int *bp = (const unsigned int *)buf.ui;
987
988        acc = rp[0];
989        acc += bp[12 - 12];
990        acc += bp[21 - 12];
991        acc += bp[20 - 12];
992        acc -= bp[23 - 12];
993        rp[0] = (unsigned int)acc;
994        acc >>= 32;
995
996        acc += rp[1];
997        acc += bp[13 - 12];
998        acc += bp[22 - 12];
999        acc += bp[23 - 12];
1000        acc -= bp[12 - 12];
1001        acc -= bp[20 - 12];
1002        rp[1] = (unsigned int)acc;
1003        acc >>= 32;
1004
1005        acc += rp[2];
1006        acc += bp[14 - 12];
1007        acc += bp[23 - 12];
1008        acc -= bp[13 - 12];
1009        acc -= bp[21 - 12];
1010        rp[2] = (unsigned int)acc;
1011        acc >>= 32;
1012
1013        acc += rp[3];
1014        acc += bp[15 - 12];
1015        acc += bp[12 - 12];
1016        acc += bp[20 - 12];
1017        acc += bp[21 - 12];
1018        acc -= bp[14 - 12];
1019        acc -= bp[22 - 12];
1020        acc -= bp[23 - 12];
1021        rp[3] = (unsigned int)acc;
1022        acc >>= 32;
1023
1024        acc += rp[4];
1025        acc += bp[21 - 12];
1026        acc += bp[21 - 12];
1027        acc += bp[16 - 12];
1028        acc += bp[13 - 12];
1029        acc += bp[12 - 12];
1030        acc += bp[20 - 12];
1031        acc += bp[22 - 12];
1032        acc -= bp[15 - 12];
1033        acc -= bp[23 - 12];
1034        acc -= bp[23 - 12];
1035        rp[4] = (unsigned int)acc;
1036        acc >>= 32;
1037
1038        acc += rp[5];
1039        acc += bp[22 - 12];
1040        acc += bp[22 - 12];
1041        acc += bp[17 - 12];
1042        acc += bp[14 - 12];
1043        acc += bp[13 - 12];
1044        acc += bp[21 - 12];
1045        acc += bp[23 - 12];
1046        acc -= bp[16 - 12];
1047        rp[5] = (unsigned int)acc;
1048        acc >>= 32;
1049
1050        acc += rp[6];
1051        acc += bp[23 - 12];
1052        acc += bp[23 - 12];
1053        acc += bp[18 - 12];
1054        acc += bp[15 - 12];
1055        acc += bp[14 - 12];
1056        acc += bp[22 - 12];
1057        acc -= bp[17 - 12];
1058        rp[6] = (unsigned int)acc;
1059        acc >>= 32;
1060
1061        acc += rp[7];
1062        acc += bp[19 - 12];
1063        acc += bp[16 - 12];
1064        acc += bp[15 - 12];
1065        acc += bp[23 - 12];
1066        acc -= bp[18 - 12];
1067        rp[7] = (unsigned int)acc;
1068        acc >>= 32;
1069
1070        acc += rp[8];
1071        acc += bp[20 - 12];
1072        acc += bp[17 - 12];
1073        acc += bp[16 - 12];
1074        acc -= bp[19 - 12];
1075        rp[8] = (unsigned int)acc;
1076        acc >>= 32;
1077
1078        acc += rp[9];
1079        acc += bp[21 - 12];
1080        acc += bp[18 - 12];
1081        acc += bp[17 - 12];
1082        acc -= bp[20 - 12];
1083        rp[9] = (unsigned int)acc;
1084        acc >>= 32;
1085
1086        acc += rp[10];
1087        acc += bp[22 - 12];
1088        acc += bp[19 - 12];
1089        acc += bp[18 - 12];
1090        acc -= bp[21 - 12];
1091        rp[10] = (unsigned int)acc;
1092        acc >>= 32;
1093
1094        acc += rp[11];
1095        acc += bp[23 - 12];
1096        acc += bp[20 - 12];
1097        acc += bp[19 - 12];
1098        acc -= bp[22 - 12];
1099        rp[11] = (unsigned int)acc;
1100
1101        carry = (int)(acc >> 32);
1102    }
1103#else
1104    {
1105        BN_ULONG t_d[BN_NIST_384_TOP];
1106
1107        /*
1108         * S1
1109         */
1110        nist_set_256(t_d, buf.bn, 0, 0, 0, 0, 0, 23 - 4, 22 - 4, 21 - 4);
1111        /* left shift */
1112        {
1113            register BN_ULONG *ap, t, c;
1114            ap = t_d;
1115            c = 0;
1116            for (i = 3; i != 0; --i) {
1117                t = *ap;
1118                *(ap++) = ((t << 1) | c) & BN_MASK2;
1119                c = (t & BN_TBIT) ? 1 : 0;
1120            }
1121            *ap = c;
1122        }
1123        carry =
1124            (int)bn_add_words(r_d + (128 / BN_BITS2), r_d + (128 / BN_BITS2),
1125                              t_d, BN_NIST_256_TOP);
1126        /*
1127         * S2
1128         */
1129        carry += (int)bn_add_words(r_d, r_d, buf.bn, BN_NIST_384_TOP);
1130        /*
1131         * S3
1132         */
1133        nist_set_384(t_d, buf.bn, 20, 19, 18, 17, 16, 15, 14, 13, 12, 23, 22,
1134                     21);
1135        carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1136        /*
1137         * S4
1138         */
1139        nist_set_384(t_d, buf.bn, 19, 18, 17, 16, 15, 14, 13, 12, 20, 0, 23,
1140                     0);
1141        carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1142        /*
1143         * S5
1144         */
1145        nist_set_384(t_d, buf.bn, 0, 0, 0, 0, 23, 22, 21, 20, 0, 0, 0, 0);
1146        carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1147        /*
1148         * S6
1149         */
1150        nist_set_384(t_d, buf.bn, 0, 0, 0, 0, 0, 0, 23, 22, 21, 0, 0, 20);
1151        carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1152        /*
1153         * D1
1154         */
1155        nist_set_384(t_d, buf.bn, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12,
1156                     23);
1157        carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1158        /*
1159         * D2
1160         */
1161        nist_set_384(t_d, buf.bn, 0, 0, 0, 0, 0, 0, 0, 23, 22, 21, 20, 0);
1162        carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1163        /*
1164         * D3
1165         */
1166        nist_set_384(t_d, buf.bn, 0, 0, 0, 0, 0, 0, 0, 23, 23, 0, 0, 0);
1167        carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1168
1169    }
1170#endif
1171    /* see BN_nist_mod_224 for explanation */
1172    u.f = bn_sub_words;
1173    if (carry > 0)
1174        carry =
1175            (int)bn_sub_words(r_d, r_d, _nist_p_384[carry - 1],
1176                              BN_NIST_384_TOP);
1177    else if (carry < 0) {
1178        carry =
1179            (int)bn_add_words(r_d, r_d, _nist_p_384[-carry - 1],
1180                              BN_NIST_384_TOP);
1181        mask = 0 - (PTR_SIZE_INT) carry;
1182        u.p = ((PTR_SIZE_INT) bn_sub_words & mask) |
1183            ((PTR_SIZE_INT) bn_add_words & ~mask);
1184    } else
1185        carry = 1;
1186
1187    mask =
1188        0 - (PTR_SIZE_INT) (*u.f) (c_d, r_d, _nist_p_384[0], BN_NIST_384_TOP);
1189    mask &= 0 - (PTR_SIZE_INT) carry;
1190    res = c_d;
1191    res = (BN_ULONG *)(((PTR_SIZE_INT) res & ~mask) |
1192                       ((PTR_SIZE_INT) r_d & mask));
1193    nist_cp_bn(r_d, res, BN_NIST_384_TOP);
1194    r->top = BN_NIST_384_TOP;
1195    bn_correct_top(r);
1196
1197    return 1;
1198}
1199
1200#define BN_NIST_521_RSHIFT      (521%BN_BITS2)
1201#define BN_NIST_521_LSHIFT      (BN_BITS2-BN_NIST_521_RSHIFT)
1202#define BN_NIST_521_TOP_MASK    ((BN_ULONG)BN_MASK2>>BN_NIST_521_LSHIFT)
1203
1204int BN_nist_mod_521(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
1205                    BN_CTX *ctx)
1206{
1207    int top = a->top, i;
1208    BN_ULONG *r_d, *a_d = a->d, t_d[BN_NIST_521_TOP], val, tmp, *res;
1209    PTR_SIZE_INT mask;
1210    static const BIGNUM _bignum_nist_p_521_sqr = {
1211        (BN_ULONG *)_nist_p_521_sqr,
1212        sizeof(_nist_p_521_sqr) / sizeof(_nist_p_521_sqr[0]),
1213        sizeof(_nist_p_521_sqr) / sizeof(_nist_p_521_sqr[0]),
1214        0, BN_FLG_STATIC_DATA
1215    };
1216
1217    field = &_bignum_nist_p_521; /* just to make sure */
1218
1219    if (BN_is_negative(a) || BN_ucmp(a, &_bignum_nist_p_521_sqr) >= 0)
1220        return BN_nnmod(r, a, field, ctx);
1221
1222    i = BN_ucmp(field, a);
1223    if (i == 0) {
1224        BN_zero(r);
1225        return 1;
1226    } else if (i > 0)
1227        return (r == a) ? 1 : (BN_copy(r, a) != NULL);
1228
1229    if (r != a) {
1230        if (!bn_wexpand(r, BN_NIST_521_TOP))
1231            return 0;
1232        r_d = r->d;
1233        nist_cp_bn(r_d, a_d, BN_NIST_521_TOP);
1234    } else
1235        r_d = a_d;
1236
1237    /* upper 521 bits, copy ... */
1238    nist_cp_bn_0(t_d, a_d + (BN_NIST_521_TOP - 1),
1239                 top - (BN_NIST_521_TOP - 1), BN_NIST_521_TOP);
1240    /* ... and right shift */
1241    for (val = t_d[0], i = 0; i < BN_NIST_521_TOP - 1; i++) {
1242        t_d[i] = (val >> BN_NIST_521_RSHIFT |
1243                  (tmp = t_d[i + 1]) << BN_NIST_521_LSHIFT) & BN_MASK2;
1244        val = tmp;
1245    }
1246    t_d[i] = val >> BN_NIST_521_RSHIFT;
1247    /* lower 521 bits */
1248    r_d[i] &= BN_NIST_521_TOP_MASK;
1249
1250    bn_add_words(r_d, r_d, t_d, BN_NIST_521_TOP);
1251    mask =
1252        0 - (PTR_SIZE_INT) bn_sub_words(t_d, r_d, _nist_p_521,
1253                                        BN_NIST_521_TOP);
1254    res = t_d;
1255    res = (BN_ULONG *)(((PTR_SIZE_INT) res & ~mask) |
1256                       ((PTR_SIZE_INT) r_d & mask));
1257    nist_cp_bn(r_d, res, BN_NIST_521_TOP);
1258    r->top = BN_NIST_521_TOP;
1259    bn_correct_top(r);
1260
1261    return 1;
1262}
1263