bn_mont.c revision 296465
1/* crypto/bn/bn_mont.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to.  The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 *    notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 *    notice, this list of conditions and the following disclaimer in the
30 *    documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 *    must display the following acknowledgement:
33 *    "This product includes cryptographic software written by
34 *     Eric Young (eay@cryptsoft.com)"
35 *    The word 'cryptographic' can be left out if the rouines from the library
36 *    being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 *    the apps directory (application code) you must include an acknowledgement:
39 *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed.  i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2006 The OpenSSL Project.  All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 *    notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 *    notice, this list of conditions and the following disclaimer in
70 *    the documentation and/or other materials provided with the
71 *    distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 *    software must display the following acknowledgment:
75 *    "This product includes software developed by the OpenSSL Project
76 *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 *    endorse or promote products derived from this software without
80 *    prior written permission. For written permission, please contact
81 *    openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 *    nor may "OpenSSL" appear in their names without prior written
85 *    permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 *    acknowledgment:
89 *    "This product includes software developed by the OpenSSL Project
90 *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com).  This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112/*
113 * Details about Montgomery multiplication algorithms can be found at
114 * http://security.ece.orst.edu/publications.html, e.g.
115 * http://security.ece.orst.edu/koc/papers/j37acmon.pdf and
116 * sections 3.8 and 4.2 in http://security.ece.orst.edu/koc/papers/r01rsasw.pdf
117 */
118
119#include <stdio.h>
120#include "cryptlib.h"
121#include "bn_lcl.h"
122
123#define MONT_WORD               /* use the faster word-based algorithm */
124
125#if defined(MONT_WORD) && defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32)
126/*
127 * This condition means we have a specific non-default build: In the 0.9.8
128 * branch, OPENSSL_BN_ASM_MONT is normally not set for any BN_BITS2<=32
129 * platform; an explicit "enable-montasm" is required. I.e., if we are here,
130 * the user intentionally deviates from the normal stable build to get better
131 * Montgomery performance from the 0.9.9-dev backport. In this case only, we
132 * also enable BN_from_montgomery_word() (another non-stable feature from
133 * 0.9.9-dev).
134 */
135# define MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD
136#endif
137
138#ifdef MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD
139static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont);
140#endif
141
142int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
143                          BN_MONT_CTX *mont, BN_CTX *ctx)
144{
145    BIGNUM *tmp;
146    int ret = 0;
147#if defined(OPENSSL_BN_ASM_MONT) && defined(MONT_WORD)
148    int num = mont->N.top;
149
150    if (num > 1 && a->top == num && b->top == num) {
151        if (bn_wexpand(r, num) == NULL)
152            return (0);
153# if 0                          /* for OpenSSL 0.9.9 mont->n0 */
154        if (bn_mul_mont(r->d, a->d, b->d, mont->N.d, mont->n0, num))
155# else
156        if (bn_mul_mont(r->d, a->d, b->d, mont->N.d, &mont->n0, num))
157# endif
158        {
159            r->neg = a->neg ^ b->neg;
160            r->top = num;
161            bn_correct_top(r);
162            return (1);
163        }
164    }
165#endif
166
167    BN_CTX_start(ctx);
168    tmp = BN_CTX_get(ctx);
169    if (tmp == NULL)
170        goto err;
171
172    bn_check_top(tmp);
173    if (a == b) {
174        if (!BN_sqr(tmp, a, ctx))
175            goto err;
176    } else {
177        if (!BN_mul(tmp, a, b, ctx))
178            goto err;
179    }
180    /* reduce from aRR to aR */
181#ifdef MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD
182    if (!BN_from_montgomery_word(r, tmp, mont))
183        goto err;
184#else
185    if (!BN_from_montgomery(r, tmp, mont, ctx))
186        goto err;
187#endif
188    bn_check_top(r);
189    ret = 1;
190 err:
191    BN_CTX_end(ctx);
192    return (ret);
193}
194
195#ifdef MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD
196static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
197{
198    BIGNUM *n;
199    BN_ULONG *ap, *np, *rp, n0, v, *nrp;
200    int al, nl, max, i, x, ri;
201
202    n = &(mont->N);
203    /*
204     * mont->ri is the size of mont->N in bits (rounded up to the word size)
205     */
206    al = ri = mont->ri / BN_BITS2;
207
208    nl = n->top;
209    if ((al == 0) || (nl == 0)) {
210        ret->top = 0;
211        return (1);
212    }
213
214    max = (nl + al + 1);        /* allow for overflow (no?) XXX */
215    if (bn_wexpand(r, max) == NULL)
216        return (0);
217
218    r->neg ^= n->neg;
219    np = n->d;
220    rp = r->d;
221    nrp = &(r->d[nl]);
222
223    /* clear the top words of T */
224    for (i = r->top; i < max; i++) /* memset? XXX */
225        r->d[i] = 0;
226
227    r->top = max;
228# if 0                          /* for OpenSSL 0.9.9 mont->n0 */
229    n0 = mont->n0[0];
230# else
231    n0 = mont->n0;
232# endif
233
234# ifdef BN_COUNT
235    fprintf(stderr, "word BN_from_montgomery_word %d * %d\n", nl, nl);
236# endif
237    for (i = 0; i < nl; i++) {
238# ifdef __TANDEM
239        {
240            long long t1;
241            long long t2;
242            long long t3;
243            t1 = rp[0] * (n0 & 0177777);
244            t2 = 037777600000l;
245            t2 = n0 & t2;
246            t3 = rp[0] & 0177777;
247            t2 = (t3 * t2) & BN_MASK2;
248            t1 = t1 + t2;
249            v = bn_mul_add_words(rp, np, nl, (BN_ULONG)t1);
250        }
251# else
252        v = bn_mul_add_words(rp, np, nl, (rp[0] * n0) & BN_MASK2);
253# endif
254        nrp++;
255        rp++;
256        if (((nrp[-1] += v) & BN_MASK2) >= v)
257            continue;
258        else {
259            if (((++nrp[0]) & BN_MASK2) != 0)
260                continue;
261            if (((++nrp[1]) & BN_MASK2) != 0)
262                continue;
263            for (x = 2; (((++nrp[x]) & BN_MASK2) == 0); x++) ;
264        }
265    }
266    bn_correct_top(r);
267
268    /*
269     * mont->ri will be a multiple of the word size and below code is kind of
270     * BN_rshift(ret,r,mont->ri) equivalent
271     */
272    if (r->top <= ri) {
273        ret->top = 0;
274        return (1);
275    }
276    al = r->top - ri;
277
278    if (bn_wexpand(ret, ri) == NULL)
279        return (0);
280    x = 0 - (((al - ri) >> (sizeof(al) * 8 - 1)) & 1);
281    ret->top = x = (ri & ~x) | (al & x); /* min(ri,al) */
282    ret->neg = r->neg;
283
284    rp = ret->d;
285    ap = &(r->d[ri]);
286
287    {
288        size_t m1, m2;
289
290        v = bn_sub_words(rp, ap, np, ri);
291        /*
292         * this ----------------^^ works even in al<ri case thanks to zealous
293         * zeroing of top of the vector in the beginning.
294         */
295
296        /* if (al==ri && !v) || al>ri) nrp=rp; else nrp=ap; */
297        /*
298         * in other words if subtraction result is real, then trick
299         * unconditional memcpy below to perform in-place "refresh" instead
300         * of actual copy.
301         */
302        m1 = 0 - (size_t)(((al - ri) >> (sizeof(al) * 8 - 1)) & 1); /* al<ri */
303        m2 = 0 - (size_t)(((ri - al) >> (sizeof(al) * 8 - 1)) & 1); /* al>ri */
304        m1 |= m2;               /* (al!=ri) */
305        m1 |= (0 - (size_t)v);  /* (al!=ri || v) */
306        m1 &= ~m2;              /* (al!=ri || v) && !al>ri */
307        nrp = (BN_ULONG *)(((size_t)rp & ~m1) | ((size_t)ap & m1));
308    }
309
310    /*
311     * 'i<ri' is chosen to eliminate dependency on input data, even though it
312     * results in redundant copy in al<ri case.
313     */
314    for (i = 0, ri -= 4; i < ri; i += 4) {
315        BN_ULONG t1, t2, t3, t4;
316
317        t1 = nrp[i + 0];
318        t2 = nrp[i + 1];
319        t3 = nrp[i + 2];
320        ap[i + 0] = 0;
321        t4 = nrp[i + 3];
322        ap[i + 1] = 0;
323        rp[i + 0] = t1;
324        ap[i + 2] = 0;
325        rp[i + 1] = t2;
326        ap[i + 3] = 0;
327        rp[i + 2] = t3;
328        rp[i + 3] = t4;
329    }
330    for (ri += 4; i < ri; i++)
331        rp[i] = nrp[i], ap[i] = 0;
332    bn_correct_top(r);
333    bn_correct_top(ret);
334    bn_check_top(ret);
335
336    return (1);
337}
338
339int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
340                       BN_CTX *ctx)
341{
342    int retn = 0;
343    BIGNUM *t;
344
345    BN_CTX_start(ctx);
346    if ((t = BN_CTX_get(ctx)) && BN_copy(t, a))
347        retn = BN_from_montgomery_word(ret, t, mont);
348    BN_CTX_end(ctx);
349    return retn;
350}
351
352#else                           /* !MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD */
353
354int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
355                       BN_CTX *ctx)
356{
357    int retn = 0;
358
359# ifdef MONT_WORD
360    BIGNUM *n, *r;
361    BN_ULONG *ap, *np, *rp, n0, v, *nrp;
362    int al, nl, max, i, x, ri;
363
364    BN_CTX_start(ctx);
365    if ((r = BN_CTX_get(ctx)) == NULL)
366        goto err;
367
368    if (!BN_copy(r, a))
369        goto err;
370    n = &(mont->N);
371
372    ap = a->d;
373    /*
374     * mont->ri is the size of mont->N in bits (rounded up to the word size)
375     */
376    al = ri = mont->ri / BN_BITS2;
377
378    nl = n->top;
379    if ((al == 0) || (nl == 0)) {
380        r->top = 0;
381        return (1);
382    }
383
384    max = (nl + al + 1);        /* allow for overflow (no?) XXX */
385    if (bn_wexpand(r, max) == NULL)
386        goto err;
387
388    r->neg = a->neg ^ n->neg;
389    np = n->d;
390    rp = r->d;
391    nrp = &(r->d[nl]);
392
393    /* clear the top words of T */
394#  if 1
395    for (i = r->top; i < max; i++) /* memset? XXX */
396        r->d[i] = 0;
397#  else
398    memset(&(r->d[r->top]), 0, (max - r->top) * sizeof(BN_ULONG));
399#  endif
400
401    r->top = max;
402    n0 = mont->n0;
403
404#  ifdef BN_COUNT
405    fprintf(stderr, "word BN_from_montgomery %d * %d\n", nl, nl);
406#  endif
407    for (i = 0; i < nl; i++) {
408#  ifdef __TANDEM
409        {
410            long long t1;
411            long long t2;
412            long long t3;
413            t1 = rp[0] * (n0 & 0177777);
414            t2 = 037777600000l;
415            t2 = n0 & t2;
416            t3 = rp[0] & 0177777;
417            t2 = (t3 * t2) & BN_MASK2;
418            t1 = t1 + t2;
419            v = bn_mul_add_words(rp, np, nl, (BN_ULONG)t1);
420        }
421#  else
422        v = bn_mul_add_words(rp, np, nl, (rp[0] * n0) & BN_MASK2);
423#  endif
424        nrp++;
425        rp++;
426        if (((nrp[-1] += v) & BN_MASK2) >= v)
427            continue;
428        else {
429            if (((++nrp[0]) & BN_MASK2) != 0)
430                continue;
431            if (((++nrp[1]) & BN_MASK2) != 0)
432                continue;
433            for (x = 2; (((++nrp[x]) & BN_MASK2) == 0); x++) ;
434        }
435    }
436    bn_correct_top(r);
437
438    /*
439     * mont->ri will be a multiple of the word size and below code is kind of
440     * BN_rshift(ret,r,mont->ri) equivalent
441     */
442    if (r->top <= ri) {
443        ret->top = 0;
444        retn = 1;
445        goto err;
446    }
447    al = r->top - ri;
448
449#  define BRANCH_FREE 1
450#  if BRANCH_FREE
451    if (bn_wexpand(ret, ri) == NULL)
452        goto err;
453    x = 0 - (((al - ri) >> (sizeof(al) * 8 - 1)) & 1);
454    ret->top = x = (ri & ~x) | (al & x); /* min(ri,al) */
455    ret->neg = r->neg;
456
457    rp = ret->d;
458    ap = &(r->d[ri]);
459
460    {
461        size_t m1, m2;
462
463        v = bn_sub_words(rp, ap, np, ri);
464        /*
465         * this ----------------^^ works even in al<ri case thanks to zealous
466         * zeroing of top of the vector in the beginning.
467         */
468
469        /* if (al==ri && !v) || al>ri) nrp=rp; else nrp=ap; */
470        /*
471         * in other words if subtraction result is real, then trick
472         * unconditional memcpy below to perform in-place "refresh" instead
473         * of actual copy.
474         */
475        m1 = 0 - (size_t)(((al - ri) >> (sizeof(al) * 8 - 1)) & 1); /* al<ri */
476        m2 = 0 - (size_t)(((ri - al) >> (sizeof(al) * 8 - 1)) & 1); /* al>ri */
477        m1 |= m2;               /* (al!=ri) */
478        m1 |= (0 - (size_t)v);  /* (al!=ri || v) */
479        m1 &= ~m2;              /* (al!=ri || v) && !al>ri */
480        nrp = (BN_ULONG *)(((size_t)rp & ~m1) | ((size_t)ap & m1));
481    }
482
483    /*
484     * 'i<ri' is chosen to eliminate dependency on input data, even though it
485     * results in redundant copy in al<ri case.
486     */
487    for (i = 0, ri -= 4; i < ri; i += 4) {
488        BN_ULONG t1, t2, t3, t4;
489
490        t1 = nrp[i + 0];
491        t2 = nrp[i + 1];
492        t3 = nrp[i + 2];
493        ap[i + 0] = 0;
494        t4 = nrp[i + 3];
495        ap[i + 1] = 0;
496        rp[i + 0] = t1;
497        ap[i + 2] = 0;
498        rp[i + 1] = t2;
499        ap[i + 3] = 0;
500        rp[i + 2] = t3;
501        rp[i + 3] = t4;
502    }
503    for (ri += 4; i < ri; i++)
504        rp[i] = nrp[i], ap[i] = 0;
505    bn_correct_top(r);
506    bn_correct_top(ret);
507#  else
508    if (bn_wexpand(ret, al) == NULL)
509        goto err;
510    ret->top = al;
511    ret->neg = r->neg;
512
513    rp = ret->d;
514    ap = &(r->d[ri]);
515    al -= 4;
516    for (i = 0; i < al; i += 4) {
517        BN_ULONG t1, t2, t3, t4;
518
519        t1 = ap[i + 0];
520        t2 = ap[i + 1];
521        t3 = ap[i + 2];
522        t4 = ap[i + 3];
523        rp[i + 0] = t1;
524        rp[i + 1] = t2;
525        rp[i + 2] = t3;
526        rp[i + 3] = t4;
527    }
528    al += 4;
529    for (; i < al; i++)
530        rp[i] = ap[i];
531#  endif
532# else                          /* !MONT_WORD */
533    BIGNUM *t1, *t2;
534
535    BN_CTX_start(ctx);
536    t1 = BN_CTX_get(ctx);
537    t2 = BN_CTX_get(ctx);
538    if (t1 == NULL || t2 == NULL)
539        goto err;
540
541    if (!BN_copy(t1, a))
542        goto err;
543    BN_mask_bits(t1, mont->ri);
544
545    if (!BN_mul(t2, t1, &mont->Ni, ctx))
546        goto err;
547    BN_mask_bits(t2, mont->ri);
548
549    if (!BN_mul(t1, t2, &mont->N, ctx))
550        goto err;
551    if (!BN_add(t2, a, t1))
552        goto err;
553    if (!BN_rshift(ret, t2, mont->ri))
554        goto err;
555# endif                         /* MONT_WORD */
556
557# if !defined(BRANCH_FREE) || BRANCH_FREE==0
558    if (BN_ucmp(ret, &(mont->N)) >= 0) {
559        if (!BN_usub(ret, ret, &(mont->N)))
560            goto err;
561    }
562# endif
563    retn = 1;
564    bn_check_top(ret);
565 err:
566    BN_CTX_end(ctx);
567    return (retn);
568}
569#endif                          /* MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD */
570
571BN_MONT_CTX *BN_MONT_CTX_new(void)
572{
573    BN_MONT_CTX *ret;
574
575    if ((ret = (BN_MONT_CTX *)OPENSSL_malloc(sizeof(BN_MONT_CTX))) == NULL)
576        return (NULL);
577
578    BN_MONT_CTX_init(ret);
579    ret->flags = BN_FLG_MALLOCED;
580    return (ret);
581}
582
583void BN_MONT_CTX_init(BN_MONT_CTX *ctx)
584{
585    ctx->ri = 0;
586    BN_init(&(ctx->RR));
587    BN_init(&(ctx->N));
588    BN_init(&(ctx->Ni));
589#if 0                           /* for OpenSSL 0.9.9 mont->n0 */
590    ctx->n0[0] = ctx->n0[1] = 0;
591#else
592    ctx->n0 = 0;
593#endif
594    ctx->flags = 0;
595}
596
597void BN_MONT_CTX_free(BN_MONT_CTX *mont)
598{
599    if (mont == NULL)
600        return;
601
602    BN_free(&(mont->RR));
603    BN_free(&(mont->N));
604    BN_free(&(mont->Ni));
605    if (mont->flags & BN_FLG_MALLOCED)
606        OPENSSL_free(mont);
607}
608
609int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
610{
611    int ret = 0;
612    BIGNUM *Ri, *R;
613
614    BN_CTX_start(ctx);
615    if ((Ri = BN_CTX_get(ctx)) == NULL)
616        goto err;
617    R = &(mont->RR);            /* grab RR as a temp */
618    if (!BN_copy(&(mont->N), mod))
619        goto err;               /* Set N */
620    mont->N.neg = 0;
621
622#ifdef MONT_WORD
623    {
624        BIGNUM tmod;
625        BN_ULONG buf[2];
626
627        mont->ri = (BN_num_bits(mod) + (BN_BITS2 - 1)) / BN_BITS2 * BN_BITS2;
628        BN_zero(R);
629# if 0                          /* for OpenSSL 0.9.9 mont->n0, would be "#if
630                                 * defined(OPENSSL_BN_ASM_MONT) &&
631                                 * (BN_BITS2<=32)", only certain BN_BITS2<=32
632                                 * platforms actually need this */
633        if (!(BN_set_bit(R, 2 * BN_BITS2)))
634            goto err;           /* R */
635# else
636        if (!(BN_set_bit(R, BN_BITS2)))
637            goto err;           /* R */
638# endif
639
640        buf[0] = mod->d[0];     /* tmod = N mod word size */
641        buf[1] = 0;
642
643        BN_init(&tmod);
644        tmod.d = buf;
645        tmod.top = buf[0] != 0 ? 1 : 0;
646        tmod.dmax = 2;
647        tmod.neg = 0;
648
649# if 0                          /* for OpenSSL 0.9.9 mont->n0, would be "#if
650                                 * defined(OPENSSL_BN_ASM_MONT) &&
651                                 * (BN_BITS2<=32)"; only certain BN_BITS2<=32
652                                 * platforms actually need this */
653        tmod.top = 0;
654        if ((buf[0] = mod->d[0]))
655            tmod.top = 1;
656        if ((buf[1] = mod->top > 1 ? mod->d[1] : 0))
657            tmod.top = 2;
658
659        if ((BN_mod_inverse(Ri, R, &tmod, ctx)) == NULL)
660            goto err;
661        if (!BN_lshift(Ri, Ri, 2 * BN_BITS2))
662            goto err;           /* R*Ri */
663        if (!BN_is_zero(Ri)) {
664            if (!BN_sub_word(Ri, 1))
665                goto err;
666        } else {                /* if N mod word size == 1 */
667
668            if (bn_expand(Ri, (int)sizeof(BN_ULONG) * 2) == NULL)
669                goto err;
670            /* Ri-- (mod double word size) */
671            Ri->neg = 0;
672            Ri->d[0] = BN_MASK2;
673            Ri->d[1] = BN_MASK2;
674            Ri->top = 2;
675        }
676        if (!BN_div(Ri, NULL, Ri, &tmod, ctx))
677            goto err;
678        /*
679         * Ni = (R*Ri-1)/N, keep only couple of least significant words:
680         */
681        mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0;
682        mont->n0[1] = (Ri->top > 1) ? Ri->d[1] : 0;
683# else
684        /* Ri = R^-1 mod N */
685        if ((BN_mod_inverse(Ri, R, &tmod, ctx)) == NULL)
686            goto err;
687        if (!BN_lshift(Ri, Ri, BN_BITS2))
688            goto err;           /* R*Ri */
689        if (!BN_is_zero(Ri)) {
690            if (!BN_sub_word(Ri, 1))
691                goto err;
692        } else {                /* if N mod word size == 1 */
693
694            if (!BN_set_word(Ri, BN_MASK2))
695                goto err;       /* Ri-- (mod word size) */
696        }
697        if (!BN_div(Ri, NULL, Ri, &tmod, ctx))
698            goto err;
699        /*
700         * Ni = (R*Ri-1)/N, keep only least significant word:
701         */
702#  if 0                         /* for OpenSSL 0.9.9 mont->n0 */
703        mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0;
704        mont->n0[1] = 0;
705#  else
706        mont->n0 = (Ri->top > 0) ? Ri->d[0] : 0;
707#  endif
708# endif
709    }
710#else                           /* !MONT_WORD */
711    {                           /* bignum version */
712        mont->ri = BN_num_bits(&mont->N);
713        BN_zero(R);
714        if (!BN_set_bit(R, mont->ri))
715            goto err;           /* R = 2^ri */
716        /* Ri = R^-1 mod N */
717        if ((BN_mod_inverse(Ri, R, &mont->N, ctx)) == NULL)
718            goto err;
719        if (!BN_lshift(Ri, Ri, mont->ri))
720            goto err;           /* R*Ri */
721        if (!BN_sub_word(Ri, 1))
722            goto err;
723        /*
724         * Ni = (R*Ri-1) / N
725         */
726        if (!BN_div(&(mont->Ni), NULL, Ri, &mont->N, ctx))
727            goto err;
728    }
729#endif
730
731    /* setup RR for conversions */
732    BN_zero(&(mont->RR));
733    if (!BN_set_bit(&(mont->RR), mont->ri * 2))
734        goto err;
735    if (!BN_mod(&(mont->RR), &(mont->RR), &(mont->N), ctx))
736        goto err;
737
738    ret = 1;
739 err:
740    BN_CTX_end(ctx);
741    return ret;
742}
743
744BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from)
745{
746    if (to == from)
747        return (to);
748
749    if (!BN_copy(&(to->RR), &(from->RR)))
750        return NULL;
751    if (!BN_copy(&(to->N), &(from->N)))
752        return NULL;
753    if (!BN_copy(&(to->Ni), &(from->Ni)))
754        return NULL;
755    to->ri = from->ri;
756#if 0                           /* for OpenSSL 0.9.9 mont->n0 */
757    to->n0[0] = from->n0[0];
758    to->n0[1] = from->n0[1];
759#else
760    to->n0 = from->n0;
761#endif
762    return (to);
763}
764
765BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, int lock,
766                                    const BIGNUM *mod, BN_CTX *ctx)
767{
768    BN_MONT_CTX *ret;
769
770    CRYPTO_r_lock(lock);
771    ret = *pmont;
772    CRYPTO_r_unlock(lock);
773    if (ret)
774        return ret;
775
776    /*
777     * We don't want to serialise globally while doing our lazy-init math in
778     * BN_MONT_CTX_set. That punishes threads that are doing independent
779     * things. Instead, punish the case where more than one thread tries to
780     * lazy-init the same 'pmont', by having each do the lazy-init math work
781     * independently and only use the one from the thread that wins the race
782     * (the losers throw away the work they've done).
783     */
784    ret = BN_MONT_CTX_new();
785    if (!ret)
786        return NULL;
787    if (!BN_MONT_CTX_set(ret, mod, ctx)) {
788        BN_MONT_CTX_free(ret);
789        return NULL;
790    }
791
792    /* The locked compare-and-set, after the local work is done. */
793    CRYPTO_w_lock(lock);
794    if (*pmont) {
795        BN_MONT_CTX_free(ret);
796        ret = *pmont;
797    } else
798        *pmont = ret;
799    CRYPTO_w_unlock(lock);
800    return ret;
801}
802