bn_exp.c revision 296611
1/* crypto/bn/bn_exp.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to.  The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 *    notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 *    notice, this list of conditions and the following disclaimer in the
30 *    documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 *    must display the following acknowledgement:
33 *    "This product includes cryptographic software written by
34 *     Eric Young (eay@cryptsoft.com)"
35 *    The word 'cryptographic' can be left out if the rouines from the library
36 *    being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 *    the apps directory (application code) you must include an acknowledgement:
39 *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed.  i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2005 The OpenSSL Project.  All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 *    notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 *    notice, this list of conditions and the following disclaimer in
70 *    the documentation and/or other materials provided with the
71 *    distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 *    software must display the following acknowledgment:
75 *    "This product includes software developed by the OpenSSL Project
76 *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 *    endorse or promote products derived from this software without
80 *    prior written permission. For written permission, please contact
81 *    openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 *    nor may "OpenSSL" appear in their names without prior written
85 *    permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 *    acknowledgment:
89 *    "This product includes software developed by the OpenSSL Project
90 *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com).  This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112#include "cryptlib.h"
113#include "constant_time_locl.h"
114#include "bn_lcl.h"
115
116/* maximum precomputation table size for *variable* sliding windows */
117#define TABLE_SIZE      32
118
119/* this one works - simple but works */
120int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
121{
122    int i, bits, ret = 0;
123    BIGNUM *v, *rr;
124
125    if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
126        /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
127        BNerr(BN_F_BN_EXP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
128        return -1;
129    }
130
131    BN_CTX_start(ctx);
132    if ((r == a) || (r == p))
133        rr = BN_CTX_get(ctx);
134    else
135        rr = r;
136    v = BN_CTX_get(ctx);
137    if (rr == NULL || v == NULL)
138        goto err;
139
140    if (BN_copy(v, a) == NULL)
141        goto err;
142    bits = BN_num_bits(p);
143
144    if (BN_is_odd(p)) {
145        if (BN_copy(rr, a) == NULL)
146            goto err;
147    } else {
148        if (!BN_one(rr))
149            goto err;
150    }
151
152    for (i = 1; i < bits; i++) {
153        if (!BN_sqr(v, v, ctx))
154            goto err;
155        if (BN_is_bit_set(p, i)) {
156            if (!BN_mul(rr, rr, v, ctx))
157                goto err;
158        }
159    }
160    ret = 1;
161 err:
162    if (r != rr)
163        BN_copy(r, rr);
164    BN_CTX_end(ctx);
165    bn_check_top(r);
166    return (ret);
167}
168
169int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
170               BN_CTX *ctx)
171{
172    int ret;
173
174    bn_check_top(a);
175    bn_check_top(p);
176    bn_check_top(m);
177
178    /*-
179     * For even modulus  m = 2^k*m_odd,  it might make sense to compute
180     * a^p mod m_odd  and  a^p mod 2^k  separately (with Montgomery
181     * exponentiation for the odd part), using appropriate exponent
182     * reductions, and combine the results using the CRT.
183     *
184     * For now, we use Montgomery only if the modulus is odd; otherwise,
185     * exponentiation using the reciprocal-based quick remaindering
186     * algorithm is used.
187     *
188     * (Timing obtained with expspeed.c [computations  a^p mod m
189     * where  a, p, m  are of the same length: 256, 512, 1024, 2048,
190     * 4096, 8192 bits], compared to the running time of the
191     * standard algorithm:
192     *
193     *   BN_mod_exp_mont   33 .. 40 %  [AMD K6-2, Linux, debug configuration]
194     *                     55 .. 77 %  [UltraSparc processor, but
195     *                                  debug-solaris-sparcv8-gcc conf.]
196     *
197     *   BN_mod_exp_recp   50 .. 70 %  [AMD K6-2, Linux, debug configuration]
198     *                     62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
199     *
200     * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
201     * at 2048 and more bits, but at 512 and 1024 bits, it was
202     * slower even than the standard algorithm!
203     *
204     * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
205     * should be obtained when the new Montgomery reduction code
206     * has been integrated into OpenSSL.)
207     */
208
209#define MONT_MUL_MOD
210#define MONT_EXP_WORD
211#define RECP_MUL_MOD
212
213#ifdef MONT_MUL_MOD
214    /*
215     * I have finally been able to take out this pre-condition of the top bit
216     * being set.  It was caused by an error in BN_div with negatives.  There
217     * was also another problem when for a^b%m a >= m.  eay 07-May-97
218     */
219    /* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */
220
221    if (BN_is_odd(m)) {
222# ifdef MONT_EXP_WORD
223        if (a->top == 1 && !a->neg
224            && (BN_get_flags(p, BN_FLG_CONSTTIME) == 0)) {
225            BN_ULONG A = a->d[0];
226            ret = BN_mod_exp_mont_word(r, A, p, m, ctx, NULL);
227        } else
228# endif
229            ret = BN_mod_exp_mont(r, a, p, m, ctx, NULL);
230    } else
231#endif
232#ifdef RECP_MUL_MOD
233    {
234        ret = BN_mod_exp_recp(r, a, p, m, ctx);
235    }
236#else
237    {
238        ret = BN_mod_exp_simple(r, a, p, m, ctx);
239    }
240#endif
241
242    bn_check_top(r);
243    return (ret);
244}
245
246int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
247                    const BIGNUM *m, BN_CTX *ctx)
248{
249    int i, j, bits, ret = 0, wstart, wend, window, wvalue;
250    int start = 1;
251    BIGNUM *aa;
252    /* Table of variables obtained from 'ctx' */
253    BIGNUM *val[TABLE_SIZE];
254    BN_RECP_CTX recp;
255
256    if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
257        /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
258        BNerr(BN_F_BN_MOD_EXP_RECP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
259        return -1;
260    }
261
262    bits = BN_num_bits(p);
263
264    if (bits == 0) {
265        ret = BN_one(r);
266        return ret;
267    }
268
269    BN_CTX_start(ctx);
270    aa = BN_CTX_get(ctx);
271    val[0] = BN_CTX_get(ctx);
272    if (!aa || !val[0])
273        goto err;
274
275    BN_RECP_CTX_init(&recp);
276    if (m->neg) {
277        /* ignore sign of 'm' */
278        if (!BN_copy(aa, m))
279            goto err;
280        aa->neg = 0;
281        if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0)
282            goto err;
283    } else {
284        if (BN_RECP_CTX_set(&recp, m, ctx) <= 0)
285            goto err;
286    }
287
288    if (!BN_nnmod(val[0], a, m, ctx))
289        goto err;               /* 1 */
290    if (BN_is_zero(val[0])) {
291        BN_zero(r);
292        ret = 1;
293        goto err;
294    }
295
296    window = BN_window_bits_for_exponent_size(bits);
297    if (window > 1) {
298        if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx))
299            goto err;           /* 2 */
300        j = 1 << (window - 1);
301        for (i = 1; i < j; i++) {
302            if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
303                !BN_mod_mul_reciprocal(val[i], val[i - 1], aa, &recp, ctx))
304                goto err;
305        }
306    }
307
308    start = 1;                  /* This is used to avoid multiplication etc
309                                 * when there is only the value '1' in the
310                                 * buffer. */
311    wvalue = 0;                 /* The 'value' of the window */
312    wstart = bits - 1;          /* The top bit of the window */
313    wend = 0;                   /* The bottom bit of the window */
314
315    if (!BN_one(r))
316        goto err;
317
318    for (;;) {
319        if (BN_is_bit_set(p, wstart) == 0) {
320            if (!start)
321                if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx))
322                    goto err;
323            if (wstart == 0)
324                break;
325            wstart--;
326            continue;
327        }
328        /*
329         * We now have wstart on a 'set' bit, we now need to work out how bit
330         * a window to do.  To do this we need to scan forward until the last
331         * set bit before the end of the window
332         */
333        j = wstart;
334        wvalue = 1;
335        wend = 0;
336        for (i = 1; i < window; i++) {
337            if (wstart - i < 0)
338                break;
339            if (BN_is_bit_set(p, wstart - i)) {
340                wvalue <<= (i - wend);
341                wvalue |= 1;
342                wend = i;
343            }
344        }
345
346        /* wend is the size of the current window */
347        j = wend + 1;
348        /* add the 'bytes above' */
349        if (!start)
350            for (i = 0; i < j; i++) {
351                if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx))
352                    goto err;
353            }
354
355        /* wvalue will be an odd number < 2^window */
356        if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], &recp, ctx))
357            goto err;
358
359        /* move the 'window' down further */
360        wstart -= wend + 1;
361        wvalue = 0;
362        start = 0;
363        if (wstart < 0)
364            break;
365    }
366    ret = 1;
367 err:
368    BN_CTX_end(ctx);
369    BN_RECP_CTX_free(&recp);
370    bn_check_top(r);
371    return (ret);
372}
373
374int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
375                    const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
376{
377    int i, j, bits, ret = 0, wstart, wend, window, wvalue;
378    int start = 1;
379    BIGNUM *d, *r;
380    const BIGNUM *aa;
381    /* Table of variables obtained from 'ctx' */
382    BIGNUM *val[TABLE_SIZE];
383    BN_MONT_CTX *mont = NULL;
384
385    if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
386        return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
387    }
388
389    bn_check_top(a);
390    bn_check_top(p);
391    bn_check_top(m);
392
393    if (!BN_is_odd(m)) {
394        BNerr(BN_F_BN_MOD_EXP_MONT, BN_R_CALLED_WITH_EVEN_MODULUS);
395        return (0);
396    }
397    bits = BN_num_bits(p);
398    if (bits == 0) {
399        ret = BN_one(rr);
400        return ret;
401    }
402
403    BN_CTX_start(ctx);
404    d = BN_CTX_get(ctx);
405    r = BN_CTX_get(ctx);
406    val[0] = BN_CTX_get(ctx);
407    if (!d || !r || !val[0])
408        goto err;
409
410    /*
411     * If this is not done, things will break in the montgomery part
412     */
413
414    if (in_mont != NULL)
415        mont = in_mont;
416    else {
417        if ((mont = BN_MONT_CTX_new()) == NULL)
418            goto err;
419        if (!BN_MONT_CTX_set(mont, m, ctx))
420            goto err;
421    }
422
423    if (a->neg || BN_ucmp(a, m) >= 0) {
424        if (!BN_nnmod(val[0], a, m, ctx))
425            goto err;
426        aa = val[0];
427    } else
428        aa = a;
429    if (BN_is_zero(aa)) {
430        BN_zero(rr);
431        ret = 1;
432        goto err;
433    }
434    if (!BN_to_montgomery(val[0], aa, mont, ctx))
435        goto err;               /* 1 */
436
437    window = BN_window_bits_for_exponent_size(bits);
438    if (window > 1) {
439        if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx))
440            goto err;           /* 2 */
441        j = 1 << (window - 1);
442        for (i = 1; i < j; i++) {
443            if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
444                !BN_mod_mul_montgomery(val[i], val[i - 1], d, mont, ctx))
445                goto err;
446        }
447    }
448
449    start = 1;                  /* This is used to avoid multiplication etc
450                                 * when there is only the value '1' in the
451                                 * buffer. */
452    wvalue = 0;                 /* The 'value' of the window */
453    wstart = bits - 1;          /* The top bit of the window */
454    wend = 0;                   /* The bottom bit of the window */
455
456    if (!BN_to_montgomery(r, BN_value_one(), mont, ctx))
457        goto err;
458    for (;;) {
459        if (BN_is_bit_set(p, wstart) == 0) {
460            if (!start) {
461                if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
462                    goto err;
463            }
464            if (wstart == 0)
465                break;
466            wstart--;
467            continue;
468        }
469        /*
470         * We now have wstart on a 'set' bit, we now need to work out how bit
471         * a window to do.  To do this we need to scan forward until the last
472         * set bit before the end of the window
473         */
474        j = wstart;
475        wvalue = 1;
476        wend = 0;
477        for (i = 1; i < window; i++) {
478            if (wstart - i < 0)
479                break;
480            if (BN_is_bit_set(p, wstart - i)) {
481                wvalue <<= (i - wend);
482                wvalue |= 1;
483                wend = i;
484            }
485        }
486
487        /* wend is the size of the current window */
488        j = wend + 1;
489        /* add the 'bytes above' */
490        if (!start)
491            for (i = 0; i < j; i++) {
492                if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
493                    goto err;
494            }
495
496        /* wvalue will be an odd number < 2^window */
497        if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx))
498            goto err;
499
500        /* move the 'window' down further */
501        wstart -= wend + 1;
502        wvalue = 0;
503        start = 0;
504        if (wstart < 0)
505            break;
506    }
507    if (!BN_from_montgomery(rr, r, mont, ctx))
508        goto err;
509    ret = 1;
510 err:
511    if ((in_mont == NULL) && (mont != NULL))
512        BN_MONT_CTX_free(mont);
513    BN_CTX_end(ctx);
514    bn_check_top(rr);
515    return (ret);
516}
517
518/*
519 * BN_mod_exp_mont_consttime() stores the precomputed powers in a specific
520 * layout so that accessing any of these table values shows the same access
521 * pattern as far as cache lines are concerned.  The following functions are
522 * used to transfer a BIGNUM from/to that table.
523 */
524
525static int MOD_EXP_CTIME_COPY_TO_PREBUF(BIGNUM *b, int top,
526                                        unsigned char *buf, int idx,
527                                        int window)
528{
529    int i, j;
530    int width = 1 << window;
531    BN_ULONG *table = (BN_ULONG *)buf;
532
533    if (bn_wexpand(b, top) == NULL)
534        return 0;
535    while (b->top < top) {
536        b->d[b->top++] = 0;
537    }
538
539    for (i = 0, j = idx; i < top; i++, j += width) {
540        table[j] = b->d[i];
541    }
542
543    bn_correct_top(b);
544    return 1;
545}
546
547static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top,
548                                          unsigned char *buf, int idx,
549                                          int window)
550{
551    int i, j;
552    int width = 1 << window;
553    volatile BN_ULONG *table = (volatile BN_ULONG *)buf;
554
555    if (bn_wexpand(b, top) == NULL)
556        return 0;
557
558    if (window <= 3) {
559        for (i = 0; i < top; i++, table += width) {
560            BN_ULONG acc = 0;
561
562            for (j = 0; j < width; j++) {
563                acc |= table[j] &
564                       ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1));
565            }
566
567            b->d[i] = acc;
568        }
569    } else {
570        int xstride = 1 << (window - 2);
571        BN_ULONG y0, y1, y2, y3;
572
573        i = idx >> (window - 2);        /* equivalent of idx / xstride */
574        idx &= xstride - 1;             /* equivalent of idx % xstride */
575
576        y0 = (BN_ULONG)0 - (constant_time_eq_int(i,0)&1);
577        y1 = (BN_ULONG)0 - (constant_time_eq_int(i,1)&1);
578        y2 = (BN_ULONG)0 - (constant_time_eq_int(i,2)&1);
579        y3 = (BN_ULONG)0 - (constant_time_eq_int(i,3)&1);
580
581        for (i = 0; i < top; i++, table += width) {
582            BN_ULONG acc = 0;
583
584            for (j = 0; j < xstride; j++) {
585                acc |= ( (table[j + 0 * xstride] & y0) |
586                         (table[j + 1 * xstride] & y1) |
587                         (table[j + 2 * xstride] & y2) |
588                         (table[j + 3 * xstride] & y3) )
589                       & ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1));
590            }
591
592            b->d[i] = acc;
593        }
594    }
595
596    b->top = top;
597    bn_correct_top(b);
598    return 1;
599}
600
601/*
602 * Given a pointer value, compute the next address that is a cache line
603 * multiple.
604 */
605#define MOD_EXP_CTIME_ALIGN(x_) \
606        ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((BN_ULONG)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
607
608/*
609 * This variant of BN_mod_exp_mont() uses fixed windows and the special
610 * precomputation memory layout to limit data-dependency to a minimum to
611 * protect secret exponents (cf. the hyper-threading timing attacks pointed
612 * out by Colin Percival,
613 * http://www.daemong-consideredperthreading-considered-harmful/)
614 */
615int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
616                              const BIGNUM *m, BN_CTX *ctx,
617                              BN_MONT_CTX *in_mont)
618{
619    int i, bits, ret = 0, idx, window, wvalue;
620    int top;
621    BIGNUM *r;
622    const BIGNUM *aa;
623    BN_MONT_CTX *mont = NULL;
624
625    int numPowers;
626    unsigned char *powerbufFree = NULL;
627    int powerbufLen = 0;
628    unsigned char *powerbuf = NULL;
629    BIGNUM *computeTemp = NULL, *am = NULL;
630
631    bn_check_top(a);
632    bn_check_top(p);
633    bn_check_top(m);
634
635    top = m->top;
636
637    if (!(m->d[0] & 1)) {
638        BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME, BN_R_CALLED_WITH_EVEN_MODULUS);
639        return (0);
640    }
641    bits = BN_num_bits(p);
642    if (bits == 0) {
643        ret = BN_one(rr);
644        return ret;
645    }
646
647    /* Initialize BIGNUM context and allocate intermediate result */
648    BN_CTX_start(ctx);
649    r = BN_CTX_get(ctx);
650    if (r == NULL)
651        goto err;
652
653    /*
654     * Allocate a montgomery context if it was not supplied by the caller. If
655     * this is not done, things will break in the montgomery part.
656     */
657    if (in_mont != NULL)
658        mont = in_mont;
659    else {
660        if ((mont = BN_MONT_CTX_new()) == NULL)
661            goto err;
662        if (!BN_MONT_CTX_set(mont, m, ctx))
663            goto err;
664    }
665
666    /* Get the window size to use with size of p. */
667    window = BN_window_bits_for_ctime_exponent_size(bits);
668
669    /*
670     * Allocate a buffer large enough to hold all of the pre-computed powers
671     * of a.
672     */
673    numPowers = 1 << window;
674    powerbufLen = sizeof(m->d[0]) * top * numPowers;
675    if ((powerbufFree =
676         (unsigned char *)OPENSSL_malloc(powerbufLen +
677                                         MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH))
678        == NULL)
679        goto err;
680
681    powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
682    memset(powerbuf, 0, powerbufLen);
683
684    /*
685     * Initialize the intermediate result. Do this early to save double
686     * conversion, once each for a^0 and intermediate result.
687     */
688    if (!BN_to_montgomery(r, BN_value_one(), mont, ctx))
689        goto err;
690    if (!MOD_EXP_CTIME_COPY_TO_PREBUF(r, top, powerbuf, 0, window))
691        goto err;
692
693    /* Initialize computeTemp as a^1 with montgomery precalcs */
694    computeTemp = BN_CTX_get(ctx);
695    am = BN_CTX_get(ctx);
696    if (computeTemp == NULL || am == NULL)
697        goto err;
698
699    if (a->neg || BN_ucmp(a, m) >= 0) {
700        if (!BN_mod(am, a, m, ctx))
701            goto err;
702        aa = am;
703    } else
704        aa = a;
705    if (!BN_to_montgomery(am, aa, mont, ctx))
706        goto err;
707    if (!BN_copy(computeTemp, am))
708        goto err;
709    if (!MOD_EXP_CTIME_COPY_TO_PREBUF(am, top, powerbuf, 1, window))
710        goto err;
711
712    /*
713     * If the window size is greater than 1, then calculate
714     * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) (even powers
715     * could instead be computed as (a^(i/2))^2 to use the slight performance
716     * advantage of sqr over mul).
717     */
718    if (window > 1) {
719        for (i = 2; i < numPowers; i++) {
720            /* Calculate a^i = a^(i-1) * a */
721            if (!BN_mod_mul_montgomery
722                (computeTemp, am, computeTemp, mont, ctx))
723                goto err;
724            if (!MOD_EXP_CTIME_COPY_TO_PREBUF(computeTemp, top, powerbuf, i,
725                                              window))
726                goto err;
727        }
728    }
729
730    /*
731     * Adjust the number of bits up to a multiple of the window size. If the
732     * exponent length is not a multiple of the window size, then this pads
733     * the most significant bits with zeros to normalize the scanning loop to
734     * there's no special cases. * NOTE: Making the window size a power of
735     * two less than the native * word size ensures that the padded bits
736     * won't go past the last * word in the internal BIGNUM structure. Going
737     * past the end will * still produce the correct result, but causes a
738     * different branch * to be taken in the BN_is_bit_set function.
739     */
740    bits = ((bits + window - 1) / window) * window;
741    idx = bits - 1;             /* The top bit of the window */
742
743    /*
744     * Scan the exponent one window at a time starting from the most
745     * significant bits.
746     */
747    while (idx >= 0) {
748        wvalue = 0;             /* The 'value' of the window */
749
750        /* Scan the window, squaring the result as we go */
751        for (i = 0; i < window; i++, idx--) {
752            if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
753                goto err;
754            wvalue = (wvalue << 1) + BN_is_bit_set(p, idx);
755        }
756
757        /*
758         * Fetch the appropriate pre-computed value from the pre-buf
759         */
760        if (!MOD_EXP_CTIME_COPY_FROM_PREBUF
761            (computeTemp, top, powerbuf, wvalue, window))
762            goto err;
763
764        /* Multiply the result into the intermediate result */
765        if (!BN_mod_mul_montgomery(r, r, computeTemp, mont, ctx))
766            goto err;
767    }
768
769    /* Convert the final result from montgomery to standard format */
770    if (!BN_from_montgomery(rr, r, mont, ctx))
771        goto err;
772    ret = 1;
773 err:
774    if ((in_mont == NULL) && (mont != NULL))
775        BN_MONT_CTX_free(mont);
776    if (powerbuf != NULL) {
777        OPENSSL_cleanse(powerbuf, powerbufLen);
778        OPENSSL_free(powerbufFree);
779    }
780    if (am != NULL)
781        BN_clear(am);
782    if (computeTemp != NULL)
783        BN_clear(computeTemp);
784    BN_CTX_end(ctx);
785    return (ret);
786}
787
788int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
789                         const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
790{
791    BN_MONT_CTX *mont = NULL;
792    int b, bits, ret = 0;
793    int r_is_one;
794    BN_ULONG w, next_w;
795    BIGNUM *d, *r, *t;
796    BIGNUM *swap_tmp;
797#define BN_MOD_MUL_WORD(r, w, m) \
798                (BN_mul_word(r, (w)) && \
799                (/* BN_ucmp(r, (m)) < 0 ? 1 :*/  \
800                        (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
801    /*
802     * BN_MOD_MUL_WORD is only used with 'w' large, so the BN_ucmp test is
803     * probably more overhead than always using BN_mod (which uses BN_copy if
804     * a similar test returns true).
805     */
806    /*
807     * We can use BN_mod and do not need BN_nnmod because our accumulator is
808     * never negative (the result of BN_mod does not depend on the sign of
809     * the modulus).
810     */
811#define BN_TO_MONTGOMERY_WORD(r, w, mont) \
812                (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
813
814    if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
815        /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
816        BNerr(BN_F_BN_MOD_EXP_MONT_WORD, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
817        return -1;
818    }
819
820    bn_check_top(p);
821    bn_check_top(m);
822
823    if (!BN_is_odd(m)) {
824        BNerr(BN_F_BN_MOD_EXP_MONT_WORD, BN_R_CALLED_WITH_EVEN_MODULUS);
825        return (0);
826    }
827    if (m->top == 1)
828        a %= m->d[0];           /* make sure that 'a' is reduced */
829
830    bits = BN_num_bits(p);
831    if (bits == 0) {
832        /* x**0 mod 1 is still zero. */
833        if (BN_is_one(m)) {
834            ret = 1;
835            BN_zero(rr);
836        } else
837            ret = BN_one(rr);
838        return ret;
839    }
840    if (a == 0) {
841        BN_zero(rr);
842        ret = 1;
843        return ret;
844    }
845
846    BN_CTX_start(ctx);
847    d = BN_CTX_get(ctx);
848    r = BN_CTX_get(ctx);
849    t = BN_CTX_get(ctx);
850    if (d == NULL || r == NULL || t == NULL)
851        goto err;
852
853    if (in_mont != NULL)
854        mont = in_mont;
855    else {
856        if ((mont = BN_MONT_CTX_new()) == NULL)
857            goto err;
858        if (!BN_MONT_CTX_set(mont, m, ctx))
859            goto err;
860    }
861
862    r_is_one = 1;               /* except for Montgomery factor */
863
864    /* bits-1 >= 0 */
865
866    /* The result is accumulated in the product r*w. */
867    w = a;                      /* bit 'bits-1' of 'p' is always set */
868    for (b = bits - 2; b >= 0; b--) {
869        /* First, square r*w. */
870        next_w = w * w;
871        if ((next_w / w) != w) { /* overflow */
872            if (r_is_one) {
873                if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
874                    goto err;
875                r_is_one = 0;
876            } else {
877                if (!BN_MOD_MUL_WORD(r, w, m))
878                    goto err;
879            }
880            next_w = 1;
881        }
882        w = next_w;
883        if (!r_is_one) {
884            if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
885                goto err;
886        }
887
888        /* Second, multiply r*w by 'a' if exponent bit is set. */
889        if (BN_is_bit_set(p, b)) {
890            next_w = w * a;
891            if ((next_w / a) != w) { /* overflow */
892                if (r_is_one) {
893                    if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
894                        goto err;
895                    r_is_one = 0;
896                } else {
897                    if (!BN_MOD_MUL_WORD(r, w, m))
898                        goto err;
899                }
900                next_w = a;
901            }
902            w = next_w;
903        }
904    }
905
906    /* Finally, set r:=r*w. */
907    if (w != 1) {
908        if (r_is_one) {
909            if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
910                goto err;
911            r_is_one = 0;
912        } else {
913            if (!BN_MOD_MUL_WORD(r, w, m))
914                goto err;
915        }
916    }
917
918    if (r_is_one) {             /* can happen only if a == 1 */
919        if (!BN_one(rr))
920            goto err;
921    } else {
922        if (!BN_from_montgomery(rr, r, mont, ctx))
923            goto err;
924    }
925    ret = 1;
926 err:
927    if ((in_mont == NULL) && (mont != NULL))
928        BN_MONT_CTX_free(mont);
929    BN_CTX_end(ctx);
930    bn_check_top(rr);
931    return (ret);
932}
933
934/* The old fallback, simple version :-) */
935int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
936                      const BIGNUM *m, BN_CTX *ctx)
937{
938    int i, j, bits, ret = 0, wstart, wend, window, wvalue;
939    int start = 1;
940    BIGNUM *d;
941    /* Table of variables obtained from 'ctx' */
942    BIGNUM *val[TABLE_SIZE];
943
944    if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
945        /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
946        BNerr(BN_F_BN_MOD_EXP_SIMPLE, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
947        return -1;
948    }
949
950    bits = BN_num_bits(p);
951
952    if (bits == 0) {
953        ret = BN_one(r);
954        return ret;
955    }
956
957    BN_CTX_start(ctx);
958    d = BN_CTX_get(ctx);
959    val[0] = BN_CTX_get(ctx);
960    if (!d || !val[0])
961        goto err;
962
963    if (!BN_nnmod(val[0], a, m, ctx))
964        goto err;               /* 1 */
965    if (BN_is_zero(val[0])) {
966        BN_zero(r);
967        ret = 1;
968        goto err;
969    }
970
971    window = BN_window_bits_for_exponent_size(bits);
972    if (window > 1) {
973        if (!BN_mod_mul(d, val[0], val[0], m, ctx))
974            goto err;           /* 2 */
975        j = 1 << (window - 1);
976        for (i = 1; i < j; i++) {
977            if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
978                !BN_mod_mul(val[i], val[i - 1], d, m, ctx))
979                goto err;
980        }
981    }
982
983    start = 1;                  /* This is used to avoid multiplication etc
984                                 * when there is only the value '1' in the
985                                 * buffer. */
986    wvalue = 0;                 /* The 'value' of the window */
987    wstart = bits - 1;          /* The top bit of the window */
988    wend = 0;                   /* The bottom bit of the window */
989
990    if (!BN_one(r))
991        goto err;
992
993    for (;;) {
994        if (BN_is_bit_set(p, wstart) == 0) {
995            if (!start)
996                if (!BN_mod_mul(r, r, r, m, ctx))
997                    goto err;
998            if (wstart == 0)
999                break;
1000            wstart--;
1001            continue;
1002        }
1003        /*
1004         * We now have wstart on a 'set' bit, we now need to work out how bit
1005         * a window to do.  To do this we need to scan forward until the last
1006         * set bit before the end of the window
1007         */
1008        j = wstart;
1009        wvalue = 1;
1010        wend = 0;
1011        for (i = 1; i < window; i++) {
1012            if (wstart - i < 0)
1013                break;
1014            if (BN_is_bit_set(p, wstart - i)) {
1015                wvalue <<= (i - wend);
1016                wvalue |= 1;
1017                wend = i;
1018            }
1019        }
1020
1021        /* wend is the size of the current window */
1022        j = wend + 1;
1023        /* add the 'bytes above' */
1024        if (!start)
1025            for (i = 0; i < j; i++) {
1026                if (!BN_mod_mul(r, r, r, m, ctx))
1027                    goto err;
1028            }
1029
1030        /* wvalue will be an odd number < 2^window */
1031        if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx))
1032            goto err;
1033
1034        /* move the 'window' down further */
1035        wstart -= wend + 1;
1036        wvalue = 0;
1037        start = 0;
1038        if (wstart < 0)
1039            break;
1040    }
1041    ret = 1;
1042 err:
1043    BN_CTX_end(ctx);
1044    bn_check_top(r);
1045    return (ret);
1046}
1047