bn_mul.c revision 296465
1/* crypto/bn/bn_mul.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to.  The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 *    notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 *    notice, this list of conditions and the following disclaimer in the
30 *    documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 *    must display the following acknowledgement:
33 *    "This product includes cryptographic software written by
34 *     Eric Young (eay@cryptsoft.com)"
35 *    The word 'cryptographic' can be left out if the rouines from the library
36 *    being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 *    the apps directory (application code) you must include an acknowledgement:
39 *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed.  i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#ifndef BN_DEBUG
60# undef NDEBUG                  /* avoid conflicting definitions */
61# define NDEBUG
62#endif
63
64#include <stdio.h>
65#include <assert.h>
66#include "cryptlib.h"
67#include "bn_lcl.h"
68
69#if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS)
70/*
71 * Here follows specialised variants of bn_add_words() and bn_sub_words().
72 * They have the property performing operations on arrays of different sizes.
73 * The sizes of those arrays is expressed through cl, which is the common
74 * length ( basicall, min(len(a),len(b)) ), and dl, which is the delta
75 * between the two lengths, calculated as len(a)-len(b). All lengths are the
76 * number of BN_ULONGs...  For the operations that require a result array as
77 * parameter, it must have the length cl+abs(dl). These functions should
78 * probably end up in bn_asm.c as soon as there are assembler counterparts
79 * for the systems that use assembler files.
80 */
81
82BN_ULONG bn_sub_part_words(BN_ULONG *r,
83                           const BN_ULONG *a, const BN_ULONG *b,
84                           int cl, int dl)
85{
86    BN_ULONG c, t;
87
88    assert(cl >= 0);
89    c = bn_sub_words(r, a, b, cl);
90
91    if (dl == 0)
92        return c;
93
94    r += cl;
95    a += cl;
96    b += cl;
97
98    if (dl < 0) {
99# ifdef BN_COUNT
100        fprintf(stderr, "  bn_sub_part_words %d + %d (dl < 0, c = %d)\n", cl,
101                dl, c);
102# endif
103        for (;;) {
104            t = b[0];
105            r[0] = (0 - t - c) & BN_MASK2;
106            if (t != 0)
107                c = 1;
108            if (++dl >= 0)
109                break;
110
111            t = b[1];
112            r[1] = (0 - t - c) & BN_MASK2;
113            if (t != 0)
114                c = 1;
115            if (++dl >= 0)
116                break;
117
118            t = b[2];
119            r[2] = (0 - t - c) & BN_MASK2;
120            if (t != 0)
121                c = 1;
122            if (++dl >= 0)
123                break;
124
125            t = b[3];
126            r[3] = (0 - t - c) & BN_MASK2;
127            if (t != 0)
128                c = 1;
129            if (++dl >= 0)
130                break;
131
132            b += 4;
133            r += 4;
134        }
135    } else {
136        int save_dl = dl;
137# ifdef BN_COUNT
138        fprintf(stderr, "  bn_sub_part_words %d + %d (dl > 0, c = %d)\n", cl,
139                dl, c);
140# endif
141        while (c) {
142            t = a[0];
143            r[0] = (t - c) & BN_MASK2;
144            if (t != 0)
145                c = 0;
146            if (--dl <= 0)
147                break;
148
149            t = a[1];
150            r[1] = (t - c) & BN_MASK2;
151            if (t != 0)
152                c = 0;
153            if (--dl <= 0)
154                break;
155
156            t = a[2];
157            r[2] = (t - c) & BN_MASK2;
158            if (t != 0)
159                c = 0;
160            if (--dl <= 0)
161                break;
162
163            t = a[3];
164            r[3] = (t - c) & BN_MASK2;
165            if (t != 0)
166                c = 0;
167            if (--dl <= 0)
168                break;
169
170            save_dl = dl;
171            a += 4;
172            r += 4;
173        }
174        if (dl > 0) {
175# ifdef BN_COUNT
176            fprintf(stderr, "  bn_sub_part_words %d + %d (dl > 0, c == 0)\n",
177                    cl, dl);
178# endif
179            if (save_dl > dl) {
180                switch (save_dl - dl) {
181                case 1:
182                    r[1] = a[1];
183                    if (--dl <= 0)
184                        break;
185                case 2:
186                    r[2] = a[2];
187                    if (--dl <= 0)
188                        break;
189                case 3:
190                    r[3] = a[3];
191                    if (--dl <= 0)
192                        break;
193                }
194                a += 4;
195                r += 4;
196            }
197        }
198        if (dl > 0) {
199# ifdef BN_COUNT
200            fprintf(stderr, "  bn_sub_part_words %d + %d (dl > 0, copy)\n",
201                    cl, dl);
202# endif
203            for (;;) {
204                r[0] = a[0];
205                if (--dl <= 0)
206                    break;
207                r[1] = a[1];
208                if (--dl <= 0)
209                    break;
210                r[2] = a[2];
211                if (--dl <= 0)
212                    break;
213                r[3] = a[3];
214                if (--dl <= 0)
215                    break;
216
217                a += 4;
218                r += 4;
219            }
220        }
221    }
222    return c;
223}
224#endif
225
226BN_ULONG bn_add_part_words(BN_ULONG *r,
227                           const BN_ULONG *a, const BN_ULONG *b,
228                           int cl, int dl)
229{
230    BN_ULONG c, l, t;
231
232    assert(cl >= 0);
233    c = bn_add_words(r, a, b, cl);
234
235    if (dl == 0)
236        return c;
237
238    r += cl;
239    a += cl;
240    b += cl;
241
242    if (dl < 0) {
243        int save_dl = dl;
244#ifdef BN_COUNT
245        fprintf(stderr, "  bn_add_part_words %d + %d (dl < 0, c = %d)\n", cl,
246                dl, c);
247#endif
248        while (c) {
249            l = (c + b[0]) & BN_MASK2;
250            c = (l < c);
251            r[0] = l;
252            if (++dl >= 0)
253                break;
254
255            l = (c + b[1]) & BN_MASK2;
256            c = (l < c);
257            r[1] = l;
258            if (++dl >= 0)
259                break;
260
261            l = (c + b[2]) & BN_MASK2;
262            c = (l < c);
263            r[2] = l;
264            if (++dl >= 0)
265                break;
266
267            l = (c + b[3]) & BN_MASK2;
268            c = (l < c);
269            r[3] = l;
270            if (++dl >= 0)
271                break;
272
273            save_dl = dl;
274            b += 4;
275            r += 4;
276        }
277        if (dl < 0) {
278#ifdef BN_COUNT
279            fprintf(stderr, "  bn_add_part_words %d + %d (dl < 0, c == 0)\n",
280                    cl, dl);
281#endif
282            if (save_dl < dl) {
283                switch (dl - save_dl) {
284                case 1:
285                    r[1] = b[1];
286                    if (++dl >= 0)
287                        break;
288                case 2:
289                    r[2] = b[2];
290                    if (++dl >= 0)
291                        break;
292                case 3:
293                    r[3] = b[3];
294                    if (++dl >= 0)
295                        break;
296                }
297                b += 4;
298                r += 4;
299            }
300        }
301        if (dl < 0) {
302#ifdef BN_COUNT
303            fprintf(stderr, "  bn_add_part_words %d + %d (dl < 0, copy)\n",
304                    cl, dl);
305#endif
306            for (;;) {
307                r[0] = b[0];
308                if (++dl >= 0)
309                    break;
310                r[1] = b[1];
311                if (++dl >= 0)
312                    break;
313                r[2] = b[2];
314                if (++dl >= 0)
315                    break;
316                r[3] = b[3];
317                if (++dl >= 0)
318                    break;
319
320                b += 4;
321                r += 4;
322            }
323        }
324    } else {
325        int save_dl = dl;
326#ifdef BN_COUNT
327        fprintf(stderr, "  bn_add_part_words %d + %d (dl > 0)\n", cl, dl);
328#endif
329        while (c) {
330            t = (a[0] + c) & BN_MASK2;
331            c = (t < c);
332            r[0] = t;
333            if (--dl <= 0)
334                break;
335
336            t = (a[1] + c) & BN_MASK2;
337            c = (t < c);
338            r[1] = t;
339            if (--dl <= 0)
340                break;
341
342            t = (a[2] + c) & BN_MASK2;
343            c = (t < c);
344            r[2] = t;
345            if (--dl <= 0)
346                break;
347
348            t = (a[3] + c) & BN_MASK2;
349            c = (t < c);
350            r[3] = t;
351            if (--dl <= 0)
352                break;
353
354            save_dl = dl;
355            a += 4;
356            r += 4;
357        }
358#ifdef BN_COUNT
359        fprintf(stderr, "  bn_add_part_words %d + %d (dl > 0, c == 0)\n", cl,
360                dl);
361#endif
362        if (dl > 0) {
363            if (save_dl > dl) {
364                switch (save_dl - dl) {
365                case 1:
366                    r[1] = a[1];
367                    if (--dl <= 0)
368                        break;
369                case 2:
370                    r[2] = a[2];
371                    if (--dl <= 0)
372                        break;
373                case 3:
374                    r[3] = a[3];
375                    if (--dl <= 0)
376                        break;
377                }
378                a += 4;
379                r += 4;
380            }
381        }
382        if (dl > 0) {
383#ifdef BN_COUNT
384            fprintf(stderr, "  bn_add_part_words %d + %d (dl > 0, copy)\n",
385                    cl, dl);
386#endif
387            for (;;) {
388                r[0] = a[0];
389                if (--dl <= 0)
390                    break;
391                r[1] = a[1];
392                if (--dl <= 0)
393                    break;
394                r[2] = a[2];
395                if (--dl <= 0)
396                    break;
397                r[3] = a[3];
398                if (--dl <= 0)
399                    break;
400
401                a += 4;
402                r += 4;
403            }
404        }
405    }
406    return c;
407}
408
409#ifdef BN_RECURSION
410/*
411 * Karatsuba recursive multiplication algorithm (cf. Knuth, The Art of
412 * Computer Programming, Vol. 2)
413 */
414
415/*-
416 * r is 2*n2 words in size,
417 * a and b are both n2 words in size.
418 * n2 must be a power of 2.
419 * We multiply and return the result.
420 * t must be 2*n2 words in size
421 * We calculate
422 * a[0]*b[0]
423 * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
424 * a[1]*b[1]
425 */
426/* dnX may not be positive, but n2/2+dnX has to be */
427void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
428                      int dna, int dnb, BN_ULONG *t)
429{
430    int n = n2 / 2, c1, c2;
431    int tna = n + dna, tnb = n + dnb;
432    unsigned int neg, zero;
433    BN_ULONG ln, lo, *p;
434
435# ifdef BN_COUNT
436    fprintf(stderr, " bn_mul_recursive %d%+d * %d%+d\n", n2, dna, n2, dnb);
437# endif
438# ifdef BN_MUL_COMBA
439#  if 0
440    if (n2 == 4) {
441        bn_mul_comba4(r, a, b);
442        return;
443    }
444#  endif
445    /*
446     * Only call bn_mul_comba 8 if n2 == 8 and the two arrays are complete
447     * [steve]
448     */
449    if (n2 == 8 && dna == 0 && dnb == 0) {
450        bn_mul_comba8(r, a, b);
451        return;
452    }
453# endif                         /* BN_MUL_COMBA */
454    /* Else do normal multiply */
455    if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) {
456        bn_mul_normal(r, a, n2 + dna, b, n2 + dnb);
457        if ((dna + dnb) < 0)
458            memset(&r[2 * n2 + dna + dnb], 0,
459                   sizeof(BN_ULONG) * -(dna + dnb));
460        return;
461    }
462    /* r=(a[0]-a[1])*(b[1]-b[0]) */
463    c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna);
464    c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n);
465    zero = neg = 0;
466    switch (c1 * 3 + c2) {
467    case -4:
468        bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
469        bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
470        break;
471    case -3:
472        zero = 1;
473        break;
474    case -2:
475        bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
476        bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
477        neg = 1;
478        break;
479    case -1:
480    case 0:
481    case 1:
482        zero = 1;
483        break;
484    case 2:
485        bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */
486        bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
487        neg = 1;
488        break;
489    case 3:
490        zero = 1;
491        break;
492    case 4:
493        bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
494        bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
495        break;
496    }
497
498# ifdef BN_MUL_COMBA
499    if (n == 4 && dna == 0 && dnb == 0) { /* XXX: bn_mul_comba4 could take
500                                           * extra args to do this well */
501        if (!zero)
502            bn_mul_comba4(&(t[n2]), t, &(t[n]));
503        else
504            memset(&(t[n2]), 0, 8 * sizeof(BN_ULONG));
505
506        bn_mul_comba4(r, a, b);
507        bn_mul_comba4(&(r[n2]), &(a[n]), &(b[n]));
508    } else if (n == 8 && dna == 0 && dnb == 0) { /* XXX: bn_mul_comba8 could
509                                                  * take extra args to do
510                                                  * this well */
511        if (!zero)
512            bn_mul_comba8(&(t[n2]), t, &(t[n]));
513        else
514            memset(&(t[n2]), 0, 16 * sizeof(BN_ULONG));
515
516        bn_mul_comba8(r, a, b);
517        bn_mul_comba8(&(r[n2]), &(a[n]), &(b[n]));
518    } else
519# endif                         /* BN_MUL_COMBA */
520    {
521        p = &(t[n2 * 2]);
522        if (!zero)
523            bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
524        else
525            memset(&(t[n2]), 0, n2 * sizeof(BN_ULONG));
526        bn_mul_recursive(r, a, b, n, 0, 0, p);
527        bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), n, dna, dnb, p);
528    }
529
530    /*-
531     * t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
532     * r[10] holds (a[0]*b[0])
533     * r[32] holds (b[1]*b[1])
534     */
535
536    c1 = (int)(bn_add_words(t, r, &(r[n2]), n2));
537
538    if (neg) {                  /* if t[32] is negative */
539        c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2));
540    } else {
541        /* Might have a carry */
542        c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
543    }
544
545    /*-
546     * t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
547     * r[10] holds (a[0]*b[0])
548     * r[32] holds (b[1]*b[1])
549     * c1 holds the carry bits
550     */
551    c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
552    if (c1) {
553        p = &(r[n + n2]);
554        lo = *p;
555        ln = (lo + c1) & BN_MASK2;
556        *p = ln;
557
558        /*
559         * The overflow will stop before we over write words we should not
560         * overwrite
561         */
562        if (ln < (BN_ULONG)c1) {
563            do {
564                p++;
565                lo = *p;
566                ln = (lo + 1) & BN_MASK2;
567                *p = ln;
568            } while (ln == 0);
569        }
570    }
571}
572
573/*
574 * n+tn is the word length t needs to be n*4 is size, as does r
575 */
576/* tnX may not be negative but less than n */
577void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n,
578                           int tna, int tnb, BN_ULONG *t)
579{
580    int i, j, n2 = n * 2;
581    int c1, c2, neg;
582    BN_ULONG ln, lo, *p;
583
584# ifdef BN_COUNT
585    fprintf(stderr, " bn_mul_part_recursive (%d%+d) * (%d%+d)\n",
586            n, tna, n, tnb);
587# endif
588    if (n < 8) {
589        bn_mul_normal(r, a, n + tna, b, n + tnb);
590        return;
591    }
592
593    /* r=(a[0]-a[1])*(b[1]-b[0]) */
594    c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna);
595    c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n);
596    neg = 0;
597    switch (c1 * 3 + c2) {
598    case -4:
599        bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
600        bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
601        break;
602    case -3:
603        /* break; */
604    case -2:
605        bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
606        bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
607        neg = 1;
608        break;
609    case -1:
610    case 0:
611    case 1:
612        /* break; */
613    case 2:
614        bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */
615        bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
616        neg = 1;
617        break;
618    case 3:
619        /* break; */
620    case 4:
621        bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
622        bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
623        break;
624    }
625    /*
626     * The zero case isn't yet implemented here. The speedup would probably
627     * be negligible.
628     */
629# if 0
630    if (n == 4) {
631        bn_mul_comba4(&(t[n2]), t, &(t[n]));
632        bn_mul_comba4(r, a, b);
633        bn_mul_normal(&(r[n2]), &(a[n]), tn, &(b[n]), tn);
634        memset(&(r[n2 + tn * 2]), 0, sizeof(BN_ULONG) * (n2 - tn * 2));
635    } else
636# endif
637    if (n == 8) {
638        bn_mul_comba8(&(t[n2]), t, &(t[n]));
639        bn_mul_comba8(r, a, b);
640        bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb);
641        memset(&(r[n2 + tna + tnb]), 0, sizeof(BN_ULONG) * (n2 - tna - tnb));
642    } else {
643        p = &(t[n2 * 2]);
644        bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
645        bn_mul_recursive(r, a, b, n, 0, 0, p);
646        i = n / 2;
647        /*
648         * If there is only a bottom half to the number, just do it
649         */
650        if (tna > tnb)
651            j = tna - i;
652        else
653            j = tnb - i;
654        if (j == 0) {
655            bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]),
656                             i, tna - i, tnb - i, p);
657            memset(&(r[n2 + i * 2]), 0, sizeof(BN_ULONG) * (n2 - i * 2));
658        } else if (j > 0) {     /* eg, n == 16, i == 8 and tn == 11 */
659            bn_mul_part_recursive(&(r[n2]), &(a[n]), &(b[n]),
660                                  i, tna - i, tnb - i, p);
661            memset(&(r[n2 + tna + tnb]), 0,
662                   sizeof(BN_ULONG) * (n2 - tna - tnb));
663        } else {                /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
664
665            memset(&(r[n2]), 0, sizeof(BN_ULONG) * n2);
666            if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL
667                && tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) {
668                bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb);
669            } else {
670                for (;;) {
671                    i /= 2;
672                    /*
673                     * these simplified conditions work exclusively because
674                     * difference between tna and tnb is 1 or 0
675                     */
676                    if (i < tna || i < tnb) {
677                        bn_mul_part_recursive(&(r[n2]),
678                                              &(a[n]), &(b[n]),
679                                              i, tna - i, tnb - i, p);
680                        break;
681                    } else if (i == tna || i == tnb) {
682                        bn_mul_recursive(&(r[n2]),
683                                         &(a[n]), &(b[n]),
684                                         i, tna - i, tnb - i, p);
685                        break;
686                    }
687                }
688            }
689        }
690    }
691
692    /*-
693     * t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
694     * r[10] holds (a[0]*b[0])
695     * r[32] holds (b[1]*b[1])
696     */
697
698    c1 = (int)(bn_add_words(t, r, &(r[n2]), n2));
699
700    if (neg) {                  /* if t[32] is negative */
701        c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2));
702    } else {
703        /* Might have a carry */
704        c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
705    }
706
707    /*-
708     * t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
709     * r[10] holds (a[0]*b[0])
710     * r[32] holds (b[1]*b[1])
711     * c1 holds the carry bits
712     */
713    c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
714    if (c1) {
715        p = &(r[n + n2]);
716        lo = *p;
717        ln = (lo + c1) & BN_MASK2;
718        *p = ln;
719
720        /*
721         * The overflow will stop before we over write words we should not
722         * overwrite
723         */
724        if (ln < (BN_ULONG)c1) {
725            do {
726                p++;
727                lo = *p;
728                ln = (lo + 1) & BN_MASK2;
729                *p = ln;
730            } while (ln == 0);
731        }
732    }
733}
734
735/*-
736 * a and b must be the same size, which is n2.
737 * r needs to be n2 words and t needs to be n2*2
738 */
739void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
740                          BN_ULONG *t)
741{
742    int n = n2 / 2;
743
744# ifdef BN_COUNT
745    fprintf(stderr, " bn_mul_low_recursive %d * %d\n", n2, n2);
746# endif
747
748    bn_mul_recursive(r, a, b, n, 0, 0, &(t[0]));
749    if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL) {
750        bn_mul_low_recursive(&(t[0]), &(a[0]), &(b[n]), n, &(t[n2]));
751        bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
752        bn_mul_low_recursive(&(t[0]), &(a[n]), &(b[0]), n, &(t[n2]));
753        bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
754    } else {
755        bn_mul_low_normal(&(t[0]), &(a[0]), &(b[n]), n);
756        bn_mul_low_normal(&(t[n]), &(a[n]), &(b[0]), n);
757        bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
758        bn_add_words(&(r[n]), &(r[n]), &(t[n]), n);
759    }
760}
761
762/*-
763 * a and b must be the same size, which is n2.
764 * r needs to be n2 words and t needs to be n2*2
765 * l is the low words of the output.
766 * t needs to be n2*3
767 */
768void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
769                 BN_ULONG *t)
770{
771    int i, n;
772    int c1, c2;
773    int neg, oneg, zero;
774    BN_ULONG ll, lc, *lp, *mp;
775
776# ifdef BN_COUNT
777    fprintf(stderr, " bn_mul_high %d * %d\n", n2, n2);
778# endif
779    n = n2 / 2;
780
781    /* Calculate (al-ah)*(bh-bl) */
782    neg = zero = 0;
783    c1 = bn_cmp_words(&(a[0]), &(a[n]), n);
784    c2 = bn_cmp_words(&(b[n]), &(b[0]), n);
785    switch (c1 * 3 + c2) {
786    case -4:
787        bn_sub_words(&(r[0]), &(a[n]), &(a[0]), n);
788        bn_sub_words(&(r[n]), &(b[0]), &(b[n]), n);
789        break;
790    case -3:
791        zero = 1;
792        break;
793    case -2:
794        bn_sub_words(&(r[0]), &(a[n]), &(a[0]), n);
795        bn_sub_words(&(r[n]), &(b[n]), &(b[0]), n);
796        neg = 1;
797        break;
798    case -1:
799    case 0:
800    case 1:
801        zero = 1;
802        break;
803    case 2:
804        bn_sub_words(&(r[0]), &(a[0]), &(a[n]), n);
805        bn_sub_words(&(r[n]), &(b[0]), &(b[n]), n);
806        neg = 1;
807        break;
808    case 3:
809        zero = 1;
810        break;
811    case 4:
812        bn_sub_words(&(r[0]), &(a[0]), &(a[n]), n);
813        bn_sub_words(&(r[n]), &(b[n]), &(b[0]), n);
814        break;
815    }
816
817    oneg = neg;
818    /* t[10] = (a[0]-a[1])*(b[1]-b[0]) */
819    /* r[10] = (a[1]*b[1]) */
820# ifdef BN_MUL_COMBA
821    if (n == 8) {
822        bn_mul_comba8(&(t[0]), &(r[0]), &(r[n]));
823        bn_mul_comba8(r, &(a[n]), &(b[n]));
824    } else
825# endif
826    {
827        bn_mul_recursive(&(t[0]), &(r[0]), &(r[n]), n, 0, 0, &(t[n2]));
828        bn_mul_recursive(r, &(a[n]), &(b[n]), n, 0, 0, &(t[n2]));
829    }
830
831    /*-
832     * s0 == low(al*bl)
833     * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl)
834     * We know s0 and s1 so the only unknown is high(al*bl)
835     * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl))
836     * high(al*bl) == s1 - (r[0]+l[0]+t[0])
837     */
838    if (l != NULL) {
839        lp = &(t[n2 + n]);
840        c1 = (int)(bn_add_words(lp, &(r[0]), &(l[0]), n));
841    } else {
842        c1 = 0;
843        lp = &(r[0]);
844    }
845
846    if (neg)
847        neg = (int)(bn_sub_words(&(t[n2]), lp, &(t[0]), n));
848    else {
849        bn_add_words(&(t[n2]), lp, &(t[0]), n);
850        neg = 0;
851    }
852
853    if (l != NULL) {
854        bn_sub_words(&(t[n2 + n]), &(l[n]), &(t[n2]), n);
855    } else {
856        lp = &(t[n2 + n]);
857        mp = &(t[n2]);
858        for (i = 0; i < n; i++)
859            lp[i] = ((~mp[i]) + 1) & BN_MASK2;
860    }
861
862    /*-
863     * s[0] = low(al*bl)
864     * t[3] = high(al*bl)
865     * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign
866     * r[10] = (a[1]*b[1])
867     */
868    /*-
869     * R[10] = al*bl
870     * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0])
871     * R[32] = ah*bh
872     */
873    /*-
874     * R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow)
875     * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow)
876     * R[3]=r[1]+(carry/borrow)
877     */
878    if (l != NULL) {
879        lp = &(t[n2]);
880        c1 = (int)(bn_add_words(lp, &(t[n2 + n]), &(l[0]), n));
881    } else {
882        lp = &(t[n2 + n]);
883        c1 = 0;
884    }
885    c1 += (int)(bn_add_words(&(t[n2]), lp, &(r[0]), n));
886    if (oneg)
887        c1 -= (int)(bn_sub_words(&(t[n2]), &(t[n2]), &(t[0]), n));
888    else
889        c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), &(t[0]), n));
890
891    c2 = (int)(bn_add_words(&(r[0]), &(r[0]), &(t[n2 + n]), n));
892    c2 += (int)(bn_add_words(&(r[0]), &(r[0]), &(r[n]), n));
893    if (oneg)
894        c2 -= (int)(bn_sub_words(&(r[0]), &(r[0]), &(t[n]), n));
895    else
896        c2 += (int)(bn_add_words(&(r[0]), &(r[0]), &(t[n]), n));
897
898    if (c1 != 0) {              /* Add starting at r[0], could be +ve or -ve */
899        i = 0;
900        if (c1 > 0) {
901            lc = c1;
902            do {
903                ll = (r[i] + lc) & BN_MASK2;
904                r[i++] = ll;
905                lc = (lc > ll);
906            } while (lc);
907        } else {
908            lc = -c1;
909            do {
910                ll = r[i];
911                r[i++] = (ll - lc) & BN_MASK2;
912                lc = (lc > ll);
913            } while (lc);
914        }
915    }
916    if (c2 != 0) {              /* Add starting at r[1] */
917        i = n;
918        if (c2 > 0) {
919            lc = c2;
920            do {
921                ll = (r[i] + lc) & BN_MASK2;
922                r[i++] = ll;
923                lc = (lc > ll);
924            } while (lc);
925        } else {
926            lc = -c2;
927            do {
928                ll = r[i];
929                r[i++] = (ll - lc) & BN_MASK2;
930                lc = (lc > ll);
931            } while (lc);
932        }
933    }
934}
935#endif                          /* BN_RECURSION */
936
937int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
938{
939    int ret = 0;
940    int top, al, bl;
941    BIGNUM *rr;
942#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
943    int i;
944#endif
945#ifdef BN_RECURSION
946    BIGNUM *t = NULL;
947    int j = 0, k;
948#endif
949
950#ifdef BN_COUNT
951    fprintf(stderr, "BN_mul %d * %d\n", a->top, b->top);
952#endif
953
954    bn_check_top(a);
955    bn_check_top(b);
956    bn_check_top(r);
957
958    al = a->top;
959    bl = b->top;
960
961    if ((al == 0) || (bl == 0)) {
962        BN_zero(r);
963        return (1);
964    }
965    top = al + bl;
966
967    BN_CTX_start(ctx);
968    if ((r == a) || (r == b)) {
969        if ((rr = BN_CTX_get(ctx)) == NULL)
970            goto err;
971    } else
972        rr = r;
973    rr->neg = a->neg ^ b->neg;
974
975#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
976    i = al - bl;
977#endif
978#ifdef BN_MUL_COMBA
979    if (i == 0) {
980# if 0
981        if (al == 4) {
982            if (bn_wexpand(rr, 8) == NULL)
983                goto err;
984            rr->top = 8;
985            bn_mul_comba4(rr->d, a->d, b->d);
986            goto end;
987        }
988# endif
989        if (al == 8) {
990            if (bn_wexpand(rr, 16) == NULL)
991                goto err;
992            rr->top = 16;
993            bn_mul_comba8(rr->d, a->d, b->d);
994            goto end;
995        }
996    }
997#endif                          /* BN_MUL_COMBA */
998#ifdef BN_RECURSION
999    if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL)) {
1000        if (i >= -1 && i <= 1) {
1001            /*
1002             * Find out the power of two lower or equal to the longest of the
1003             * two numbers
1004             */
1005            if (i >= 0) {
1006                j = BN_num_bits_word((BN_ULONG)al);
1007            }
1008            if (i == -1) {
1009                j = BN_num_bits_word((BN_ULONG)bl);
1010            }
1011            j = 1 << (j - 1);
1012            assert(j <= al || j <= bl);
1013            k = j + j;
1014            t = BN_CTX_get(ctx);
1015            if (t == NULL)
1016                goto err;
1017            if (al > j || bl > j) {
1018                if (bn_wexpand(t, k * 4) == NULL)
1019                    goto err;
1020                if (bn_wexpand(rr, k * 4) == NULL)
1021                    goto err;
1022                bn_mul_part_recursive(rr->d, a->d, b->d,
1023                                      j, al - j, bl - j, t->d);
1024            } else {            /* al <= j || bl <= j */
1025
1026                if (bn_wexpand(t, k * 2) == NULL)
1027                    goto err;
1028                if (bn_wexpand(rr, k * 2) == NULL)
1029                    goto err;
1030                bn_mul_recursive(rr->d, a->d, b->d, j, al - j, bl - j, t->d);
1031            }
1032            rr->top = top;
1033            goto end;
1034        }
1035# if 0
1036        if (i == 1 && !BN_get_flags(b, BN_FLG_STATIC_DATA)) {
1037            BIGNUM *tmp_bn = (BIGNUM *)b;
1038            if (bn_wexpand(tmp_bn, al) == NULL)
1039                goto err;
1040            tmp_bn->d[bl] = 0;
1041            bl++;
1042            i--;
1043        } else if (i == -1 && !BN_get_flags(a, BN_FLG_STATIC_DATA)) {
1044            BIGNUM *tmp_bn = (BIGNUM *)a;
1045            if (bn_wexpand(tmp_bn, bl) == NULL)
1046                goto err;
1047            tmp_bn->d[al] = 0;
1048            al++;
1049            i++;
1050        }
1051        if (i == 0) {
1052            /* symmetric and > 4 */
1053            /* 16 or larger */
1054            j = BN_num_bits_word((BN_ULONG)al);
1055            j = 1 << (j - 1);
1056            k = j + j;
1057            t = BN_CTX_get(ctx);
1058            if (al == j) {      /* exact multiple */
1059                if (bn_wexpand(t, k * 2) == NULL)
1060                    goto err;
1061                if (bn_wexpand(rr, k * 2) == NULL)
1062                    goto err;
1063                bn_mul_recursive(rr->d, a->d, b->d, al, t->d);
1064            } else {
1065                if (bn_wexpand(t, k * 4) == NULL)
1066                    goto err;
1067                if (bn_wexpand(rr, k * 4) == NULL)
1068                    goto err;
1069                bn_mul_part_recursive(rr->d, a->d, b->d, al - j, j, t->d);
1070            }
1071            rr->top = top;
1072            goto end;
1073        }
1074# endif
1075    }
1076#endif                          /* BN_RECURSION */
1077    if (bn_wexpand(rr, top) == NULL)
1078        goto err;
1079    rr->top = top;
1080    bn_mul_normal(rr->d, a->d, al, b->d, bl);
1081
1082#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
1083 end:
1084#endif
1085    bn_correct_top(rr);
1086    if (r != rr)
1087        BN_copy(r, rr);
1088    ret = 1;
1089 err:
1090    bn_check_top(r);
1091    BN_CTX_end(ctx);
1092    return (ret);
1093}
1094
1095void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
1096{
1097    BN_ULONG *rr;
1098
1099#ifdef BN_COUNT
1100    fprintf(stderr, " bn_mul_normal %d * %d\n", na, nb);
1101#endif
1102
1103    if (na < nb) {
1104        int itmp;
1105        BN_ULONG *ltmp;
1106
1107        itmp = na;
1108        na = nb;
1109        nb = itmp;
1110        ltmp = a;
1111        a = b;
1112        b = ltmp;
1113
1114    }
1115    rr = &(r[na]);
1116    if (nb <= 0) {
1117        (void)bn_mul_words(r, a, na, 0);
1118        return;
1119    } else
1120        rr[0] = bn_mul_words(r, a, na, b[0]);
1121
1122    for (;;) {
1123        if (--nb <= 0)
1124            return;
1125        rr[1] = bn_mul_add_words(&(r[1]), a, na, b[1]);
1126        if (--nb <= 0)
1127            return;
1128        rr[2] = bn_mul_add_words(&(r[2]), a, na, b[2]);
1129        if (--nb <= 0)
1130            return;
1131        rr[3] = bn_mul_add_words(&(r[3]), a, na, b[3]);
1132        if (--nb <= 0)
1133            return;
1134        rr[4] = bn_mul_add_words(&(r[4]), a, na, b[4]);
1135        rr += 4;
1136        r += 4;
1137        b += 4;
1138    }
1139}
1140
1141void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
1142{
1143#ifdef BN_COUNT
1144    fprintf(stderr, " bn_mul_low_normal %d * %d\n", n, n);
1145#endif
1146    bn_mul_words(r, a, n, b[0]);
1147
1148    for (;;) {
1149        if (--n <= 0)
1150            return;
1151        bn_mul_add_words(&(r[1]), a, n, b[1]);
1152        if (--n <= 0)
1153            return;
1154        bn_mul_add_words(&(r[2]), a, n, b[2]);
1155        if (--n <= 0)
1156            return;
1157        bn_mul_add_words(&(r[3]), a, n, b[3]);
1158        if (--n <= 0)
1159            return;
1160        bn_mul_add_words(&(r[4]), a, n, b[4]);
1161        r += 4;
1162        b += 4;
1163    }
1164}
1165