bn_mul.c revision 1.22
1/* $OpenBSD: bn_mul.c,v 1.22 2022/11/26 13:56:33 jsing Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to.  The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 *    notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 *    notice, this list of conditions and the following disclaimer in the
30 *    documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 *    must display the following acknowledgement:
33 *    "This product includes cryptographic software written by
34 *     Eric Young (eay@cryptsoft.com)"
35 *    The word 'cryptographic' can be left out if the rouines from the library
36 *    being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 *    the apps directory (application code) you must include an acknowledgement:
39 *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed.  i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#ifndef BN_DEBUG
60# undef NDEBUG /* avoid conflicting definitions */
61# define NDEBUG
62#endif
63
64#include <assert.h>
65#include <stdio.h>
66#include <string.h>
67
68#include <openssl/opensslconf.h>
69
70#include "bn_lcl.h"
71
72#if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS)
73/* Here follows specialised variants of bn_add_words() and
74   bn_sub_words().  They have the property performing operations on
75   arrays of different sizes.  The sizes of those arrays is expressed through
76   cl, which is the common length ( basicall, min(len(a),len(b)) ), and dl,
77   which is the delta between the two lengths, calculated as len(a)-len(b).
78   All lengths are the number of BN_ULONGs...  For the operations that require
79   a result array as parameter, it must have the length cl+abs(dl).
80   These functions should probably end up in bn_asm.c as soon as there are
81   assembler counterparts for the systems that use assembler files.  */
82
83BN_ULONG
84bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int cl,
85    int dl)
86{
87	BN_ULONG c, t;
88
89	assert(cl >= 0);
90	c = bn_sub_words(r, a, b, cl);
91
92	if (dl == 0)
93		return c;
94
95	r += cl;
96	a += cl;
97	b += cl;
98
99	if (dl < 0) {
100#ifdef BN_COUNT
101		fprintf(stderr,
102		    "  bn_sub_part_words %d + %d (dl < 0, c = %d)\n",
103		    cl, dl, c);
104#endif
105		for (;;) {
106			t = b[0];
107			r[0] = (0 - t - c) & BN_MASK2;
108			if (t != 0)
109				c = 1;
110			if (++dl >= 0)
111				break;
112
113			t = b[1];
114			r[1] = (0 - t - c) & BN_MASK2;
115			if (t != 0)
116				c = 1;
117			if (++dl >= 0)
118				break;
119
120			t = b[2];
121			r[2] = (0 - t - c) & BN_MASK2;
122			if (t != 0)
123				c = 1;
124			if (++dl >= 0)
125				break;
126
127			t = b[3];
128			r[3] = (0 - t - c) & BN_MASK2;
129			if (t != 0)
130				c = 1;
131			if (++dl >= 0)
132				break;
133
134			b += 4;
135			r += 4;
136		}
137	} else {
138		int save_dl = dl;
139#ifdef BN_COUNT
140		fprintf(stderr,
141		    "  bn_sub_part_words %d + %d (dl > 0, c = %d)\n",
142		    cl, dl, c);
143#endif
144		while (c) {
145			t = a[0];
146			r[0] = (t - c) & BN_MASK2;
147			if (t != 0)
148				c = 0;
149			if (--dl <= 0)
150				break;
151
152			t = a[1];
153			r[1] = (t - c) & BN_MASK2;
154			if (t != 0)
155				c = 0;
156			if (--dl <= 0)
157				break;
158
159			t = a[2];
160			r[2] = (t - c) & BN_MASK2;
161			if (t != 0)
162				c = 0;
163			if (--dl <= 0)
164				break;
165
166			t = a[3];
167			r[3] = (t - c) & BN_MASK2;
168			if (t != 0)
169				c = 0;
170			if (--dl <= 0)
171				break;
172
173			save_dl = dl;
174			a += 4;
175			r += 4;
176		}
177		if (dl > 0) {
178#ifdef BN_COUNT
179			fprintf(stderr,
180			    "  bn_sub_part_words %d + %d (dl > 0, c == 0)\n",
181			    cl, dl);
182#endif
183			if (save_dl > dl) {
184				switch (save_dl - dl) {
185				case 1:
186					r[1] = a[1];
187					if (--dl <= 0)
188						break;
189				case 2:
190					r[2] = a[2];
191					if (--dl <= 0)
192						break;
193				case 3:
194					r[3] = a[3];
195					if (--dl <= 0)
196						break;
197				}
198				a += 4;
199				r += 4;
200			}
201		}
202		if (dl > 0) {
203#ifdef BN_COUNT
204			fprintf(stderr,
205			    "  bn_sub_part_words %d + %d (dl > 0, copy)\n",
206			    cl, dl);
207#endif
208			for (;;) {
209				r[0] = a[0];
210				if (--dl <= 0)
211					break;
212				r[1] = a[1];
213				if (--dl <= 0)
214					break;
215				r[2] = a[2];
216				if (--dl <= 0)
217					break;
218				r[3] = a[3];
219				if (--dl <= 0)
220					break;
221
222				a += 4;
223				r += 4;
224			}
225		}
226	}
227	return c;
228}
229#endif
230
231BN_ULONG
232bn_add_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int cl,
233    int dl)
234{
235	BN_ULONG c, l, t;
236
237	assert(cl >= 0);
238	c = bn_add_words(r, a, b, cl);
239
240	if (dl == 0)
241		return c;
242
243	r += cl;
244	a += cl;
245	b += cl;
246
247	if (dl < 0) {
248		int save_dl = dl;
249#ifdef BN_COUNT
250		fprintf(stderr,
251		    "  bn_add_part_words %d + %d (dl < 0, c = %d)\n",
252		    cl, dl, c);
253#endif
254		while (c) {
255			l = (c + b[0]) & BN_MASK2;
256			c = (l < c);
257			r[0] = l;
258			if (++dl >= 0)
259				break;
260
261			l = (c + b[1]) & BN_MASK2;
262			c = (l < c);
263			r[1] = l;
264			if (++dl >= 0)
265				break;
266
267			l = (c + b[2]) & BN_MASK2;
268			c = (l < c);
269			r[2] = l;
270			if (++dl >= 0)
271				break;
272
273			l = (c + b[3]) & BN_MASK2;
274			c = (l < c);
275			r[3] = l;
276			if (++dl >= 0)
277				break;
278
279			save_dl = dl;
280			b += 4;
281			r += 4;
282		}
283		if (dl < 0) {
284#ifdef BN_COUNT
285			fprintf(stderr,
286			    "  bn_add_part_words %d + %d (dl < 0, c == 0)\n",
287			    cl, dl);
288#endif
289			if (save_dl < dl) {
290				switch (dl - save_dl) {
291				case 1:
292					r[1] = b[1];
293					if (++dl >= 0)
294						break;
295				case 2:
296					r[2] = b[2];
297					if (++dl >= 0)
298						break;
299				case 3:
300					r[3] = b[3];
301					if (++dl >= 0)
302						break;
303				}
304				b += 4;
305				r += 4;
306			}
307		}
308		if (dl < 0) {
309#ifdef BN_COUNT
310			fprintf(stderr,
311			    "  bn_add_part_words %d + %d (dl < 0, copy)\n",
312			    cl, dl);
313#endif
314			for (;;) {
315				r[0] = b[0];
316				if (++dl >= 0)
317					break;
318				r[1] = b[1];
319				if (++dl >= 0)
320					break;
321				r[2] = b[2];
322				if (++dl >= 0)
323					break;
324				r[3] = b[3];
325				if (++dl >= 0)
326					break;
327
328				b += 4;
329				r += 4;
330			}
331		}
332	} else {
333		int save_dl = dl;
334#ifdef BN_COUNT
335		fprintf(stderr,
336		    "  bn_add_part_words %d + %d (dl > 0)\n", cl, dl);
337#endif
338		while (c) {
339			t = (a[0] + c) & BN_MASK2;
340			c = (t < c);
341			r[0] = t;
342			if (--dl <= 0)
343				break;
344
345			t = (a[1] + c) & BN_MASK2;
346			c = (t < c);
347			r[1] = t;
348			if (--dl <= 0)
349				break;
350
351			t = (a[2] + c) & BN_MASK2;
352			c = (t < c);
353			r[2] = t;
354			if (--dl <= 0)
355				break;
356
357			t = (a[3] + c) & BN_MASK2;
358			c = (t < c);
359			r[3] = t;
360			if (--dl <= 0)
361				break;
362
363			save_dl = dl;
364			a += 4;
365			r += 4;
366		}
367#ifdef BN_COUNT
368		fprintf(stderr,
369		    "  bn_add_part_words %d + %d (dl > 0, c == 0)\n", cl, dl);
370#endif
371		if (dl > 0) {
372			if (save_dl > dl) {
373				switch (save_dl - dl) {
374				case 1:
375					r[1] = a[1];
376					if (--dl <= 0)
377						break;
378				case 2:
379					r[2] = a[2];
380					if (--dl <= 0)
381						break;
382				case 3:
383					r[3] = a[3];
384					if (--dl <= 0)
385						break;
386				}
387				a += 4;
388				r += 4;
389			}
390		}
391		if (dl > 0) {
392#ifdef BN_COUNT
393			fprintf(stderr,
394			    "  bn_add_part_words %d + %d (dl > 0, copy)\n",
395			    cl, dl);
396#endif
397			for (;;) {
398				r[0] = a[0];
399				if (--dl <= 0)
400					break;
401				r[1] = a[1];
402				if (--dl <= 0)
403					break;
404				r[2] = a[2];
405				if (--dl <= 0)
406					break;
407				r[3] = a[3];
408				if (--dl <= 0)
409					break;
410
411				a += 4;
412				r += 4;
413			}
414		}
415	}
416	return c;
417}
418
419#ifdef BN_RECURSION
420/* Karatsuba recursive multiplication algorithm
421 * (cf. Knuth, The Art of Computer Programming, Vol. 2) */
422
423/* r is 2*n2 words in size,
424 * a and b are both n2 words in size.
425 * n2 must be a power of 2.
426 * We multiply and return the result.
427 * t must be 2*n2 words in size
428 * We calculate
429 * a[0]*b[0]
430 * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
431 * a[1]*b[1]
432 */
433/* dnX may not be positive, but n2/2+dnX has to be */
434void
435bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, int dna,
436    int dnb, BN_ULONG *t)
437{
438	int n = n2 / 2, c1, c2;
439	int tna = n + dna, tnb = n + dnb;
440	unsigned int neg, zero;
441	BN_ULONG ln, lo, *p;
442
443# ifdef BN_COUNT
444	fprintf(stderr, " bn_mul_recursive %d%+d * %d%+d\n",n2,dna,n2,dnb);
445# endif
446# ifdef BN_MUL_COMBA
447#  if 0
448	if (n2 == 4) {
449		bn_mul_comba4(r, a, b);
450		return;
451	}
452#  endif
453	/* Only call bn_mul_comba 8 if n2 == 8 and the
454	 * two arrays are complete [steve]
455	 */
456	if (n2 == 8 && dna == 0 && dnb == 0) {
457		bn_mul_comba8(r, a, b);
458		return;
459	}
460# endif /* BN_MUL_COMBA */
461	/* Else do normal multiply */
462	if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) {
463		bn_mul_normal(r, a, n2 + dna, b, n2 + dnb);
464		if ((dna + dnb) < 0)
465			memset(&r[2*n2 + dna + dnb], 0,
466			    sizeof(BN_ULONG) * -(dna + dnb));
467		return;
468	}
469	/* r=(a[0]-a[1])*(b[1]-b[0]) */
470	c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna);
471	c2 = bn_cmp_part_words(&(b[n]), b,tnb, tnb - n);
472	zero = neg = 0;
473	switch (c1 * 3 + c2) {
474	case -4:
475		bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
476		bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
477		break;
478	case -3:
479		zero = 1;
480		break;
481	case -2:
482		bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
483		bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
484		neg = 1;
485		break;
486	case -1:
487	case 0:
488	case 1:
489		zero = 1;
490		break;
491	case 2:
492		bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */
493		bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
494		neg = 1;
495		break;
496	case 3:
497		zero = 1;
498		break;
499	case 4:
500		bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
501		bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
502		break;
503	}
504
505# ifdef BN_MUL_COMBA
506	if (n == 4 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba4 could take
507					       extra args to do this well */
508	{
509		if (!zero)
510			bn_mul_comba4(&(t[n2]), t, &(t[n]));
511		else
512			memset(&(t[n2]), 0, 8 * sizeof(BN_ULONG));
513
514		bn_mul_comba4(r, a, b);
515		bn_mul_comba4(&(r[n2]), &(a[n]), &(b[n]));
516	} else if (n == 8 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba8 could
517						    take extra args to do this
518						    well */
519	{
520		if (!zero)
521			bn_mul_comba8(&(t[n2]), t, &(t[n]));
522		else
523			memset(&(t[n2]), 0, 16 * sizeof(BN_ULONG));
524
525		bn_mul_comba8(r, a, b);
526		bn_mul_comba8(&(r[n2]), &(a[n]), &(b[n]));
527	} else
528# endif /* BN_MUL_COMBA */
529	{
530		p = &(t[n2 * 2]);
531		if (!zero)
532			bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
533		else
534			memset(&(t[n2]), 0, n2 * sizeof(BN_ULONG));
535		bn_mul_recursive(r, a, b, n, 0, 0, p);
536		bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), n, dna, dnb, p);
537	}
538
539	/* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
540	 * r[10] holds (a[0]*b[0])
541	 * r[32] holds (b[1]*b[1])
542	 */
543
544	c1 = (int)(bn_add_words(t, r, &(r[n2]), n2));
545
546	if (neg) /* if t[32] is negative */
547	{
548		c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2));
549	} else {
550		/* Might have a carry */
551		c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
552	}
553
554	/* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
555	 * r[10] holds (a[0]*b[0])
556	 * r[32] holds (b[1]*b[1])
557	 * c1 holds the carry bits
558	 */
559	c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
560	if (c1) {
561		p = &(r[n + n2]);
562		lo= *p;
563		ln = (lo + c1) & BN_MASK2;
564		*p = ln;
565
566		/* The overflow will stop before we over write
567		 * words we should not overwrite */
568		if (ln < (BN_ULONG)c1) {
569			do {
570				p++;
571				lo= *p;
572				ln = (lo + 1) & BN_MASK2;
573				*p = ln;
574			} while (ln == 0);
575		}
576	}
577}
578
579/* n+tn is the word length
580 * t needs to be n*4 is size, as does r */
581/* tnX may not be negative but less than n */
582void
583bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, int tna,
584    int tnb, BN_ULONG *t)
585{
586	int i, j, n2 = n * 2;
587	int c1, c2, neg;
588	BN_ULONG ln, lo, *p;
589
590# ifdef BN_COUNT
591	fprintf(stderr, " bn_mul_part_recursive (%d%+d) * (%d%+d)\n",
592	    n, tna, n, tnb);
593# endif
594	if (n < 8) {
595		bn_mul_normal(r, a, n + tna, b, n + tnb);
596		return;
597	}
598
599	/* r=(a[0]-a[1])*(b[1]-b[0]) */
600	c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna);
601	c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n);
602	neg = 0;
603	switch (c1 * 3 + c2) {
604	case -4:
605		bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
606		bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
607		break;
608	case -3:
609		/* break; */
610	case -2:
611		bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
612		bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
613		neg = 1;
614		break;
615	case -1:
616	case 0:
617	case 1:
618		/* break; */
619	case 2:
620		bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */
621		bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
622		neg = 1;
623		break;
624	case 3:
625		/* break; */
626	case 4:
627		bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
628		bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
629		break;
630	}
631		/* The zero case isn't yet implemented here. The speedup
632		   would probably be negligible. */
633# if 0
634	if (n == 4) {
635		bn_mul_comba4(&(t[n2]), t, &(t[n]));
636		bn_mul_comba4(r, a, b);
637		bn_mul_normal(&(r[n2]), &(a[n]), tn, &(b[n]), tn);
638		memset(&(r[n2 + tn * 2]), 0, sizeof(BN_ULONG) * (n2 - tn * 2));
639	} else
640# endif
641		if (n == 8) {
642		bn_mul_comba8(&(t[n2]), t, &(t[n]));
643		bn_mul_comba8(r, a, b);
644		bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb);
645		memset(&(r[n2 + tna + tnb]), 0,
646		    sizeof(BN_ULONG) * (n2 - tna - tnb));
647	} else {
648		p = &(t[n2*2]);
649		bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
650		bn_mul_recursive(r, a, b, n, 0, 0, p);
651		i = n / 2;
652		/* If there is only a bottom half to the number,
653		 * just do it */
654		if (tna > tnb)
655			j = tna - i;
656		else
657			j = tnb - i;
658		if (j == 0) {
659			bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]),
660			    i, tna - i, tnb - i, p);
661			memset(&(r[n2 + i * 2]), 0,
662			    sizeof(BN_ULONG) * (n2 - i * 2));
663		}
664		else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */
665		{
666			bn_mul_part_recursive(&(r[n2]), &(a[n]), &(b[n]),
667			    i, tna - i, tnb - i, p);
668			memset(&(r[n2 + tna + tnb]), 0,
669			    sizeof(BN_ULONG) * (n2 - tna - tnb));
670		}
671		else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
672		{
673			memset(&(r[n2]), 0, sizeof(BN_ULONG) * n2);
674			if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL &&
675			    tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) {
676				bn_mul_normal(&(r[n2]), &(a[n]), tna,
677				    &(b[n]), tnb);
678			} else {
679				for (;;) {
680					i /= 2;
681					/* these simplified conditions work
682					 * exclusively because difference
683					 * between tna and tnb is 1 or 0 */
684					if (i < tna || i < tnb) {
685						bn_mul_part_recursive(&(r[n2]),
686						    &(a[n]), &(b[n]), i,
687						    tna - i, tnb - i, p);
688						break;
689					} else if (i == tna || i == tnb) {
690						bn_mul_recursive(&(r[n2]),
691						    &(a[n]), &(b[n]), i,
692						    tna - i, tnb - i, p);
693						break;
694					}
695				}
696			}
697		}
698	}
699
700	/* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
701	 * r[10] holds (a[0]*b[0])
702	 * r[32] holds (b[1]*b[1])
703	 */
704
705	c1 = (int)(bn_add_words(t, r,&(r[n2]), n2));
706
707	if (neg) /* if t[32] is negative */
708	{
709		c1 -= (int)(bn_sub_words(&(t[n2]), t,&(t[n2]), n2));
710	} else {
711		/* Might have a carry */
712		c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
713	}
714
715	/* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
716	 * r[10] holds (a[0]*b[0])
717	 * r[32] holds (b[1]*b[1])
718	 * c1 holds the carry bits
719	 */
720	c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
721	if (c1) {
722		p = &(r[n + n2]);
723		lo= *p;
724		ln = (lo + c1)&BN_MASK2;
725		*p = ln;
726
727		/* The overflow will stop before we over write
728		 * words we should not overwrite */
729		if (ln < (BN_ULONG)c1) {
730			do {
731				p++;
732				lo= *p;
733				ln = (lo + 1) & BN_MASK2;
734				*p = ln;
735			} while (ln == 0);
736		}
737	}
738}
739
740/* a and b must be the same size, which is n2.
741 * r needs to be n2 words and t needs to be n2*2
742 */
743void
744bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, BN_ULONG *t)
745{
746	int n = n2 / 2;
747
748# ifdef BN_COUNT
749	fprintf(stderr, " bn_mul_low_recursive %d * %d\n",n2,n2);
750# endif
751
752	bn_mul_recursive(r, a, b, n, 0, 0, &(t[0]));
753	if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL) {
754		bn_mul_low_recursive(&(t[0]), &(a[0]), &(b[n]), n, &(t[n2]));
755		bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
756		bn_mul_low_recursive(&(t[0]), &(a[n]), &(b[0]), n, &(t[n2]));
757		bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
758	} else {
759		bn_mul_low_normal(&(t[0]), &(a[0]), &(b[n]), n);
760		bn_mul_low_normal(&(t[n]), &(a[n]), &(b[0]), n);
761		bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
762		bn_add_words(&(r[n]), &(r[n]), &(t[n]), n);
763	}
764}
765
766/* a and b must be the same size, which is n2.
767 * r needs to be n2 words and t needs to be n2*2
768 * l is the low words of the output.
769 * t needs to be n2*3
770 */
771void
772bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
773    BN_ULONG *t)
774{
775	int i, n;
776	int c1, c2;
777	int neg, oneg, zero;
778	BN_ULONG ll, lc, *lp, *mp;
779
780# ifdef BN_COUNT
781	fprintf(stderr, " bn_mul_high %d * %d\n",n2,n2);
782# endif
783	n = n2 / 2;
784
785	/* Calculate (al-ah)*(bh-bl) */
786	neg = zero = 0;
787	c1 = bn_cmp_words(&(a[0]), &(a[n]), n);
788	c2 = bn_cmp_words(&(b[n]), &(b[0]), n);
789	switch (c1 * 3 + c2) {
790	case -4:
791		bn_sub_words(&(r[0]), &(a[n]), &(a[0]), n);
792		bn_sub_words(&(r[n]), &(b[0]), &(b[n]), n);
793		break;
794	case -3:
795		zero = 1;
796		break;
797	case -2:
798		bn_sub_words(&(r[0]), &(a[n]), &(a[0]), n);
799		bn_sub_words(&(r[n]), &(b[n]), &(b[0]), n);
800		neg = 1;
801		break;
802	case -1:
803	case 0:
804	case 1:
805		zero = 1;
806		break;
807	case 2:
808		bn_sub_words(&(r[0]), &(a[0]), &(a[n]), n);
809		bn_sub_words(&(r[n]), &(b[0]), &(b[n]), n);
810		neg = 1;
811		break;
812	case 3:
813		zero = 1;
814		break;
815	case 4:
816		bn_sub_words(&(r[0]), &(a[0]), &(a[n]), n);
817		bn_sub_words(&(r[n]), &(b[n]), &(b[0]), n);
818		break;
819	}
820
821	oneg = neg;
822	/* t[10] = (a[0]-a[1])*(b[1]-b[0]) */
823	/* r[10] = (a[1]*b[1]) */
824# ifdef BN_MUL_COMBA
825	if (n == 8) {
826		bn_mul_comba8(&(t[0]), &(r[0]), &(r[n]));
827		bn_mul_comba8(r, &(a[n]), &(b[n]));
828	} else
829# endif
830	{
831		bn_mul_recursive(&(t[0]), &(r[0]), &(r[n]), n, 0, 0, &(t[n2]));
832		bn_mul_recursive(r, &(a[n]), &(b[n]), n, 0, 0, &(t[n2]));
833	}
834
835	/* s0 == low(al*bl)
836	 * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl)
837	 * We know s0 and s1 so the only unknown is high(al*bl)
838	 * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl))
839	 * high(al*bl) == s1 - (r[0]+l[0]+t[0])
840	 */
841	if (l != NULL) {
842		lp = &(t[n2 + n]);
843		c1 = (int)(bn_add_words(lp, &(r[0]), &(l[0]), n));
844	} else {
845		c1 = 0;
846		lp = &(r[0]);
847	}
848
849	if (neg)
850		neg = (int)(bn_sub_words(&(t[n2]), lp, &(t[0]), n));
851	else {
852		bn_add_words(&(t[n2]), lp, &(t[0]), n);
853		neg = 0;
854	}
855
856	if (l != NULL) {
857		bn_sub_words(&(t[n2 + n]), &(l[n]), &(t[n2]), n);
858	} else {
859		lp = &(t[n2 + n]);
860		mp = &(t[n2]);
861		for (i = 0; i < n; i++)
862			lp[i] = ((~mp[i]) + 1) & BN_MASK2;
863	}
864
865	/* s[0] = low(al*bl)
866	 * t[3] = high(al*bl)
867	 * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign
868	 * r[10] = (a[1]*b[1])
869	 */
870	/* R[10] = al*bl
871	 * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0])
872	 * R[32] = ah*bh
873	 */
874	/* R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow)
875	 * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow)
876	 * R[3]=r[1]+(carry/borrow)
877	 */
878	if (l != NULL) {
879		lp = &(t[n2]);
880		c1 = (int)(bn_add_words(lp, &(t[n2 + n]), &(l[0]), n));
881	} else {
882		lp = &(t[n2 + n]);
883		c1 = 0;
884	}
885	c1 += (int)(bn_add_words(&(t[n2]), lp, &(r[0]), n));
886	if (oneg)
887		c1 -= (int)(bn_sub_words(&(t[n2]), &(t[n2]), &(t[0]), n));
888	else
889		c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), &(t[0]), n));
890
891	c2 = (int)(bn_add_words(&(r[0]), &(r[0]), &(t[n2 + n]), n));
892	c2 += (int)(bn_add_words(&(r[0]), &(r[0]), &(r[n]), n));
893	if (oneg)
894		c2 -= (int)(bn_sub_words(&(r[0]), &(r[0]), &(t[n]), n));
895	else
896		c2 += (int)(bn_add_words(&(r[0]), &(r[0]), &(t[n]), n));
897
898	if (c1 != 0) /* Add starting at r[0], could be +ve or -ve */
899	{
900		i = 0;
901		if (c1 > 0) {
902			lc = c1;
903			do {
904				ll = (r[i] + lc) & BN_MASK2;
905				r[i++] = ll;
906				lc = (lc > ll);
907			} while (lc);
908		} else {
909			lc = -c1;
910			do {
911				ll = r[i];
912				r[i++] = (ll - lc) & BN_MASK2;
913				lc = (lc > ll);
914			} while (lc);
915		}
916	}
917	if (c2 != 0) /* Add starting at r[1] */
918	{
919		i = n;
920		if (c2 > 0) {
921			lc = c2;
922			do {
923				ll = (r[i] + lc) & BN_MASK2;
924				r[i++] = ll;
925				lc = (lc > ll);
926			} while (lc);
927		} else {
928			lc = -c2;
929			do {
930				ll = r[i];
931				r[i++] = (ll - lc) & BN_MASK2;
932				lc = (lc > ll);
933			} while (lc);
934		}
935	}
936}
937#endif /* BN_RECURSION */
938
939int
940BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
941{
942	int ret = 0;
943	int top, al, bl;
944	BIGNUM *rr;
945#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
946	int i;
947#endif
948#ifdef BN_RECURSION
949	BIGNUM *t = NULL;
950	int j = 0, k;
951#endif
952
953#ifdef BN_COUNT
954	fprintf(stderr, "BN_mul %d * %d\n",a->top,b->top);
955#endif
956
957
958	al = a->top;
959	bl = b->top;
960
961	if ((al == 0) || (bl == 0)) {
962		BN_zero(r);
963		return (1);
964	}
965	top = al + bl;
966
967	BN_CTX_start(ctx);
968	if ((r == a) || (r == b)) {
969		if ((rr = BN_CTX_get(ctx)) == NULL)
970			goto err;
971	} else
972		rr = r;
973	rr->neg = a->neg ^ b->neg;
974
975#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
976	i = al - bl;
977#endif
978#ifdef BN_MUL_COMBA
979	if (i == 0) {
980# if 0
981		if (al == 4) {
982			if (!bn_wexpand(rr, 8))
983				goto err;
984			rr->top = 8;
985			bn_mul_comba4(rr->d, a->d, b->d);
986			goto end;
987		}
988# endif
989		if (al == 8) {
990			if (!bn_wexpand(rr, 16))
991				goto err;
992			rr->top = 16;
993			bn_mul_comba8(rr->d, a->d, b->d);
994			goto end;
995		}
996	}
997#endif /* BN_MUL_COMBA */
998#ifdef BN_RECURSION
999	if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL)) {
1000		if (i >= -1 && i <= 1) {
1001			/* Find out the power of two lower or equal
1002			   to the longest of the two numbers */
1003			if (i >= 0) {
1004				j = BN_num_bits_word((BN_ULONG)al);
1005			}
1006			if (i == -1) {
1007				j = BN_num_bits_word((BN_ULONG)bl);
1008			}
1009			j = 1 << (j - 1);
1010			assert(j <= al || j <= bl);
1011			k = j + j;
1012			if ((t = BN_CTX_get(ctx)) == NULL)
1013				goto err;
1014			if (al > j || bl > j) {
1015				if (!bn_wexpand(t, k * 4))
1016					goto err;
1017				if (!bn_wexpand(rr, k * 4))
1018					goto err;
1019				bn_mul_part_recursive(rr->d, a->d, b->d,
1020				    j, al - j, bl - j, t->d);
1021			}
1022			else	/* al <= j || bl <= j */
1023			{
1024				if (!bn_wexpand(t, k * 2))
1025					goto err;
1026				if (!bn_wexpand(rr, k * 2))
1027					goto err;
1028				bn_mul_recursive(rr->d, a->d, b->d,
1029				    j, al - j, bl - j, t->d);
1030			}
1031			rr->top = top;
1032			goto end;
1033		}
1034#if 0
1035		if (i == 1 && !BN_get_flags(b, BN_FLG_STATIC_DATA)) {
1036			BIGNUM *tmp_bn = (BIGNUM *)b;
1037			if (!bn_wexpand(tmp_bn, al))
1038				goto err;
1039			tmp_bn->d[bl] = 0;
1040			bl++;
1041			i--;
1042		} else if (i == -1 && !BN_get_flags(a, BN_FLG_STATIC_DATA)) {
1043			BIGNUM *tmp_bn = (BIGNUM *)a;
1044			if (!bn_wexpand(tmp_bn, bl))
1045				goto err;
1046			tmp_bn->d[al] = 0;
1047			al++;
1048			i++;
1049		}
1050		if (i == 0) {
1051			/* symmetric and > 4 */
1052			/* 16 or larger */
1053			j = BN_num_bits_word((BN_ULONG)al);
1054			j = 1 << (j - 1);
1055			k = j + j;
1056			if ((t = BN_CTX_get(ctx)) == NULL)
1057				goto err;
1058			if (al == j) /* exact multiple */
1059			{
1060				if (!bn_wexpand(t, k * 2))
1061					goto err;
1062				if (!bn_wexpand(rr, k * 2))
1063					goto err;
1064				bn_mul_recursive(rr->d, a->d, b->d, al, t->d);
1065			} else {
1066				if (!bn_wexpand(t, k * 4))
1067					goto err;
1068				if (!bn_wexpand(rr, k * 4))
1069					goto err;
1070				bn_mul_part_recursive(rr->d, a->d, b->d,
1071				    al - j, j, t->d);
1072			}
1073			rr->top = top;
1074			goto end;
1075		}
1076#endif
1077	}
1078#endif /* BN_RECURSION */
1079	if (!bn_wexpand(rr, top))
1080		goto err;
1081	rr->top = top;
1082	bn_mul_normal(rr->d, a->d, al, b->d, bl);
1083
1084#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
1085end:
1086#endif
1087	bn_correct_top(rr);
1088	if (r != rr)
1089		BN_copy(r, rr);
1090	ret = 1;
1091err:
1092	BN_CTX_end(ctx);
1093	return (ret);
1094}
1095
1096void
1097bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
1098{
1099	BN_ULONG *rr;
1100
1101#ifdef BN_COUNT
1102	fprintf(stderr, " bn_mul_normal %d * %d\n", na, nb);
1103#endif
1104
1105	if (na < nb) {
1106		int itmp;
1107		BN_ULONG *ltmp;
1108
1109		itmp = na;
1110		na = nb;
1111		nb = itmp;
1112		ltmp = a;
1113		a = b;
1114		b = ltmp;
1115
1116	}
1117	rr = &(r[na]);
1118	if (nb <= 0) {
1119		(void)bn_mul_words(r, a, na, 0);
1120		return;
1121	} else
1122		rr[0] = bn_mul_words(r, a, na, b[0]);
1123
1124	for (;;) {
1125		if (--nb <= 0)
1126			return;
1127		rr[1] = bn_mul_add_words(&(r[1]), a, na, b[1]);
1128		if (--nb <= 0)
1129			return;
1130		rr[2] = bn_mul_add_words(&(r[2]), a, na, b[2]);
1131		if (--nb <= 0)
1132			return;
1133		rr[3] = bn_mul_add_words(&(r[3]), a, na, b[3]);
1134		if (--nb <= 0)
1135			return;
1136		rr[4] = bn_mul_add_words(&(r[4]), a, na, b[4]);
1137		rr += 4;
1138		r += 4;
1139		b += 4;
1140	}
1141}
1142
1143void
1144bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
1145{
1146#ifdef BN_COUNT
1147	fprintf(stderr, " bn_mul_low_normal %d * %d\n", n, n);
1148#endif
1149	bn_mul_words(r, a, n, b[0]);
1150
1151	for (;;) {
1152		if (--n <= 0)
1153			return;
1154		bn_mul_add_words(&(r[1]), a, n, b[1]);
1155		if (--n <= 0)
1156			return;
1157		bn_mul_add_words(&(r[2]), a, n, b[2]);
1158		if (--n <= 0)
1159			return;
1160		bn_mul_add_words(&(r[3]), a, n, b[3]);
1161		if (--n <= 0)
1162			return;
1163		bn_mul_add_words(&(r[4]), a, n, b[4]);
1164		r += 4;
1165		b += 4;
1166	}
1167}
1168