bn_mul.c revision 1.24
1/* $OpenBSD: bn_mul.c,v 1.24 2022/11/30 01:47:19 jsing Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to.  The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 *    notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 *    notice, this list of conditions and the following disclaimer in the
30 *    documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 *    must display the following acknowledgement:
33 *    "This product includes cryptographic software written by
34 *     Eric Young (eay@cryptsoft.com)"
35 *    The word 'cryptographic' can be left out if the rouines from the library
36 *    being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 *    the apps directory (application code) you must include an acknowledgement:
39 *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed.  i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <assert.h>
60#include <stdio.h>
61#include <string.h>
62
63#include <openssl/opensslconf.h>
64
65#include "bn_local.h"
66
67#if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS)
68/* Here follows specialised variants of bn_add_words() and
69   bn_sub_words().  They have the property performing operations on
70   arrays of different sizes.  The sizes of those arrays is expressed through
71   cl, which is the common length ( basicall, min(len(a),len(b)) ), and dl,
72   which is the delta between the two lengths, calculated as len(a)-len(b).
73   All lengths are the number of BN_ULONGs...  For the operations that require
74   a result array as parameter, it must have the length cl+abs(dl).
75   These functions should probably end up in bn_asm.c as soon as there are
76   assembler counterparts for the systems that use assembler files.  */
77
78BN_ULONG
79bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int cl,
80    int dl)
81{
82	BN_ULONG c, t;
83
84	assert(cl >= 0);
85	c = bn_sub_words(r, a, b, cl);
86
87	if (dl == 0)
88		return c;
89
90	r += cl;
91	a += cl;
92	b += cl;
93
94	if (dl < 0) {
95#ifdef BN_COUNT
96		fprintf(stderr,
97		    "  bn_sub_part_words %d + %d (dl < 0, c = %d)\n",
98		    cl, dl, c);
99#endif
100		for (;;) {
101			t = b[0];
102			r[0] = (0 - t - c) & BN_MASK2;
103			if (t != 0)
104				c = 1;
105			if (++dl >= 0)
106				break;
107
108			t = b[1];
109			r[1] = (0 - t - c) & BN_MASK2;
110			if (t != 0)
111				c = 1;
112			if (++dl >= 0)
113				break;
114
115			t = b[2];
116			r[2] = (0 - t - c) & BN_MASK2;
117			if (t != 0)
118				c = 1;
119			if (++dl >= 0)
120				break;
121
122			t = b[3];
123			r[3] = (0 - t - c) & BN_MASK2;
124			if (t != 0)
125				c = 1;
126			if (++dl >= 0)
127				break;
128
129			b += 4;
130			r += 4;
131		}
132	} else {
133		int save_dl = dl;
134#ifdef BN_COUNT
135		fprintf(stderr,
136		    "  bn_sub_part_words %d + %d (dl > 0, c = %d)\n",
137		    cl, dl, c);
138#endif
139		while (c) {
140			t = a[0];
141			r[0] = (t - c) & BN_MASK2;
142			if (t != 0)
143				c = 0;
144			if (--dl <= 0)
145				break;
146
147			t = a[1];
148			r[1] = (t - c) & BN_MASK2;
149			if (t != 0)
150				c = 0;
151			if (--dl <= 0)
152				break;
153
154			t = a[2];
155			r[2] = (t - c) & BN_MASK2;
156			if (t != 0)
157				c = 0;
158			if (--dl <= 0)
159				break;
160
161			t = a[3];
162			r[3] = (t - c) & BN_MASK2;
163			if (t != 0)
164				c = 0;
165			if (--dl <= 0)
166				break;
167
168			save_dl = dl;
169			a += 4;
170			r += 4;
171		}
172		if (dl > 0) {
173#ifdef BN_COUNT
174			fprintf(stderr,
175			    "  bn_sub_part_words %d + %d (dl > 0, c == 0)\n",
176			    cl, dl);
177#endif
178			if (save_dl > dl) {
179				switch (save_dl - dl) {
180				case 1:
181					r[1] = a[1];
182					if (--dl <= 0)
183						break;
184				case 2:
185					r[2] = a[2];
186					if (--dl <= 0)
187						break;
188				case 3:
189					r[3] = a[3];
190					if (--dl <= 0)
191						break;
192				}
193				a += 4;
194				r += 4;
195			}
196		}
197		if (dl > 0) {
198#ifdef BN_COUNT
199			fprintf(stderr,
200			    "  bn_sub_part_words %d + %d (dl > 0, copy)\n",
201			    cl, dl);
202#endif
203			for (;;) {
204				r[0] = a[0];
205				if (--dl <= 0)
206					break;
207				r[1] = a[1];
208				if (--dl <= 0)
209					break;
210				r[2] = a[2];
211				if (--dl <= 0)
212					break;
213				r[3] = a[3];
214				if (--dl <= 0)
215					break;
216
217				a += 4;
218				r += 4;
219			}
220		}
221	}
222	return c;
223}
224#endif
225
226BN_ULONG
227bn_add_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int cl,
228    int dl)
229{
230	BN_ULONG c, l, t;
231
232	assert(cl >= 0);
233	c = bn_add_words(r, a, b, cl);
234
235	if (dl == 0)
236		return c;
237
238	r += cl;
239	a += cl;
240	b += cl;
241
242	if (dl < 0) {
243		int save_dl = dl;
244#ifdef BN_COUNT
245		fprintf(stderr,
246		    "  bn_add_part_words %d + %d (dl < 0, c = %d)\n",
247		    cl, dl, c);
248#endif
249		while (c) {
250			l = (c + b[0]) & BN_MASK2;
251			c = (l < c);
252			r[0] = l;
253			if (++dl >= 0)
254				break;
255
256			l = (c + b[1]) & BN_MASK2;
257			c = (l < c);
258			r[1] = l;
259			if (++dl >= 0)
260				break;
261
262			l = (c + b[2]) & BN_MASK2;
263			c = (l < c);
264			r[2] = l;
265			if (++dl >= 0)
266				break;
267
268			l = (c + b[3]) & BN_MASK2;
269			c = (l < c);
270			r[3] = l;
271			if (++dl >= 0)
272				break;
273
274			save_dl = dl;
275			b += 4;
276			r += 4;
277		}
278		if (dl < 0) {
279#ifdef BN_COUNT
280			fprintf(stderr,
281			    "  bn_add_part_words %d + %d (dl < 0, c == 0)\n",
282			    cl, dl);
283#endif
284			if (save_dl < dl) {
285				switch (dl - save_dl) {
286				case 1:
287					r[1] = b[1];
288					if (++dl >= 0)
289						break;
290				case 2:
291					r[2] = b[2];
292					if (++dl >= 0)
293						break;
294				case 3:
295					r[3] = b[3];
296					if (++dl >= 0)
297						break;
298				}
299				b += 4;
300				r += 4;
301			}
302		}
303		if (dl < 0) {
304#ifdef BN_COUNT
305			fprintf(stderr,
306			    "  bn_add_part_words %d + %d (dl < 0, copy)\n",
307			    cl, dl);
308#endif
309			for (;;) {
310				r[0] = b[0];
311				if (++dl >= 0)
312					break;
313				r[1] = b[1];
314				if (++dl >= 0)
315					break;
316				r[2] = b[2];
317				if (++dl >= 0)
318					break;
319				r[3] = b[3];
320				if (++dl >= 0)
321					break;
322
323				b += 4;
324				r += 4;
325			}
326		}
327	} else {
328		int save_dl = dl;
329#ifdef BN_COUNT
330		fprintf(stderr,
331		    "  bn_add_part_words %d + %d (dl > 0)\n", cl, dl);
332#endif
333		while (c) {
334			t = (a[0] + c) & BN_MASK2;
335			c = (t < c);
336			r[0] = t;
337			if (--dl <= 0)
338				break;
339
340			t = (a[1] + c) & BN_MASK2;
341			c = (t < c);
342			r[1] = t;
343			if (--dl <= 0)
344				break;
345
346			t = (a[2] + c) & BN_MASK2;
347			c = (t < c);
348			r[2] = t;
349			if (--dl <= 0)
350				break;
351
352			t = (a[3] + c) & BN_MASK2;
353			c = (t < c);
354			r[3] = t;
355			if (--dl <= 0)
356				break;
357
358			save_dl = dl;
359			a += 4;
360			r += 4;
361		}
362#ifdef BN_COUNT
363		fprintf(stderr,
364		    "  bn_add_part_words %d + %d (dl > 0, c == 0)\n", cl, dl);
365#endif
366		if (dl > 0) {
367			if (save_dl > dl) {
368				switch (save_dl - dl) {
369				case 1:
370					r[1] = a[1];
371					if (--dl <= 0)
372						break;
373				case 2:
374					r[2] = a[2];
375					if (--dl <= 0)
376						break;
377				case 3:
378					r[3] = a[3];
379					if (--dl <= 0)
380						break;
381				}
382				a += 4;
383				r += 4;
384			}
385		}
386		if (dl > 0) {
387#ifdef BN_COUNT
388			fprintf(stderr,
389			    "  bn_add_part_words %d + %d (dl > 0, copy)\n",
390			    cl, dl);
391#endif
392			for (;;) {
393				r[0] = a[0];
394				if (--dl <= 0)
395					break;
396				r[1] = a[1];
397				if (--dl <= 0)
398					break;
399				r[2] = a[2];
400				if (--dl <= 0)
401					break;
402				r[3] = a[3];
403				if (--dl <= 0)
404					break;
405
406				a += 4;
407				r += 4;
408			}
409		}
410	}
411	return c;
412}
413
414#ifdef BN_RECURSION
415/* Karatsuba recursive multiplication algorithm
416 * (cf. Knuth, The Art of Computer Programming, Vol. 2) */
417
418/* r is 2*n2 words in size,
419 * a and b are both n2 words in size.
420 * n2 must be a power of 2.
421 * We multiply and return the result.
422 * t must be 2*n2 words in size
423 * We calculate
424 * a[0]*b[0]
425 * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
426 * a[1]*b[1]
427 */
428/* dnX may not be positive, but n2/2+dnX has to be */
429void
430bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, int dna,
431    int dnb, BN_ULONG *t)
432{
433	int n = n2 / 2, c1, c2;
434	int tna = n + dna, tnb = n + dnb;
435	unsigned int neg, zero;
436	BN_ULONG ln, lo, *p;
437
438# ifdef BN_COUNT
439	fprintf(stderr, " bn_mul_recursive %d%+d * %d%+d\n",n2,dna,n2,dnb);
440# endif
441# ifdef BN_MUL_COMBA
442#  if 0
443	if (n2 == 4) {
444		bn_mul_comba4(r, a, b);
445		return;
446	}
447#  endif
448	/* Only call bn_mul_comba 8 if n2 == 8 and the
449	 * two arrays are complete [steve]
450	 */
451	if (n2 == 8 && dna == 0 && dnb == 0) {
452		bn_mul_comba8(r, a, b);
453		return;
454	}
455# endif /* BN_MUL_COMBA */
456	/* Else do normal multiply */
457	if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) {
458		bn_mul_normal(r, a, n2 + dna, b, n2 + dnb);
459		if ((dna + dnb) < 0)
460			memset(&r[2*n2 + dna + dnb], 0,
461			    sizeof(BN_ULONG) * -(dna + dnb));
462		return;
463	}
464	/* r=(a[0]-a[1])*(b[1]-b[0]) */
465	c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna);
466	c2 = bn_cmp_part_words(&(b[n]), b,tnb, tnb - n);
467	zero = neg = 0;
468	switch (c1 * 3 + c2) {
469	case -4:
470		bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
471		bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
472		break;
473	case -3:
474		zero = 1;
475		break;
476	case -2:
477		bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
478		bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
479		neg = 1;
480		break;
481	case -1:
482	case 0:
483	case 1:
484		zero = 1;
485		break;
486	case 2:
487		bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */
488		bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
489		neg = 1;
490		break;
491	case 3:
492		zero = 1;
493		break;
494	case 4:
495		bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
496		bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
497		break;
498	}
499
500# ifdef BN_MUL_COMBA
501	if (n == 4 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba4 could take
502					       extra args to do this well */
503	{
504		if (!zero)
505			bn_mul_comba4(&(t[n2]), t, &(t[n]));
506		else
507			memset(&(t[n2]), 0, 8 * sizeof(BN_ULONG));
508
509		bn_mul_comba4(r, a, b);
510		bn_mul_comba4(&(r[n2]), &(a[n]), &(b[n]));
511	} else if (n == 8 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba8 could
512						    take extra args to do this
513						    well */
514	{
515		if (!zero)
516			bn_mul_comba8(&(t[n2]), t, &(t[n]));
517		else
518			memset(&(t[n2]), 0, 16 * sizeof(BN_ULONG));
519
520		bn_mul_comba8(r, a, b);
521		bn_mul_comba8(&(r[n2]), &(a[n]), &(b[n]));
522	} else
523# endif /* BN_MUL_COMBA */
524	{
525		p = &(t[n2 * 2]);
526		if (!zero)
527			bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
528		else
529			memset(&(t[n2]), 0, n2 * sizeof(BN_ULONG));
530		bn_mul_recursive(r, a, b, n, 0, 0, p);
531		bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), n, dna, dnb, p);
532	}
533
534	/* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
535	 * r[10] holds (a[0]*b[0])
536	 * r[32] holds (b[1]*b[1])
537	 */
538
539	c1 = (int)(bn_add_words(t, r, &(r[n2]), n2));
540
541	if (neg) /* if t[32] is negative */
542	{
543		c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2));
544	} else {
545		/* Might have a carry */
546		c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
547	}
548
549	/* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
550	 * r[10] holds (a[0]*b[0])
551	 * r[32] holds (b[1]*b[1])
552	 * c1 holds the carry bits
553	 */
554	c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
555	if (c1) {
556		p = &(r[n + n2]);
557		lo= *p;
558		ln = (lo + c1) & BN_MASK2;
559		*p = ln;
560
561		/* The overflow will stop before we over write
562		 * words we should not overwrite */
563		if (ln < (BN_ULONG)c1) {
564			do {
565				p++;
566				lo= *p;
567				ln = (lo + 1) & BN_MASK2;
568				*p = ln;
569			} while (ln == 0);
570		}
571	}
572}
573
574/* n+tn is the word length
575 * t needs to be n*4 is size, as does r */
576/* tnX may not be negative but less than n */
577void
578bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, int tna,
579    int tnb, BN_ULONG *t)
580{
581	int i, j, n2 = n * 2;
582	int c1, c2, neg;
583	BN_ULONG ln, lo, *p;
584
585# ifdef BN_COUNT
586	fprintf(stderr, " bn_mul_part_recursive (%d%+d) * (%d%+d)\n",
587	    n, tna, n, tnb);
588# endif
589	if (n < 8) {
590		bn_mul_normal(r, a, n + tna, b, n + tnb);
591		return;
592	}
593
594	/* r=(a[0]-a[1])*(b[1]-b[0]) */
595	c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna);
596	c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n);
597	neg = 0;
598	switch (c1 * 3 + c2) {
599	case -4:
600		bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
601		bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
602		break;
603	case -3:
604		/* break; */
605	case -2:
606		bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
607		bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
608		neg = 1;
609		break;
610	case -1:
611	case 0:
612	case 1:
613		/* break; */
614	case 2:
615		bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */
616		bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
617		neg = 1;
618		break;
619	case 3:
620		/* break; */
621	case 4:
622		bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
623		bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
624		break;
625	}
626		/* The zero case isn't yet implemented here. The speedup
627		   would probably be negligible. */
628# if 0
629	if (n == 4) {
630		bn_mul_comba4(&(t[n2]), t, &(t[n]));
631		bn_mul_comba4(r, a, b);
632		bn_mul_normal(&(r[n2]), &(a[n]), tn, &(b[n]), tn);
633		memset(&(r[n2 + tn * 2]), 0, sizeof(BN_ULONG) * (n2 - tn * 2));
634	} else
635# endif
636		if (n == 8) {
637		bn_mul_comba8(&(t[n2]), t, &(t[n]));
638		bn_mul_comba8(r, a, b);
639		bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb);
640		memset(&(r[n2 + tna + tnb]), 0,
641		    sizeof(BN_ULONG) * (n2 - tna - tnb));
642	} else {
643		p = &(t[n2*2]);
644		bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
645		bn_mul_recursive(r, a, b, n, 0, 0, p);
646		i = n / 2;
647		/* If there is only a bottom half to the number,
648		 * just do it */
649		if (tna > tnb)
650			j = tna - i;
651		else
652			j = tnb - i;
653		if (j == 0) {
654			bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]),
655			    i, tna - i, tnb - i, p);
656			memset(&(r[n2 + i * 2]), 0,
657			    sizeof(BN_ULONG) * (n2 - i * 2));
658		}
659		else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */
660		{
661			bn_mul_part_recursive(&(r[n2]), &(a[n]), &(b[n]),
662			    i, tna - i, tnb - i, p);
663			memset(&(r[n2 + tna + tnb]), 0,
664			    sizeof(BN_ULONG) * (n2 - tna - tnb));
665		}
666		else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
667		{
668			memset(&(r[n2]), 0, sizeof(BN_ULONG) * n2);
669			if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL &&
670			    tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) {
671				bn_mul_normal(&(r[n2]), &(a[n]), tna,
672				    &(b[n]), tnb);
673			} else {
674				for (;;) {
675					i /= 2;
676					/* these simplified conditions work
677					 * exclusively because difference
678					 * between tna and tnb is 1 or 0 */
679					if (i < tna || i < tnb) {
680						bn_mul_part_recursive(&(r[n2]),
681						    &(a[n]), &(b[n]), i,
682						    tna - i, tnb - i, p);
683						break;
684					} else if (i == tna || i == tnb) {
685						bn_mul_recursive(&(r[n2]),
686						    &(a[n]), &(b[n]), i,
687						    tna - i, tnb - i, p);
688						break;
689					}
690				}
691			}
692		}
693	}
694
695	/* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
696	 * r[10] holds (a[0]*b[0])
697	 * r[32] holds (b[1]*b[1])
698	 */
699
700	c1 = (int)(bn_add_words(t, r,&(r[n2]), n2));
701
702	if (neg) /* if t[32] is negative */
703	{
704		c1 -= (int)(bn_sub_words(&(t[n2]), t,&(t[n2]), n2));
705	} else {
706		/* Might have a carry */
707		c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
708	}
709
710	/* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
711	 * r[10] holds (a[0]*b[0])
712	 * r[32] holds (b[1]*b[1])
713	 * c1 holds the carry bits
714	 */
715	c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
716	if (c1) {
717		p = &(r[n + n2]);
718		lo= *p;
719		ln = (lo + c1)&BN_MASK2;
720		*p = ln;
721
722		/* The overflow will stop before we over write
723		 * words we should not overwrite */
724		if (ln < (BN_ULONG)c1) {
725			do {
726				p++;
727				lo= *p;
728				ln = (lo + 1) & BN_MASK2;
729				*p = ln;
730			} while (ln == 0);
731		}
732	}
733}
734
735/* a and b must be the same size, which is n2.
736 * r needs to be n2 words and t needs to be n2*2
737 */
738void
739bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, BN_ULONG *t)
740{
741	int n = n2 / 2;
742
743# ifdef BN_COUNT
744	fprintf(stderr, " bn_mul_low_recursive %d * %d\n",n2,n2);
745# endif
746
747	bn_mul_recursive(r, a, b, n, 0, 0, &(t[0]));
748	if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL) {
749		bn_mul_low_recursive(&(t[0]), &(a[0]), &(b[n]), n, &(t[n2]));
750		bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
751		bn_mul_low_recursive(&(t[0]), &(a[n]), &(b[0]), n, &(t[n2]));
752		bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
753	} else {
754		bn_mul_low_normal(&(t[0]), &(a[0]), &(b[n]), n);
755		bn_mul_low_normal(&(t[n]), &(a[n]), &(b[0]), n);
756		bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
757		bn_add_words(&(r[n]), &(r[n]), &(t[n]), n);
758	}
759}
760
761/* a and b must be the same size, which is n2.
762 * r needs to be n2 words and t needs to be n2*2
763 * l is the low words of the output.
764 * t needs to be n2*3
765 */
766void
767bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
768    BN_ULONG *t)
769{
770	int i, n;
771	int c1, c2;
772	int neg, oneg, zero;
773	BN_ULONG ll, lc, *lp, *mp;
774
775# ifdef BN_COUNT
776	fprintf(stderr, " bn_mul_high %d * %d\n",n2,n2);
777# endif
778	n = n2 / 2;
779
780	/* Calculate (al-ah)*(bh-bl) */
781	neg = zero = 0;
782	c1 = bn_cmp_words(&(a[0]), &(a[n]), n);
783	c2 = bn_cmp_words(&(b[n]), &(b[0]), n);
784	switch (c1 * 3 + c2) {
785	case -4:
786		bn_sub_words(&(r[0]), &(a[n]), &(a[0]), n);
787		bn_sub_words(&(r[n]), &(b[0]), &(b[n]), n);
788		break;
789	case -3:
790		zero = 1;
791		break;
792	case -2:
793		bn_sub_words(&(r[0]), &(a[n]), &(a[0]), n);
794		bn_sub_words(&(r[n]), &(b[n]), &(b[0]), n);
795		neg = 1;
796		break;
797	case -1:
798	case 0:
799	case 1:
800		zero = 1;
801		break;
802	case 2:
803		bn_sub_words(&(r[0]), &(a[0]), &(a[n]), n);
804		bn_sub_words(&(r[n]), &(b[0]), &(b[n]), n);
805		neg = 1;
806		break;
807	case 3:
808		zero = 1;
809		break;
810	case 4:
811		bn_sub_words(&(r[0]), &(a[0]), &(a[n]), n);
812		bn_sub_words(&(r[n]), &(b[n]), &(b[0]), n);
813		break;
814	}
815
816	oneg = neg;
817	/* t[10] = (a[0]-a[1])*(b[1]-b[0]) */
818	/* r[10] = (a[1]*b[1]) */
819# ifdef BN_MUL_COMBA
820	if (n == 8) {
821		bn_mul_comba8(&(t[0]), &(r[0]), &(r[n]));
822		bn_mul_comba8(r, &(a[n]), &(b[n]));
823	} else
824# endif
825	{
826		bn_mul_recursive(&(t[0]), &(r[0]), &(r[n]), n, 0, 0, &(t[n2]));
827		bn_mul_recursive(r, &(a[n]), &(b[n]), n, 0, 0, &(t[n2]));
828	}
829
830	/* s0 == low(al*bl)
831	 * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl)
832	 * We know s0 and s1 so the only unknown is high(al*bl)
833	 * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl))
834	 * high(al*bl) == s1 - (r[0]+l[0]+t[0])
835	 */
836	if (l != NULL) {
837		lp = &(t[n2 + n]);
838		c1 = (int)(bn_add_words(lp, &(r[0]), &(l[0]), n));
839	} else {
840		c1 = 0;
841		lp = &(r[0]);
842	}
843
844	if (neg)
845		neg = (int)(bn_sub_words(&(t[n2]), lp, &(t[0]), n));
846	else {
847		bn_add_words(&(t[n2]), lp, &(t[0]), n);
848		neg = 0;
849	}
850
851	if (l != NULL) {
852		bn_sub_words(&(t[n2 + n]), &(l[n]), &(t[n2]), n);
853	} else {
854		lp = &(t[n2 + n]);
855		mp = &(t[n2]);
856		for (i = 0; i < n; i++)
857			lp[i] = ((~mp[i]) + 1) & BN_MASK2;
858	}
859
860	/* s[0] = low(al*bl)
861	 * t[3] = high(al*bl)
862	 * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign
863	 * r[10] = (a[1]*b[1])
864	 */
865	/* R[10] = al*bl
866	 * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0])
867	 * R[32] = ah*bh
868	 */
869	/* R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow)
870	 * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow)
871	 * R[3]=r[1]+(carry/borrow)
872	 */
873	if (l != NULL) {
874		lp = &(t[n2]);
875		c1 = (int)(bn_add_words(lp, &(t[n2 + n]), &(l[0]), n));
876	} else {
877		lp = &(t[n2 + n]);
878		c1 = 0;
879	}
880	c1 += (int)(bn_add_words(&(t[n2]), lp, &(r[0]), n));
881	if (oneg)
882		c1 -= (int)(bn_sub_words(&(t[n2]), &(t[n2]), &(t[0]), n));
883	else
884		c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), &(t[0]), n));
885
886	c2 = (int)(bn_add_words(&(r[0]), &(r[0]), &(t[n2 + n]), n));
887	c2 += (int)(bn_add_words(&(r[0]), &(r[0]), &(r[n]), n));
888	if (oneg)
889		c2 -= (int)(bn_sub_words(&(r[0]), &(r[0]), &(t[n]), n));
890	else
891		c2 += (int)(bn_add_words(&(r[0]), &(r[0]), &(t[n]), n));
892
893	if (c1 != 0) /* Add starting at r[0], could be +ve or -ve */
894	{
895		i = 0;
896		if (c1 > 0) {
897			lc = c1;
898			do {
899				ll = (r[i] + lc) & BN_MASK2;
900				r[i++] = ll;
901				lc = (lc > ll);
902			} while (lc);
903		} else {
904			lc = -c1;
905			do {
906				ll = r[i];
907				r[i++] = (ll - lc) & BN_MASK2;
908				lc = (lc > ll);
909			} while (lc);
910		}
911	}
912	if (c2 != 0) /* Add starting at r[1] */
913	{
914		i = n;
915		if (c2 > 0) {
916			lc = c2;
917			do {
918				ll = (r[i] + lc) & BN_MASK2;
919				r[i++] = ll;
920				lc = (lc > ll);
921			} while (lc);
922		} else {
923			lc = -c2;
924			do {
925				ll = r[i];
926				r[i++] = (ll - lc) & BN_MASK2;
927				lc = (lc > ll);
928			} while (lc);
929		}
930	}
931}
932#endif /* BN_RECURSION */
933
934int
935BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
936{
937	int ret = 0;
938	int top, al, bl;
939	BIGNUM *rr;
940#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
941	int i;
942#endif
943#ifdef BN_RECURSION
944	BIGNUM *t = NULL;
945	int j = 0, k;
946#endif
947
948#ifdef BN_COUNT
949	fprintf(stderr, "BN_mul %d * %d\n",a->top,b->top);
950#endif
951
952
953	al = a->top;
954	bl = b->top;
955
956	if ((al == 0) || (bl == 0)) {
957		BN_zero(r);
958		return (1);
959	}
960	top = al + bl;
961
962	BN_CTX_start(ctx);
963	if ((r == a) || (r == b)) {
964		if ((rr = BN_CTX_get(ctx)) == NULL)
965			goto err;
966	} else
967		rr = r;
968	rr->neg = a->neg ^ b->neg;
969
970#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
971	i = al - bl;
972#endif
973#ifdef BN_MUL_COMBA
974	if (i == 0) {
975# if 0
976		if (al == 4) {
977			if (!bn_wexpand(rr, 8))
978				goto err;
979			rr->top = 8;
980			bn_mul_comba4(rr->d, a->d, b->d);
981			goto end;
982		}
983# endif
984		if (al == 8) {
985			if (!bn_wexpand(rr, 16))
986				goto err;
987			rr->top = 16;
988			bn_mul_comba8(rr->d, a->d, b->d);
989			goto end;
990		}
991	}
992#endif /* BN_MUL_COMBA */
993#ifdef BN_RECURSION
994	if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL)) {
995		if (i >= -1 && i <= 1) {
996			/* Find out the power of two lower or equal
997			   to the longest of the two numbers */
998			if (i >= 0) {
999				j = BN_num_bits_word((BN_ULONG)al);
1000			}
1001			if (i == -1) {
1002				j = BN_num_bits_word((BN_ULONG)bl);
1003			}
1004			j = 1 << (j - 1);
1005			assert(j <= al || j <= bl);
1006			k = j + j;
1007			if ((t = BN_CTX_get(ctx)) == NULL)
1008				goto err;
1009			if (al > j || bl > j) {
1010				if (!bn_wexpand(t, k * 4))
1011					goto err;
1012				if (!bn_wexpand(rr, k * 4))
1013					goto err;
1014				bn_mul_part_recursive(rr->d, a->d, b->d,
1015				    j, al - j, bl - j, t->d);
1016			}
1017			else	/* al <= j || bl <= j */
1018			{
1019				if (!bn_wexpand(t, k * 2))
1020					goto err;
1021				if (!bn_wexpand(rr, k * 2))
1022					goto err;
1023				bn_mul_recursive(rr->d, a->d, b->d,
1024				    j, al - j, bl - j, t->d);
1025			}
1026			rr->top = top;
1027			goto end;
1028		}
1029#if 0
1030		if (i == 1 && !BN_get_flags(b, BN_FLG_STATIC_DATA)) {
1031			BIGNUM *tmp_bn = (BIGNUM *)b;
1032			if (!bn_wexpand(tmp_bn, al))
1033				goto err;
1034			tmp_bn->d[bl] = 0;
1035			bl++;
1036			i--;
1037		} else if (i == -1 && !BN_get_flags(a, BN_FLG_STATIC_DATA)) {
1038			BIGNUM *tmp_bn = (BIGNUM *)a;
1039			if (!bn_wexpand(tmp_bn, bl))
1040				goto err;
1041			tmp_bn->d[al] = 0;
1042			al++;
1043			i++;
1044		}
1045		if (i == 0) {
1046			/* symmetric and > 4 */
1047			/* 16 or larger */
1048			j = BN_num_bits_word((BN_ULONG)al);
1049			j = 1 << (j - 1);
1050			k = j + j;
1051			if ((t = BN_CTX_get(ctx)) == NULL)
1052				goto err;
1053			if (al == j) /* exact multiple */
1054			{
1055				if (!bn_wexpand(t, k * 2))
1056					goto err;
1057				if (!bn_wexpand(rr, k * 2))
1058					goto err;
1059				bn_mul_recursive(rr->d, a->d, b->d, al, t->d);
1060			} else {
1061				if (!bn_wexpand(t, k * 4))
1062					goto err;
1063				if (!bn_wexpand(rr, k * 4))
1064					goto err;
1065				bn_mul_part_recursive(rr->d, a->d, b->d,
1066				    al - j, j, t->d);
1067			}
1068			rr->top = top;
1069			goto end;
1070		}
1071#endif
1072	}
1073#endif /* BN_RECURSION */
1074	if (!bn_wexpand(rr, top))
1075		goto err;
1076	rr->top = top;
1077	bn_mul_normal(rr->d, a->d, al, b->d, bl);
1078
1079#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
1080end:
1081#endif
1082	bn_correct_top(rr);
1083	if (r != rr)
1084		BN_copy(r, rr);
1085	ret = 1;
1086err:
1087	BN_CTX_end(ctx);
1088	return (ret);
1089}
1090
1091void
1092bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
1093{
1094	BN_ULONG *rr;
1095
1096#ifdef BN_COUNT
1097	fprintf(stderr, " bn_mul_normal %d * %d\n", na, nb);
1098#endif
1099
1100	if (na < nb) {
1101		int itmp;
1102		BN_ULONG *ltmp;
1103
1104		itmp = na;
1105		na = nb;
1106		nb = itmp;
1107		ltmp = a;
1108		a = b;
1109		b = ltmp;
1110
1111	}
1112	rr = &(r[na]);
1113	if (nb <= 0) {
1114		(void)bn_mul_words(r, a, na, 0);
1115		return;
1116	} else
1117		rr[0] = bn_mul_words(r, a, na, b[0]);
1118
1119	for (;;) {
1120		if (--nb <= 0)
1121			return;
1122		rr[1] = bn_mul_add_words(&(r[1]), a, na, b[1]);
1123		if (--nb <= 0)
1124			return;
1125		rr[2] = bn_mul_add_words(&(r[2]), a, na, b[2]);
1126		if (--nb <= 0)
1127			return;
1128		rr[3] = bn_mul_add_words(&(r[3]), a, na, b[3]);
1129		if (--nb <= 0)
1130			return;
1131		rr[4] = bn_mul_add_words(&(r[4]), a, na, b[4]);
1132		rr += 4;
1133		r += 4;
1134		b += 4;
1135	}
1136}
1137
1138void
1139bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
1140{
1141#ifdef BN_COUNT
1142	fprintf(stderr, " bn_mul_low_normal %d * %d\n", n, n);
1143#endif
1144	bn_mul_words(r, a, n, b[0]);
1145
1146	for (;;) {
1147		if (--n <= 0)
1148			return;
1149		bn_mul_add_words(&(r[1]), a, n, b[1]);
1150		if (--n <= 0)
1151			return;
1152		bn_mul_add_words(&(r[2]), a, n, b[2]);
1153		if (--n <= 0)
1154			return;
1155		bn_mul_add_words(&(r[3]), a, n, b[3]);
1156		if (--n <= 0)
1157			return;
1158		bn_mul_add_words(&(r[4]), a, n, b[4]);
1159		r += 4;
1160		b += 4;
1161	}
1162}
1163