bn_mul.c revision 1.25
1/* $OpenBSD: bn_mul.c,v 1.25 2023/01/16 16:53:19 jsing Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to.  The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 *    notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 *    notice, this list of conditions and the following disclaimer in the
30 *    documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 *    must display the following acknowledgement:
33 *    "This product includes cryptographic software written by
34 *     Eric Young (eay@cryptsoft.com)"
35 *    The word 'cryptographic' can be left out if the rouines from the library
36 *    being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 *    the apps directory (application code) you must include an acknowledgement:
39 *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed.  i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <assert.h>
60#include <stdio.h>
61#include <string.h>
62
63#include <openssl/opensslconf.h>
64
65#include "bn_local.h"
66
67#if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS)
68/* Here follows specialised variants of bn_add_words() and
69   bn_sub_words().  They have the property performing operations on
70   arrays of different sizes.  The sizes of those arrays is expressed through
71   cl, which is the common length ( basicall, min(len(a),len(b)) ), and dl,
72   which is the delta between the two lengths, calculated as len(a)-len(b).
73   All lengths are the number of BN_ULONGs...  For the operations that require
74   a result array as parameter, it must have the length cl+abs(dl).
75   These functions should probably end up in bn_asm.c as soon as there are
76   assembler counterparts for the systems that use assembler files.  */
77
78BN_ULONG
79bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int cl,
80    int dl)
81{
82	BN_ULONG c, t;
83
84	assert(cl >= 0);
85	c = bn_sub_words(r, a, b, cl);
86
87	if (dl == 0)
88		return c;
89
90	r += cl;
91	a += cl;
92	b += cl;
93
94	if (dl < 0) {
95		for (;;) {
96			t = b[0];
97			r[0] = (0 - t - c) & BN_MASK2;
98			if (t != 0)
99				c = 1;
100			if (++dl >= 0)
101				break;
102
103			t = b[1];
104			r[1] = (0 - t - c) & BN_MASK2;
105			if (t != 0)
106				c = 1;
107			if (++dl >= 0)
108				break;
109
110			t = b[2];
111			r[2] = (0 - t - c) & BN_MASK2;
112			if (t != 0)
113				c = 1;
114			if (++dl >= 0)
115				break;
116
117			t = b[3];
118			r[3] = (0 - t - c) & BN_MASK2;
119			if (t != 0)
120				c = 1;
121			if (++dl >= 0)
122				break;
123
124			b += 4;
125			r += 4;
126		}
127	} else {
128		int save_dl = dl;
129		while (c) {
130			t = a[0];
131			r[0] = (t - c) & BN_MASK2;
132			if (t != 0)
133				c = 0;
134			if (--dl <= 0)
135				break;
136
137			t = a[1];
138			r[1] = (t - c) & BN_MASK2;
139			if (t != 0)
140				c = 0;
141			if (--dl <= 0)
142				break;
143
144			t = a[2];
145			r[2] = (t - c) & BN_MASK2;
146			if (t != 0)
147				c = 0;
148			if (--dl <= 0)
149				break;
150
151			t = a[3];
152			r[3] = (t - c) & BN_MASK2;
153			if (t != 0)
154				c = 0;
155			if (--dl <= 0)
156				break;
157
158			save_dl = dl;
159			a += 4;
160			r += 4;
161		}
162		if (dl > 0) {
163			if (save_dl > dl) {
164				switch (save_dl - dl) {
165				case 1:
166					r[1] = a[1];
167					if (--dl <= 0)
168						break;
169				case 2:
170					r[2] = a[2];
171					if (--dl <= 0)
172						break;
173				case 3:
174					r[3] = a[3];
175					if (--dl <= 0)
176						break;
177				}
178				a += 4;
179				r += 4;
180			}
181		}
182		if (dl > 0) {
183			for (;;) {
184				r[0] = a[0];
185				if (--dl <= 0)
186					break;
187				r[1] = a[1];
188				if (--dl <= 0)
189					break;
190				r[2] = a[2];
191				if (--dl <= 0)
192					break;
193				r[3] = a[3];
194				if (--dl <= 0)
195					break;
196
197				a += 4;
198				r += 4;
199			}
200		}
201	}
202	return c;
203}
204#endif
205
206BN_ULONG
207bn_add_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int cl,
208    int dl)
209{
210	BN_ULONG c, l, t;
211
212	assert(cl >= 0);
213	c = bn_add_words(r, a, b, cl);
214
215	if (dl == 0)
216		return c;
217
218	r += cl;
219	a += cl;
220	b += cl;
221
222	if (dl < 0) {
223		int save_dl = dl;
224		while (c) {
225			l = (c + b[0]) & BN_MASK2;
226			c = (l < c);
227			r[0] = l;
228			if (++dl >= 0)
229				break;
230
231			l = (c + b[1]) & BN_MASK2;
232			c = (l < c);
233			r[1] = l;
234			if (++dl >= 0)
235				break;
236
237			l = (c + b[2]) & BN_MASK2;
238			c = (l < c);
239			r[2] = l;
240			if (++dl >= 0)
241				break;
242
243			l = (c + b[3]) & BN_MASK2;
244			c = (l < c);
245			r[3] = l;
246			if (++dl >= 0)
247				break;
248
249			save_dl = dl;
250			b += 4;
251			r += 4;
252		}
253		if (dl < 0) {
254			if (save_dl < dl) {
255				switch (dl - save_dl) {
256				case 1:
257					r[1] = b[1];
258					if (++dl >= 0)
259						break;
260				case 2:
261					r[2] = b[2];
262					if (++dl >= 0)
263						break;
264				case 3:
265					r[3] = b[3];
266					if (++dl >= 0)
267						break;
268				}
269				b += 4;
270				r += 4;
271			}
272		}
273		if (dl < 0) {
274			for (;;) {
275				r[0] = b[0];
276				if (++dl >= 0)
277					break;
278				r[1] = b[1];
279				if (++dl >= 0)
280					break;
281				r[2] = b[2];
282				if (++dl >= 0)
283					break;
284				r[3] = b[3];
285				if (++dl >= 0)
286					break;
287
288				b += 4;
289				r += 4;
290			}
291		}
292	} else {
293		int save_dl = dl;
294		while (c) {
295			t = (a[0] + c) & BN_MASK2;
296			c = (t < c);
297			r[0] = t;
298			if (--dl <= 0)
299				break;
300
301			t = (a[1] + c) & BN_MASK2;
302			c = (t < c);
303			r[1] = t;
304			if (--dl <= 0)
305				break;
306
307			t = (a[2] + c) & BN_MASK2;
308			c = (t < c);
309			r[2] = t;
310			if (--dl <= 0)
311				break;
312
313			t = (a[3] + c) & BN_MASK2;
314			c = (t < c);
315			r[3] = t;
316			if (--dl <= 0)
317				break;
318
319			save_dl = dl;
320			a += 4;
321			r += 4;
322		}
323		if (dl > 0) {
324			if (save_dl > dl) {
325				switch (save_dl - dl) {
326				case 1:
327					r[1] = a[1];
328					if (--dl <= 0)
329						break;
330				case 2:
331					r[2] = a[2];
332					if (--dl <= 0)
333						break;
334				case 3:
335					r[3] = a[3];
336					if (--dl <= 0)
337						break;
338				}
339				a += 4;
340				r += 4;
341			}
342		}
343		if (dl > 0) {
344			for (;;) {
345				r[0] = a[0];
346				if (--dl <= 0)
347					break;
348				r[1] = a[1];
349				if (--dl <= 0)
350					break;
351				r[2] = a[2];
352				if (--dl <= 0)
353					break;
354				r[3] = a[3];
355				if (--dl <= 0)
356					break;
357
358				a += 4;
359				r += 4;
360			}
361		}
362	}
363	return c;
364}
365
366#ifdef BN_RECURSION
367/* Karatsuba recursive multiplication algorithm
368 * (cf. Knuth, The Art of Computer Programming, Vol. 2) */
369
370/* r is 2*n2 words in size,
371 * a and b are both n2 words in size.
372 * n2 must be a power of 2.
373 * We multiply and return the result.
374 * t must be 2*n2 words in size
375 * We calculate
376 * a[0]*b[0]
377 * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
378 * a[1]*b[1]
379 */
380/* dnX may not be positive, but n2/2+dnX has to be */
381void
382bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, int dna,
383    int dnb, BN_ULONG *t)
384{
385	int n = n2 / 2, c1, c2;
386	int tna = n + dna, tnb = n + dnb;
387	unsigned int neg, zero;
388	BN_ULONG ln, lo, *p;
389
390# ifdef BN_MUL_COMBA
391#  if 0
392	if (n2 == 4) {
393		bn_mul_comba4(r, a, b);
394		return;
395	}
396#  endif
397	/* Only call bn_mul_comba 8 if n2 == 8 and the
398	 * two arrays are complete [steve]
399	 */
400	if (n2 == 8 && dna == 0 && dnb == 0) {
401		bn_mul_comba8(r, a, b);
402		return;
403	}
404# endif /* BN_MUL_COMBA */
405	/* Else do normal multiply */
406	if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) {
407		bn_mul_normal(r, a, n2 + dna, b, n2 + dnb);
408		if ((dna + dnb) < 0)
409			memset(&r[2*n2 + dna + dnb], 0,
410			    sizeof(BN_ULONG) * -(dna + dnb));
411		return;
412	}
413	/* r=(a[0]-a[1])*(b[1]-b[0]) */
414	c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna);
415	c2 = bn_cmp_part_words(&(b[n]), b,tnb, tnb - n);
416	zero = neg = 0;
417	switch (c1 * 3 + c2) {
418	case -4:
419		bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
420		bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
421		break;
422	case -3:
423		zero = 1;
424		break;
425	case -2:
426		bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
427		bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
428		neg = 1;
429		break;
430	case -1:
431	case 0:
432	case 1:
433		zero = 1;
434		break;
435	case 2:
436		bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */
437		bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
438		neg = 1;
439		break;
440	case 3:
441		zero = 1;
442		break;
443	case 4:
444		bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
445		bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
446		break;
447	}
448
449# ifdef BN_MUL_COMBA
450	if (n == 4 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba4 could take
451					       extra args to do this well */
452	{
453		if (!zero)
454			bn_mul_comba4(&(t[n2]), t, &(t[n]));
455		else
456			memset(&(t[n2]), 0, 8 * sizeof(BN_ULONG));
457
458		bn_mul_comba4(r, a, b);
459		bn_mul_comba4(&(r[n2]), &(a[n]), &(b[n]));
460	} else if (n == 8 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba8 could
461						    take extra args to do this
462						    well */
463	{
464		if (!zero)
465			bn_mul_comba8(&(t[n2]), t, &(t[n]));
466		else
467			memset(&(t[n2]), 0, 16 * sizeof(BN_ULONG));
468
469		bn_mul_comba8(r, a, b);
470		bn_mul_comba8(&(r[n2]), &(a[n]), &(b[n]));
471	} else
472# endif /* BN_MUL_COMBA */
473	{
474		p = &(t[n2 * 2]);
475		if (!zero)
476			bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
477		else
478			memset(&(t[n2]), 0, n2 * sizeof(BN_ULONG));
479		bn_mul_recursive(r, a, b, n, 0, 0, p);
480		bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), n, dna, dnb, p);
481	}
482
483	/* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
484	 * r[10] holds (a[0]*b[0])
485	 * r[32] holds (b[1]*b[1])
486	 */
487
488	c1 = (int)(bn_add_words(t, r, &(r[n2]), n2));
489
490	if (neg) /* if t[32] is negative */
491	{
492		c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2));
493	} else {
494		/* Might have a carry */
495		c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
496	}
497
498	/* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
499	 * r[10] holds (a[0]*b[0])
500	 * r[32] holds (b[1]*b[1])
501	 * c1 holds the carry bits
502	 */
503	c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
504	if (c1) {
505		p = &(r[n + n2]);
506		lo= *p;
507		ln = (lo + c1) & BN_MASK2;
508		*p = ln;
509
510		/* The overflow will stop before we over write
511		 * words we should not overwrite */
512		if (ln < (BN_ULONG)c1) {
513			do {
514				p++;
515				lo= *p;
516				ln = (lo + 1) & BN_MASK2;
517				*p = ln;
518			} while (ln == 0);
519		}
520	}
521}
522
523/* n+tn is the word length
524 * t needs to be n*4 is size, as does r */
525/* tnX may not be negative but less than n */
526void
527bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, int tna,
528    int tnb, BN_ULONG *t)
529{
530	int i, j, n2 = n * 2;
531	int c1, c2, neg;
532	BN_ULONG ln, lo, *p;
533
534	if (n < 8) {
535		bn_mul_normal(r, a, n + tna, b, n + tnb);
536		return;
537	}
538
539	/* r=(a[0]-a[1])*(b[1]-b[0]) */
540	c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna);
541	c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n);
542	neg = 0;
543	switch (c1 * 3 + c2) {
544	case -4:
545		bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
546		bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
547		break;
548	case -3:
549		/* break; */
550	case -2:
551		bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
552		bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
553		neg = 1;
554		break;
555	case -1:
556	case 0:
557	case 1:
558		/* break; */
559	case 2:
560		bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */
561		bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
562		neg = 1;
563		break;
564	case 3:
565		/* break; */
566	case 4:
567		bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
568		bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
569		break;
570	}
571		/* The zero case isn't yet implemented here. The speedup
572		   would probably be negligible. */
573# if 0
574	if (n == 4) {
575		bn_mul_comba4(&(t[n2]), t, &(t[n]));
576		bn_mul_comba4(r, a, b);
577		bn_mul_normal(&(r[n2]), &(a[n]), tn, &(b[n]), tn);
578		memset(&(r[n2 + tn * 2]), 0, sizeof(BN_ULONG) * (n2 - tn * 2));
579	} else
580# endif
581		if (n == 8) {
582		bn_mul_comba8(&(t[n2]), t, &(t[n]));
583		bn_mul_comba8(r, a, b);
584		bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb);
585		memset(&(r[n2 + tna + tnb]), 0,
586		    sizeof(BN_ULONG) * (n2 - tna - tnb));
587	} else {
588		p = &(t[n2*2]);
589		bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
590		bn_mul_recursive(r, a, b, n, 0, 0, p);
591		i = n / 2;
592		/* If there is only a bottom half to the number,
593		 * just do it */
594		if (tna > tnb)
595			j = tna - i;
596		else
597			j = tnb - i;
598		if (j == 0) {
599			bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]),
600			    i, tna - i, tnb - i, p);
601			memset(&(r[n2 + i * 2]), 0,
602			    sizeof(BN_ULONG) * (n2 - i * 2));
603		}
604		else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */
605		{
606			bn_mul_part_recursive(&(r[n2]), &(a[n]), &(b[n]),
607			    i, tna - i, tnb - i, p);
608			memset(&(r[n2 + tna + tnb]), 0,
609			    sizeof(BN_ULONG) * (n2 - tna - tnb));
610		}
611		else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
612		{
613			memset(&(r[n2]), 0, sizeof(BN_ULONG) * n2);
614			if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL &&
615			    tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) {
616				bn_mul_normal(&(r[n2]), &(a[n]), tna,
617				    &(b[n]), tnb);
618			} else {
619				for (;;) {
620					i /= 2;
621					/* these simplified conditions work
622					 * exclusively because difference
623					 * between tna and tnb is 1 or 0 */
624					if (i < tna || i < tnb) {
625						bn_mul_part_recursive(&(r[n2]),
626						    &(a[n]), &(b[n]), i,
627						    tna - i, tnb - i, p);
628						break;
629					} else if (i == tna || i == tnb) {
630						bn_mul_recursive(&(r[n2]),
631						    &(a[n]), &(b[n]), i,
632						    tna - i, tnb - i, p);
633						break;
634					}
635				}
636			}
637		}
638	}
639
640	/* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
641	 * r[10] holds (a[0]*b[0])
642	 * r[32] holds (b[1]*b[1])
643	 */
644
645	c1 = (int)(bn_add_words(t, r,&(r[n2]), n2));
646
647	if (neg) /* if t[32] is negative */
648	{
649		c1 -= (int)(bn_sub_words(&(t[n2]), t,&(t[n2]), n2));
650	} else {
651		/* Might have a carry */
652		c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
653	}
654
655	/* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
656	 * r[10] holds (a[0]*b[0])
657	 * r[32] holds (b[1]*b[1])
658	 * c1 holds the carry bits
659	 */
660	c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
661	if (c1) {
662		p = &(r[n + n2]);
663		lo= *p;
664		ln = (lo + c1)&BN_MASK2;
665		*p = ln;
666
667		/* The overflow will stop before we over write
668		 * words we should not overwrite */
669		if (ln < (BN_ULONG)c1) {
670			do {
671				p++;
672				lo= *p;
673				ln = (lo + 1) & BN_MASK2;
674				*p = ln;
675			} while (ln == 0);
676		}
677	}
678}
679
680/* a and b must be the same size, which is n2.
681 * r needs to be n2 words and t needs to be n2*2
682 */
683void
684bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, BN_ULONG *t)
685{
686	int n = n2 / 2;
687
688
689	bn_mul_recursive(r, a, b, n, 0, 0, &(t[0]));
690	if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL) {
691		bn_mul_low_recursive(&(t[0]), &(a[0]), &(b[n]), n, &(t[n2]));
692		bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
693		bn_mul_low_recursive(&(t[0]), &(a[n]), &(b[0]), n, &(t[n2]));
694		bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
695	} else {
696		bn_mul_low_normal(&(t[0]), &(a[0]), &(b[n]), n);
697		bn_mul_low_normal(&(t[n]), &(a[n]), &(b[0]), n);
698		bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
699		bn_add_words(&(r[n]), &(r[n]), &(t[n]), n);
700	}
701}
702
703/* a and b must be the same size, which is n2.
704 * r needs to be n2 words and t needs to be n2*2
705 * l is the low words of the output.
706 * t needs to be n2*3
707 */
708void
709bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
710    BN_ULONG *t)
711{
712	int i, n;
713	int c1, c2;
714	int neg, oneg, zero;
715	BN_ULONG ll, lc, *lp, *mp;
716
717	n = n2 / 2;
718
719	/* Calculate (al-ah)*(bh-bl) */
720	neg = zero = 0;
721	c1 = bn_cmp_words(&(a[0]), &(a[n]), n);
722	c2 = bn_cmp_words(&(b[n]), &(b[0]), n);
723	switch (c1 * 3 + c2) {
724	case -4:
725		bn_sub_words(&(r[0]), &(a[n]), &(a[0]), n);
726		bn_sub_words(&(r[n]), &(b[0]), &(b[n]), n);
727		break;
728	case -3:
729		zero = 1;
730		break;
731	case -2:
732		bn_sub_words(&(r[0]), &(a[n]), &(a[0]), n);
733		bn_sub_words(&(r[n]), &(b[n]), &(b[0]), n);
734		neg = 1;
735		break;
736	case -1:
737	case 0:
738	case 1:
739		zero = 1;
740		break;
741	case 2:
742		bn_sub_words(&(r[0]), &(a[0]), &(a[n]), n);
743		bn_sub_words(&(r[n]), &(b[0]), &(b[n]), n);
744		neg = 1;
745		break;
746	case 3:
747		zero = 1;
748		break;
749	case 4:
750		bn_sub_words(&(r[0]), &(a[0]), &(a[n]), n);
751		bn_sub_words(&(r[n]), &(b[n]), &(b[0]), n);
752		break;
753	}
754
755	oneg = neg;
756	/* t[10] = (a[0]-a[1])*(b[1]-b[0]) */
757	/* r[10] = (a[1]*b[1]) */
758# ifdef BN_MUL_COMBA
759	if (n == 8) {
760		bn_mul_comba8(&(t[0]), &(r[0]), &(r[n]));
761		bn_mul_comba8(r, &(a[n]), &(b[n]));
762	} else
763# endif
764	{
765		bn_mul_recursive(&(t[0]), &(r[0]), &(r[n]), n, 0, 0, &(t[n2]));
766		bn_mul_recursive(r, &(a[n]), &(b[n]), n, 0, 0, &(t[n2]));
767	}
768
769	/* s0 == low(al*bl)
770	 * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl)
771	 * We know s0 and s1 so the only unknown is high(al*bl)
772	 * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl))
773	 * high(al*bl) == s1 - (r[0]+l[0]+t[0])
774	 */
775	if (l != NULL) {
776		lp = &(t[n2 + n]);
777		c1 = (int)(bn_add_words(lp, &(r[0]), &(l[0]), n));
778	} else {
779		c1 = 0;
780		lp = &(r[0]);
781	}
782
783	if (neg)
784		neg = (int)(bn_sub_words(&(t[n2]), lp, &(t[0]), n));
785	else {
786		bn_add_words(&(t[n2]), lp, &(t[0]), n);
787		neg = 0;
788	}
789
790	if (l != NULL) {
791		bn_sub_words(&(t[n2 + n]), &(l[n]), &(t[n2]), n);
792	} else {
793		lp = &(t[n2 + n]);
794		mp = &(t[n2]);
795		for (i = 0; i < n; i++)
796			lp[i] = ((~mp[i]) + 1) & BN_MASK2;
797	}
798
799	/* s[0] = low(al*bl)
800	 * t[3] = high(al*bl)
801	 * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign
802	 * r[10] = (a[1]*b[1])
803	 */
804	/* R[10] = al*bl
805	 * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0])
806	 * R[32] = ah*bh
807	 */
808	/* R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow)
809	 * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow)
810	 * R[3]=r[1]+(carry/borrow)
811	 */
812	if (l != NULL) {
813		lp = &(t[n2]);
814		c1 = (int)(bn_add_words(lp, &(t[n2 + n]), &(l[0]), n));
815	} else {
816		lp = &(t[n2 + n]);
817		c1 = 0;
818	}
819	c1 += (int)(bn_add_words(&(t[n2]), lp, &(r[0]), n));
820	if (oneg)
821		c1 -= (int)(bn_sub_words(&(t[n2]), &(t[n2]), &(t[0]), n));
822	else
823		c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), &(t[0]), n));
824
825	c2 = (int)(bn_add_words(&(r[0]), &(r[0]), &(t[n2 + n]), n));
826	c2 += (int)(bn_add_words(&(r[0]), &(r[0]), &(r[n]), n));
827	if (oneg)
828		c2 -= (int)(bn_sub_words(&(r[0]), &(r[0]), &(t[n]), n));
829	else
830		c2 += (int)(bn_add_words(&(r[0]), &(r[0]), &(t[n]), n));
831
832	if (c1 != 0) /* Add starting at r[0], could be +ve or -ve */
833	{
834		i = 0;
835		if (c1 > 0) {
836			lc = c1;
837			do {
838				ll = (r[i] + lc) & BN_MASK2;
839				r[i++] = ll;
840				lc = (lc > ll);
841			} while (lc);
842		} else {
843			lc = -c1;
844			do {
845				ll = r[i];
846				r[i++] = (ll - lc) & BN_MASK2;
847				lc = (lc > ll);
848			} while (lc);
849		}
850	}
851	if (c2 != 0) /* Add starting at r[1] */
852	{
853		i = n;
854		if (c2 > 0) {
855			lc = c2;
856			do {
857				ll = (r[i] + lc) & BN_MASK2;
858				r[i++] = ll;
859				lc = (lc > ll);
860			} while (lc);
861		} else {
862			lc = -c2;
863			do {
864				ll = r[i];
865				r[i++] = (ll - lc) & BN_MASK2;
866				lc = (lc > ll);
867			} while (lc);
868		}
869	}
870}
871#endif /* BN_RECURSION */
872
873int
874BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
875{
876	int ret = 0;
877	int top, al, bl;
878	BIGNUM *rr;
879#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
880	int i;
881#endif
882#ifdef BN_RECURSION
883	BIGNUM *t = NULL;
884	int j = 0, k;
885#endif
886
887
888
889	al = a->top;
890	bl = b->top;
891
892	if ((al == 0) || (bl == 0)) {
893		BN_zero(r);
894		return (1);
895	}
896	top = al + bl;
897
898	BN_CTX_start(ctx);
899	if ((r == a) || (r == b)) {
900		if ((rr = BN_CTX_get(ctx)) == NULL)
901			goto err;
902	} else
903		rr = r;
904	rr->neg = a->neg ^ b->neg;
905
906#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
907	i = al - bl;
908#endif
909#ifdef BN_MUL_COMBA
910	if (i == 0) {
911# if 0
912		if (al == 4) {
913			if (!bn_wexpand(rr, 8))
914				goto err;
915			rr->top = 8;
916			bn_mul_comba4(rr->d, a->d, b->d);
917			goto end;
918		}
919# endif
920		if (al == 8) {
921			if (!bn_wexpand(rr, 16))
922				goto err;
923			rr->top = 16;
924			bn_mul_comba8(rr->d, a->d, b->d);
925			goto end;
926		}
927	}
928#endif /* BN_MUL_COMBA */
929#ifdef BN_RECURSION
930	if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL)) {
931		if (i >= -1 && i <= 1) {
932			/* Find out the power of two lower or equal
933			   to the longest of the two numbers */
934			if (i >= 0) {
935				j = BN_num_bits_word((BN_ULONG)al);
936			}
937			if (i == -1) {
938				j = BN_num_bits_word((BN_ULONG)bl);
939			}
940			j = 1 << (j - 1);
941			assert(j <= al || j <= bl);
942			k = j + j;
943			if ((t = BN_CTX_get(ctx)) == NULL)
944				goto err;
945			if (al > j || bl > j) {
946				if (!bn_wexpand(t, k * 4))
947					goto err;
948				if (!bn_wexpand(rr, k * 4))
949					goto err;
950				bn_mul_part_recursive(rr->d, a->d, b->d,
951				    j, al - j, bl - j, t->d);
952			}
953			else	/* al <= j || bl <= j */
954			{
955				if (!bn_wexpand(t, k * 2))
956					goto err;
957				if (!bn_wexpand(rr, k * 2))
958					goto err;
959				bn_mul_recursive(rr->d, a->d, b->d,
960				    j, al - j, bl - j, t->d);
961			}
962			rr->top = top;
963			goto end;
964		}
965#if 0
966		if (i == 1 && !BN_get_flags(b, BN_FLG_STATIC_DATA)) {
967			BIGNUM *tmp_bn = (BIGNUM *)b;
968			if (!bn_wexpand(tmp_bn, al))
969				goto err;
970			tmp_bn->d[bl] = 0;
971			bl++;
972			i--;
973		} else if (i == -1 && !BN_get_flags(a, BN_FLG_STATIC_DATA)) {
974			BIGNUM *tmp_bn = (BIGNUM *)a;
975			if (!bn_wexpand(tmp_bn, bl))
976				goto err;
977			tmp_bn->d[al] = 0;
978			al++;
979			i++;
980		}
981		if (i == 0) {
982			/* symmetric and > 4 */
983			/* 16 or larger */
984			j = BN_num_bits_word((BN_ULONG)al);
985			j = 1 << (j - 1);
986			k = j + j;
987			if ((t = BN_CTX_get(ctx)) == NULL)
988				goto err;
989			if (al == j) /* exact multiple */
990			{
991				if (!bn_wexpand(t, k * 2))
992					goto err;
993				if (!bn_wexpand(rr, k * 2))
994					goto err;
995				bn_mul_recursive(rr->d, a->d, b->d, al, t->d);
996			} else {
997				if (!bn_wexpand(t, k * 4))
998					goto err;
999				if (!bn_wexpand(rr, k * 4))
1000					goto err;
1001				bn_mul_part_recursive(rr->d, a->d, b->d,
1002				    al - j, j, t->d);
1003			}
1004			rr->top = top;
1005			goto end;
1006		}
1007#endif
1008	}
1009#endif /* BN_RECURSION */
1010	if (!bn_wexpand(rr, top))
1011		goto err;
1012	rr->top = top;
1013	bn_mul_normal(rr->d, a->d, al, b->d, bl);
1014
1015#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
1016end:
1017#endif
1018	bn_correct_top(rr);
1019	if (r != rr)
1020		BN_copy(r, rr);
1021	ret = 1;
1022err:
1023	BN_CTX_end(ctx);
1024	return (ret);
1025}
1026
1027void
1028bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
1029{
1030	BN_ULONG *rr;
1031
1032
1033	if (na < nb) {
1034		int itmp;
1035		BN_ULONG *ltmp;
1036
1037		itmp = na;
1038		na = nb;
1039		nb = itmp;
1040		ltmp = a;
1041		a = b;
1042		b = ltmp;
1043
1044	}
1045	rr = &(r[na]);
1046	if (nb <= 0) {
1047		(void)bn_mul_words(r, a, na, 0);
1048		return;
1049	} else
1050		rr[0] = bn_mul_words(r, a, na, b[0]);
1051
1052	for (;;) {
1053		if (--nb <= 0)
1054			return;
1055		rr[1] = bn_mul_add_words(&(r[1]), a, na, b[1]);
1056		if (--nb <= 0)
1057			return;
1058		rr[2] = bn_mul_add_words(&(r[2]), a, na, b[2]);
1059		if (--nb <= 0)
1060			return;
1061		rr[3] = bn_mul_add_words(&(r[3]), a, na, b[3]);
1062		if (--nb <= 0)
1063			return;
1064		rr[4] = bn_mul_add_words(&(r[4]), a, na, b[4]);
1065		rr += 4;
1066		r += 4;
1067		b += 4;
1068	}
1069}
1070
1071void
1072bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
1073{
1074	bn_mul_words(r, a, n, b[0]);
1075
1076	for (;;) {
1077		if (--n <= 0)
1078			return;
1079		bn_mul_add_words(&(r[1]), a, n, b[1]);
1080		if (--n <= 0)
1081			return;
1082		bn_mul_add_words(&(r[2]), a, n, b[2]);
1083		if (--n <= 0)
1084			return;
1085		bn_mul_add_words(&(r[3]), a, n, b[3]);
1086		if (--n <= 0)
1087			return;
1088		bn_mul_add_words(&(r[4]), a, n, b[4]);
1089		r += 4;
1090		b += 4;
1091	}
1092}
1093