bn_mul.c revision 1.16
1/* $OpenBSD: bn_mul.c,v 1.16 2014/06/12 15:49:28 deraadt Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to.  The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 *    notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 *    notice, this list of conditions and the following disclaimer in the
30 *    documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 *    must display the following acknowledgement:
33 *    "This product includes cryptographic software written by
34 *     Eric Young (eay@cryptsoft.com)"
35 *    The word 'cryptographic' can be left out if the rouines from the library
36 *    being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 *    the apps directory (application code) you must include an acknowledgement:
39 *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed.  i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#ifndef BN_DEBUG
60# undef NDEBUG /* avoid conflicting definitions */
61# define NDEBUG
62#endif
63
64#include <stdio.h>
65#include <assert.h>
66#include "cryptlib.h"
67#include "bn_lcl.h"
68
69#if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS)
70/* Here follows specialised variants of bn_add_words() and
71   bn_sub_words().  They have the property performing operations on
72   arrays of different sizes.  The sizes of those arrays is expressed through
73   cl, which is the common length ( basicall, min(len(a),len(b)) ), and dl,
74   which is the delta between the two lengths, calculated as len(a)-len(b).
75   All lengths are the number of BN_ULONGs...  For the operations that require
76   a result array as parameter, it must have the length cl+abs(dl).
77   These functions should probably end up in bn_asm.c as soon as there are
78   assembler counterparts for the systems that use assembler files.  */
79
80BN_ULONG
81bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int cl,
82    int dl)
83{
84	BN_ULONG c, t;
85
86	assert(cl >= 0);
87	c = bn_sub_words(r, a, b, cl);
88
89	if (dl == 0)
90		return c;
91
92	r += cl;
93	a += cl;
94	b += cl;
95
96	if (dl < 0) {
97#ifdef BN_COUNT
98		fprintf(stderr,
99		    "  bn_sub_part_words %d + %d (dl < 0, c = %d)\n",
100		    cl, dl, c);
101#endif
102		for (;;) {
103			t = b[0];
104			r[0] = (0 - t - c) & BN_MASK2;
105			if (t != 0)
106				c = 1;
107			if (++dl >= 0)
108				break;
109
110			t = b[1];
111			r[1] = (0 - t - c) & BN_MASK2;
112			if (t != 0)
113				c = 1;
114			if (++dl >= 0)
115				break;
116
117			t = b[2];
118			r[2] = (0 - t - c) & BN_MASK2;
119			if (t != 0)
120				c = 1;
121			if (++dl >= 0)
122				break;
123
124			t = b[3];
125			r[3] = (0 - t - c) & BN_MASK2;
126			if (t != 0)
127				c = 1;
128			if (++dl >= 0)
129				break;
130
131			b += 4;
132			r += 4;
133		}
134	} else {
135		int save_dl = dl;
136#ifdef BN_COUNT
137		fprintf(stderr,
138		    "  bn_sub_part_words %d + %d (dl > 0, c = %d)\n",
139		    cl, dl, c);
140#endif
141		while (c) {
142			t = a[0];
143			r[0] = (t - c) & BN_MASK2;
144			if (t != 0)
145				c = 0;
146			if (--dl <= 0)
147				break;
148
149			t = a[1];
150			r[1] = (t - c) & BN_MASK2;
151			if (t != 0)
152				c = 0;
153			if (--dl <= 0)
154				break;
155
156			t = a[2];
157			r[2] = (t - c) & BN_MASK2;
158			if (t != 0)
159				c = 0;
160			if (--dl <= 0)
161				break;
162
163			t = a[3];
164			r[3] = (t - c) & BN_MASK2;
165			if (t != 0)
166				c = 0;
167			if (--dl <= 0)
168				break;
169
170			save_dl = dl;
171			a += 4;
172			r += 4;
173		}
174		if (dl > 0) {
175#ifdef BN_COUNT
176			fprintf(stderr,
177			    "  bn_sub_part_words %d + %d (dl > 0, c == 0)\n",
178			    cl, dl);
179#endif
180			if (save_dl > dl) {
181				switch (save_dl - dl) {
182				case 1:
183					r[1] = a[1];
184					if (--dl <= 0)
185						break;
186				case 2:
187					r[2] = a[2];
188					if (--dl <= 0)
189						break;
190				case 3:
191					r[3] = a[3];
192					if (--dl <= 0)
193						break;
194				}
195				a += 4;
196				r += 4;
197			}
198		}
199		if (dl > 0) {
200#ifdef BN_COUNT
201			fprintf(stderr,
202			    "  bn_sub_part_words %d + %d (dl > 0, copy)\n",
203			    cl, dl);
204#endif
205			for (;;) {
206				r[0] = a[0];
207				if (--dl <= 0)
208					break;
209				r[1] = a[1];
210				if (--dl <= 0)
211					break;
212				r[2] = a[2];
213				if (--dl <= 0)
214					break;
215				r[3] = a[3];
216				if (--dl <= 0)
217					break;
218
219				a += 4;
220				r += 4;
221			}
222		}
223	}
224	return c;
225}
226#endif
227
228BN_ULONG
229bn_add_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int cl,
230    int dl)
231{
232	BN_ULONG c, l, t;
233
234	assert(cl >= 0);
235	c = bn_add_words(r, a, b, cl);
236
237	if (dl == 0)
238		return c;
239
240	r += cl;
241	a += cl;
242	b += cl;
243
244	if (dl < 0) {
245		int save_dl = dl;
246#ifdef BN_COUNT
247		fprintf(stderr,
248		    "  bn_add_part_words %d + %d (dl < 0, c = %d)\n",
249		    cl, dl, c);
250#endif
251		while (c) {
252			l = (c + b[0]) & BN_MASK2;
253			c = (l < c);
254			r[0] = l;
255			if (++dl >= 0)
256				break;
257
258			l = (c + b[1]) & BN_MASK2;
259			c = (l < c);
260			r[1] = l;
261			if (++dl >= 0)
262				break;
263
264			l = (c + b[2]) & BN_MASK2;
265			c = (l < c);
266			r[2] = l;
267			if (++dl >= 0)
268				break;
269
270			l = (c + b[3]) & BN_MASK2;
271			c = (l < c);
272			r[3] = l;
273			if (++dl >= 0)
274				break;
275
276			save_dl = dl;
277			b += 4;
278			r += 4;
279		}
280		if (dl < 0) {
281#ifdef BN_COUNT
282			fprintf(stderr,
283			    "  bn_add_part_words %d + %d (dl < 0, c == 0)\n",
284			    cl, dl);
285#endif
286			if (save_dl < dl) {
287				switch (dl - save_dl) {
288				case 1:
289					r[1] = b[1];
290					if (++dl >= 0)
291						break;
292				case 2:
293					r[2] = b[2];
294					if (++dl >= 0)
295						break;
296				case 3:
297					r[3] = b[3];
298					if (++dl >= 0)
299						break;
300				}
301				b += 4;
302				r += 4;
303			}
304		}
305		if (dl < 0) {
306#ifdef BN_COUNT
307			fprintf(stderr,
308			    "  bn_add_part_words %d + %d (dl < 0, copy)\n",
309			    cl, dl);
310#endif
311			for (;;) {
312				r[0] = b[0];
313				if (++dl >= 0)
314					break;
315				r[1] = b[1];
316				if (++dl >= 0)
317					break;
318				r[2] = b[2];
319				if (++dl >= 0)
320					break;
321				r[3] = b[3];
322				if (++dl >= 0)
323					break;
324
325				b += 4;
326				r += 4;
327			}
328		}
329	} else {
330		int save_dl = dl;
331#ifdef BN_COUNT
332		fprintf(stderr,
333		    "  bn_add_part_words %d + %d (dl > 0)\n", cl, dl);
334#endif
335		while (c) {
336			t = (a[0] + c) & BN_MASK2;
337			c = (t < c);
338			r[0] = t;
339			if (--dl <= 0)
340				break;
341
342			t = (a[1] + c) & BN_MASK2;
343			c = (t < c);
344			r[1] = t;
345			if (--dl <= 0)
346				break;
347
348			t = (a[2] + c) & BN_MASK2;
349			c = (t < c);
350			r[2] = t;
351			if (--dl <= 0)
352				break;
353
354			t = (a[3] + c) & BN_MASK2;
355			c = (t < c);
356			r[3] = t;
357			if (--dl <= 0)
358				break;
359
360			save_dl = dl;
361			a += 4;
362			r += 4;
363		}
364#ifdef BN_COUNT
365		fprintf(stderr,
366		    "  bn_add_part_words %d + %d (dl > 0, c == 0)\n", cl, dl);
367#endif
368		if (dl > 0) {
369			if (save_dl > dl) {
370				switch (save_dl - dl) {
371				case 1:
372					r[1] = a[1];
373					if (--dl <= 0)
374						break;
375				case 2:
376					r[2] = a[2];
377					if (--dl <= 0)
378						break;
379				case 3:
380					r[3] = a[3];
381					if (--dl <= 0)
382						break;
383				}
384				a += 4;
385				r += 4;
386			}
387		}
388		if (dl > 0) {
389#ifdef BN_COUNT
390			fprintf(stderr,
391			    "  bn_add_part_words %d + %d (dl > 0, copy)\n",
392			    cl, dl);
393#endif
394			for (;;) {
395				r[0] = a[0];
396				if (--dl <= 0)
397					break;
398				r[1] = a[1];
399				if (--dl <= 0)
400					break;
401				r[2] = a[2];
402				if (--dl <= 0)
403					break;
404				r[3] = a[3];
405				if (--dl <= 0)
406					break;
407
408				a += 4;
409				r += 4;
410			}
411		}
412	}
413	return c;
414}
415
416#ifdef BN_RECURSION
417/* Karatsuba recursive multiplication algorithm
418 * (cf. Knuth, The Art of Computer Programming, Vol. 2) */
419
420/* r is 2*n2 words in size,
421 * a and b are both n2 words in size.
422 * n2 must be a power of 2.
423 * We multiply and return the result.
424 * t must be 2*n2 words in size
425 * We calculate
426 * a[0]*b[0]
427 * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
428 * a[1]*b[1]
429 */
430/* dnX may not be positive, but n2/2+dnX has to be */
431void
432bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, int dna,
433    int dnb, BN_ULONG *t)
434{
435	int n = n2 / 2, c1, c2;
436	int tna = n + dna, tnb = n + dnb;
437	unsigned int neg, zero;
438	BN_ULONG ln, lo, *p;
439
440# ifdef BN_COUNT
441	fprintf(stderr, " bn_mul_recursive %d%+d * %d%+d\n",n2,dna,n2,dnb);
442# endif
443# ifdef BN_MUL_COMBA
444#  if 0
445	if (n2 == 4) {
446		bn_mul_comba4(r, a, b);
447		return;
448	}
449#  endif
450	/* Only call bn_mul_comba 8 if n2 == 8 and the
451	 * two arrays are complete [steve]
452	 */
453	if (n2 == 8 && dna == 0 && dnb == 0) {
454		bn_mul_comba8(r, a, b);
455		return;
456	}
457# endif /* BN_MUL_COMBA */
458	/* Else do normal multiply */
459	if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) {
460		bn_mul_normal(r, a, n2 + dna, b, n2 + dnb);
461		if ((dna + dnb) < 0)
462			memset(&r[2*n2 + dna + dnb], 0,
463			    sizeof(BN_ULONG) * -(dna + dnb));
464		return;
465	}
466	/* r=(a[0]-a[1])*(b[1]-b[0]) */
467	c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna);
468	c2 = bn_cmp_part_words(&(b[n]), b,tnb, tnb - n);
469	zero = neg = 0;
470	switch (c1 * 3 + c2) {
471	case -4:
472		bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
473		bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
474		break;
475	case -3:
476		zero = 1;
477		break;
478	case -2:
479		bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
480		bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
481		neg = 1;
482		break;
483	case -1:
484	case 0:
485	case 1:
486		zero = 1;
487		break;
488	case 2:
489		bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */
490		bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
491		neg = 1;
492		break;
493	case 3:
494		zero = 1;
495		break;
496	case 4:
497		bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
498		bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
499		break;
500	}
501
502# ifdef BN_MUL_COMBA
503	if (n == 4 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba4 could take
504					       extra args to do this well */
505	{
506		if (!zero)
507			bn_mul_comba4(&(t[n2]), t, &(t[n]));
508		else
509			memset(&(t[n2]), 0, 8 * sizeof(BN_ULONG));
510
511		bn_mul_comba4(r, a, b);
512		bn_mul_comba4(&(r[n2]), &(a[n]), &(b[n]));
513	} else if (n == 8 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba8 could
514						    take extra args to do this
515						    well */
516	{
517		if (!zero)
518			bn_mul_comba8(&(t[n2]), t, &(t[n]));
519		else
520			memset(&(t[n2]), 0, 16 * sizeof(BN_ULONG));
521
522		bn_mul_comba8(r, a, b);
523		bn_mul_comba8(&(r[n2]), &(a[n]), &(b[n]));
524	} else
525# endif /* BN_MUL_COMBA */
526	{
527		p = &(t[n2 * 2]);
528		if (!zero)
529			bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
530		else
531			memset(&(t[n2]), 0, n2 * sizeof(BN_ULONG));
532		bn_mul_recursive(r, a, b, n, 0, 0, p);
533		bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), n, dna, dnb, p);
534	}
535
536	/* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
537	 * r[10] holds (a[0]*b[0])
538	 * r[32] holds (b[1]*b[1])
539	 */
540
541	c1 = (int)(bn_add_words(t, r, &(r[n2]), n2));
542
543	if (neg) /* if t[32] is negative */
544	{
545		c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2));
546	} else {
547		/* Might have a carry */
548		c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
549	}
550
551	/* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
552	 * r[10] holds (a[0]*b[0])
553	 * r[32] holds (b[1]*b[1])
554	 * c1 holds the carry bits
555	 */
556	c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
557	if (c1) {
558		p = &(r[n + n2]);
559		lo= *p;
560		ln = (lo + c1) & BN_MASK2;
561		*p = ln;
562
563		/* The overflow will stop before we over write
564		 * words we should not overwrite */
565		if (ln < (BN_ULONG)c1) {
566			do {
567				p++;
568				lo= *p;
569				ln = (lo + 1) & BN_MASK2;
570				*p = ln;
571			} while (ln == 0);
572		}
573	}
574}
575
576/* n+tn is the word length
577 * t needs to be n*4 is size, as does r */
578/* tnX may not be negative but less than n */
579void
580bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, int tna,
581    int tnb, BN_ULONG *t)
582{
583	int i, j, n2 = n * 2;
584	int c1, c2, neg;
585	BN_ULONG ln, lo, *p;
586
587# ifdef BN_COUNT
588	fprintf(stderr, " bn_mul_part_recursive (%d%+d) * (%d%+d)\n",
589	    n, tna, n, tnb);
590# endif
591	if (n < 8) {
592		bn_mul_normal(r, a, n + tna, b, n + tnb);
593		return;
594	}
595
596	/* r=(a[0]-a[1])*(b[1]-b[0]) */
597	c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna);
598	c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n);
599	neg = 0;
600	switch (c1 * 3 + c2) {
601	case -4:
602		bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
603		bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
604		break;
605	case -3:
606		/* break; */
607	case -2:
608		bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
609		bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
610		neg = 1;
611		break;
612	case -1:
613	case 0:
614	case 1:
615		/* break; */
616	case 2:
617		bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */
618		bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
619		neg = 1;
620		break;
621	case 3:
622		/* break; */
623	case 4:
624		bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
625		bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
626		break;
627	}
628		/* The zero case isn't yet implemented here. The speedup
629		   would probably be negligible. */
630# if 0
631	if (n == 4) {
632		bn_mul_comba4(&(t[n2]), t, &(t[n]));
633		bn_mul_comba4(r, a, b);
634		bn_mul_normal(&(r[n2]), &(a[n]), tn, &(b[n]), tn);
635		memset(&(r[n2 + tn * 2]), 0, sizeof(BN_ULONG) * (n2 - tn * 2));
636	} else
637# endif
638		if (n == 8) {
639		bn_mul_comba8(&(t[n2]), t, &(t[n]));
640		bn_mul_comba8(r, a, b);
641		bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb);
642		memset(&(r[n2 + tna + tnb]), 0,
643		    sizeof(BN_ULONG) * (n2 - tna - tnb));
644	} else {
645		p = &(t[n2*2]);
646		bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
647		bn_mul_recursive(r, a, b, n, 0, 0, p);
648		i = n / 2;
649		/* If there is only a bottom half to the number,
650		 * just do it */
651		if (tna > tnb)
652			j = tna - i;
653		else
654			j = tnb - i;
655		if (j == 0) {
656			bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]),
657			    i, tna - i, tnb - i, p);
658			memset(&(r[n2 + i * 2]), 0,
659			    sizeof(BN_ULONG) * (n2 - i * 2));
660		}
661		else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */
662		{
663			bn_mul_part_recursive(&(r[n2]), &(a[n]), &(b[n]),
664			    i, tna - i, tnb - i, p);
665			memset(&(r[n2 + tna + tnb]), 0,
666			    sizeof(BN_ULONG) * (n2 - tna - tnb));
667		}
668		else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
669		{
670			memset(&(r[n2]), 0, sizeof(BN_ULONG) * n2);
671			if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL &&
672			    tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) {
673				bn_mul_normal(&(r[n2]), &(a[n]), tna,
674				    &(b[n]), tnb);
675			} else {
676				for (;;) {
677					i /= 2;
678					/* these simplified conditions work
679					 * exclusively because difference
680					 * between tna and tnb is 1 or 0 */
681					if (i < tna || i < tnb) {
682						bn_mul_part_recursive(&(r[n2]),
683						    &(a[n]), &(b[n]), i,
684						    tna - i, tnb - i, p);
685						break;
686					} else if (i == tna || i == tnb) {
687						bn_mul_recursive(&(r[n2]),
688						    &(a[n]), &(b[n]), i,
689						    tna - i, tnb - i, p);
690						break;
691					}
692				}
693			}
694		}
695	}
696
697	/* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
698	 * r[10] holds (a[0]*b[0])
699	 * r[32] holds (b[1]*b[1])
700	 */
701
702	c1 = (int)(bn_add_words(t, r,&(r[n2]), n2));
703
704	if (neg) /* if t[32] is negative */
705	{
706		c1 -= (int)(bn_sub_words(&(t[n2]), t,&(t[n2]), n2));
707	} else {
708		/* Might have a carry */
709		c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
710	}
711
712	/* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
713	 * r[10] holds (a[0]*b[0])
714	 * r[32] holds (b[1]*b[1])
715	 * c1 holds the carry bits
716	 */
717	c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
718	if (c1) {
719		p = &(r[n + n2]);
720		lo= *p;
721		ln = (lo + c1)&BN_MASK2;
722		*p = ln;
723
724		/* The overflow will stop before we over write
725		 * words we should not overwrite */
726		if (ln < (BN_ULONG)c1) {
727			do {
728				p++;
729				lo= *p;
730				ln = (lo + 1) & BN_MASK2;
731				*p = ln;
732			} while (ln == 0);
733		}
734	}
735}
736
737/* a and b must be the same size, which is n2.
738 * r needs to be n2 words and t needs to be n2*2
739 */
740void
741bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, BN_ULONG *t)
742{
743	int n = n2 / 2;
744
745# ifdef BN_COUNT
746	fprintf(stderr, " bn_mul_low_recursive %d * %d\n",n2,n2);
747# endif
748
749	bn_mul_recursive(r, a, b, n, 0, 0, &(t[0]));
750	if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL) {
751		bn_mul_low_recursive(&(t[0]), &(a[0]), &(b[n]), n, &(t[n2]));
752		bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
753		bn_mul_low_recursive(&(t[0]), &(a[n]), &(b[0]), n, &(t[n2]));
754		bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
755	} else {
756		bn_mul_low_normal(&(t[0]), &(a[0]), &(b[n]), n);
757		bn_mul_low_normal(&(t[n]), &(a[n]), &(b[0]), n);
758		bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
759		bn_add_words(&(r[n]), &(r[n]), &(t[n]), n);
760	}
761}
762
763/* a and b must be the same size, which is n2.
764 * r needs to be n2 words and t needs to be n2*2
765 * l is the low words of the output.
766 * t needs to be n2*3
767 */
768void
769bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
770    BN_ULONG *t)
771{
772	int i, n;
773	int c1, c2;
774	int neg, oneg, zero;
775	BN_ULONG ll, lc, *lp, *mp;
776
777# ifdef BN_COUNT
778	fprintf(stderr, " bn_mul_high %d * %d\n",n2,n2);
779# endif
780	n = n2 / 2;
781
782	/* Calculate (al-ah)*(bh-bl) */
783	neg = zero = 0;
784	c1 = bn_cmp_words(&(a[0]), &(a[n]), n);
785	c2 = bn_cmp_words(&(b[n]), &(b[0]), n);
786	switch (c1 * 3 + c2) {
787	case -4:
788		bn_sub_words(&(r[0]), &(a[n]), &(a[0]), n);
789		bn_sub_words(&(r[n]), &(b[0]), &(b[n]), n);
790		break;
791	case -3:
792		zero = 1;
793		break;
794	case -2:
795		bn_sub_words(&(r[0]), &(a[n]), &(a[0]), n);
796		bn_sub_words(&(r[n]), &(b[n]), &(b[0]), n);
797		neg = 1;
798		break;
799	case -1:
800	case 0:
801	case 1:
802		zero = 1;
803		break;
804	case 2:
805		bn_sub_words(&(r[0]), &(a[0]), &(a[n]), n);
806		bn_sub_words(&(r[n]), &(b[0]), &(b[n]), n);
807		neg = 1;
808		break;
809	case 3:
810		zero = 1;
811		break;
812	case 4:
813		bn_sub_words(&(r[0]), &(a[0]), &(a[n]), n);
814		bn_sub_words(&(r[n]), &(b[n]), &(b[0]), n);
815		break;
816	}
817
818	oneg = neg;
819	/* t[10] = (a[0]-a[1])*(b[1]-b[0]) */
820	/* r[10] = (a[1]*b[1]) */
821# ifdef BN_MUL_COMBA
822	if (n == 8) {
823		bn_mul_comba8(&(t[0]), &(r[0]), &(r[n]));
824		bn_mul_comba8(r, &(a[n]), &(b[n]));
825	} else
826# endif
827	{
828		bn_mul_recursive(&(t[0]), &(r[0]), &(r[n]), n, 0, 0, &(t[n2]));
829		bn_mul_recursive(r, &(a[n]), &(b[n]), n, 0, 0, &(t[n2]));
830	}
831
832	/* s0 == low(al*bl)
833	 * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl)
834	 * We know s0 and s1 so the only unknown is high(al*bl)
835	 * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl))
836	 * high(al*bl) == s1 - (r[0]+l[0]+t[0])
837	 */
838	if (l != NULL) {
839		lp = &(t[n2 + n]);
840		c1 = (int)(bn_add_words(lp, &(r[0]), &(l[0]), n));
841	} else {
842		c1 = 0;
843		lp = &(r[0]);
844	}
845
846	if (neg)
847		neg = (int)(bn_sub_words(&(t[n2]), lp, &(t[0]), n));
848	else {
849		bn_add_words(&(t[n2]), lp, &(t[0]), n);
850		neg = 0;
851	}
852
853	if (l != NULL) {
854		bn_sub_words(&(t[n2 + n]), &(l[n]), &(t[n2]), n);
855	} else {
856		lp = &(t[n2 + n]);
857		mp = &(t[n2]);
858		for (i = 0; i < n; i++)
859			lp[i] = ((~mp[i]) + 1) & BN_MASK2;
860	}
861
862	/* s[0] = low(al*bl)
863	 * t[3] = high(al*bl)
864	 * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign
865	 * r[10] = (a[1]*b[1])
866	 */
867	/* R[10] = al*bl
868	 * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0])
869	 * R[32] = ah*bh
870	 */
871	/* R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow)
872	 * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow)
873	 * R[3]=r[1]+(carry/borrow)
874	 */
875	if (l != NULL) {
876		lp = &(t[n2]);
877		c1 = (int)(bn_add_words(lp, &(t[n2 + n]), &(l[0]), n));
878	} else {
879		lp = &(t[n2 + n]);
880		c1 = 0;
881	}
882	c1 += (int)(bn_add_words(&(t[n2]), lp, &(r[0]), n));
883	if (oneg)
884		c1 -= (int)(bn_sub_words(&(t[n2]), &(t[n2]), &(t[0]), n));
885	else
886		c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), &(t[0]), n));
887
888	c2 = (int)(bn_add_words(&(r[0]), &(r[0]), &(t[n2 + n]), n));
889	c2 += (int)(bn_add_words(&(r[0]), &(r[0]), &(r[n]), n));
890	if (oneg)
891		c2 -= (int)(bn_sub_words(&(r[0]), &(r[0]), &(t[n]), n));
892	else
893		c2 += (int)(bn_add_words(&(r[0]), &(r[0]), &(t[n]), n));
894
895	if (c1 != 0) /* Add starting at r[0], could be +ve or -ve */
896	{
897		i = 0;
898		if (c1 > 0) {
899			lc = c1;
900			do {
901				ll = (r[i] + lc) & BN_MASK2;
902				r[i++] = ll;
903				lc = (lc > ll);
904			} while (lc);
905		} else {
906			lc = -c1;
907			do {
908				ll = r[i];
909				r[i++] = (ll - lc) & BN_MASK2;
910				lc = (lc > ll);
911			} while (lc);
912		}
913	}
914	if (c2 != 0) /* Add starting at r[1] */
915	{
916		i = n;
917		if (c2 > 0) {
918			lc = c2;
919			do {
920				ll = (r[i] + lc) & BN_MASK2;
921				r[i++] = ll;
922				lc = (lc > ll);
923			} while (lc);
924		} else {
925			lc = -c2;
926			do {
927				ll = r[i];
928				r[i++] = (ll - lc) & BN_MASK2;
929				lc = (lc > ll);
930			} while (lc);
931		}
932	}
933}
934#endif /* BN_RECURSION */
935
936int
937BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
938{
939	int ret = 0;
940	int top, al, bl;
941	BIGNUM *rr;
942#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
943	int i;
944#endif
945#ifdef BN_RECURSION
946	BIGNUM *t = NULL;
947	int j = 0, k;
948#endif
949
950#ifdef BN_COUNT
951	fprintf(stderr, "BN_mul %d * %d\n",a->top,b->top);
952#endif
953
954	bn_check_top(a);
955	bn_check_top(b);
956	bn_check_top(r);
957
958	al = a->top;
959	bl = b->top;
960
961	if ((al == 0) || (bl == 0)) {
962		BN_zero(r);
963		return (1);
964	}
965	top = al + bl;
966
967	BN_CTX_start(ctx);
968	if ((r == a) || (r == b)) {
969		if ((rr = BN_CTX_get(ctx)) == NULL)
970			goto err;
971	} else
972		rr = r;
973	rr->neg = a->neg ^ b->neg;
974
975#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
976	i = al - bl;
977#endif
978#ifdef BN_MUL_COMBA
979	if (i == 0) {
980# if 0
981		if (al == 4) {
982			if (bn_wexpand(rr, 8) == NULL)
983				goto err;
984			rr->top = 8;
985			bn_mul_comba4(rr->d, a->d, b->d);
986			goto end;
987		}
988# endif
989		if (al == 8) {
990			if (bn_wexpand(rr, 16) == NULL)
991				goto err;
992			rr->top = 16;
993			bn_mul_comba8(rr->d, a->d, b->d);
994			goto end;
995		}
996	}
997#endif /* BN_MUL_COMBA */
998#ifdef BN_RECURSION
999	if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL)) {
1000		if (i >= -1 && i <= 1) {
1001			/* Find out the power of two lower or equal
1002			   to the longest of the two numbers */
1003			if (i >= 0) {
1004				j = BN_num_bits_word((BN_ULONG)al);
1005			}
1006			if (i == -1) {
1007				j = BN_num_bits_word((BN_ULONG)bl);
1008			}
1009			j = 1 << (j - 1);
1010			assert(j <= al || j <= bl);
1011			k = j + j;
1012			t = BN_CTX_get(ctx);
1013			if (t == NULL)
1014				goto err;
1015			if (al > j || bl > j) {
1016				if (bn_wexpand(t, k * 4) == NULL)
1017					goto err;
1018				if (bn_wexpand(rr, k * 4) == NULL)
1019					goto err;
1020				bn_mul_part_recursive(rr->d, a->d, b->d,
1021				    j, al - j, bl - j, t->d);
1022			}
1023			else	/* al <= j || bl <= j */
1024			{
1025				if (bn_wexpand(t, k * 2) == NULL)
1026					goto err;
1027				if (bn_wexpand(rr, k * 2) == NULL)
1028					goto err;
1029				bn_mul_recursive(rr->d, a->d, b->d,
1030				    j, al - j, bl - j, t->d);
1031			}
1032			rr->top = top;
1033			goto end;
1034		}
1035#if 0
1036		if (i == 1 && !BN_get_flags(b, BN_FLG_STATIC_DATA)) {
1037			BIGNUM *tmp_bn = (BIGNUM *)b;
1038			if (bn_wexpand(tmp_bn, al) == NULL)
1039				goto err;
1040			tmp_bn->d[bl] = 0;
1041			bl++;
1042			i--;
1043		} else if (i == -1 && !BN_get_flags(a, BN_FLG_STATIC_DATA)) {
1044			BIGNUM *tmp_bn = (BIGNUM *)a;
1045			if (bn_wexpand(tmp_bn, bl) == NULL)
1046				goto err;
1047			tmp_bn->d[al] = 0;
1048			al++;
1049			i++;
1050		}
1051		if (i == 0) {
1052			/* symmetric and > 4 */
1053			/* 16 or larger */
1054			j = BN_num_bits_word((BN_ULONG)al);
1055			j = 1 << (j - 1);
1056			k = j + j;
1057			t = BN_CTX_get(ctx);
1058			if (al == j) /* exact multiple */
1059			{
1060				if (bn_wexpand(t, k * 2) == NULL)
1061					goto err;
1062				if (bn_wexpand(rr, k * 2) == NULL)
1063					goto err;
1064				bn_mul_recursive(rr->d, a->d, b->d, al, t->d);
1065			} else {
1066				if (bn_wexpand(t, k * 4) == NULL)
1067					goto err;
1068				if (bn_wexpand(rr, k * 4) == NULL)
1069					goto err;
1070				bn_mul_part_recursive(rr->d, a->d, b->d,
1071				    al - j, j, t->d);
1072			}
1073			rr->top = top;
1074			goto end;
1075		}
1076#endif
1077	}
1078#endif /* BN_RECURSION */
1079	if (bn_wexpand(rr, top) == NULL)
1080		goto err;
1081	rr->top = top;
1082	bn_mul_normal(rr->d, a->d, al, b->d, bl);
1083
1084#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
1085end:
1086#endif
1087	bn_correct_top(rr);
1088	if (r != rr)
1089		BN_copy(r, rr);
1090	ret = 1;
1091err:
1092	bn_check_top(r);
1093	BN_CTX_end(ctx);
1094	return (ret);
1095}
1096
1097void
1098bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
1099{
1100	BN_ULONG *rr;
1101
1102#ifdef BN_COUNT
1103	fprintf(stderr, " bn_mul_normal %d * %d\n", na, nb);
1104#endif
1105
1106	if (na < nb) {
1107		int itmp;
1108		BN_ULONG *ltmp;
1109
1110		itmp = na;
1111		na = nb;
1112		nb = itmp;
1113		ltmp = a;
1114		a = b;
1115		b = ltmp;
1116
1117	}
1118	rr = &(r[na]);
1119	if (nb <= 0) {
1120		(void)bn_mul_words(r, a, na, 0);
1121		return;
1122	} else
1123		rr[0] = bn_mul_words(r, a, na, b[0]);
1124
1125	for (;;) {
1126		if (--nb <= 0)
1127			return;
1128		rr[1] = bn_mul_add_words(&(r[1]), a, na, b[1]);
1129		if (--nb <= 0)
1130			return;
1131		rr[2] = bn_mul_add_words(&(r[2]), a, na, b[2]);
1132		if (--nb <= 0)
1133			return;
1134		rr[3] = bn_mul_add_words(&(r[3]), a, na, b[3]);
1135		if (--nb <= 0)
1136			return;
1137		rr[4] = bn_mul_add_words(&(r[4]), a, na, b[4]);
1138		rr += 4;
1139		r += 4;
1140		b += 4;
1141	}
1142}
1143
1144void
1145bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
1146{
1147#ifdef BN_COUNT
1148	fprintf(stderr, " bn_mul_low_normal %d * %d\n", n, n);
1149#endif
1150	bn_mul_words(r, a, n, b[0]);
1151
1152	for (;;) {
1153		if (--n <= 0)
1154			return;
1155		bn_mul_add_words(&(r[1]), a, n, b[1]);
1156		if (--n <= 0)
1157			return;
1158		bn_mul_add_words(&(r[2]), a, n, b[2]);
1159		if (--n <= 0)
1160			return;
1161		bn_mul_add_words(&(r[3]), a, n, b[3]);
1162		if (--n <= 0)
1163			return;
1164		bn_mul_add_words(&(r[4]), a, n, b[4]);
1165		r += 4;
1166		b += 4;
1167	}
1168}
1169