bn_mul.c revision 1.17
1/* $OpenBSD: bn_mul.c,v 1.17 2014/07/10 13:58:22 jsing Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to.  The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 *    notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 *    notice, this list of conditions and the following disclaimer in the
30 *    documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 *    must display the following acknowledgement:
33 *    "This product includes cryptographic software written by
34 *     Eric Young (eay@cryptsoft.com)"
35 *    The word 'cryptographic' can be left out if the rouines from the library
36 *    being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 *    the apps directory (application code) you must include an acknowledgement:
39 *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed.  i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#ifndef BN_DEBUG
60# undef NDEBUG /* avoid conflicting definitions */
61# define NDEBUG
62#endif
63
64#include <assert.h>
65#include <stdio.h>
66#include <string.h>
67
68#include "cryptlib.h"
69#include "bn_lcl.h"
70
71#if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS)
72/* Here follows specialised variants of bn_add_words() and
73   bn_sub_words().  They have the property performing operations on
74   arrays of different sizes.  The sizes of those arrays is expressed through
75   cl, which is the common length ( basicall, min(len(a),len(b)) ), and dl,
76   which is the delta between the two lengths, calculated as len(a)-len(b).
77   All lengths are the number of BN_ULONGs...  For the operations that require
78   a result array as parameter, it must have the length cl+abs(dl).
79   These functions should probably end up in bn_asm.c as soon as there are
80   assembler counterparts for the systems that use assembler files.  */
81
82BN_ULONG
83bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int cl,
84    int dl)
85{
86	BN_ULONG c, t;
87
88	assert(cl >= 0);
89	c = bn_sub_words(r, a, b, cl);
90
91	if (dl == 0)
92		return c;
93
94	r += cl;
95	a += cl;
96	b += cl;
97
98	if (dl < 0) {
99#ifdef BN_COUNT
100		fprintf(stderr,
101		    "  bn_sub_part_words %d + %d (dl < 0, c = %d)\n",
102		    cl, dl, c);
103#endif
104		for (;;) {
105			t = b[0];
106			r[0] = (0 - t - c) & BN_MASK2;
107			if (t != 0)
108				c = 1;
109			if (++dl >= 0)
110				break;
111
112			t = b[1];
113			r[1] = (0 - t - c) & BN_MASK2;
114			if (t != 0)
115				c = 1;
116			if (++dl >= 0)
117				break;
118
119			t = b[2];
120			r[2] = (0 - t - c) & BN_MASK2;
121			if (t != 0)
122				c = 1;
123			if (++dl >= 0)
124				break;
125
126			t = b[3];
127			r[3] = (0 - t - c) & BN_MASK2;
128			if (t != 0)
129				c = 1;
130			if (++dl >= 0)
131				break;
132
133			b += 4;
134			r += 4;
135		}
136	} else {
137		int save_dl = dl;
138#ifdef BN_COUNT
139		fprintf(stderr,
140		    "  bn_sub_part_words %d + %d (dl > 0, c = %d)\n",
141		    cl, dl, c);
142#endif
143		while (c) {
144			t = a[0];
145			r[0] = (t - c) & BN_MASK2;
146			if (t != 0)
147				c = 0;
148			if (--dl <= 0)
149				break;
150
151			t = a[1];
152			r[1] = (t - c) & BN_MASK2;
153			if (t != 0)
154				c = 0;
155			if (--dl <= 0)
156				break;
157
158			t = a[2];
159			r[2] = (t - c) & BN_MASK2;
160			if (t != 0)
161				c = 0;
162			if (--dl <= 0)
163				break;
164
165			t = a[3];
166			r[3] = (t - c) & BN_MASK2;
167			if (t != 0)
168				c = 0;
169			if (--dl <= 0)
170				break;
171
172			save_dl = dl;
173			a += 4;
174			r += 4;
175		}
176		if (dl > 0) {
177#ifdef BN_COUNT
178			fprintf(stderr,
179			    "  bn_sub_part_words %d + %d (dl > 0, c == 0)\n",
180			    cl, dl);
181#endif
182			if (save_dl > dl) {
183				switch (save_dl - dl) {
184				case 1:
185					r[1] = a[1];
186					if (--dl <= 0)
187						break;
188				case 2:
189					r[2] = a[2];
190					if (--dl <= 0)
191						break;
192				case 3:
193					r[3] = a[3];
194					if (--dl <= 0)
195						break;
196				}
197				a += 4;
198				r += 4;
199			}
200		}
201		if (dl > 0) {
202#ifdef BN_COUNT
203			fprintf(stderr,
204			    "  bn_sub_part_words %d + %d (dl > 0, copy)\n",
205			    cl, dl);
206#endif
207			for (;;) {
208				r[0] = a[0];
209				if (--dl <= 0)
210					break;
211				r[1] = a[1];
212				if (--dl <= 0)
213					break;
214				r[2] = a[2];
215				if (--dl <= 0)
216					break;
217				r[3] = a[3];
218				if (--dl <= 0)
219					break;
220
221				a += 4;
222				r += 4;
223			}
224		}
225	}
226	return c;
227}
228#endif
229
230BN_ULONG
231bn_add_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int cl,
232    int dl)
233{
234	BN_ULONG c, l, t;
235
236	assert(cl >= 0);
237	c = bn_add_words(r, a, b, cl);
238
239	if (dl == 0)
240		return c;
241
242	r += cl;
243	a += cl;
244	b += cl;
245
246	if (dl < 0) {
247		int save_dl = dl;
248#ifdef BN_COUNT
249		fprintf(stderr,
250		    "  bn_add_part_words %d + %d (dl < 0, c = %d)\n",
251		    cl, dl, c);
252#endif
253		while (c) {
254			l = (c + b[0]) & BN_MASK2;
255			c = (l < c);
256			r[0] = l;
257			if (++dl >= 0)
258				break;
259
260			l = (c + b[1]) & BN_MASK2;
261			c = (l < c);
262			r[1] = l;
263			if (++dl >= 0)
264				break;
265
266			l = (c + b[2]) & BN_MASK2;
267			c = (l < c);
268			r[2] = l;
269			if (++dl >= 0)
270				break;
271
272			l = (c + b[3]) & BN_MASK2;
273			c = (l < c);
274			r[3] = l;
275			if (++dl >= 0)
276				break;
277
278			save_dl = dl;
279			b += 4;
280			r += 4;
281		}
282		if (dl < 0) {
283#ifdef BN_COUNT
284			fprintf(stderr,
285			    "  bn_add_part_words %d + %d (dl < 0, c == 0)\n",
286			    cl, dl);
287#endif
288			if (save_dl < dl) {
289				switch (dl - save_dl) {
290				case 1:
291					r[1] = b[1];
292					if (++dl >= 0)
293						break;
294				case 2:
295					r[2] = b[2];
296					if (++dl >= 0)
297						break;
298				case 3:
299					r[3] = b[3];
300					if (++dl >= 0)
301						break;
302				}
303				b += 4;
304				r += 4;
305			}
306		}
307		if (dl < 0) {
308#ifdef BN_COUNT
309			fprintf(stderr,
310			    "  bn_add_part_words %d + %d (dl < 0, copy)\n",
311			    cl, dl);
312#endif
313			for (;;) {
314				r[0] = b[0];
315				if (++dl >= 0)
316					break;
317				r[1] = b[1];
318				if (++dl >= 0)
319					break;
320				r[2] = b[2];
321				if (++dl >= 0)
322					break;
323				r[3] = b[3];
324				if (++dl >= 0)
325					break;
326
327				b += 4;
328				r += 4;
329			}
330		}
331	} else {
332		int save_dl = dl;
333#ifdef BN_COUNT
334		fprintf(stderr,
335		    "  bn_add_part_words %d + %d (dl > 0)\n", cl, dl);
336#endif
337		while (c) {
338			t = (a[0] + c) & BN_MASK2;
339			c = (t < c);
340			r[0] = t;
341			if (--dl <= 0)
342				break;
343
344			t = (a[1] + c) & BN_MASK2;
345			c = (t < c);
346			r[1] = t;
347			if (--dl <= 0)
348				break;
349
350			t = (a[2] + c) & BN_MASK2;
351			c = (t < c);
352			r[2] = t;
353			if (--dl <= 0)
354				break;
355
356			t = (a[3] + c) & BN_MASK2;
357			c = (t < c);
358			r[3] = t;
359			if (--dl <= 0)
360				break;
361
362			save_dl = dl;
363			a += 4;
364			r += 4;
365		}
366#ifdef BN_COUNT
367		fprintf(stderr,
368		    "  bn_add_part_words %d + %d (dl > 0, c == 0)\n", cl, dl);
369#endif
370		if (dl > 0) {
371			if (save_dl > dl) {
372				switch (save_dl - dl) {
373				case 1:
374					r[1] = a[1];
375					if (--dl <= 0)
376						break;
377				case 2:
378					r[2] = a[2];
379					if (--dl <= 0)
380						break;
381				case 3:
382					r[3] = a[3];
383					if (--dl <= 0)
384						break;
385				}
386				a += 4;
387				r += 4;
388			}
389		}
390		if (dl > 0) {
391#ifdef BN_COUNT
392			fprintf(stderr,
393			    "  bn_add_part_words %d + %d (dl > 0, copy)\n",
394			    cl, dl);
395#endif
396			for (;;) {
397				r[0] = a[0];
398				if (--dl <= 0)
399					break;
400				r[1] = a[1];
401				if (--dl <= 0)
402					break;
403				r[2] = a[2];
404				if (--dl <= 0)
405					break;
406				r[3] = a[3];
407				if (--dl <= 0)
408					break;
409
410				a += 4;
411				r += 4;
412			}
413		}
414	}
415	return c;
416}
417
418#ifdef BN_RECURSION
419/* Karatsuba recursive multiplication algorithm
420 * (cf. Knuth, The Art of Computer Programming, Vol. 2) */
421
422/* r is 2*n2 words in size,
423 * a and b are both n2 words in size.
424 * n2 must be a power of 2.
425 * We multiply and return the result.
426 * t must be 2*n2 words in size
427 * We calculate
428 * a[0]*b[0]
429 * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
430 * a[1]*b[1]
431 */
432/* dnX may not be positive, but n2/2+dnX has to be */
433void
434bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, int dna,
435    int dnb, BN_ULONG *t)
436{
437	int n = n2 / 2, c1, c2;
438	int tna = n + dna, tnb = n + dnb;
439	unsigned int neg, zero;
440	BN_ULONG ln, lo, *p;
441
442# ifdef BN_COUNT
443	fprintf(stderr, " bn_mul_recursive %d%+d * %d%+d\n",n2,dna,n2,dnb);
444# endif
445# ifdef BN_MUL_COMBA
446#  if 0
447	if (n2 == 4) {
448		bn_mul_comba4(r, a, b);
449		return;
450	}
451#  endif
452	/* Only call bn_mul_comba 8 if n2 == 8 and the
453	 * two arrays are complete [steve]
454	 */
455	if (n2 == 8 && dna == 0 && dnb == 0) {
456		bn_mul_comba8(r, a, b);
457		return;
458	}
459# endif /* BN_MUL_COMBA */
460	/* Else do normal multiply */
461	if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) {
462		bn_mul_normal(r, a, n2 + dna, b, n2 + dnb);
463		if ((dna + dnb) < 0)
464			memset(&r[2*n2 + dna + dnb], 0,
465			    sizeof(BN_ULONG) * -(dna + dnb));
466		return;
467	}
468	/* r=(a[0]-a[1])*(b[1]-b[0]) */
469	c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna);
470	c2 = bn_cmp_part_words(&(b[n]), b,tnb, tnb - n);
471	zero = neg = 0;
472	switch (c1 * 3 + c2) {
473	case -4:
474		bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
475		bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
476		break;
477	case -3:
478		zero = 1;
479		break;
480	case -2:
481		bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
482		bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
483		neg = 1;
484		break;
485	case -1:
486	case 0:
487	case 1:
488		zero = 1;
489		break;
490	case 2:
491		bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */
492		bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
493		neg = 1;
494		break;
495	case 3:
496		zero = 1;
497		break;
498	case 4:
499		bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
500		bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
501		break;
502	}
503
504# ifdef BN_MUL_COMBA
505	if (n == 4 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba4 could take
506					       extra args to do this well */
507	{
508		if (!zero)
509			bn_mul_comba4(&(t[n2]), t, &(t[n]));
510		else
511			memset(&(t[n2]), 0, 8 * sizeof(BN_ULONG));
512
513		bn_mul_comba4(r, a, b);
514		bn_mul_comba4(&(r[n2]), &(a[n]), &(b[n]));
515	} else if (n == 8 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba8 could
516						    take extra args to do this
517						    well */
518	{
519		if (!zero)
520			bn_mul_comba8(&(t[n2]), t, &(t[n]));
521		else
522			memset(&(t[n2]), 0, 16 * sizeof(BN_ULONG));
523
524		bn_mul_comba8(r, a, b);
525		bn_mul_comba8(&(r[n2]), &(a[n]), &(b[n]));
526	} else
527# endif /* BN_MUL_COMBA */
528	{
529		p = &(t[n2 * 2]);
530		if (!zero)
531			bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
532		else
533			memset(&(t[n2]), 0, n2 * sizeof(BN_ULONG));
534		bn_mul_recursive(r, a, b, n, 0, 0, p);
535		bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), n, dna, dnb, p);
536	}
537
538	/* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
539	 * r[10] holds (a[0]*b[0])
540	 * r[32] holds (b[1]*b[1])
541	 */
542
543	c1 = (int)(bn_add_words(t, r, &(r[n2]), n2));
544
545	if (neg) /* if t[32] is negative */
546	{
547		c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2));
548	} else {
549		/* Might have a carry */
550		c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
551	}
552
553	/* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
554	 * r[10] holds (a[0]*b[0])
555	 * r[32] holds (b[1]*b[1])
556	 * c1 holds the carry bits
557	 */
558	c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
559	if (c1) {
560		p = &(r[n + n2]);
561		lo= *p;
562		ln = (lo + c1) & BN_MASK2;
563		*p = ln;
564
565		/* The overflow will stop before we over write
566		 * words we should not overwrite */
567		if (ln < (BN_ULONG)c1) {
568			do {
569				p++;
570				lo= *p;
571				ln = (lo + 1) & BN_MASK2;
572				*p = ln;
573			} while (ln == 0);
574		}
575	}
576}
577
578/* n+tn is the word length
579 * t needs to be n*4 is size, as does r */
580/* tnX may not be negative but less than n */
581void
582bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, int tna,
583    int tnb, BN_ULONG *t)
584{
585	int i, j, n2 = n * 2;
586	int c1, c2, neg;
587	BN_ULONG ln, lo, *p;
588
589# ifdef BN_COUNT
590	fprintf(stderr, " bn_mul_part_recursive (%d%+d) * (%d%+d)\n",
591	    n, tna, n, tnb);
592# endif
593	if (n < 8) {
594		bn_mul_normal(r, a, n + tna, b, n + tnb);
595		return;
596	}
597
598	/* r=(a[0]-a[1])*(b[1]-b[0]) */
599	c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna);
600	c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n);
601	neg = 0;
602	switch (c1 * 3 + c2) {
603	case -4:
604		bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
605		bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
606		break;
607	case -3:
608		/* break; */
609	case -2:
610		bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
611		bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
612		neg = 1;
613		break;
614	case -1:
615	case 0:
616	case 1:
617		/* break; */
618	case 2:
619		bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */
620		bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
621		neg = 1;
622		break;
623	case 3:
624		/* break; */
625	case 4:
626		bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
627		bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
628		break;
629	}
630		/* The zero case isn't yet implemented here. The speedup
631		   would probably be negligible. */
632# if 0
633	if (n == 4) {
634		bn_mul_comba4(&(t[n2]), t, &(t[n]));
635		bn_mul_comba4(r, a, b);
636		bn_mul_normal(&(r[n2]), &(a[n]), tn, &(b[n]), tn);
637		memset(&(r[n2 + tn * 2]), 0, sizeof(BN_ULONG) * (n2 - tn * 2));
638	} else
639# endif
640		if (n == 8) {
641		bn_mul_comba8(&(t[n2]), t, &(t[n]));
642		bn_mul_comba8(r, a, b);
643		bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb);
644		memset(&(r[n2 + tna + tnb]), 0,
645		    sizeof(BN_ULONG) * (n2 - tna - tnb));
646	} else {
647		p = &(t[n2*2]);
648		bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
649		bn_mul_recursive(r, a, b, n, 0, 0, p);
650		i = n / 2;
651		/* If there is only a bottom half to the number,
652		 * just do it */
653		if (tna > tnb)
654			j = tna - i;
655		else
656			j = tnb - i;
657		if (j == 0) {
658			bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]),
659			    i, tna - i, tnb - i, p);
660			memset(&(r[n2 + i * 2]), 0,
661			    sizeof(BN_ULONG) * (n2 - i * 2));
662		}
663		else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */
664		{
665			bn_mul_part_recursive(&(r[n2]), &(a[n]), &(b[n]),
666			    i, tna - i, tnb - i, p);
667			memset(&(r[n2 + tna + tnb]), 0,
668			    sizeof(BN_ULONG) * (n2 - tna - tnb));
669		}
670		else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
671		{
672			memset(&(r[n2]), 0, sizeof(BN_ULONG) * n2);
673			if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL &&
674			    tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) {
675				bn_mul_normal(&(r[n2]), &(a[n]), tna,
676				    &(b[n]), tnb);
677			} else {
678				for (;;) {
679					i /= 2;
680					/* these simplified conditions work
681					 * exclusively because difference
682					 * between tna and tnb is 1 or 0 */
683					if (i < tna || i < tnb) {
684						bn_mul_part_recursive(&(r[n2]),
685						    &(a[n]), &(b[n]), i,
686						    tna - i, tnb - i, p);
687						break;
688					} else if (i == tna || i == tnb) {
689						bn_mul_recursive(&(r[n2]),
690						    &(a[n]), &(b[n]), i,
691						    tna - i, tnb - i, p);
692						break;
693					}
694				}
695			}
696		}
697	}
698
699	/* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
700	 * r[10] holds (a[0]*b[0])
701	 * r[32] holds (b[1]*b[1])
702	 */
703
704	c1 = (int)(bn_add_words(t, r,&(r[n2]), n2));
705
706	if (neg) /* if t[32] is negative */
707	{
708		c1 -= (int)(bn_sub_words(&(t[n2]), t,&(t[n2]), n2));
709	} else {
710		/* Might have a carry */
711		c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
712	}
713
714	/* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
715	 * r[10] holds (a[0]*b[0])
716	 * r[32] holds (b[1]*b[1])
717	 * c1 holds the carry bits
718	 */
719	c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
720	if (c1) {
721		p = &(r[n + n2]);
722		lo= *p;
723		ln = (lo + c1)&BN_MASK2;
724		*p = ln;
725
726		/* The overflow will stop before we over write
727		 * words we should not overwrite */
728		if (ln < (BN_ULONG)c1) {
729			do {
730				p++;
731				lo= *p;
732				ln = (lo + 1) & BN_MASK2;
733				*p = ln;
734			} while (ln == 0);
735		}
736	}
737}
738
739/* a and b must be the same size, which is n2.
740 * r needs to be n2 words and t needs to be n2*2
741 */
742void
743bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, BN_ULONG *t)
744{
745	int n = n2 / 2;
746
747# ifdef BN_COUNT
748	fprintf(stderr, " bn_mul_low_recursive %d * %d\n",n2,n2);
749# endif
750
751	bn_mul_recursive(r, a, b, n, 0, 0, &(t[0]));
752	if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL) {
753		bn_mul_low_recursive(&(t[0]), &(a[0]), &(b[n]), n, &(t[n2]));
754		bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
755		bn_mul_low_recursive(&(t[0]), &(a[n]), &(b[0]), n, &(t[n2]));
756		bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
757	} else {
758		bn_mul_low_normal(&(t[0]), &(a[0]), &(b[n]), n);
759		bn_mul_low_normal(&(t[n]), &(a[n]), &(b[0]), n);
760		bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
761		bn_add_words(&(r[n]), &(r[n]), &(t[n]), n);
762	}
763}
764
765/* a and b must be the same size, which is n2.
766 * r needs to be n2 words and t needs to be n2*2
767 * l is the low words of the output.
768 * t needs to be n2*3
769 */
770void
771bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
772    BN_ULONG *t)
773{
774	int i, n;
775	int c1, c2;
776	int neg, oneg, zero;
777	BN_ULONG ll, lc, *lp, *mp;
778
779# ifdef BN_COUNT
780	fprintf(stderr, " bn_mul_high %d * %d\n",n2,n2);
781# endif
782	n = n2 / 2;
783
784	/* Calculate (al-ah)*(bh-bl) */
785	neg = zero = 0;
786	c1 = bn_cmp_words(&(a[0]), &(a[n]), n);
787	c2 = bn_cmp_words(&(b[n]), &(b[0]), n);
788	switch (c1 * 3 + c2) {
789	case -4:
790		bn_sub_words(&(r[0]), &(a[n]), &(a[0]), n);
791		bn_sub_words(&(r[n]), &(b[0]), &(b[n]), n);
792		break;
793	case -3:
794		zero = 1;
795		break;
796	case -2:
797		bn_sub_words(&(r[0]), &(a[n]), &(a[0]), n);
798		bn_sub_words(&(r[n]), &(b[n]), &(b[0]), n);
799		neg = 1;
800		break;
801	case -1:
802	case 0:
803	case 1:
804		zero = 1;
805		break;
806	case 2:
807		bn_sub_words(&(r[0]), &(a[0]), &(a[n]), n);
808		bn_sub_words(&(r[n]), &(b[0]), &(b[n]), n);
809		neg = 1;
810		break;
811	case 3:
812		zero = 1;
813		break;
814	case 4:
815		bn_sub_words(&(r[0]), &(a[0]), &(a[n]), n);
816		bn_sub_words(&(r[n]), &(b[n]), &(b[0]), n);
817		break;
818	}
819
820	oneg = neg;
821	/* t[10] = (a[0]-a[1])*(b[1]-b[0]) */
822	/* r[10] = (a[1]*b[1]) */
823# ifdef BN_MUL_COMBA
824	if (n == 8) {
825		bn_mul_comba8(&(t[0]), &(r[0]), &(r[n]));
826		bn_mul_comba8(r, &(a[n]), &(b[n]));
827	} else
828# endif
829	{
830		bn_mul_recursive(&(t[0]), &(r[0]), &(r[n]), n, 0, 0, &(t[n2]));
831		bn_mul_recursive(r, &(a[n]), &(b[n]), n, 0, 0, &(t[n2]));
832	}
833
834	/* s0 == low(al*bl)
835	 * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl)
836	 * We know s0 and s1 so the only unknown is high(al*bl)
837	 * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl))
838	 * high(al*bl) == s1 - (r[0]+l[0]+t[0])
839	 */
840	if (l != NULL) {
841		lp = &(t[n2 + n]);
842		c1 = (int)(bn_add_words(lp, &(r[0]), &(l[0]), n));
843	} else {
844		c1 = 0;
845		lp = &(r[0]);
846	}
847
848	if (neg)
849		neg = (int)(bn_sub_words(&(t[n2]), lp, &(t[0]), n));
850	else {
851		bn_add_words(&(t[n2]), lp, &(t[0]), n);
852		neg = 0;
853	}
854
855	if (l != NULL) {
856		bn_sub_words(&(t[n2 + n]), &(l[n]), &(t[n2]), n);
857	} else {
858		lp = &(t[n2 + n]);
859		mp = &(t[n2]);
860		for (i = 0; i < n; i++)
861			lp[i] = ((~mp[i]) + 1) & BN_MASK2;
862	}
863
864	/* s[0] = low(al*bl)
865	 * t[3] = high(al*bl)
866	 * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign
867	 * r[10] = (a[1]*b[1])
868	 */
869	/* R[10] = al*bl
870	 * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0])
871	 * R[32] = ah*bh
872	 */
873	/* R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow)
874	 * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow)
875	 * R[3]=r[1]+(carry/borrow)
876	 */
877	if (l != NULL) {
878		lp = &(t[n2]);
879		c1 = (int)(bn_add_words(lp, &(t[n2 + n]), &(l[0]), n));
880	} else {
881		lp = &(t[n2 + n]);
882		c1 = 0;
883	}
884	c1 += (int)(bn_add_words(&(t[n2]), lp, &(r[0]), n));
885	if (oneg)
886		c1 -= (int)(bn_sub_words(&(t[n2]), &(t[n2]), &(t[0]), n));
887	else
888		c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), &(t[0]), n));
889
890	c2 = (int)(bn_add_words(&(r[0]), &(r[0]), &(t[n2 + n]), n));
891	c2 += (int)(bn_add_words(&(r[0]), &(r[0]), &(r[n]), n));
892	if (oneg)
893		c2 -= (int)(bn_sub_words(&(r[0]), &(r[0]), &(t[n]), n));
894	else
895		c2 += (int)(bn_add_words(&(r[0]), &(r[0]), &(t[n]), n));
896
897	if (c1 != 0) /* Add starting at r[0], could be +ve or -ve */
898	{
899		i = 0;
900		if (c1 > 0) {
901			lc = c1;
902			do {
903				ll = (r[i] + lc) & BN_MASK2;
904				r[i++] = ll;
905				lc = (lc > ll);
906			} while (lc);
907		} else {
908			lc = -c1;
909			do {
910				ll = r[i];
911				r[i++] = (ll - lc) & BN_MASK2;
912				lc = (lc > ll);
913			} while (lc);
914		}
915	}
916	if (c2 != 0) /* Add starting at r[1] */
917	{
918		i = n;
919		if (c2 > 0) {
920			lc = c2;
921			do {
922				ll = (r[i] + lc) & BN_MASK2;
923				r[i++] = ll;
924				lc = (lc > ll);
925			} while (lc);
926		} else {
927			lc = -c2;
928			do {
929				ll = r[i];
930				r[i++] = (ll - lc) & BN_MASK2;
931				lc = (lc > ll);
932			} while (lc);
933		}
934	}
935}
936#endif /* BN_RECURSION */
937
938int
939BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
940{
941	int ret = 0;
942	int top, al, bl;
943	BIGNUM *rr;
944#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
945	int i;
946#endif
947#ifdef BN_RECURSION
948	BIGNUM *t = NULL;
949	int j = 0, k;
950#endif
951
952#ifdef BN_COUNT
953	fprintf(stderr, "BN_mul %d * %d\n",a->top,b->top);
954#endif
955
956	bn_check_top(a);
957	bn_check_top(b);
958	bn_check_top(r);
959
960	al = a->top;
961	bl = b->top;
962
963	if ((al == 0) || (bl == 0)) {
964		BN_zero(r);
965		return (1);
966	}
967	top = al + bl;
968
969	BN_CTX_start(ctx);
970	if ((r == a) || (r == b)) {
971		if ((rr = BN_CTX_get(ctx)) == NULL)
972			goto err;
973	} else
974		rr = r;
975	rr->neg = a->neg ^ b->neg;
976
977#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
978	i = al - bl;
979#endif
980#ifdef BN_MUL_COMBA
981	if (i == 0) {
982# if 0
983		if (al == 4) {
984			if (bn_wexpand(rr, 8) == NULL)
985				goto err;
986			rr->top = 8;
987			bn_mul_comba4(rr->d, a->d, b->d);
988			goto end;
989		}
990# endif
991		if (al == 8) {
992			if (bn_wexpand(rr, 16) == NULL)
993				goto err;
994			rr->top = 16;
995			bn_mul_comba8(rr->d, a->d, b->d);
996			goto end;
997		}
998	}
999#endif /* BN_MUL_COMBA */
1000#ifdef BN_RECURSION
1001	if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL)) {
1002		if (i >= -1 && i <= 1) {
1003			/* Find out the power of two lower or equal
1004			   to the longest of the two numbers */
1005			if (i >= 0) {
1006				j = BN_num_bits_word((BN_ULONG)al);
1007			}
1008			if (i == -1) {
1009				j = BN_num_bits_word((BN_ULONG)bl);
1010			}
1011			j = 1 << (j - 1);
1012			assert(j <= al || j <= bl);
1013			k = j + j;
1014			t = BN_CTX_get(ctx);
1015			if (t == NULL)
1016				goto err;
1017			if (al > j || bl > j) {
1018				if (bn_wexpand(t, k * 4) == NULL)
1019					goto err;
1020				if (bn_wexpand(rr, k * 4) == NULL)
1021					goto err;
1022				bn_mul_part_recursive(rr->d, a->d, b->d,
1023				    j, al - j, bl - j, t->d);
1024			}
1025			else	/* al <= j || bl <= j */
1026			{
1027				if (bn_wexpand(t, k * 2) == NULL)
1028					goto err;
1029				if (bn_wexpand(rr, k * 2) == NULL)
1030					goto err;
1031				bn_mul_recursive(rr->d, a->d, b->d,
1032				    j, al - j, bl - j, t->d);
1033			}
1034			rr->top = top;
1035			goto end;
1036		}
1037#if 0
1038		if (i == 1 && !BN_get_flags(b, BN_FLG_STATIC_DATA)) {
1039			BIGNUM *tmp_bn = (BIGNUM *)b;
1040			if (bn_wexpand(tmp_bn, al) == NULL)
1041				goto err;
1042			tmp_bn->d[bl] = 0;
1043			bl++;
1044			i--;
1045		} else if (i == -1 && !BN_get_flags(a, BN_FLG_STATIC_DATA)) {
1046			BIGNUM *tmp_bn = (BIGNUM *)a;
1047			if (bn_wexpand(tmp_bn, bl) == NULL)
1048				goto err;
1049			tmp_bn->d[al] = 0;
1050			al++;
1051			i++;
1052		}
1053		if (i == 0) {
1054			/* symmetric and > 4 */
1055			/* 16 or larger */
1056			j = BN_num_bits_word((BN_ULONG)al);
1057			j = 1 << (j - 1);
1058			k = j + j;
1059			t = BN_CTX_get(ctx);
1060			if (al == j) /* exact multiple */
1061			{
1062				if (bn_wexpand(t, k * 2) == NULL)
1063					goto err;
1064				if (bn_wexpand(rr, k * 2) == NULL)
1065					goto err;
1066				bn_mul_recursive(rr->d, a->d, b->d, al, t->d);
1067			} else {
1068				if (bn_wexpand(t, k * 4) == NULL)
1069					goto err;
1070				if (bn_wexpand(rr, k * 4) == NULL)
1071					goto err;
1072				bn_mul_part_recursive(rr->d, a->d, b->d,
1073				    al - j, j, t->d);
1074			}
1075			rr->top = top;
1076			goto end;
1077		}
1078#endif
1079	}
1080#endif /* BN_RECURSION */
1081	if (bn_wexpand(rr, top) == NULL)
1082		goto err;
1083	rr->top = top;
1084	bn_mul_normal(rr->d, a->d, al, b->d, bl);
1085
1086#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
1087end:
1088#endif
1089	bn_correct_top(rr);
1090	if (r != rr)
1091		BN_copy(r, rr);
1092	ret = 1;
1093err:
1094	bn_check_top(r);
1095	BN_CTX_end(ctx);
1096	return (ret);
1097}
1098
1099void
1100bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
1101{
1102	BN_ULONG *rr;
1103
1104#ifdef BN_COUNT
1105	fprintf(stderr, " bn_mul_normal %d * %d\n", na, nb);
1106#endif
1107
1108	if (na < nb) {
1109		int itmp;
1110		BN_ULONG *ltmp;
1111
1112		itmp = na;
1113		na = nb;
1114		nb = itmp;
1115		ltmp = a;
1116		a = b;
1117		b = ltmp;
1118
1119	}
1120	rr = &(r[na]);
1121	if (nb <= 0) {
1122		(void)bn_mul_words(r, a, na, 0);
1123		return;
1124	} else
1125		rr[0] = bn_mul_words(r, a, na, b[0]);
1126
1127	for (;;) {
1128		if (--nb <= 0)
1129			return;
1130		rr[1] = bn_mul_add_words(&(r[1]), a, na, b[1]);
1131		if (--nb <= 0)
1132			return;
1133		rr[2] = bn_mul_add_words(&(r[2]), a, na, b[2]);
1134		if (--nb <= 0)
1135			return;
1136		rr[3] = bn_mul_add_words(&(r[3]), a, na, b[3]);
1137		if (--nb <= 0)
1138			return;
1139		rr[4] = bn_mul_add_words(&(r[4]), a, na, b[4]);
1140		rr += 4;
1141		r += 4;
1142		b += 4;
1143	}
1144}
1145
1146void
1147bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
1148{
1149#ifdef BN_COUNT
1150	fprintf(stderr, " bn_mul_low_normal %d * %d\n", n, n);
1151#endif
1152	bn_mul_words(r, a, n, b[0]);
1153
1154	for (;;) {
1155		if (--n <= 0)
1156			return;
1157		bn_mul_add_words(&(r[1]), a, n, b[1]);
1158		if (--n <= 0)
1159			return;
1160		bn_mul_add_words(&(r[2]), a, n, b[2]);
1161		if (--n <= 0)
1162			return;
1163		bn_mul_add_words(&(r[3]), a, n, b[3]);
1164		if (--n <= 0)
1165			return;
1166		bn_mul_add_words(&(r[4]), a, n, b[4]);
1167		r += 4;
1168		b += 4;
1169	}
1170}
1171