bn_mul.c revision 160814
1/* crypto/bn/bn_mul.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to.  The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 *    notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 *    notice, this list of conditions and the following disclaimer in the
30 *    documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 *    must display the following acknowledgement:
33 *    "This product includes cryptographic software written by
34 *     Eric Young (eay@cryptsoft.com)"
35 *    The word 'cryptographic' can be left out if the rouines from the library
36 *    being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 *    the apps directory (application code) you must include an acknowledgement:
39 *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed.  i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#ifndef BN_DEBUG
60# undef NDEBUG /* avoid conflicting definitions */
61# define NDEBUG
62#endif
63
64#include <stdio.h>
65#include <assert.h>
66#include "cryptlib.h"
67#include "bn_lcl.h"
68
69#if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS)
70/* Here follows specialised variants of bn_add_words() and
71   bn_sub_words().  They have the property performing operations on
72   arrays of different sizes.  The sizes of those arrays is expressed through
73   cl, which is the common length ( basicall, min(len(a),len(b)) ), and dl,
74   which is the delta between the two lengths, calculated as len(a)-len(b).
75   All lengths are the number of BN_ULONGs...  For the operations that require
76   a result array as parameter, it must have the length cl+abs(dl).
77   These functions should probably end up in bn_asm.c as soon as there are
78   assembler counterparts for the systems that use assembler files.  */
79
80BN_ULONG bn_sub_part_words(BN_ULONG *r,
81	const BN_ULONG *a, const BN_ULONG *b,
82	int cl, int dl)
83	{
84	BN_ULONG c, t;
85
86	assert(cl >= 0);
87	c = bn_sub_words(r, a, b, cl);
88
89	if (dl == 0)
90		return c;
91
92	r += cl;
93	a += cl;
94	b += cl;
95
96	if (dl < 0)
97		{
98#ifdef BN_COUNT
99		fprintf(stderr, "  bn_sub_part_words %d + %d (dl < 0, c = %d)\n", cl, dl, c);
100#endif
101		for (;;)
102			{
103			t = b[0];
104			r[0] = (0-t-c)&BN_MASK2;
105			if (t != 0) c=1;
106			if (++dl >= 0) break;
107
108			t = b[1];
109			r[1] = (0-t-c)&BN_MASK2;
110			if (t != 0) c=1;
111			if (++dl >= 0) break;
112
113			t = b[2];
114			r[2] = (0-t-c)&BN_MASK2;
115			if (t != 0) c=1;
116			if (++dl >= 0) break;
117
118			t = b[3];
119			r[3] = (0-t-c)&BN_MASK2;
120			if (t != 0) c=1;
121			if (++dl >= 0) break;
122
123			b += 4;
124			r += 4;
125			}
126		}
127	else
128		{
129		int save_dl = dl;
130#ifdef BN_COUNT
131		fprintf(stderr, "  bn_sub_part_words %d + %d (dl > 0, c = %d)\n", cl, dl, c);
132#endif
133		while(c)
134			{
135			t = a[0];
136			r[0] = (t-c)&BN_MASK2;
137			if (t != 0) c=0;
138			if (--dl <= 0) break;
139
140			t = a[1];
141			r[1] = (t-c)&BN_MASK2;
142			if (t != 0) c=0;
143			if (--dl <= 0) break;
144
145			t = a[2];
146			r[2] = (t-c)&BN_MASK2;
147			if (t != 0) c=0;
148			if (--dl <= 0) break;
149
150			t = a[3];
151			r[3] = (t-c)&BN_MASK2;
152			if (t != 0) c=0;
153			if (--dl <= 0) break;
154
155			save_dl = dl;
156			a += 4;
157			r += 4;
158			}
159		if (dl > 0)
160			{
161#ifdef BN_COUNT
162			fprintf(stderr, "  bn_sub_part_words %d + %d (dl > 0, c == 0)\n", cl, dl);
163#endif
164			if (save_dl > dl)
165				{
166				switch (save_dl - dl)
167					{
168				case 1:
169					r[1] = a[1];
170					if (--dl <= 0) break;
171				case 2:
172					r[2] = a[2];
173					if (--dl <= 0) break;
174				case 3:
175					r[3] = a[3];
176					if (--dl <= 0) break;
177					}
178				a += 4;
179				r += 4;
180				}
181			}
182		if (dl > 0)
183			{
184#ifdef BN_COUNT
185			fprintf(stderr, "  bn_sub_part_words %d + %d (dl > 0, copy)\n", cl, dl);
186#endif
187			for(;;)
188				{
189				r[0] = a[0];
190				if (--dl <= 0) break;
191				r[1] = a[1];
192				if (--dl <= 0) break;
193				r[2] = a[2];
194				if (--dl <= 0) break;
195				r[3] = a[3];
196				if (--dl <= 0) break;
197
198				a += 4;
199				r += 4;
200				}
201			}
202		}
203	return c;
204	}
205#endif
206
207BN_ULONG bn_add_part_words(BN_ULONG *r,
208	const BN_ULONG *a, const BN_ULONG *b,
209	int cl, int dl)
210	{
211	BN_ULONG c, l, t;
212
213	assert(cl >= 0);
214	c = bn_add_words(r, a, b, cl);
215
216	if (dl == 0)
217		return c;
218
219	r += cl;
220	a += cl;
221	b += cl;
222
223	if (dl < 0)
224		{
225		int save_dl = dl;
226#ifdef BN_COUNT
227		fprintf(stderr, "  bn_add_part_words %d + %d (dl < 0, c = %d)\n", cl, dl, c);
228#endif
229		while (c)
230			{
231			l=(c+b[0])&BN_MASK2;
232			c=(l < c);
233			r[0]=l;
234			if (++dl >= 0) break;
235
236			l=(c+b[1])&BN_MASK2;
237			c=(l < c);
238			r[1]=l;
239			if (++dl >= 0) break;
240
241			l=(c+b[2])&BN_MASK2;
242			c=(l < c);
243			r[2]=l;
244			if (++dl >= 0) break;
245
246			l=(c+b[3])&BN_MASK2;
247			c=(l < c);
248			r[3]=l;
249			if (++dl >= 0) break;
250
251			save_dl = dl;
252			b+=4;
253			r+=4;
254			}
255		if (dl < 0)
256			{
257#ifdef BN_COUNT
258			fprintf(stderr, "  bn_add_part_words %d + %d (dl < 0, c == 0)\n", cl, dl);
259#endif
260			if (save_dl < dl)
261				{
262				switch (dl - save_dl)
263					{
264				case 1:
265					r[1] = b[1];
266					if (++dl >= 0) break;
267				case 2:
268					r[2] = b[2];
269					if (++dl >= 0) break;
270				case 3:
271					r[3] = b[3];
272					if (++dl >= 0) break;
273					}
274				b += 4;
275				r += 4;
276				}
277			}
278		if (dl < 0)
279			{
280#ifdef BN_COUNT
281			fprintf(stderr, "  bn_add_part_words %d + %d (dl < 0, copy)\n", cl, dl);
282#endif
283			for(;;)
284				{
285				r[0] = b[0];
286				if (++dl >= 0) break;
287				r[1] = b[1];
288				if (++dl >= 0) break;
289				r[2] = b[2];
290				if (++dl >= 0) break;
291				r[3] = b[3];
292				if (++dl >= 0) break;
293
294				b += 4;
295				r += 4;
296				}
297			}
298		}
299	else
300		{
301		int save_dl = dl;
302#ifdef BN_COUNT
303		fprintf(stderr, "  bn_add_part_words %d + %d (dl > 0)\n", cl, dl);
304#endif
305		while (c)
306			{
307			t=(a[0]+c)&BN_MASK2;
308			c=(t < c);
309			r[0]=t;
310			if (--dl <= 0) break;
311
312			t=(a[1]+c)&BN_MASK2;
313			c=(t < c);
314			r[1]=t;
315			if (--dl <= 0) break;
316
317			t=(a[2]+c)&BN_MASK2;
318			c=(t < c);
319			r[2]=t;
320			if (--dl <= 0) break;
321
322			t=(a[3]+c)&BN_MASK2;
323			c=(t < c);
324			r[3]=t;
325			if (--dl <= 0) break;
326
327			save_dl = dl;
328			a+=4;
329			r+=4;
330			}
331#ifdef BN_COUNT
332		fprintf(stderr, "  bn_add_part_words %d + %d (dl > 0, c == 0)\n", cl, dl);
333#endif
334		if (dl > 0)
335			{
336			if (save_dl > dl)
337				{
338				switch (save_dl - dl)
339					{
340				case 1:
341					r[1] = a[1];
342					if (--dl <= 0) break;
343				case 2:
344					r[2] = a[2];
345					if (--dl <= 0) break;
346				case 3:
347					r[3] = a[3];
348					if (--dl <= 0) break;
349					}
350				a += 4;
351				r += 4;
352				}
353			}
354		if (dl > 0)
355			{
356#ifdef BN_COUNT
357			fprintf(stderr, "  bn_add_part_words %d + %d (dl > 0, copy)\n", cl, dl);
358#endif
359			for(;;)
360				{
361				r[0] = a[0];
362				if (--dl <= 0) break;
363				r[1] = a[1];
364				if (--dl <= 0) break;
365				r[2] = a[2];
366				if (--dl <= 0) break;
367				r[3] = a[3];
368				if (--dl <= 0) break;
369
370				a += 4;
371				r += 4;
372				}
373			}
374		}
375	return c;
376	}
377
378#ifdef BN_RECURSION
379/* Karatsuba recursive multiplication algorithm
380 * (cf. Knuth, The Art of Computer Programming, Vol. 2) */
381
382/* r is 2*n2 words in size,
383 * a and b are both n2 words in size.
384 * n2 must be a power of 2.
385 * We multiply and return the result.
386 * t must be 2*n2 words in size
387 * We calculate
388 * a[0]*b[0]
389 * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
390 * a[1]*b[1]
391 */
392void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
393	int dna, int dnb, BN_ULONG *t)
394	{
395	int n=n2/2,c1,c2;
396	int tna=n+dna, tnb=n+dnb;
397	unsigned int neg,zero;
398	BN_ULONG ln,lo,*p;
399
400# ifdef BN_COUNT
401	fprintf(stderr," bn_mul_recursive %d * %d\n",n2,n2);
402# endif
403# ifdef BN_MUL_COMBA
404#  if 0
405	if (n2 == 4)
406		{
407		bn_mul_comba4(r,a,b);
408		return;
409		}
410#  endif
411	/* Only call bn_mul_comba 8 if n2 == 8 and the
412	 * two arrays are complete [steve]
413	 */
414	if (n2 == 8 && dna == 0 && dnb == 0)
415		{
416		bn_mul_comba8(r,a,b);
417		return;
418		}
419# endif /* BN_MUL_COMBA */
420	/* Else do normal multiply */
421	if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL)
422		{
423		bn_mul_normal(r,a,n2+dna,b,n2+dnb);
424		if ((dna + dnb) < 0)
425			memset(&r[2*n2 + dna + dnb], 0,
426				sizeof(BN_ULONG) * -(dna + dnb));
427		return;
428		}
429	/* r=(a[0]-a[1])*(b[1]-b[0]) */
430	c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna);
431	c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n);
432	zero=neg=0;
433	switch (c1*3+c2)
434		{
435	case -4:
436		bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
437		bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
438		break;
439	case -3:
440		zero=1;
441		break;
442	case -2:
443		bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
444		bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n); /* + */
445		neg=1;
446		break;
447	case -1:
448	case 0:
449	case 1:
450		zero=1;
451		break;
452	case 2:
453		bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna); /* + */
454		bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
455		neg=1;
456		break;
457	case 3:
458		zero=1;
459		break;
460	case 4:
461		bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna);
462		bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n);
463		break;
464		}
465
466# ifdef BN_MUL_COMBA
467	if (n == 4 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba4 could take
468					       extra args to do this well */
469		{
470		if (!zero)
471			bn_mul_comba4(&(t[n2]),t,&(t[n]));
472		else
473			memset(&(t[n2]),0,8*sizeof(BN_ULONG));
474
475		bn_mul_comba4(r,a,b);
476		bn_mul_comba4(&(r[n2]),&(a[n]),&(b[n]));
477		}
478	else if (n == 8 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba8 could
479						    take extra args to do this
480						    well */
481		{
482		if (!zero)
483			bn_mul_comba8(&(t[n2]),t,&(t[n]));
484		else
485			memset(&(t[n2]),0,16*sizeof(BN_ULONG));
486
487		bn_mul_comba8(r,a,b);
488		bn_mul_comba8(&(r[n2]),&(a[n]),&(b[n]));
489		}
490	else
491# endif /* BN_MUL_COMBA */
492		{
493		p= &(t[n2*2]);
494		if (!zero)
495			bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p);
496		else
497			memset(&(t[n2]),0,n2*sizeof(BN_ULONG));
498		bn_mul_recursive(r,a,b,n,0,0,p);
499		bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,dna,dnb,p);
500		}
501
502	/* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
503	 * r[10] holds (a[0]*b[0])
504	 * r[32] holds (b[1]*b[1])
505	 */
506
507	c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
508
509	if (neg) /* if t[32] is negative */
510		{
511		c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
512		}
513	else
514		{
515		/* Might have a carry */
516		c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
517		}
518
519	/* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
520	 * r[10] holds (a[0]*b[0])
521	 * r[32] holds (b[1]*b[1])
522	 * c1 holds the carry bits
523	 */
524	c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
525	if (c1)
526		{
527		p= &(r[n+n2]);
528		lo= *p;
529		ln=(lo+c1)&BN_MASK2;
530		*p=ln;
531
532		/* The overflow will stop before we over write
533		 * words we should not overwrite */
534		if (ln < (BN_ULONG)c1)
535			{
536			do	{
537				p++;
538				lo= *p;
539				ln=(lo+1)&BN_MASK2;
540				*p=ln;
541				} while (ln == 0);
542			}
543		}
544	}
545
546/* n+tn is the word length
547 * t needs to be n*4 is size, as does r */
548void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n,
549	     int tna, int tnb, BN_ULONG *t)
550	{
551	int i,j,n2=n*2;
552	int c1,c2,neg,zero;
553	BN_ULONG ln,lo,*p;
554
555# ifdef BN_COUNT
556	fprintf(stderr," bn_mul_part_recursive (%d+%d) * (%d+%d)\n",
557		tna, n, tnb, n);
558# endif
559	if (n < 8)
560		{
561		bn_mul_normal(r,a,n+tna,b,n+tnb);
562		return;
563		}
564
565	/* r=(a[0]-a[1])*(b[1]-b[0]) */
566	c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna);
567	c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n);
568	zero=neg=0;
569	switch (c1*3+c2)
570		{
571	case -4:
572		bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
573		bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
574		break;
575	case -3:
576		zero=1;
577		/* break; */
578	case -2:
579		bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
580		bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n); /* + */
581		neg=1;
582		break;
583	case -1:
584	case 0:
585	case 1:
586		zero=1;
587		/* break; */
588	case 2:
589		bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna); /* + */
590		bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
591		neg=1;
592		break;
593	case 3:
594		zero=1;
595		/* break; */
596	case 4:
597		bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna);
598		bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n);
599		break;
600		}
601		/* The zero case isn't yet implemented here. The speedup
602		   would probably be negligible. */
603# if 0
604	if (n == 4)
605		{
606		bn_mul_comba4(&(t[n2]),t,&(t[n]));
607		bn_mul_comba4(r,a,b);
608		bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn);
609		memset(&(r[n2+tn*2]),0,sizeof(BN_ULONG)*(n2-tn*2));
610		}
611	else
612# endif
613	if (n == 8)
614		{
615		bn_mul_comba8(&(t[n2]),t,&(t[n]));
616		bn_mul_comba8(r,a,b);
617		bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb);
618		memset(&(r[n2+tna+tnb]),0,sizeof(BN_ULONG)*(n2-tna-tnb));
619		}
620	else
621		{
622		p= &(t[n2*2]);
623		bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p);
624		bn_mul_recursive(r,a,b,n,0,0,p);
625		i=n/2;
626		/* If there is only a bottom half to the number,
627		 * just do it */
628		if (tna > tnb)
629			j = tna - i;
630		else
631			j = tnb - i;
632		if (j == 0)
633			{
634			bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),
635				i,tna-i,tnb-i,p);
636			memset(&(r[n2+i*2]),0,sizeof(BN_ULONG)*(n2-i*2));
637			}
638		else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */
639				{
640				bn_mul_part_recursive(&(r[n2]),&(a[n]),&(b[n]),
641					i,tna-i,tnb-i,p);
642				memset(&(r[n2+tna+tnb]),0,
643					sizeof(BN_ULONG)*(n2-tna-tnb));
644				}
645		else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
646			{
647			memset(&(r[n2]),0,sizeof(BN_ULONG)*n2);
648			if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL
649				&& tnb < BN_MUL_RECURSIVE_SIZE_NORMAL)
650				{
651				bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb);
652				}
653			else
654				{
655				for (;;)
656					{
657					i/=2;
658					if (i < tna && i < tnb)
659						{
660						bn_mul_part_recursive(&(r[n2]),
661							&(a[n]),&(b[n]),
662							i,tna-i,tnb-i,p);
663						break;
664						}
665					else if (i <= tna && i <= tnb)
666						{
667						bn_mul_recursive(&(r[n2]),
668							&(a[n]),&(b[n]),
669							i,tna-i,tnb-i,p);
670						break;
671						}
672					}
673				}
674			}
675		}
676
677	/* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
678	 * r[10] holds (a[0]*b[0])
679	 * r[32] holds (b[1]*b[1])
680	 */
681
682	c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
683
684	if (neg) /* if t[32] is negative */
685		{
686		c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
687		}
688	else
689		{
690		/* Might have a carry */
691		c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
692		}
693
694	/* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
695	 * r[10] holds (a[0]*b[0])
696	 * r[32] holds (b[1]*b[1])
697	 * c1 holds the carry bits
698	 */
699	c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
700	if (c1)
701		{
702		p= &(r[n+n2]);
703		lo= *p;
704		ln=(lo+c1)&BN_MASK2;
705		*p=ln;
706
707		/* The overflow will stop before we over write
708		 * words we should not overwrite */
709		if (ln < (BN_ULONG)c1)
710			{
711			do	{
712				p++;
713				lo= *p;
714				ln=(lo+1)&BN_MASK2;
715				*p=ln;
716				} while (ln == 0);
717			}
718		}
719	}
720
721/* a and b must be the same size, which is n2.
722 * r needs to be n2 words and t needs to be n2*2
723 */
724void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
725	     BN_ULONG *t)
726	{
727	int n=n2/2;
728
729# ifdef BN_COUNT
730	fprintf(stderr," bn_mul_low_recursive %d * %d\n",n2,n2);
731# endif
732
733	bn_mul_recursive(r,a,b,n,0,0,&(t[0]));
734	if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL)
735		{
736		bn_mul_low_recursive(&(t[0]),&(a[0]),&(b[n]),n,&(t[n2]));
737		bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
738		bn_mul_low_recursive(&(t[0]),&(a[n]),&(b[0]),n,&(t[n2]));
739		bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
740		}
741	else
742		{
743		bn_mul_low_normal(&(t[0]),&(a[0]),&(b[n]),n);
744		bn_mul_low_normal(&(t[n]),&(a[n]),&(b[0]),n);
745		bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
746		bn_add_words(&(r[n]),&(r[n]),&(t[n]),n);
747		}
748	}
749
750/* a and b must be the same size, which is n2.
751 * r needs to be n2 words and t needs to be n2*2
752 * l is the low words of the output.
753 * t needs to be n2*3
754 */
755void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
756	     BN_ULONG *t)
757	{
758	int i,n;
759	int c1,c2;
760	int neg,oneg,zero;
761	BN_ULONG ll,lc,*lp,*mp;
762
763# ifdef BN_COUNT
764	fprintf(stderr," bn_mul_high %d * %d\n",n2,n2);
765# endif
766	n=n2/2;
767
768	/* Calculate (al-ah)*(bh-bl) */
769	neg=zero=0;
770	c1=bn_cmp_words(&(a[0]),&(a[n]),n);
771	c2=bn_cmp_words(&(b[n]),&(b[0]),n);
772	switch (c1*3+c2)
773		{
774	case -4:
775		bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
776		bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
777		break;
778	case -3:
779		zero=1;
780		break;
781	case -2:
782		bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
783		bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
784		neg=1;
785		break;
786	case -1:
787	case 0:
788	case 1:
789		zero=1;
790		break;
791	case 2:
792		bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
793		bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
794		neg=1;
795		break;
796	case 3:
797		zero=1;
798		break;
799	case 4:
800		bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
801		bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
802		break;
803		}
804
805	oneg=neg;
806	/* t[10] = (a[0]-a[1])*(b[1]-b[0]) */
807	/* r[10] = (a[1]*b[1]) */
808# ifdef BN_MUL_COMBA
809	if (n == 8)
810		{
811		bn_mul_comba8(&(t[0]),&(r[0]),&(r[n]));
812		bn_mul_comba8(r,&(a[n]),&(b[n]));
813		}
814	else
815# endif
816		{
817		bn_mul_recursive(&(t[0]),&(r[0]),&(r[n]),n,0,0,&(t[n2]));
818		bn_mul_recursive(r,&(a[n]),&(b[n]),n,0,0,&(t[n2]));
819		}
820
821	/* s0 == low(al*bl)
822	 * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl)
823	 * We know s0 and s1 so the only unknown is high(al*bl)
824	 * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl))
825	 * high(al*bl) == s1 - (r[0]+l[0]+t[0])
826	 */
827	if (l != NULL)
828		{
829		lp= &(t[n2+n]);
830		c1=(int)(bn_add_words(lp,&(r[0]),&(l[0]),n));
831		}
832	else
833		{
834		c1=0;
835		lp= &(r[0]);
836		}
837
838	if (neg)
839		neg=(int)(bn_sub_words(&(t[n2]),lp,&(t[0]),n));
840	else
841		{
842		bn_add_words(&(t[n2]),lp,&(t[0]),n);
843		neg=0;
844		}
845
846	if (l != NULL)
847		{
848		bn_sub_words(&(t[n2+n]),&(l[n]),&(t[n2]),n);
849		}
850	else
851		{
852		lp= &(t[n2+n]);
853		mp= &(t[n2]);
854		for (i=0; i<n; i++)
855			lp[i]=((~mp[i])+1)&BN_MASK2;
856		}
857
858	/* s[0] = low(al*bl)
859	 * t[3] = high(al*bl)
860	 * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign
861	 * r[10] = (a[1]*b[1])
862	 */
863	/* R[10] = al*bl
864	 * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0])
865	 * R[32] = ah*bh
866	 */
867	/* R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow)
868	 * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow)
869	 * R[3]=r[1]+(carry/borrow)
870	 */
871	if (l != NULL)
872		{
873		lp= &(t[n2]);
874		c1= (int)(bn_add_words(lp,&(t[n2+n]),&(l[0]),n));
875		}
876	else
877		{
878		lp= &(t[n2+n]);
879		c1=0;
880		}
881	c1+=(int)(bn_add_words(&(t[n2]),lp,  &(r[0]),n));
882	if (oneg)
883		c1-=(int)(bn_sub_words(&(t[n2]),&(t[n2]),&(t[0]),n));
884	else
885		c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),&(t[0]),n));
886
887	c2 =(int)(bn_add_words(&(r[0]),&(r[0]),&(t[n2+n]),n));
888	c2+=(int)(bn_add_words(&(r[0]),&(r[0]),&(r[n]),n));
889	if (oneg)
890		c2-=(int)(bn_sub_words(&(r[0]),&(r[0]),&(t[n]),n));
891	else
892		c2+=(int)(bn_add_words(&(r[0]),&(r[0]),&(t[n]),n));
893
894	if (c1 != 0) /* Add starting at r[0], could be +ve or -ve */
895		{
896		i=0;
897		if (c1 > 0)
898			{
899			lc=c1;
900			do	{
901				ll=(r[i]+lc)&BN_MASK2;
902				r[i++]=ll;
903				lc=(lc > ll);
904				} while (lc);
905			}
906		else
907			{
908			lc= -c1;
909			do	{
910				ll=r[i];
911				r[i++]=(ll-lc)&BN_MASK2;
912				lc=(lc > ll);
913				} while (lc);
914			}
915		}
916	if (c2 != 0) /* Add starting at r[1] */
917		{
918		i=n;
919		if (c2 > 0)
920			{
921			lc=c2;
922			do	{
923				ll=(r[i]+lc)&BN_MASK2;
924				r[i++]=ll;
925				lc=(lc > ll);
926				} while (lc);
927			}
928		else
929			{
930			lc= -c2;
931			do	{
932				ll=r[i];
933				r[i++]=(ll-lc)&BN_MASK2;
934				lc=(lc > ll);
935				} while (lc);
936			}
937		}
938	}
939#endif /* BN_RECURSION */
940
941int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
942	{
943	int ret=0;
944	int top,al,bl;
945	BIGNUM *rr;
946#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
947	int i;
948#endif
949#ifdef BN_RECURSION
950	BIGNUM *t=NULL;
951	int j=0,k;
952#endif
953
954#ifdef BN_COUNT
955	fprintf(stderr,"BN_mul %d * %d\n",a->top,b->top);
956#endif
957
958	bn_check_top(a);
959	bn_check_top(b);
960	bn_check_top(r);
961
962	al=a->top;
963	bl=b->top;
964
965	if ((al == 0) || (bl == 0))
966		{
967		BN_zero(r);
968		return(1);
969		}
970	top=al+bl;
971
972	BN_CTX_start(ctx);
973	if ((r == a) || (r == b))
974		{
975		if ((rr = BN_CTX_get(ctx)) == NULL) goto err;
976		}
977	else
978		rr = r;
979	rr->neg=a->neg^b->neg;
980
981#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
982	i = al-bl;
983#endif
984#ifdef BN_MUL_COMBA
985	if (i == 0)
986		{
987# if 0
988		if (al == 4)
989			{
990			if (bn_wexpand(rr,8) == NULL) goto err;
991			rr->top=8;
992			bn_mul_comba4(rr->d,a->d,b->d);
993			goto end;
994			}
995# endif
996		if (al == 8)
997			{
998			if (bn_wexpand(rr,16) == NULL) goto err;
999			rr->top=16;
1000			bn_mul_comba8(rr->d,a->d,b->d);
1001			goto end;
1002			}
1003		}
1004#endif /* BN_MUL_COMBA */
1005#ifdef BN_RECURSION
1006	if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL))
1007		{
1008		if (i >= -1 && i <= 1)
1009			{
1010			int sav_j =0;
1011			/* Find out the power of two lower or equal
1012			   to the longest of the two numbers */
1013			if (i >= 0)
1014				{
1015				j = BN_num_bits_word((BN_ULONG)al);
1016				}
1017			if (i == -1)
1018				{
1019				j = BN_num_bits_word((BN_ULONG)bl);
1020				}
1021			sav_j = j;
1022			j = 1<<(j-1);
1023			assert(j <= al || j <= bl);
1024			k = j+j;
1025			t = BN_CTX_get(ctx);
1026			if (al > j || bl > j)
1027				{
1028				bn_wexpand(t,k*4);
1029				bn_wexpand(rr,k*4);
1030				bn_mul_part_recursive(rr->d,a->d,b->d,
1031					j,al-j,bl-j,t->d);
1032				}
1033			else	/* al <= j || bl <= j */
1034				{
1035				bn_wexpand(t,k*2);
1036				bn_wexpand(rr,k*2);
1037				bn_mul_recursive(rr->d,a->d,b->d,
1038					j,al-j,bl-j,t->d);
1039				}
1040			rr->top=top;
1041			goto end;
1042			}
1043#if 0
1044		if (i == 1 && !BN_get_flags(b,BN_FLG_STATIC_DATA))
1045			{
1046			BIGNUM *tmp_bn = (BIGNUM *)b;
1047			if (bn_wexpand(tmp_bn,al) == NULL) goto err;
1048			tmp_bn->d[bl]=0;
1049			bl++;
1050			i--;
1051			}
1052		else if (i == -1 && !BN_get_flags(a,BN_FLG_STATIC_DATA))
1053			{
1054			BIGNUM *tmp_bn = (BIGNUM *)a;
1055			if (bn_wexpand(tmp_bn,bl) == NULL) goto err;
1056			tmp_bn->d[al]=0;
1057			al++;
1058			i++;
1059			}
1060		if (i == 0)
1061			{
1062			/* symmetric and > 4 */
1063			/* 16 or larger */
1064			j=BN_num_bits_word((BN_ULONG)al);
1065			j=1<<(j-1);
1066			k=j+j;
1067			t = BN_CTX_get(ctx);
1068			if (al == j) /* exact multiple */
1069				{
1070				if (bn_wexpand(t,k*2) == NULL) goto err;
1071				if (bn_wexpand(rr,k*2) == NULL) goto err;
1072				bn_mul_recursive(rr->d,a->d,b->d,al,t->d);
1073				}
1074			else
1075				{
1076				if (bn_wexpand(t,k*4) == NULL) goto err;
1077				if (bn_wexpand(rr,k*4) == NULL) goto err;
1078				bn_mul_part_recursive(rr->d,a->d,b->d,al-j,j,t->d);
1079				}
1080			rr->top=top;
1081			goto end;
1082			}
1083#endif
1084		}
1085#endif /* BN_RECURSION */
1086	if (bn_wexpand(rr,top) == NULL) goto err;
1087	rr->top=top;
1088	bn_mul_normal(rr->d,a->d,al,b->d,bl);
1089
1090#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
1091end:
1092#endif
1093	bn_correct_top(rr);
1094	if (r != rr) BN_copy(r,rr);
1095	ret=1;
1096err:
1097	bn_check_top(r);
1098	BN_CTX_end(ctx);
1099	return(ret);
1100	}
1101
1102void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
1103	{
1104	BN_ULONG *rr;
1105
1106#ifdef BN_COUNT
1107	fprintf(stderr," bn_mul_normal %d * %d\n",na,nb);
1108#endif
1109
1110	if (na < nb)
1111		{
1112		int itmp;
1113		BN_ULONG *ltmp;
1114
1115		itmp=na; na=nb; nb=itmp;
1116		ltmp=a;   a=b;   b=ltmp;
1117
1118		}
1119	rr= &(r[na]);
1120	if (nb <= 0)
1121		{
1122		(void)bn_mul_words(r,a,na,0);
1123		return;
1124		}
1125	else
1126		rr[0]=bn_mul_words(r,a,na,b[0]);
1127
1128	for (;;)
1129		{
1130		if (--nb <= 0) return;
1131		rr[1]=bn_mul_add_words(&(r[1]),a,na,b[1]);
1132		if (--nb <= 0) return;
1133		rr[2]=bn_mul_add_words(&(r[2]),a,na,b[2]);
1134		if (--nb <= 0) return;
1135		rr[3]=bn_mul_add_words(&(r[3]),a,na,b[3]);
1136		if (--nb <= 0) return;
1137		rr[4]=bn_mul_add_words(&(r[4]),a,na,b[4]);
1138		rr+=4;
1139		r+=4;
1140		b+=4;
1141		}
1142	}
1143
1144void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
1145	{
1146#ifdef BN_COUNT
1147	fprintf(stderr," bn_mul_low_normal %d * %d\n",n,n);
1148#endif
1149	bn_mul_words(r,a,n,b[0]);
1150
1151	for (;;)
1152		{
1153		if (--n <= 0) return;
1154		bn_mul_add_words(&(r[1]),a,n,b[1]);
1155		if (--n <= 0) return;
1156		bn_mul_add_words(&(r[2]),a,n,b[2]);
1157		if (--n <= 0) return;
1158		bn_mul_add_words(&(r[3]),a,n,b[3]);
1159		if (--n <= 0) return;
1160		bn_mul_add_words(&(r[4]),a,n,b[4]);
1161		r+=4;
1162		b+=4;
1163		}
1164	}
1165