bn_mul.c revision 205128
1/* crypto/bn/bn_mul.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to.  The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 *    notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 *    notice, this list of conditions and the following disclaimer in the
30 *    documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 *    must display the following acknowledgement:
33 *    "This product includes cryptographic software written by
34 *     Eric Young (eay@cryptsoft.com)"
35 *    The word 'cryptographic' can be left out if the rouines from the library
36 *    being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 *    the apps directory (application code) you must include an acknowledgement:
39 *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed.  i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#ifndef BN_DEBUG
60# undef NDEBUG /* avoid conflicting definitions */
61# define NDEBUG
62#endif
63
64#include <stdio.h>
65#include <assert.h>
66#include "cryptlib.h"
67#include "bn_lcl.h"
68
69#if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS)
70/* Here follows specialised variants of bn_add_words() and
71   bn_sub_words().  They have the property performing operations on
72   arrays of different sizes.  The sizes of those arrays is expressed through
73   cl, which is the common length ( basicall, min(len(a),len(b)) ), and dl,
74   which is the delta between the two lengths, calculated as len(a)-len(b).
75   All lengths are the number of BN_ULONGs...  For the operations that require
76   a result array as parameter, it must have the length cl+abs(dl).
77   These functions should probably end up in bn_asm.c as soon as there are
78   assembler counterparts for the systems that use assembler files.  */
79
80BN_ULONG bn_sub_part_words(BN_ULONG *r,
81	const BN_ULONG *a, const BN_ULONG *b,
82	int cl, int dl)
83	{
84	BN_ULONG c, t;
85
86	assert(cl >= 0);
87	c = bn_sub_words(r, a, b, cl);
88
89	if (dl == 0)
90		return c;
91
92	r += cl;
93	a += cl;
94	b += cl;
95
96	if (dl < 0)
97		{
98#ifdef BN_COUNT
99		fprintf(stderr, "  bn_sub_part_words %d + %d (dl < 0, c = %d)\n", cl, dl, c);
100#endif
101		for (;;)
102			{
103			t = b[0];
104			r[0] = (0-t-c)&BN_MASK2;
105			if (t != 0) c=1;
106			if (++dl >= 0) break;
107
108			t = b[1];
109			r[1] = (0-t-c)&BN_MASK2;
110			if (t != 0) c=1;
111			if (++dl >= 0) break;
112
113			t = b[2];
114			r[2] = (0-t-c)&BN_MASK2;
115			if (t != 0) c=1;
116			if (++dl >= 0) break;
117
118			t = b[3];
119			r[3] = (0-t-c)&BN_MASK2;
120			if (t != 0) c=1;
121			if (++dl >= 0) break;
122
123			b += 4;
124			r += 4;
125			}
126		}
127	else
128		{
129		int save_dl = dl;
130#ifdef BN_COUNT
131		fprintf(stderr, "  bn_sub_part_words %d + %d (dl > 0, c = %d)\n", cl, dl, c);
132#endif
133		while(c)
134			{
135			t = a[0];
136			r[0] = (t-c)&BN_MASK2;
137			if (t != 0) c=0;
138			if (--dl <= 0) break;
139
140			t = a[1];
141			r[1] = (t-c)&BN_MASK2;
142			if (t != 0) c=0;
143			if (--dl <= 0) break;
144
145			t = a[2];
146			r[2] = (t-c)&BN_MASK2;
147			if (t != 0) c=0;
148			if (--dl <= 0) break;
149
150			t = a[3];
151			r[3] = (t-c)&BN_MASK2;
152			if (t != 0) c=0;
153			if (--dl <= 0) break;
154
155			save_dl = dl;
156			a += 4;
157			r += 4;
158			}
159		if (dl > 0)
160			{
161#ifdef BN_COUNT
162			fprintf(stderr, "  bn_sub_part_words %d + %d (dl > 0, c == 0)\n", cl, dl);
163#endif
164			if (save_dl > dl)
165				{
166				switch (save_dl - dl)
167					{
168				case 1:
169					r[1] = a[1];
170					if (--dl <= 0) break;
171				case 2:
172					r[2] = a[2];
173					if (--dl <= 0) break;
174				case 3:
175					r[3] = a[3];
176					if (--dl <= 0) break;
177					}
178				a += 4;
179				r += 4;
180				}
181			}
182		if (dl > 0)
183			{
184#ifdef BN_COUNT
185			fprintf(stderr, "  bn_sub_part_words %d + %d (dl > 0, copy)\n", cl, dl);
186#endif
187			for(;;)
188				{
189				r[0] = a[0];
190				if (--dl <= 0) break;
191				r[1] = a[1];
192				if (--dl <= 0) break;
193				r[2] = a[2];
194				if (--dl <= 0) break;
195				r[3] = a[3];
196				if (--dl <= 0) break;
197
198				a += 4;
199				r += 4;
200				}
201			}
202		}
203	return c;
204	}
205#endif
206
207BN_ULONG bn_add_part_words(BN_ULONG *r,
208	const BN_ULONG *a, const BN_ULONG *b,
209	int cl, int dl)
210	{
211	BN_ULONG c, l, t;
212
213	assert(cl >= 0);
214	c = bn_add_words(r, a, b, cl);
215
216	if (dl == 0)
217		return c;
218
219	r += cl;
220	a += cl;
221	b += cl;
222
223	if (dl < 0)
224		{
225		int save_dl = dl;
226#ifdef BN_COUNT
227		fprintf(stderr, "  bn_add_part_words %d + %d (dl < 0, c = %d)\n", cl, dl, c);
228#endif
229		while (c)
230			{
231			l=(c+b[0])&BN_MASK2;
232			c=(l < c);
233			r[0]=l;
234			if (++dl >= 0) break;
235
236			l=(c+b[1])&BN_MASK2;
237			c=(l < c);
238			r[1]=l;
239			if (++dl >= 0) break;
240
241			l=(c+b[2])&BN_MASK2;
242			c=(l < c);
243			r[2]=l;
244			if (++dl >= 0) break;
245
246			l=(c+b[3])&BN_MASK2;
247			c=(l < c);
248			r[3]=l;
249			if (++dl >= 0) break;
250
251			save_dl = dl;
252			b+=4;
253			r+=4;
254			}
255		if (dl < 0)
256			{
257#ifdef BN_COUNT
258			fprintf(stderr, "  bn_add_part_words %d + %d (dl < 0, c == 0)\n", cl, dl);
259#endif
260			if (save_dl < dl)
261				{
262				switch (dl - save_dl)
263					{
264				case 1:
265					r[1] = b[1];
266					if (++dl >= 0) break;
267				case 2:
268					r[2] = b[2];
269					if (++dl >= 0) break;
270				case 3:
271					r[3] = b[3];
272					if (++dl >= 0) break;
273					}
274				b += 4;
275				r += 4;
276				}
277			}
278		if (dl < 0)
279			{
280#ifdef BN_COUNT
281			fprintf(stderr, "  bn_add_part_words %d + %d (dl < 0, copy)\n", cl, dl);
282#endif
283			for(;;)
284				{
285				r[0] = b[0];
286				if (++dl >= 0) break;
287				r[1] = b[1];
288				if (++dl >= 0) break;
289				r[2] = b[2];
290				if (++dl >= 0) break;
291				r[3] = b[3];
292				if (++dl >= 0) break;
293
294				b += 4;
295				r += 4;
296				}
297			}
298		}
299	else
300		{
301		int save_dl = dl;
302#ifdef BN_COUNT
303		fprintf(stderr, "  bn_add_part_words %d + %d (dl > 0)\n", cl, dl);
304#endif
305		while (c)
306			{
307			t=(a[0]+c)&BN_MASK2;
308			c=(t < c);
309			r[0]=t;
310			if (--dl <= 0) break;
311
312			t=(a[1]+c)&BN_MASK2;
313			c=(t < c);
314			r[1]=t;
315			if (--dl <= 0) break;
316
317			t=(a[2]+c)&BN_MASK2;
318			c=(t < c);
319			r[2]=t;
320			if (--dl <= 0) break;
321
322			t=(a[3]+c)&BN_MASK2;
323			c=(t < c);
324			r[3]=t;
325			if (--dl <= 0) break;
326
327			save_dl = dl;
328			a+=4;
329			r+=4;
330			}
331#ifdef BN_COUNT
332		fprintf(stderr, "  bn_add_part_words %d + %d (dl > 0, c == 0)\n", cl, dl);
333#endif
334		if (dl > 0)
335			{
336			if (save_dl > dl)
337				{
338				switch (save_dl - dl)
339					{
340				case 1:
341					r[1] = a[1];
342					if (--dl <= 0) break;
343				case 2:
344					r[2] = a[2];
345					if (--dl <= 0) break;
346				case 3:
347					r[3] = a[3];
348					if (--dl <= 0) break;
349					}
350				a += 4;
351				r += 4;
352				}
353			}
354		if (dl > 0)
355			{
356#ifdef BN_COUNT
357			fprintf(stderr, "  bn_add_part_words %d + %d (dl > 0, copy)\n", cl, dl);
358#endif
359			for(;;)
360				{
361				r[0] = a[0];
362				if (--dl <= 0) break;
363				r[1] = a[1];
364				if (--dl <= 0) break;
365				r[2] = a[2];
366				if (--dl <= 0) break;
367				r[3] = a[3];
368				if (--dl <= 0) break;
369
370				a += 4;
371				r += 4;
372				}
373			}
374		}
375	return c;
376	}
377
378#ifdef BN_RECURSION
379/* Karatsuba recursive multiplication algorithm
380 * (cf. Knuth, The Art of Computer Programming, Vol. 2) */
381
382/* r is 2*n2 words in size,
383 * a and b are both n2 words in size.
384 * n2 must be a power of 2.
385 * We multiply and return the result.
386 * t must be 2*n2 words in size
387 * We calculate
388 * a[0]*b[0]
389 * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
390 * a[1]*b[1]
391 */
392/* dnX may not be positive, but n2/2+dnX has to be */
393void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
394	int dna, int dnb, BN_ULONG *t)
395	{
396	int n=n2/2,c1,c2;
397	int tna=n+dna, tnb=n+dnb;
398	unsigned int neg,zero;
399	BN_ULONG ln,lo,*p;
400
401# ifdef BN_COUNT
402	fprintf(stderr," bn_mul_recursive %d%+d * %d%+d\n",n2,dna,n2,dnb);
403# endif
404# ifdef BN_MUL_COMBA
405#  if 0
406	if (n2 == 4)
407		{
408		bn_mul_comba4(r,a,b);
409		return;
410		}
411#  endif
412	/* Only call bn_mul_comba 8 if n2 == 8 and the
413	 * two arrays are complete [steve]
414	 */
415	if (n2 == 8 && dna == 0 && dnb == 0)
416		{
417		bn_mul_comba8(r,a,b);
418		return;
419		}
420# endif /* BN_MUL_COMBA */
421	/* Else do normal multiply */
422	if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL)
423		{
424		bn_mul_normal(r,a,n2+dna,b,n2+dnb);
425		if ((dna + dnb) < 0)
426			memset(&r[2*n2 + dna + dnb], 0,
427				sizeof(BN_ULONG) * -(dna + dnb));
428		return;
429		}
430	/* r=(a[0]-a[1])*(b[1]-b[0]) */
431	c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna);
432	c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n);
433	zero=neg=0;
434	switch (c1*3+c2)
435		{
436	case -4:
437		bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
438		bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
439		break;
440	case -3:
441		zero=1;
442		break;
443	case -2:
444		bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
445		bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n); /* + */
446		neg=1;
447		break;
448	case -1:
449	case 0:
450	case 1:
451		zero=1;
452		break;
453	case 2:
454		bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna); /* + */
455		bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
456		neg=1;
457		break;
458	case 3:
459		zero=1;
460		break;
461	case 4:
462		bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna);
463		bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n);
464		break;
465		}
466
467# ifdef BN_MUL_COMBA
468	if (n == 4 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba4 could take
469					       extra args to do this well */
470		{
471		if (!zero)
472			bn_mul_comba4(&(t[n2]),t,&(t[n]));
473		else
474			memset(&(t[n2]),0,8*sizeof(BN_ULONG));
475
476		bn_mul_comba4(r,a,b);
477		bn_mul_comba4(&(r[n2]),&(a[n]),&(b[n]));
478		}
479	else if (n == 8 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba8 could
480						    take extra args to do this
481						    well */
482		{
483		if (!zero)
484			bn_mul_comba8(&(t[n2]),t,&(t[n]));
485		else
486			memset(&(t[n2]),0,16*sizeof(BN_ULONG));
487
488		bn_mul_comba8(r,a,b);
489		bn_mul_comba8(&(r[n2]),&(a[n]),&(b[n]));
490		}
491	else
492# endif /* BN_MUL_COMBA */
493		{
494		p= &(t[n2*2]);
495		if (!zero)
496			bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p);
497		else
498			memset(&(t[n2]),0,n2*sizeof(BN_ULONG));
499		bn_mul_recursive(r,a,b,n,0,0,p);
500		bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,dna,dnb,p);
501		}
502
503	/* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
504	 * r[10] holds (a[0]*b[0])
505	 * r[32] holds (b[1]*b[1])
506	 */
507
508	c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
509
510	if (neg) /* if t[32] is negative */
511		{
512		c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
513		}
514	else
515		{
516		/* Might have a carry */
517		c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
518		}
519
520	/* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
521	 * r[10] holds (a[0]*b[0])
522	 * r[32] holds (b[1]*b[1])
523	 * c1 holds the carry bits
524	 */
525	c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
526	if (c1)
527		{
528		p= &(r[n+n2]);
529		lo= *p;
530		ln=(lo+c1)&BN_MASK2;
531		*p=ln;
532
533		/* The overflow will stop before we over write
534		 * words we should not overwrite */
535		if (ln < (BN_ULONG)c1)
536			{
537			do	{
538				p++;
539				lo= *p;
540				ln=(lo+1)&BN_MASK2;
541				*p=ln;
542				} while (ln == 0);
543			}
544		}
545	}
546
547/* n+tn is the word length
548 * t needs to be n*4 is size, as does r */
549/* tnX may not be negative but less than n */
550void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n,
551	     int tna, int tnb, BN_ULONG *t)
552	{
553	int i,j,n2=n*2;
554	int c1,c2,neg,zero;
555	BN_ULONG ln,lo,*p;
556
557# ifdef BN_COUNT
558	fprintf(stderr," bn_mul_part_recursive (%d%+d) * (%d%+d)\n",
559		n, tna, n, tnb);
560# endif
561	if (n < 8)
562		{
563		bn_mul_normal(r,a,n+tna,b,n+tnb);
564		return;
565		}
566
567	/* r=(a[0]-a[1])*(b[1]-b[0]) */
568	c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna);
569	c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n);
570	zero=neg=0;
571	switch (c1*3+c2)
572		{
573	case -4:
574		bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
575		bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
576		break;
577	case -3:
578		zero=1;
579		/* break; */
580	case -2:
581		bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
582		bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n); /* + */
583		neg=1;
584		break;
585	case -1:
586	case 0:
587	case 1:
588		zero=1;
589		/* break; */
590	case 2:
591		bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna); /* + */
592		bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
593		neg=1;
594		break;
595	case 3:
596		zero=1;
597		/* break; */
598	case 4:
599		bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna);
600		bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n);
601		break;
602		}
603		/* The zero case isn't yet implemented here. The speedup
604		   would probably be negligible. */
605# if 0
606	if (n == 4)
607		{
608		bn_mul_comba4(&(t[n2]),t,&(t[n]));
609		bn_mul_comba4(r,a,b);
610		bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn);
611		memset(&(r[n2+tn*2]),0,sizeof(BN_ULONG)*(n2-tn*2));
612		}
613	else
614# endif
615	if (n == 8)
616		{
617		bn_mul_comba8(&(t[n2]),t,&(t[n]));
618		bn_mul_comba8(r,a,b);
619		bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb);
620		memset(&(r[n2+tna+tnb]),0,sizeof(BN_ULONG)*(n2-tna-tnb));
621		}
622	else
623		{
624		p= &(t[n2*2]);
625		bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p);
626		bn_mul_recursive(r,a,b,n,0,0,p);
627		i=n/2;
628		/* If there is only a bottom half to the number,
629		 * just do it */
630		if (tna > tnb)
631			j = tna - i;
632		else
633			j = tnb - i;
634		if (j == 0)
635			{
636			bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),
637				i,tna-i,tnb-i,p);
638			memset(&(r[n2+i*2]),0,sizeof(BN_ULONG)*(n2-i*2));
639			}
640		else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */
641				{
642				bn_mul_part_recursive(&(r[n2]),&(a[n]),&(b[n]),
643					i,tna-i,tnb-i,p);
644				memset(&(r[n2+tna+tnb]),0,
645					sizeof(BN_ULONG)*(n2-tna-tnb));
646				}
647		else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
648			{
649			memset(&(r[n2]),0,sizeof(BN_ULONG)*n2);
650			if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL
651				&& tnb < BN_MUL_RECURSIVE_SIZE_NORMAL)
652				{
653				bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb);
654				}
655			else
656				{
657				for (;;)
658					{
659					i/=2;
660					/* these simplified conditions work
661					 * exclusively because difference
662					 * between tna and tnb is 1 or 0 */
663					if (i < tna || i < tnb)
664						{
665						bn_mul_part_recursive(&(r[n2]),
666							&(a[n]),&(b[n]),
667							i,tna-i,tnb-i,p);
668						break;
669						}
670					else if (i == tna || i == tnb)
671						{
672						bn_mul_recursive(&(r[n2]),
673							&(a[n]),&(b[n]),
674							i,tna-i,tnb-i,p);
675						break;
676						}
677					}
678				}
679			}
680		}
681
682	/* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
683	 * r[10] holds (a[0]*b[0])
684	 * r[32] holds (b[1]*b[1])
685	 */
686
687	c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
688
689	if (neg) /* if t[32] is negative */
690		{
691		c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
692		}
693	else
694		{
695		/* Might have a carry */
696		c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
697		}
698
699	/* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
700	 * r[10] holds (a[0]*b[0])
701	 * r[32] holds (b[1]*b[1])
702	 * c1 holds the carry bits
703	 */
704	c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
705	if (c1)
706		{
707		p= &(r[n+n2]);
708		lo= *p;
709		ln=(lo+c1)&BN_MASK2;
710		*p=ln;
711
712		/* The overflow will stop before we over write
713		 * words we should not overwrite */
714		if (ln < (BN_ULONG)c1)
715			{
716			do	{
717				p++;
718				lo= *p;
719				ln=(lo+1)&BN_MASK2;
720				*p=ln;
721				} while (ln == 0);
722			}
723		}
724	}
725
726/* a and b must be the same size, which is n2.
727 * r needs to be n2 words and t needs to be n2*2
728 */
729void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
730	     BN_ULONG *t)
731	{
732	int n=n2/2;
733
734# ifdef BN_COUNT
735	fprintf(stderr," bn_mul_low_recursive %d * %d\n",n2,n2);
736# endif
737
738	bn_mul_recursive(r,a,b,n,0,0,&(t[0]));
739	if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL)
740		{
741		bn_mul_low_recursive(&(t[0]),&(a[0]),&(b[n]),n,&(t[n2]));
742		bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
743		bn_mul_low_recursive(&(t[0]),&(a[n]),&(b[0]),n,&(t[n2]));
744		bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
745		}
746	else
747		{
748		bn_mul_low_normal(&(t[0]),&(a[0]),&(b[n]),n);
749		bn_mul_low_normal(&(t[n]),&(a[n]),&(b[0]),n);
750		bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
751		bn_add_words(&(r[n]),&(r[n]),&(t[n]),n);
752		}
753	}
754
755/* a and b must be the same size, which is n2.
756 * r needs to be n2 words and t needs to be n2*2
757 * l is the low words of the output.
758 * t needs to be n2*3
759 */
760void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
761	     BN_ULONG *t)
762	{
763	int i,n;
764	int c1,c2;
765	int neg,oneg,zero;
766	BN_ULONG ll,lc,*lp,*mp;
767
768# ifdef BN_COUNT
769	fprintf(stderr," bn_mul_high %d * %d\n",n2,n2);
770# endif
771	n=n2/2;
772
773	/* Calculate (al-ah)*(bh-bl) */
774	neg=zero=0;
775	c1=bn_cmp_words(&(a[0]),&(a[n]),n);
776	c2=bn_cmp_words(&(b[n]),&(b[0]),n);
777	switch (c1*3+c2)
778		{
779	case -4:
780		bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
781		bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
782		break;
783	case -3:
784		zero=1;
785		break;
786	case -2:
787		bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
788		bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
789		neg=1;
790		break;
791	case -1:
792	case 0:
793	case 1:
794		zero=1;
795		break;
796	case 2:
797		bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
798		bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
799		neg=1;
800		break;
801	case 3:
802		zero=1;
803		break;
804	case 4:
805		bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
806		bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
807		break;
808		}
809
810	oneg=neg;
811	/* t[10] = (a[0]-a[1])*(b[1]-b[0]) */
812	/* r[10] = (a[1]*b[1]) */
813# ifdef BN_MUL_COMBA
814	if (n == 8)
815		{
816		bn_mul_comba8(&(t[0]),&(r[0]),&(r[n]));
817		bn_mul_comba8(r,&(a[n]),&(b[n]));
818		}
819	else
820# endif
821		{
822		bn_mul_recursive(&(t[0]),&(r[0]),&(r[n]),n,0,0,&(t[n2]));
823		bn_mul_recursive(r,&(a[n]),&(b[n]),n,0,0,&(t[n2]));
824		}
825
826	/* s0 == low(al*bl)
827	 * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl)
828	 * We know s0 and s1 so the only unknown is high(al*bl)
829	 * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl))
830	 * high(al*bl) == s1 - (r[0]+l[0]+t[0])
831	 */
832	if (l != NULL)
833		{
834		lp= &(t[n2+n]);
835		c1=(int)(bn_add_words(lp,&(r[0]),&(l[0]),n));
836		}
837	else
838		{
839		c1=0;
840		lp= &(r[0]);
841		}
842
843	if (neg)
844		neg=(int)(bn_sub_words(&(t[n2]),lp,&(t[0]),n));
845	else
846		{
847		bn_add_words(&(t[n2]),lp,&(t[0]),n);
848		neg=0;
849		}
850
851	if (l != NULL)
852		{
853		bn_sub_words(&(t[n2+n]),&(l[n]),&(t[n2]),n);
854		}
855	else
856		{
857		lp= &(t[n2+n]);
858		mp= &(t[n2]);
859		for (i=0; i<n; i++)
860			lp[i]=((~mp[i])+1)&BN_MASK2;
861		}
862
863	/* s[0] = low(al*bl)
864	 * t[3] = high(al*bl)
865	 * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign
866	 * r[10] = (a[1]*b[1])
867	 */
868	/* R[10] = al*bl
869	 * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0])
870	 * R[32] = ah*bh
871	 */
872	/* R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow)
873	 * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow)
874	 * R[3]=r[1]+(carry/borrow)
875	 */
876	if (l != NULL)
877		{
878		lp= &(t[n2]);
879		c1= (int)(bn_add_words(lp,&(t[n2+n]),&(l[0]),n));
880		}
881	else
882		{
883		lp= &(t[n2+n]);
884		c1=0;
885		}
886	c1+=(int)(bn_add_words(&(t[n2]),lp,  &(r[0]),n));
887	if (oneg)
888		c1-=(int)(bn_sub_words(&(t[n2]),&(t[n2]),&(t[0]),n));
889	else
890		c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),&(t[0]),n));
891
892	c2 =(int)(bn_add_words(&(r[0]),&(r[0]),&(t[n2+n]),n));
893	c2+=(int)(bn_add_words(&(r[0]),&(r[0]),&(r[n]),n));
894	if (oneg)
895		c2-=(int)(bn_sub_words(&(r[0]),&(r[0]),&(t[n]),n));
896	else
897		c2+=(int)(bn_add_words(&(r[0]),&(r[0]),&(t[n]),n));
898
899	if (c1 != 0) /* Add starting at r[0], could be +ve or -ve */
900		{
901		i=0;
902		if (c1 > 0)
903			{
904			lc=c1;
905			do	{
906				ll=(r[i]+lc)&BN_MASK2;
907				r[i++]=ll;
908				lc=(lc > ll);
909				} while (lc);
910			}
911		else
912			{
913			lc= -c1;
914			do	{
915				ll=r[i];
916				r[i++]=(ll-lc)&BN_MASK2;
917				lc=(lc > ll);
918				} while (lc);
919			}
920		}
921	if (c2 != 0) /* Add starting at r[1] */
922		{
923		i=n;
924		if (c2 > 0)
925			{
926			lc=c2;
927			do	{
928				ll=(r[i]+lc)&BN_MASK2;
929				r[i++]=ll;
930				lc=(lc > ll);
931				} while (lc);
932			}
933		else
934			{
935			lc= -c2;
936			do	{
937				ll=r[i];
938				r[i++]=(ll-lc)&BN_MASK2;
939				lc=(lc > ll);
940				} while (lc);
941			}
942		}
943	}
944#endif /* BN_RECURSION */
945
946int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
947	{
948	int ret=0;
949	int top,al,bl;
950	BIGNUM *rr;
951#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
952	int i;
953#endif
954#ifdef BN_RECURSION
955	BIGNUM *t=NULL;
956	int j=0,k;
957#endif
958
959#ifdef BN_COUNT
960	fprintf(stderr,"BN_mul %d * %d\n",a->top,b->top);
961#endif
962
963	bn_check_top(a);
964	bn_check_top(b);
965	bn_check_top(r);
966
967	al=a->top;
968	bl=b->top;
969
970	if ((al == 0) || (bl == 0))
971		{
972		BN_zero(r);
973		return(1);
974		}
975	top=al+bl;
976
977	BN_CTX_start(ctx);
978	if ((r == a) || (r == b))
979		{
980		if ((rr = BN_CTX_get(ctx)) == NULL) goto err;
981		}
982	else
983		rr = r;
984	rr->neg=a->neg^b->neg;
985
986#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
987	i = al-bl;
988#endif
989#ifdef BN_MUL_COMBA
990	if (i == 0)
991		{
992# if 0
993		if (al == 4)
994			{
995			if (bn_wexpand(rr,8) == NULL) goto err;
996			rr->top=8;
997			bn_mul_comba4(rr->d,a->d,b->d);
998			goto end;
999			}
1000# endif
1001		if (al == 8)
1002			{
1003			if (bn_wexpand(rr,16) == NULL) goto err;
1004			rr->top=16;
1005			bn_mul_comba8(rr->d,a->d,b->d);
1006			goto end;
1007			}
1008		}
1009#endif /* BN_MUL_COMBA */
1010#ifdef BN_RECURSION
1011	if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL))
1012		{
1013		if (i >= -1 && i <= 1)
1014			{
1015			int sav_j =0;
1016			/* Find out the power of two lower or equal
1017			   to the longest of the two numbers */
1018			if (i >= 0)
1019				{
1020				j = BN_num_bits_word((BN_ULONG)al);
1021				}
1022			if (i == -1)
1023				{
1024				j = BN_num_bits_word((BN_ULONG)bl);
1025				}
1026			sav_j = j;
1027			j = 1<<(j-1);
1028			assert(j <= al || j <= bl);
1029			k = j+j;
1030			t = BN_CTX_get(ctx);
1031			if (t == NULL)
1032				goto err;
1033			if (al > j || bl > j)
1034				{
1035				if (bn_wexpand(t,k*4) == NULL) goto err;
1036				if (bn_wexpand(rr,k*4) == NULL) goto err;
1037				bn_mul_part_recursive(rr->d,a->d,b->d,
1038					j,al-j,bl-j,t->d);
1039				}
1040			else	/* al <= j || bl <= j */
1041				{
1042				if (bn_wexpand(t,k*2) == NULL) goto err;
1043				if (bn_wexpand(rr,k*2) == NULL) goto err;
1044				bn_mul_recursive(rr->d,a->d,b->d,
1045					j,al-j,bl-j,t->d);
1046				}
1047			rr->top=top;
1048			goto end;
1049			}
1050#if 0
1051		if (i == 1 && !BN_get_flags(b,BN_FLG_STATIC_DATA))
1052			{
1053			BIGNUM *tmp_bn = (BIGNUM *)b;
1054			if (bn_wexpand(tmp_bn,al) == NULL) goto err;
1055			tmp_bn->d[bl]=0;
1056			bl++;
1057			i--;
1058			}
1059		else if (i == -1 && !BN_get_flags(a,BN_FLG_STATIC_DATA))
1060			{
1061			BIGNUM *tmp_bn = (BIGNUM *)a;
1062			if (bn_wexpand(tmp_bn,bl) == NULL) goto err;
1063			tmp_bn->d[al]=0;
1064			al++;
1065			i++;
1066			}
1067		if (i == 0)
1068			{
1069			/* symmetric and > 4 */
1070			/* 16 or larger */
1071			j=BN_num_bits_word((BN_ULONG)al);
1072			j=1<<(j-1);
1073			k=j+j;
1074			t = BN_CTX_get(ctx);
1075			if (al == j) /* exact multiple */
1076				{
1077				if (bn_wexpand(t,k*2) == NULL) goto err;
1078				if (bn_wexpand(rr,k*2) == NULL) goto err;
1079				bn_mul_recursive(rr->d,a->d,b->d,al,t->d);
1080				}
1081			else
1082				{
1083				if (bn_wexpand(t,k*4) == NULL) goto err;
1084				if (bn_wexpand(rr,k*4) == NULL) goto err;
1085				bn_mul_part_recursive(rr->d,a->d,b->d,al-j,j,t->d);
1086				}
1087			rr->top=top;
1088			goto end;
1089			}
1090#endif
1091		}
1092#endif /* BN_RECURSION */
1093	if (bn_wexpand(rr,top) == NULL) goto err;
1094	rr->top=top;
1095	bn_mul_normal(rr->d,a->d,al,b->d,bl);
1096
1097#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
1098end:
1099#endif
1100	bn_correct_top(rr);
1101	if (r != rr) BN_copy(r,rr);
1102	ret=1;
1103err:
1104	bn_check_top(r);
1105	BN_CTX_end(ctx);
1106	return(ret);
1107	}
1108
1109void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
1110	{
1111	BN_ULONG *rr;
1112
1113#ifdef BN_COUNT
1114	fprintf(stderr," bn_mul_normal %d * %d\n",na,nb);
1115#endif
1116
1117	if (na < nb)
1118		{
1119		int itmp;
1120		BN_ULONG *ltmp;
1121
1122		itmp=na; na=nb; nb=itmp;
1123		ltmp=a;   a=b;   b=ltmp;
1124
1125		}
1126	rr= &(r[na]);
1127	if (nb <= 0)
1128		{
1129		(void)bn_mul_words(r,a,na,0);
1130		return;
1131		}
1132	else
1133		rr[0]=bn_mul_words(r,a,na,b[0]);
1134
1135	for (;;)
1136		{
1137		if (--nb <= 0) return;
1138		rr[1]=bn_mul_add_words(&(r[1]),a,na,b[1]);
1139		if (--nb <= 0) return;
1140		rr[2]=bn_mul_add_words(&(r[2]),a,na,b[2]);
1141		if (--nb <= 0) return;
1142		rr[3]=bn_mul_add_words(&(r[3]),a,na,b[3]);
1143		if (--nb <= 0) return;
1144		rr[4]=bn_mul_add_words(&(r[4]),a,na,b[4]);
1145		rr+=4;
1146		r+=4;
1147		b+=4;
1148		}
1149	}
1150
1151void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
1152	{
1153#ifdef BN_COUNT
1154	fprintf(stderr," bn_mul_low_normal %d * %d\n",n,n);
1155#endif
1156	bn_mul_words(r,a,n,b[0]);
1157
1158	for (;;)
1159		{
1160		if (--n <= 0) return;
1161		bn_mul_add_words(&(r[1]),a,n,b[1]);
1162		if (--n <= 0) return;
1163		bn_mul_add_words(&(r[2]),a,n,b[2]);
1164		if (--n <= 0) return;
1165		bn_mul_add_words(&(r[3]),a,n,b[3]);
1166		if (--n <= 0) return;
1167		bn_mul_add_words(&(r[4]),a,n,b[4]);
1168		r+=4;
1169		b+=4;
1170		}
1171	}
1172