bn_mul.c revision 1.31
1/* $OpenBSD: bn_mul.c,v 1.31 2023/02/13 04:25:37 jsing Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to.  The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 *    notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 *    notice, this list of conditions and the following disclaimer in the
30 *    documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 *    must display the following acknowledgement:
33 *    "This product includes cryptographic software written by
34 *     Eric Young (eay@cryptsoft.com)"
35 *    The word 'cryptographic' can be left out if the rouines from the library
36 *    being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 *    the apps directory (application code) you must include an acknowledgement:
39 *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed.  i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <assert.h>
60#include <stdio.h>
61#include <string.h>
62
63#include <openssl/opensslconf.h>
64
65#include "bn_arch.h"
66#include "bn_local.h"
67
68#ifndef HAVE_BN_MUL_ADD_WORDS
69#if defined(BN_LLONG) || defined(BN_UMULT_HIGH)
70
71BN_ULONG
72bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
73{
74	BN_ULONG c1 = 0;
75
76	assert(num >= 0);
77	if (num <= 0)
78		return (c1);
79
80#ifndef OPENSSL_SMALL_FOOTPRINT
81	while (num & ~3) {
82		mul_add(rp[0], ap[0], w, c1);
83		mul_add(rp[1], ap[1], w, c1);
84		mul_add(rp[2], ap[2], w, c1);
85		mul_add(rp[3], ap[3], w, c1);
86		ap += 4;
87		rp += 4;
88		num -= 4;
89	}
90#endif
91	while (num) {
92		mul_add(rp[0], ap[0], w, c1);
93		ap++;
94		rp++;
95		num--;
96	}
97
98	return (c1);
99}
100
101#else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
102
103BN_ULONG
104bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
105{
106	BN_ULONG c = 0;
107	BN_ULONG bl, bh;
108
109	assert(num >= 0);
110	if (num <= 0)
111		return ((BN_ULONG)0);
112
113	bl = LBITS(w);
114	bh = HBITS(w);
115
116#ifndef OPENSSL_SMALL_FOOTPRINT
117	while (num & ~3) {
118		mul_add(rp[0], ap[0], bl, bh, c);
119		mul_add(rp[1], ap[1], bl, bh, c);
120		mul_add(rp[2], ap[2], bl, bh, c);
121		mul_add(rp[3], ap[3], bl, bh, c);
122		ap += 4;
123		rp += 4;
124		num -= 4;
125	}
126#endif
127	while (num) {
128		mul_add(rp[0], ap[0], bl, bh, c);
129		ap++;
130		rp++;
131		num--;
132	}
133	return (c);
134}
135
136#endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
137#endif
138
139#ifndef HAVE_BN_MUL_COMBA4
140void
141bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
142{
143	BN_ULONG c1, c2, c3;
144
145	c1 = 0;
146	c2 = 0;
147	c3 = 0;
148	mul_add_c(a[0], b[0], c1, c2, c3);
149	r[0] = c1;
150	c1 = 0;
151	mul_add_c(a[0], b[1], c2, c3, c1);
152	mul_add_c(a[1], b[0], c2, c3, c1);
153	r[1] = c2;
154	c2 = 0;
155	mul_add_c(a[2], b[0], c3, c1, c2);
156	mul_add_c(a[1], b[1], c3, c1, c2);
157	mul_add_c(a[0], b[2], c3, c1, c2);
158	r[2] = c3;
159	c3 = 0;
160	mul_add_c(a[0], b[3], c1, c2, c3);
161	mul_add_c(a[1], b[2], c1, c2, c3);
162	mul_add_c(a[2], b[1], c1, c2, c3);
163	mul_add_c(a[3], b[0], c1, c2, c3);
164	r[3] = c1;
165	c1 = 0;
166	mul_add_c(a[3], b[1], c2, c3, c1);
167	mul_add_c(a[2], b[2], c2, c3, c1);
168	mul_add_c(a[1], b[3], c2, c3, c1);
169	r[4] = c2;
170	c2 = 0;
171	mul_add_c(a[2], b[3], c3, c1, c2);
172	mul_add_c(a[3], b[2], c3, c1, c2);
173	r[5] = c3;
174	c3 = 0;
175	mul_add_c(a[3], b[3], c1, c2, c3);
176	r[6] = c1;
177	r[7] = c2;
178}
179#endif
180
181#ifndef HAVE_BN_MUL_COMBA8
182void
183bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
184{
185	BN_ULONG c1, c2, c3;
186
187	c1 = 0;
188	c2 = 0;
189	c3 = 0;
190	mul_add_c(a[0], b[0], c1, c2, c3);
191	r[0] = c1;
192	c1 = 0;
193	mul_add_c(a[0], b[1], c2, c3, c1);
194	mul_add_c(a[1], b[0], c2, c3, c1);
195	r[1] = c2;
196	c2 = 0;
197	mul_add_c(a[2], b[0], c3, c1, c2);
198	mul_add_c(a[1], b[1], c3, c1, c2);
199	mul_add_c(a[0], b[2], c3, c1, c2);
200	r[2] = c3;
201	c3 = 0;
202	mul_add_c(a[0], b[3], c1, c2, c3);
203	mul_add_c(a[1], b[2], c1, c2, c3);
204	mul_add_c(a[2], b[1], c1, c2, c3);
205	mul_add_c(a[3], b[0], c1, c2, c3);
206	r[3] = c1;
207	c1 = 0;
208	mul_add_c(a[4], b[0], c2, c3, c1);
209	mul_add_c(a[3], b[1], c2, c3, c1);
210	mul_add_c(a[2], b[2], c2, c3, c1);
211	mul_add_c(a[1], b[3], c2, c3, c1);
212	mul_add_c(a[0], b[4], c2, c3, c1);
213	r[4] = c2;
214	c2 = 0;
215	mul_add_c(a[0], b[5], c3, c1, c2);
216	mul_add_c(a[1], b[4], c3, c1, c2);
217	mul_add_c(a[2], b[3], c3, c1, c2);
218	mul_add_c(a[3], b[2], c3, c1, c2);
219	mul_add_c(a[4], b[1], c3, c1, c2);
220	mul_add_c(a[5], b[0], c3, c1, c2);
221	r[5] = c3;
222	c3 = 0;
223	mul_add_c(a[6], b[0], c1, c2, c3);
224	mul_add_c(a[5], b[1], c1, c2, c3);
225	mul_add_c(a[4], b[2], c1, c2, c3);
226	mul_add_c(a[3], b[3], c1, c2, c3);
227	mul_add_c(a[2], b[4], c1, c2, c3);
228	mul_add_c(a[1], b[5], c1, c2, c3);
229	mul_add_c(a[0], b[6], c1, c2, c3);
230	r[6] = c1;
231	c1 = 0;
232	mul_add_c(a[0], b[7], c2, c3, c1);
233	mul_add_c(a[1], b[6], c2, c3, c1);
234	mul_add_c(a[2], b[5], c2, c3, c1);
235	mul_add_c(a[3], b[4], c2, c3, c1);
236	mul_add_c(a[4], b[3], c2, c3, c1);
237	mul_add_c(a[5], b[2], c2, c3, c1);
238	mul_add_c(a[6], b[1], c2, c3, c1);
239	mul_add_c(a[7], b[0], c2, c3, c1);
240	r[7] = c2;
241	c2 = 0;
242	mul_add_c(a[7], b[1], c3, c1, c2);
243	mul_add_c(a[6], b[2], c3, c1, c2);
244	mul_add_c(a[5], b[3], c3, c1, c2);
245	mul_add_c(a[4], b[4], c3, c1, c2);
246	mul_add_c(a[3], b[5], c3, c1, c2);
247	mul_add_c(a[2], b[6], c3, c1, c2);
248	mul_add_c(a[1], b[7], c3, c1, c2);
249	r[8] = c3;
250	c3 = 0;
251	mul_add_c(a[2], b[7], c1, c2, c3);
252	mul_add_c(a[3], b[6], c1, c2, c3);
253	mul_add_c(a[4], b[5], c1, c2, c3);
254	mul_add_c(a[5], b[4], c1, c2, c3);
255	mul_add_c(a[6], b[3], c1, c2, c3);
256	mul_add_c(a[7], b[2], c1, c2, c3);
257	r[9] = c1;
258	c1 = 0;
259	mul_add_c(a[7], b[3], c2, c3, c1);
260	mul_add_c(a[6], b[4], c2, c3, c1);
261	mul_add_c(a[5], b[5], c2, c3, c1);
262	mul_add_c(a[4], b[6], c2, c3, c1);
263	mul_add_c(a[3], b[7], c2, c3, c1);
264	r[10] = c2;
265	c2 = 0;
266	mul_add_c(a[4], b[7], c3, c1, c2);
267	mul_add_c(a[5], b[6], c3, c1, c2);
268	mul_add_c(a[6], b[5], c3, c1, c2);
269	mul_add_c(a[7], b[4], c3, c1, c2);
270	r[11] = c3;
271	c3 = 0;
272	mul_add_c(a[7], b[5], c1, c2, c3);
273	mul_add_c(a[6], b[6], c1, c2, c3);
274	mul_add_c(a[5], b[7], c1, c2, c3);
275	r[12] = c1;
276	c1 = 0;
277	mul_add_c(a[6], b[7], c2, c3, c1);
278	mul_add_c(a[7], b[6], c2, c3, c1);
279	r[13] = c2;
280	c2 = 0;
281	mul_add_c(a[7], b[7], c3, c1, c2);
282	r[14] = c3;
283	r[15] = c1;
284}
285#endif
286
287#ifndef HAVE_BN_MUL_WORDS
288#if defined(BN_LLONG) || defined(BN_UMULT_HIGH)
289
290BN_ULONG
291bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
292{
293	BN_ULONG c1 = 0;
294
295	assert(num >= 0);
296	if (num <= 0)
297		return (c1);
298
299#ifndef OPENSSL_SMALL_FOOTPRINT
300	while (num & ~3) {
301		mul(rp[0], ap[0], w, c1);
302		mul(rp[1], ap[1], w, c1);
303		mul(rp[2], ap[2], w, c1);
304		mul(rp[3], ap[3], w, c1);
305		ap += 4;
306		rp += 4;
307		num -= 4;
308	}
309#endif
310	while (num) {
311		mul(rp[0], ap[0], w, c1);
312		ap++;
313		rp++;
314		num--;
315	}
316	return (c1);
317}
318#else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
319
320BN_ULONG
321bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
322{
323	BN_ULONG carry = 0;
324	BN_ULONG bl, bh;
325
326	assert(num >= 0);
327	if (num <= 0)
328		return ((BN_ULONG)0);
329
330	bl = LBITS(w);
331	bh = HBITS(w);
332
333#ifndef OPENSSL_SMALL_FOOTPRINT
334	while (num & ~3) {
335		mul(rp[0], ap[0], bl, bh, carry);
336		mul(rp[1], ap[1], bl, bh, carry);
337		mul(rp[2], ap[2], bl, bh, carry);
338		mul(rp[3], ap[3], bl, bh, carry);
339		ap += 4;
340		rp += 4;
341		num -= 4;
342	}
343#endif
344	while (num) {
345		mul(rp[0], ap[0], bl, bh, carry);
346		ap++;
347		rp++;
348		num--;
349	}
350	return (carry);
351}
352#endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
353#endif
354
355#if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS)
356/*
357 * Here follows a specialised variant of bn_sub_words(), which has the property
358 * performing operations on arrays of different sizes. The sizes of those arrays
359 * is expressed through cl, which is the common length (basically,
360 * min(len(a),len(b))), and dl, which is the delta between the two lengths,
361 * calculated as len(a)-len(b). All lengths are the number of BN_ULONGs. For the
362 * operations that require a result array as parameter, it must have the length
363 * cl+abs(dl).
364 */
365BN_ULONG
366bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int cl,
367    int dl)
368{
369	BN_ULONG c, t;
370
371	assert(cl >= 0);
372	c = bn_sub_words(r, a, b, cl);
373
374	if (dl == 0)
375		return c;
376
377	r += cl;
378	a += cl;
379	b += cl;
380
381	if (dl < 0) {
382		for (;;) {
383			t = b[0];
384			r[0] = (0 - t - c) & BN_MASK2;
385			if (t != 0)
386				c = 1;
387			if (++dl >= 0)
388				break;
389
390			t = b[1];
391			r[1] = (0 - t - c) & BN_MASK2;
392			if (t != 0)
393				c = 1;
394			if (++dl >= 0)
395				break;
396
397			t = b[2];
398			r[2] = (0 - t - c) & BN_MASK2;
399			if (t != 0)
400				c = 1;
401			if (++dl >= 0)
402				break;
403
404			t = b[3];
405			r[3] = (0 - t - c) & BN_MASK2;
406			if (t != 0)
407				c = 1;
408			if (++dl >= 0)
409				break;
410
411			b += 4;
412			r += 4;
413		}
414	} else {
415		int save_dl = dl;
416		while (c) {
417			t = a[0];
418			r[0] = (t - c) & BN_MASK2;
419			if (t != 0)
420				c = 0;
421			if (--dl <= 0)
422				break;
423
424			t = a[1];
425			r[1] = (t - c) & BN_MASK2;
426			if (t != 0)
427				c = 0;
428			if (--dl <= 0)
429				break;
430
431			t = a[2];
432			r[2] = (t - c) & BN_MASK2;
433			if (t != 0)
434				c = 0;
435			if (--dl <= 0)
436				break;
437
438			t = a[3];
439			r[3] = (t - c) & BN_MASK2;
440			if (t != 0)
441				c = 0;
442			if (--dl <= 0)
443				break;
444
445			save_dl = dl;
446			a += 4;
447			r += 4;
448		}
449		if (dl > 0) {
450			if (save_dl > dl) {
451				switch (save_dl - dl) {
452				case 1:
453					r[1] = a[1];
454					if (--dl <= 0)
455						break;
456				case 2:
457					r[2] = a[2];
458					if (--dl <= 0)
459						break;
460				case 3:
461					r[3] = a[3];
462					if (--dl <= 0)
463						break;
464				}
465				a += 4;
466				r += 4;
467			}
468		}
469		if (dl > 0) {
470			for (;;) {
471				r[0] = a[0];
472				if (--dl <= 0)
473					break;
474				r[1] = a[1];
475				if (--dl <= 0)
476					break;
477				r[2] = a[2];
478				if (--dl <= 0)
479					break;
480				r[3] = a[3];
481				if (--dl <= 0)
482					break;
483
484				a += 4;
485				r += 4;
486			}
487		}
488	}
489	return c;
490}
491#endif
492
493void
494bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
495{
496	BN_ULONG *rr;
497
498
499	if (na < nb) {
500		int itmp;
501		BN_ULONG *ltmp;
502
503		itmp = na;
504		na = nb;
505		nb = itmp;
506		ltmp = a;
507		a = b;
508		b = ltmp;
509
510	}
511	rr = &(r[na]);
512	if (nb <= 0) {
513		(void)bn_mul_words(r, a, na, 0);
514		return;
515	} else
516		rr[0] = bn_mul_words(r, a, na, b[0]);
517
518	for (;;) {
519		if (--nb <= 0)
520			return;
521		rr[1] = bn_mul_add_words(&(r[1]), a, na, b[1]);
522		if (--nb <= 0)
523			return;
524		rr[2] = bn_mul_add_words(&(r[2]), a, na, b[2]);
525		if (--nb <= 0)
526			return;
527		rr[3] = bn_mul_add_words(&(r[3]), a, na, b[3]);
528		if (--nb <= 0)
529			return;
530		rr[4] = bn_mul_add_words(&(r[4]), a, na, b[4]);
531		rr += 4;
532		r += 4;
533		b += 4;
534	}
535}
536
537#ifdef BN_RECURSION
538/* Karatsuba recursive multiplication algorithm
539 * (cf. Knuth, The Art of Computer Programming, Vol. 2) */
540
541/* r is 2*n2 words in size,
542 * a and b are both n2 words in size.
543 * n2 must be a power of 2.
544 * We multiply and return the result.
545 * t must be 2*n2 words in size
546 * We calculate
547 * a[0]*b[0]
548 * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
549 * a[1]*b[1]
550 */
551/* dnX may not be positive, but n2/2+dnX has to be */
552void
553bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, int dna,
554    int dnb, BN_ULONG *t)
555{
556	int n = n2 / 2, c1, c2;
557	int tna = n + dna, tnb = n + dnb;
558	unsigned int neg, zero;
559	BN_ULONG ln, lo, *p;
560
561# ifdef BN_MUL_COMBA
562#  if 0
563	if (n2 == 4) {
564		bn_mul_comba4(r, a, b);
565		return;
566	}
567#  endif
568	/* Only call bn_mul_comba 8 if n2 == 8 and the
569	 * two arrays are complete [steve]
570	 */
571	if (n2 == 8 && dna == 0 && dnb == 0) {
572		bn_mul_comba8(r, a, b);
573		return;
574	}
575# endif /* BN_MUL_COMBA */
576	/* Else do normal multiply */
577	if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) {
578		bn_mul_normal(r, a, n2 + dna, b, n2 + dnb);
579		if ((dna + dnb) < 0)
580			memset(&r[2*n2 + dna + dnb], 0,
581			    sizeof(BN_ULONG) * -(dna + dnb));
582		return;
583	}
584	/* r=(a[0]-a[1])*(b[1]-b[0]) */
585	c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna);
586	c2 = bn_cmp_part_words(&(b[n]), b,tnb, tnb - n);
587	zero = neg = 0;
588	switch (c1 * 3 + c2) {
589	case -4:
590		bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
591		bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
592		break;
593	case -3:
594		zero = 1;
595		break;
596	case -2:
597		bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
598		bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
599		neg = 1;
600		break;
601	case -1:
602	case 0:
603	case 1:
604		zero = 1;
605		break;
606	case 2:
607		bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */
608		bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
609		neg = 1;
610		break;
611	case 3:
612		zero = 1;
613		break;
614	case 4:
615		bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
616		bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
617		break;
618	}
619
620# ifdef BN_MUL_COMBA
621	if (n == 4 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba4 could take
622					       extra args to do this well */
623	{
624		if (!zero)
625			bn_mul_comba4(&(t[n2]), t, &(t[n]));
626		else
627			memset(&(t[n2]), 0, 8 * sizeof(BN_ULONG));
628
629		bn_mul_comba4(r, a, b);
630		bn_mul_comba4(&(r[n2]), &(a[n]), &(b[n]));
631	} else if (n == 8 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba8 could
632						    take extra args to do this
633						    well */
634	{
635		if (!zero)
636			bn_mul_comba8(&(t[n2]), t, &(t[n]));
637		else
638			memset(&(t[n2]), 0, 16 * sizeof(BN_ULONG));
639
640		bn_mul_comba8(r, a, b);
641		bn_mul_comba8(&(r[n2]), &(a[n]), &(b[n]));
642	} else
643# endif /* BN_MUL_COMBA */
644	{
645		p = &(t[n2 * 2]);
646		if (!zero)
647			bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
648		else
649			memset(&(t[n2]), 0, n2 * sizeof(BN_ULONG));
650		bn_mul_recursive(r, a, b, n, 0, 0, p);
651		bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), n, dna, dnb, p);
652	}
653
654	/* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
655	 * r[10] holds (a[0]*b[0])
656	 * r[32] holds (b[1]*b[1])
657	 */
658
659	c1 = (int)(bn_add_words(t, r, &(r[n2]), n2));
660
661	if (neg) /* if t[32] is negative */
662	{
663		c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2));
664	} else {
665		/* Might have a carry */
666		c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
667	}
668
669	/* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
670	 * r[10] holds (a[0]*b[0])
671	 * r[32] holds (b[1]*b[1])
672	 * c1 holds the carry bits
673	 */
674	c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
675	if (c1) {
676		p = &(r[n + n2]);
677		lo= *p;
678		ln = (lo + c1) & BN_MASK2;
679		*p = ln;
680
681		/* The overflow will stop before we over write
682		 * words we should not overwrite */
683		if (ln < (BN_ULONG)c1) {
684			do {
685				p++;
686				lo= *p;
687				ln = (lo + 1) & BN_MASK2;
688				*p = ln;
689			} while (ln == 0);
690		}
691	}
692}
693
694/* n+tn is the word length
695 * t needs to be n*4 is size, as does r */
696/* tnX may not be negative but less than n */
697void
698bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, int tna,
699    int tnb, BN_ULONG *t)
700{
701	int i, j, n2 = n * 2;
702	int c1, c2, neg;
703	BN_ULONG ln, lo, *p;
704
705	if (n < 8) {
706		bn_mul_normal(r, a, n + tna, b, n + tnb);
707		return;
708	}
709
710	/* r=(a[0]-a[1])*(b[1]-b[0]) */
711	c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna);
712	c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n);
713	neg = 0;
714	switch (c1 * 3 + c2) {
715	case -4:
716		bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
717		bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
718		break;
719	case -3:
720		/* break; */
721	case -2:
722		bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
723		bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
724		neg = 1;
725		break;
726	case -1:
727	case 0:
728	case 1:
729		/* break; */
730	case 2:
731		bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */
732		bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
733		neg = 1;
734		break;
735	case 3:
736		/* break; */
737	case 4:
738		bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
739		bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
740		break;
741	}
742		/* The zero case isn't yet implemented here. The speedup
743		   would probably be negligible. */
744# if 0
745	if (n == 4) {
746		bn_mul_comba4(&(t[n2]), t, &(t[n]));
747		bn_mul_comba4(r, a, b);
748		bn_mul_normal(&(r[n2]), &(a[n]), tn, &(b[n]), tn);
749		memset(&(r[n2 + tn * 2]), 0, sizeof(BN_ULONG) * (n2 - tn * 2));
750	} else
751# endif
752		if (n == 8) {
753		bn_mul_comba8(&(t[n2]), t, &(t[n]));
754		bn_mul_comba8(r, a, b);
755		bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb);
756		memset(&(r[n2 + tna + tnb]), 0,
757		    sizeof(BN_ULONG) * (n2 - tna - tnb));
758	} else {
759		p = &(t[n2*2]);
760		bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
761		bn_mul_recursive(r, a, b, n, 0, 0, p);
762		i = n / 2;
763		/* If there is only a bottom half to the number,
764		 * just do it */
765		if (tna > tnb)
766			j = tna - i;
767		else
768			j = tnb - i;
769		if (j == 0) {
770			bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]),
771			    i, tna - i, tnb - i, p);
772			memset(&(r[n2 + i * 2]), 0,
773			    sizeof(BN_ULONG) * (n2 - i * 2));
774		}
775		else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */
776		{
777			bn_mul_part_recursive(&(r[n2]), &(a[n]), &(b[n]),
778			    i, tna - i, tnb - i, p);
779			memset(&(r[n2 + tna + tnb]), 0,
780			    sizeof(BN_ULONG) * (n2 - tna - tnb));
781		}
782		else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
783		{
784			memset(&(r[n2]), 0, sizeof(BN_ULONG) * n2);
785			if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL &&
786			    tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) {
787				bn_mul_normal(&(r[n2]), &(a[n]), tna,
788				    &(b[n]), tnb);
789			} else {
790				for (;;) {
791					i /= 2;
792					/* these simplified conditions work
793					 * exclusively because difference
794					 * between tna and tnb is 1 or 0 */
795					if (i < tna || i < tnb) {
796						bn_mul_part_recursive(&(r[n2]),
797						    &(a[n]), &(b[n]), i,
798						    tna - i, tnb - i, p);
799						break;
800					} else if (i == tna || i == tnb) {
801						bn_mul_recursive(&(r[n2]),
802						    &(a[n]), &(b[n]), i,
803						    tna - i, tnb - i, p);
804						break;
805					}
806				}
807			}
808		}
809	}
810
811	/* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
812	 * r[10] holds (a[0]*b[0])
813	 * r[32] holds (b[1]*b[1])
814	 */
815
816	c1 = (int)(bn_add_words(t, r,&(r[n2]), n2));
817
818	if (neg) /* if t[32] is negative */
819	{
820		c1 -= (int)(bn_sub_words(&(t[n2]), t,&(t[n2]), n2));
821	} else {
822		/* Might have a carry */
823		c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
824	}
825
826	/* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
827	 * r[10] holds (a[0]*b[0])
828	 * r[32] holds (b[1]*b[1])
829	 * c1 holds the carry bits
830	 */
831	c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
832	if (c1) {
833		p = &(r[n + n2]);
834		lo= *p;
835		ln = (lo + c1)&BN_MASK2;
836		*p = ln;
837
838		/* The overflow will stop before we over write
839		 * words we should not overwrite */
840		if (ln < (BN_ULONG)c1) {
841			do {
842				p++;
843				lo= *p;
844				ln = (lo + 1) & BN_MASK2;
845				*p = ln;
846			} while (ln == 0);
847		}
848	}
849}
850#endif /* BN_RECURSION */
851
852#ifndef HAVE_BN_MUL
853#ifndef BN_RECURSION
854int
855bn_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, int rn, BN_CTX *ctx)
856{
857	bn_mul_normal(r->d, a->d, a->top, b->d, b->top);
858
859	return 1;
860}
861
862#else /* BN_RECURSION */
863int
864bn_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, int rn, BN_CTX *ctx)
865{
866	BIGNUM *t = NULL;
867	int al, bl, i, k;
868	int j = 0;
869	int ret = 0;
870
871	BN_CTX_start(ctx);
872
873	al = a->top;
874	bl = b->top;
875
876	i = al - bl;
877
878	if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL)) {
879		if (i >= -1 && i <= 1) {
880			/* Find out the power of two lower or equal
881			   to the longest of the two numbers */
882			if (i >= 0) {
883				j = BN_num_bits_word((BN_ULONG)al);
884			}
885			if (i == -1) {
886				j = BN_num_bits_word((BN_ULONG)bl);
887			}
888			j = 1 << (j - 1);
889			assert(j <= al || j <= bl);
890			k = j + j;
891			if ((t = BN_CTX_get(ctx)) == NULL)
892				goto err;
893			if (al > j || bl > j) {
894				if (!bn_wexpand(t, k * 4))
895					goto err;
896				if (!bn_wexpand(r, k * 4))
897					goto err;
898				bn_mul_part_recursive(r->d, a->d, b->d,
899				    j, al - j, bl - j, t->d);
900			}
901			else	/* al <= j || bl <= j */
902			{
903				if (!bn_wexpand(t, k * 2))
904					goto err;
905				if (!bn_wexpand(r, k * 2))
906					goto err;
907				bn_mul_recursive(r->d, a->d, b->d,
908				    j, al - j, bl - j, t->d);
909			}
910			r->top = rn;
911			goto end;
912		}
913#if 0
914		if (i == 1 && !BN_get_flags(b, BN_FLG_STATIC_DATA)) {
915			BIGNUM *tmp_bn = (BIGNUM *)b;
916			if (!bn_wexpand(tmp_bn, al))
917				goto err;
918			tmp_bn->d[bl] = 0;
919			bl++;
920			i--;
921		} else if (i == -1 && !BN_get_flags(a, BN_FLG_STATIC_DATA)) {
922			BIGNUM *tmp_bn = (BIGNUM *)a;
923			if (!bn_wexpand(tmp_bn, bl))
924				goto err;
925			tmp_bn->d[al] = 0;
926			al++;
927			i++;
928		}
929		if (i == 0) {
930			/* symmetric and > 4 */
931			/* 16 or larger */
932			j = BN_num_bits_word((BN_ULONG)al);
933			j = 1 << (j - 1);
934			k = j + j;
935			if ((t = BN_CTX_get(ctx)) == NULL)
936				goto err;
937			if (al == j) /* exact multiple */
938			{
939				if (!bn_wexpand(t, k * 2))
940					goto err;
941				if (!bn_wexpand(r, k * 2))
942					goto err;
943				bn_mul_recursive(r->d, a->d, b->d, al, t->d);
944			} else {
945				if (!bn_wexpand(t, k * 4))
946					goto err;
947				if (!bn_wexpand(r, k * 4))
948					goto err;
949				bn_mul_part_recursive(r->d, a->d, b->d,
950				    al - j, j, t->d);
951			}
952			r->top = top;
953			goto end;
954		}
955#endif
956	}
957
958	bn_mul_normal(r->d, a->d, al, b->d, bl);
959
960 end:
961	ret = 1;
962 err:
963	BN_CTX_end(ctx);
964
965	return ret;
966}
967#endif /* BN_RECURSION */
968#endif /* HAVE_BN_MUL */
969
970int
971BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
972{
973	BIGNUM *rr;
974	int rn;
975	int ret = 0;
976
977	BN_CTX_start(ctx);
978
979	if (BN_is_zero(a) || BN_is_zero(b)) {
980		BN_zero(r);
981		goto done;
982	}
983
984	rr = r;
985	if (rr == a || rr == b)
986		rr = BN_CTX_get(ctx);
987	if (rr == NULL)
988		goto err;
989
990	rn = a->top + b->top;
991	if (rn < a->top)
992		goto err;
993	if (!bn_wexpand(rr, rn))
994		goto err;
995
996	if (a->top == 4 && b->top == 4) {
997		bn_mul_comba4(rr->d, a->d, b->d);
998	} else if (a->top == 8 && b->top == 8) {
999		bn_mul_comba8(rr->d, a->d, b->d);
1000	} else {
1001		if (!bn_mul(rr, a, b, rn, ctx))
1002			goto err;
1003	}
1004
1005	rr->top = rn;
1006	bn_correct_top(rr);
1007
1008	BN_set_negative(rr, a->neg ^ b->neg);
1009
1010	if (r != rr)
1011		BN_copy(r, rr);
1012 done:
1013	ret = 1;
1014 err:
1015	BN_CTX_end(ctx);
1016
1017	return ret;
1018}
1019