1/*
2 * Copyright (c) 2011-12 Apple Inc. All Rights Reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
25 * All rights reserved.
26 *
27 * This package is an SSL implementation written
28 * by Eric Young (eay@cryptsoft.com).
29 * The implementation was written so as to conform with Netscapes SSL.
30 *
31 * This library is free for commercial and non-commercial use as long as
32 * the following conditions are aheared to.  The following conditions
33 * apply to all code found in this distribution, be it the RC4, RSA,
34 * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
35 * included with this distribution is covered by the same copyright terms
36 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
37 *
38 * Copyright remains Eric Young's, and as such any Copyright notices in
39 * the code are not to be removed.
40 * If this package is used in a product, Eric Young should be given attribution
41 * as the author of the parts of the library used.
42 * This can be in the form of a textual message at program startup or
43 * in documentation (online or textual) provided with the package.
44 *
45 * Redistribution and use in source and binary forms, with or without
46 * modification, are permitted provided that the following conditions
47 * are met:
48 * 1. Redistributions of source code must retain the copyright
49 *    notice, this list of conditions and the following disclaimer.
50 * 2. Redistributions in binary form must reproduce the above copyright
51 *    notice, this list of conditions and the following disclaimer in the
52 *    documentation and/or other materials provided with the distribution.
53 * 3. All advertising materials mentioning features or use of this software
54 *    must display the following acknowledgement:
55 *    "This product includes cryptographic software written by
56 *     Eric Young (eay@cryptsoft.com)"
57 *    The word 'cryptographic' can be left out if the rouines from the library
58 *    being used are not cryptographic related :-).
59 * 4. If you include any Windows specific code (or a derivative thereof) from
60 *    the apps directory (application code) you must include an acknowledgement:
61 *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
62 *
63 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * The licence and distribution terms for any publically available version or
76 * derivative of this code cannot be changed.  i.e. this code cannot simply be
77 * copied and put under another distribution licence
78 * [including the GNU Public Licence.]
79 */
80
81#include "ossl-config.h"
82
83#include <assert.h>
84#include <ctype.h>
85#include <limits.h>
86#include <stdio.h>
87#include <stdlib.h>
88#include <string.h>
89
90#include "krb5-types.h"
91#include "rfc2459_asn1.h" /* XXX */
92#include "der.h"
93
94#include "ossl-bio.h"
95#include "ossl-bn.h"
96#include "ossl-rand.h"
97
98#define BN_prime_checks    0
99#define BN_prime_checks_for_size(b) \
100	((b) >= 1300 ?  2 :	    \
101	(b) >= 850 ?  3 :	    \
102	(b) >= 650 ?  4 :	    \
103	(b) >= 550 ?  5 :	    \
104	(b) >= 550 ?  5 :	    \
105	(b) >= 400 ?  7 :	    \
106	(b) >= 350 ?  8 :	    \
107	(b) >= 300 ?  9 :	    \
108	(b) >= 250 ? 12 :	    \
109	(b) >= 200 ? 15 :	    \
110	(b) >= 150 ? 18 :	    \
111	/* b >= 100 */ 27)
112
113/* BN_window_bits_for_exponent_size -- macro for sliding window mod_exp functions */
114#define BN_window_bits_for_exponent_size(b) \
115	((b) > 671 ? 6 :		    \
116	(b) > 239 ? 5 :			    \
117	(b) > 79 ? 4 :			    \
118	(b) > 17 ? 3 : 1)
119
120
121#ifdef BN_LLONG
122#define mul_add(r, a, w, c)			    \
123	{					    \
124		BN_ULLONG t;			    \
125		t = (BN_ULLONG)w * (a) + (r) + (c); \
126		(r) = Lw(t);			    \
127		(c) = Hw(t);			    \
128	}
129
130#define mul(r, a, w, c)			      \
131	{				      \
132		BN_ULLONG t;		      \
133		t = (BN_ULLONG)w * (a) + (c); \
134		(r) = Lw(t);		      \
135		(c) = Hw(t);		      \
136	}
137
138#define sqr(r0, r1, a)			\
139	{				\
140		BN_ULLONG t;		\
141		t = (BN_ULLONG)(a)*(a);	\
142		(r0) = Lw(t);		\
143		(r1) = Hw(t);		\
144	}
145
146#elif defined(BN_UMULT_LOHI)
147#define mul_add(r, a, w, c)			    \
148	{					    \
149		BN_ULONG high, low, ret, tmp = (a); \
150		ret = (r);			    \
151		BN_UMULT_LOHI(low, high, w, tmp);   \
152		ret += (c);			    \
153		(c) = (ret < (c)) ? 1 : 0;	    \
154		(c) += high;			    \
155		ret += low;			    \
156		(c) += (ret < low) ? 1 : 0;	    \
157		(r) = ret;			    \
158	}
159
160#define mul(r, a, w, c)				   \
161	{					   \
162		BN_ULONG high, low, ret, ta = (a); \
163		BN_UMULT_LOHI(low, high, w, ta);   \
164		ret = low + (c);		   \
165		(c) = high;			   \
166		(c) += (ret < low) ? 1 : 0;	   \
167		(r) = ret;			   \
168	}
169
170#define sqr(r0, r1, a)				 \
171	{					 \
172		BN_ULONG tmp = (a);		 \
173		BN_UMULT_LOHI(r0, r1, tmp, tmp); \
174	}
175
176#elif defined(BN_UMULT_HIGH)
177#define mul_add(r, a, w, c)			    \
178	{					    \
179		BN_ULONG high, low, ret, tmp = (a); \
180		ret = (r);			    \
181		high = BN_UMULT_HIGH(w, tmp);	    \
182		ret += (c);			    \
183		low = (w) * tmp;		    \
184		(c) = (ret < (c)) ? 1 : 0;	    \
185		(c) += high;			    \
186		ret += low;			    \
187		(c) += (ret < low) ? 1 : 0;	    \
188		(r) = ret;			    \
189	}
190
191#define mul(r, a, w, c)				   \
192	{					   \
193		BN_ULONG high, low, ret, ta = (a); \
194		low = (w) * ta;			   \
195		high = BN_UMULT_HIGH(w, ta);	   \
196		ret = low + (c);		   \
197		(c) = high;			   \
198		(c) += (ret < low) ? 1 : 0;	   \
199		(r) = ret;			   \
200	}
201
202#define sqr(r0, r1, a)				\
203	{					\
204		BN_ULONG tmp = (a);		\
205		(r0) = tmp * tmp;		\
206		(r1) = BN_UMULT_HIGH(tmp, tmp);	\
207	}
208
209#else
210
211/*************************************************************
212 * No long long type
213 */
214
215#define LBITS(a)	((a)&BN_MASK2l)
216#define HBITS(a)	(((a)>>BN_BITS4)&BN_MASK2l)
217#define L2HBITS(a)	(((a)<<BN_BITS4)&BN_MASK2)
218
219#define LLBITS(a)	((a)&BN_MASKl)
220#define LHBITS(a)	(((a)>>BN_BITS2)&BN_MASKl)
221#define LL2HBITS(a)	((BN_ULLONG)((a)&BN_MASKl)<<BN_BITS2)
222
223#define mul64(l, h, bl, bh)							 \
224	{									 \
225		BN_ULONG m, m1, lt, ht;						 \
226										 \
227		lt = l;								 \
228		ht = h;								 \
229		m = (bh)*(lt);							 \
230		lt = (bl)*(lt);							 \
231		m1 = (bl)*(ht);							 \
232		ht = (bh)*(ht);							 \
233		m = (m+m1)&BN_MASK2; if (m < m1) { ht += L2HBITS((BN_ULONG)1); } \
234		ht += HBITS(m);							 \
235		m1 = L2HBITS(m);						 \
236		lt = (lt+m1)&BN_MASK2; if (lt < m1) { ht++; }			 \
237		(l) = lt;							 \
238		(h) = ht;							 \
239	}
240
241#define sqr64(lo, ho, in)				\
242	{						\
243		BN_ULONG l, h, m;			\
244							\
245		h = (in);				\
246		l = LBITS(h);				\
247		h = HBITS(h);				\
248		m = (l)*(h);				\
249		l *= l;					\
250		h *= h;					\
251		h += (m&BN_MASK2h1)>>(BN_BITS4-1);	\
252		m = (m&BN_MASK2l)<<(BN_BITS4+1);	\
253		l = (l+m)&BN_MASK2; if (l < m) { h++; }	\
254		(lo) = l;				\
255		(ho) = h;				\
256	}
257
258#define mul_add(r, a, bl, bh, c)			    \
259	{						    \
260		BN_ULONG l, h;				    \
261							    \
262		h = (a);				    \
263		l = LBITS(h);				    \
264		h = HBITS(h);				    \
265		mul64(l, h, (bl), (bh));		    \
266							    \
267		/* non-multiply part */			    \
268		l = (l+(c))&BN_MASK2; if (l < (c)) { h++; } \
269		(c) = (r);				    \
270		l = (l+(c))&BN_MASK2; if (l < (c)) { h++; } \
271		(c) = h&BN_MASK2;			    \
272		(r) = l;				    \
273	}
274
275#define mul(r, a, bl, bh, c)				   \
276	{						   \
277		BN_ULONG l, h;				   \
278							   \
279		h = (a);				   \
280		l = LBITS(h);				   \
281		h = HBITS(h);				   \
282		mul64(l, h, (bl), (bh));		   \
283							   \
284		/* non-multiply part */			   \
285		l += (c); if ((l&BN_MASK2) < (c)) { h++; } \
286		(c) = h&BN_MASK2;			   \
287		(r) = l&BN_MASK2;			   \
288	}
289#endif /* !BN_LLONG */
290
291#define bn_wexpand(a, words)    (((words) <= (a)->dmax) ? (a) : bn_expand2((a), (words)))
292
293#define bn_expand(a, bits)					 \
294	((((((bits + BN_BITS2 - 1)) / BN_BITS2)) <= (a)->dmax) ? \
295	(a) : bn_expand2((a), (bits + BN_BITS2 - 1) / BN_BITS2))
296#define bn_pollute(a)
297#define bn_check_top(a)
298
299#if defined(BN_LLONG) || defined(BN_UMULT_HIGH)
300
301BN_ULONG
302bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
303{
304	BN_ULONG c1 = 0;
305
306	assert(num >= 0);
307	if (num <= 0) {
308		return (c1);
309	}
310
311	while (num&~3) {
312		mul_add(rp[0], ap[0], w, c1);
313		mul_add(rp[1], ap[1], w, c1);
314		mul_add(rp[2], ap[2], w, c1);
315		mul_add(rp[3], ap[3], w, c1);
316		ap += 4;
317		rp += 4;
318		num -= 4;
319	}
320	if (num) {
321		mul_add(rp[0], ap[0], w, c1);
322		if (--num == 0) {
323			return (c1);
324		}
325		mul_add(rp[1], ap[1], w, c1);
326		if (--num == 0) {
327			return (c1);
328		}
329		mul_add(rp[2], ap[2], w, c1);
330		return (c1);
331	}
332
333	return (c1);
334}
335
336
337BN_ULONG
338bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
339{
340	BN_ULONG c1 = 0;
341
342	assert(num >= 0);
343	if (num <= 0) {
344		return (c1);
345	}
346
347	while (num&~3) {
348		mul(rp[0], ap[0], w, c1);
349		mul(rp[1], ap[1], w, c1);
350		mul(rp[2], ap[2], w, c1);
351		mul(rp[3], ap[3], w, c1);
352		ap += 4;
353		rp += 4;
354		num -= 4;
355	}
356	if (num) {
357		mul(rp[0], ap[0], w, c1);
358		if (--num == 0) {
359			return (c1);
360		}
361		mul(rp[1], ap[1], w, c1);
362		if (--num == 0) {
363			return (c1);
364		}
365		mul(rp[2], ap[2], w, c1);
366	}
367	return (c1);
368}
369
370
371void
372bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
373{
374	assert(n >= 0);
375	if (n <= 0) {
376		return;
377	}
378	while (n&~3) {
379		sqr(r[0], r[1], a[0]);
380		sqr(r[2], r[3], a[1]);
381		sqr(r[4], r[5], a[2]);
382		sqr(r[6], r[7], a[3]);
383		a += 4;
384		r += 8;
385		n -= 4;
386	}
387	if (n) {
388		sqr(r[0], r[1], a[0]);
389		if (--n == 0) {
390			return;
391		}
392		sqr(r[2], r[3], a[1]);
393		if (--n == 0) {
394			return;
395		}
396		sqr(r[4], r[5], a[2]);
397	}
398}
399
400
401#else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
402
403BN_ULONG
404bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
405{
406	BN_ULONG c = 0;
407	BN_ULONG bl, bh;
408
409	assert(num >= 0);
410	if (num <= 0) {
411		return ((BN_ULONG)0);
412	}
413
414	bl = LBITS(w);
415	bh = HBITS(w);
416
417	for ( ; ; ) {
418		mul_add(rp[0], ap[0], bl, bh, c);
419		if (--num == 0) {
420			break;
421		}
422		mul_add(rp[1], ap[1], bl, bh, c);
423		if (--num == 0) {
424			break;
425		}
426		mul_add(rp[2], ap[2], bl, bh, c);
427		if (--num == 0) {
428			break;
429		}
430		mul_add(rp[3], ap[3], bl, bh, c);
431		if (--num == 0) {
432			break;
433		}
434		ap += 4;
435		rp += 4;
436	}
437	return (c);
438}
439
440
441BN_ULONG
442bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
443{
444	BN_ULONG carry = 0;
445	BN_ULONG bl, bh;
446
447	assert(num >= 0);
448	if (num <= 0) {
449		return ((BN_ULONG)0);
450	}
451
452	bl = LBITS(w);
453	bh = HBITS(w);
454
455	for ( ; ; ) {
456		mul(rp[0], ap[0], bl, bh, carry);
457		if (--num == 0) {
458			break;
459		}
460		mul(rp[1], ap[1], bl, bh, carry);
461		if (--num == 0) {
462			break;
463		}
464		mul(rp[2], ap[2], bl, bh, carry);
465		if (--num == 0) {
466			break;
467		}
468		mul(rp[3], ap[3], bl, bh, carry);
469		if (--num == 0) {
470			break;
471		}
472		ap += 4;
473		rp += 4;
474	}
475	return (carry);
476}
477
478
479void
480bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
481{
482	assert(n >= 0);
483	if (n <= 0) {
484		return;
485	}
486	for ( ; ; ) {
487		sqr64(r[0], r[1], a[0]);
488		if (--n == 0) {
489			break;
490		}
491
492		sqr64(r[2], r[3], a[1]);
493		if (--n == 0) {
494			break;
495		}
496
497		sqr64(r[4], r[5], a[2]);
498		if (--n == 0) {
499			break;
500		}
501
502		sqr64(r[6], r[7], a[3]);
503		if (--n == 0) {
504			break;
505		}
506
507		a += 4;
508		r += 8;
509	}
510}
511
512
513#endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
514
515BN_ULONG
516bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
517{
518	BN_ULONG t1, t2;
519	int c = 0;
520
521	assert(n >= 0);
522	if (n <= 0) {
523		return ((BN_ULONG)0);
524	}
525
526	for ( ; ; ) {
527		t1 = a[0];
528		t2 = b[0];
529		r[0] = (t1-t2-c)&BN_MASK2;
530		if (t1 != t2) {
531			c = (t1 < t2);
532		}
533		if (--n <= 0) {
534			break;
535		}
536
537		t1 = a[1];
538		t2 = b[1];
539		r[1] = (t1-t2-c)&BN_MASK2;
540		if (t1 != t2) {
541			c = (t1 < t2);
542		}
543		if (--n <= 0) {
544			break;
545		}
546
547		t1 = a[2];
548		t2 = b[2];
549		r[2] = (t1-t2-c)&BN_MASK2;
550		if (t1 != t2) {
551			c = (t1 < t2);
552		}
553		if (--n <= 0) {
554			break;
555		}
556
557		t1 = a[3];
558		t2 = b[3];
559		r[3] = (t1-t2-c)&BN_MASK2;
560		if (t1 != t2) {
561			c = (t1 < t2);
562		}
563		if (--n <= 0) {
564			break;
565		}
566
567		a += 4;
568		b += 4;
569		r += 4;
570	}
571	return (c);
572}
573
574
575static inline void
576bn_correct_top(BIGNUM *a)
577{
578	BN_ULONG *ftl;
579
580	if ((a)->top > 0) {
581		for (ftl = &((a)->d[(a)->top-1]); (a)->top > 0; (a)->top--) {
582			if (*(ftl--)) {
583				break;
584			}
585		}
586	}
587	bn_pollute(a);
588}
589
590
591static BN_ULONG *
592bn_expand_internal(const BIGNUM *b, int words)
593{
594	BN_ULONG *A, *a = NULL;
595	const BN_ULONG *B;
596	int i;
597
598	bn_check_top(b);
599
600	if (words > (INT_MAX/(4*BN_BITS2))) {
601		/* BNerr(BN_F_BN_EXPAND_INTERNAL,BN_R_BIGNUM_TOO_LONG); */
602		return (NULL);
603	}
604	if (BN_get_flags(b, BN_FLG_STATIC_DATA)) {
605		/* BNerr(BN_F_BN_EXPAND_INTERNAL,BN_R_EXPAND_ON_STATIC_BIGNUM_DATA); */
606		return (NULL);
607	}
608	a = A = (BN_ULONG *)malloc(sizeof(BN_ULONG)*words);
609	if (A == NULL) {
610		/* BNerr(BN_F_BN_EXPAND_INTERNAL,ERR_R_MALLOC_FAILURE); */
611		return (NULL);
612	}
613#if 1
614	B = b->d;
615	if (B != NULL) {
616		for (i = b->top>>2; i > 0; i--, A += 4, B += 4) {
617			BN_ULONG a0, a1, a2, a3;
618			a0 = B[0];
619			a1 = B[1];
620			a2 = B[2];
621			a3 = B[3];
622			A[0] = a0;
623			A[1] = a1;
624			A[2] = a2;
625			A[3] = a3;
626		}
627		switch (b->top & 3) {
628		case 3:
629			A[2] = B[2];
630
631		case 2:
632			A[1] = B[1];
633
634		case 1:
635			A[0] = B[0];
636
637		case 0:
638			break;
639		}
640	}
641#else
642	memset(A, 0, sizeof(BN_ULONG)*words);
643	memcpy(A, b->d, sizeof(b->d[0])*b->top);
644#endif
645	return (a);
646}
647
648
649static BIGNUM *
650bn_expand2(BIGNUM *b, int words)
651{
652	bn_check_top(b);
653
654	if (words > b->dmax) {
655		BN_ULONG *a = bn_expand_internal(b, words);
656
657		if (!a) {
658			return (NULL);
659		}
660		if (b->d) {
661			free(b->d);
662		}
663		b->d = a;
664		b->dmax = words;
665	}
666
667	bn_check_top(b);
668
669	return (b);
670}
671
672
673BIGNUM *
674BN_new(void)
675{
676	BIGNUM *ret;
677
678	if ((ret = (BIGNUM *)malloc(sizeof(BIGNUM))) == NULL) {
679		/* BNerr(BN_F_BN_NEW,ERR_R_MALLOC_FAILURE); */
680		return (NULL);
681	}
682    BN_init(ret);
683
684	ret->flags = BN_FLG_MALLOCED;
685	ret->top = 0;
686	ret->dmax = 0;
687	ret->d = NULL;
688
689	bn_check_top(ret);
690
691	return (ret);
692}
693
694
695void
696BN_init(BIGNUM *a)
697{
698	memset(a, 0, sizeof(BIGNUM));
699	bn_check_top(a);
700}
701
702
703void
704BN_free(BIGNUM *a)
705{
706	if (NULL == a) {
707		return;
708	}
709	bn_check_top(a);
710	if ((a->d != NULL) && !(BN_get_flags(a, BN_FLG_STATIC_DATA))) {
711		free(a->d);
712	}
713	if (a->flags & BN_FLG_MALLOCED) {
714		free(a);
715	} else{
716		a->d = NULL;
717	}
718}
719
720
721void
722BN_clear(BIGNUM *a)
723{
724	bn_check_top(a);
725	if (a->d != NULL) {
726		memset(a->d, 0, a->dmax * sizeof(a->d[0]));
727	}
728	a->top = 0;
729	a->neg = 0;
730}
731
732
733void
734BN_clear_free(BIGNUM *a)
735{
736	int i;
737
738	if (NULL == a) {
739		return;
740	}
741	if (a->d != NULL) {
742		memset(a->d, 0, a->dmax * sizeof(a->d[0]));
743		if (!(BN_get_flags(a, BN_FLG_STATIC_DATA))) {
744			free(a->d);
745		}
746	}
747	i = BN_get_flags(a, BN_FLG_MALLOCED);
748	memset(a, 0, sizeof(*a));
749	if (i) {
750		free(a);
751	}
752}
753
754
755/*
756 * Callback when doing slow generation of numbers, like primes.
757 */
758void
759BN_GENCB_set(BN_GENCB *gencb, int (*cb_2)(int, int, BN_GENCB *), void *ctx)
760{
761	gencb->ver = 2;
762	gencb->cb.cb_2 = cb_2;
763	gencb->arg = ctx;
764}
765
766
767int
768BN_GENCB_call(BN_GENCB *cb, int a, int b)
769{
770	if ((cb == NULL) || (cb->cb.cb_2 == NULL)) {
771		return (1);
772	}
773	return (cb->cb.cb_2(a, b, cb));
774}
775
776
777/*
778 * ctx pools
779 */
780
781/* How many bignums are in each "pool item"; */
782#define BN_CTX_POOL_SIZE	16
783/* The stack frame info is resizing, set a first-time expansion size; */
784#define BN_CTX_START_FRAMES	32
785
786/***********/
787/* BN_POOL */
788/***********/
789
790/* A bundle of bignums that can be linked with other bundles */
791typedef struct bignum_pool_item {
792	/* The bignum values */
793	BIGNUM				vals[BN_CTX_POOL_SIZE];
794	/* Linked-list admin */
795	struct bignum_pool_item *	prev, *next;
796} BN_POOL_ITEM;
797
798/* A linked-list of bignums grouped in bundles */
799typedef struct bignum_pool {
800	/* Linked-list admin */
801	BN_POOL_ITEM *	head, *current, *tail;
802	/* Stack depth and allocation size */
803	unsigned	used, size;
804} BN_POOL;
805
806static void BN_POOL_init(BN_POOL *);
807static void BN_POOL_finish(BN_POOL *);
808
809#ifndef OPENSSL_NO_DEPRECATED
810static void BN_POOL_reset(BN_POOL *);
811
812#endif
813static BIGNUM *BN_POOL_get(BN_POOL *);
814static void BN_POOL_release(BN_POOL *, unsigned int);
815
816/************/
817/* BN_STACK */
818/************/
819
820/* A wrapper to manage the "stack frames" */
821typedef struct bignum_ctx_stack {
822	/* Array of indexes into the bignum stack */
823	unsigned int *	indexes;
824	/* Number of stack frames, and the size of the allocated array */
825	unsigned int	depth, size;
826} BN_STACK;
827static void BN_STACK_init(BN_STACK *);
828static void BN_STACK_finish(BN_STACK *);
829
830#ifndef OPENSSL_NO_DEPRECATED
831static void BN_STACK_reset(BN_STACK *);
832
833#endif
834static int BN_STACK_push(BN_STACK *, unsigned int);
835static unsigned int BN_STACK_pop(BN_STACK *);
836
837/**********/
838/* BN_CTX */
839/**********/
840
841/* The opaque BN_CTX type */
842struct bignum_ctx {
843	/* The bignum bundles */
844	BN_POOL		pool;
845	/* The "stack frames", if you will */
846	BN_STACK	stack;
847	/* The number of bignums currently assigned */
848	unsigned int	used;
849	/* Depth of stack overflow */
850	int		err_stack;
851	/* Block "gets" until an "end" (compatibility behaviour) */
852	int		too_many;
853};
854
855BN_CTX *
856BN_CTX_new(void)
857{
858	BN_CTX *ret = malloc(sizeof(BN_CTX));
859
860	if (!ret) {
861		/* BNerr(BN_F_BN_CTX_NEW,ERR_R_MALLOC_FAILURE); */
862		return (NULL);
863	}
864	/* Initialise the structure */
865	BN_POOL_init(&ret->pool);
866	BN_STACK_init(&ret->stack);
867	ret->used = 0;
868	ret->err_stack = 0;
869	ret->too_many = 0;
870
871	return (ret);
872}
873
874
875void
876BN_CTX_free(BN_CTX *ctx)
877{
878	if (ctx == NULL) {
879		return;
880	}
881	BN_STACK_finish(&ctx->stack);
882	BN_POOL_finish(&ctx->pool);
883	free(ctx);
884}
885
886
887void
888BN_CTX_start(BN_CTX *ctx)
889{
890	/* If we're already overflowing ... */
891	if (ctx->err_stack || ctx->too_many) {
892		ctx->err_stack++;
893	}
894	/* (Try to) get a new frame pointer */
895	else if (!BN_STACK_push(&ctx->stack, ctx->used)) {
896		/* BNerr(BN_F_BN_CTX_START,BN_R_TOO_MANY_TEMPORARY_VARIABLES); */
897		ctx->err_stack++;
898	}
899}
900
901
902void
903BN_CTX_end(BN_CTX *ctx)
904{
905	if (ctx->err_stack) {
906		ctx->err_stack--;
907	} else{
908		unsigned int fp = BN_STACK_pop(&ctx->stack);
909		/* Does this stack frame have anything to release? */
910		if (fp < ctx->used) {
911			BN_POOL_release(&ctx->pool, ctx->used - fp);
912		}
913		ctx->used = fp;
914		/* Unjam "too_many" in case "get" had failed */
915		ctx->too_many = 0;
916	}
917}
918
919
920BIGNUM *
921BN_CTX_get(BN_CTX *ctx)
922{
923	BIGNUM *ret;
924
925	if (ctx->err_stack || ctx->too_many) {
926		return (NULL);
927	}
928	if ((ret = BN_POOL_get(&ctx->pool)) == NULL) {
929		/* Setting too_many prevents repeated "get" attempts from
930		 * cluttering the error stack. */
931		ctx->too_many = 1;
932		/* BNerr(BN_F_BN_CTX_GET,BN_R_TOO_MANY_TEMPORARY_VARIABLES); */
933		return (NULL);
934	}
935	/* OK, make sure the returned bignum is "zero" */
936	BN_zero(ret);
937	ctx->used++;
938	return (ret);
939}
940
941
942/************/
943/* BN_STACK */
944/************/
945
946static void
947BN_STACK_init(BN_STACK *st)
948{
949	st->indexes = NULL;
950	st->depth = st->size = 0;
951}
952
953
954static void
955BN_STACK_finish(BN_STACK *st)
956{
957	if (st->size) {
958		free(st->indexes);
959	}
960}
961
962
963static int
964BN_STACK_push(BN_STACK *st, unsigned int idx)
965{
966	if (st->depth == st->size) {
967		/* Need to expand */
968		unsigned int newsize = (st->size ?
969		    (st->size * 3 / 2) : BN_CTX_START_FRAMES);
970		unsigned int *newitems = malloc(newsize *
971			sizeof(unsigned int));
972		if (!newitems) {
973			return (0);
974		}
975		if (st->depth) {
976			memcpy(newitems, st->indexes, st->depth *
977			    sizeof(unsigned int));
978		}
979		if (st->size) {
980			free(st->indexes);
981		}
982		st->indexes = newitems;
983		st->size = newsize;
984	}
985	st->indexes[(st->depth)++] = idx;
986	return (1);
987}
988
989
990static unsigned int
991BN_STACK_pop(BN_STACK *st)
992{
993	return (st->indexes[--(st->depth)]);
994}
995
996
997/***********/
998/* BN_POOL */
999/***********/
1000
1001static void
1002BN_POOL_init(BN_POOL *p)
1003{
1004	p->head = p->current = p->tail = NULL;
1005	p->used = p->size = 0;
1006}
1007
1008
1009static void
1010BN_POOL_finish(BN_POOL *p)
1011{
1012	while (p->head) {
1013		unsigned int loop = 0;
1014		BIGNUM *bn = p->head->vals;
1015		while (loop++ < BN_CTX_POOL_SIZE) {
1016			if (bn->d) {
1017				BN_clear_free(bn);
1018			}
1019			bn++;
1020		}
1021		p->current = p->head->next;
1022		free(p->head);
1023		p->head = p->current;
1024	}
1025}
1026
1027
1028#ifndef OPENSSL_NO_DEPRECATED
1029static void BN_POOL_reset(BN_POOL *p)
1030{
1031	BN_POOL_ITEM *item = p->head;
1032
1033	while (item) {
1034		unsigned int loop = 0;
1035		BIGNUM *bn = item->vals;
1036		while (loop++ < BN_CTX_POOL_SIZE) {
1037			if (bn->d) {
1038				BN_clear(bn);
1039			}
1040			bn++;
1041		}
1042		item = item->next;
1043	}
1044	p->current = p->head;
1045	p->used = 0;
1046}
1047
1048
1049#endif
1050
1051static BIGNUM *
1052BN_POOL_get(BN_POOL *p)
1053{
1054	if (p->used == p->size) {
1055		BIGNUM *bn;
1056		unsigned int loop = 0;
1057		BN_POOL_ITEM *item = malloc(sizeof(BN_POOL_ITEM));
1058		if (!item) {
1059			return (NULL);
1060		}
1061		/* Initialise the structure */
1062		bn = item->vals;
1063		while (loop++ < BN_CTX_POOL_SIZE) {
1064			BN_init(bn++);
1065		}
1066		item->prev = p->tail;
1067		item->next = NULL;
1068		/* Link it in */
1069		if (!p->head) {
1070			p->head = p->current = p->tail = item;
1071		} else{
1072			p->tail->next = item;
1073			p->tail = item;
1074			p->current = item;
1075		}
1076		p->size += BN_CTX_POOL_SIZE;
1077		p->used++;
1078		/* Return the first bignum from the new pool */
1079		return (item->vals);
1080	}
1081	if (!p->used) {
1082		p->current = p->head;
1083	} else if ((p->used % BN_CTX_POOL_SIZE) == 0) {
1084		p->current = p->current->next;
1085	}
1086	return (p->current->vals + ((p->used++) % BN_CTX_POOL_SIZE));
1087}
1088
1089
1090static void
1091BN_POOL_release(BN_POOL *p, unsigned int num)
1092{
1093	unsigned int offset = (p->used - 1) % BN_CTX_POOL_SIZE;
1094
1095	p->used -= num;
1096	while (num--) {
1097		bn_check_top(p->current->vals + offset);
1098		if (!offset) {
1099			offset = BN_CTX_POOL_SIZE - 1;
1100			p->current = p->current->prev;
1101		}else          {
1102			offset--;
1103		}
1104	}
1105}
1106
1107
1108/*
1109 *
1110 */
1111BIGNUM *
1112BN_dup(const BIGNUM *a)
1113{
1114	BIGNUM *t;
1115
1116	if (NULL == a) {
1117		return (NULL);
1118	}
1119	bn_check_top(a);
1120
1121	t = BN_new();
1122	if (t == NULL) {
1123		return (NULL);
1124	}
1125	if (!BN_copy(t, a)) {
1126		BN_free(t);
1127		return (NULL);
1128	}
1129	bn_check_top(t);
1130	return (t);
1131}
1132
1133
1134BIGNUM *
1135BN_copy(BIGNUM *a, const BIGNUM *b)
1136{
1137	int i;
1138	BN_ULONG *A;
1139	const BN_ULONG *B;
1140
1141	bn_check_top(b);
1142	if (a == b) {
1143		return (a);
1144	}
1145	if (bn_wexpand(a, b->top) == NULL) {
1146		return (NULL);
1147	}
1148	A = a->d;
1149	B = b->d;
1150#if 1
1151	for (i = b->top>>2; i > 0; i--, A += 4, B += 4) {
1152		BN_ULONG a0, a1, a2, a3;
1153		a0 = B[0];
1154		a1 = B[1];
1155		a2 = B[2];
1156		a3 = B[3];
1157		A[0] = a0;
1158		A[1] = a1;
1159		A[2] = a2;
1160		A[3] = a3;
1161	}
1162	switch (b->top&3) {
1163	case 3:
1164		A[2] = B[2];
1165
1166	case 2:
1167		A[1] = B[1];
1168
1169	case 1:
1170		A[0] = B[0];
1171
1172	case 0:
1173		break;
1174	}
1175#else
1176	memcpy(a->d, b->d, sizeof(b->d[0]) * b->top);
1177#endif
1178	a->top = b->top;
1179	a->neg = b->neg;
1180	bn_check_top(a);
1181
1182	return (a);
1183}
1184
1185
1186const BIGNUM *
1187BN_value_one(void)
1188{
1189	static BN_ULONG data_one = 1L;
1190	static const BIGNUM const_one =
1191	{
1192		.d	= &data_one,
1193		.top	=	 1,
1194		.dmax	=	 1,
1195		.neg	=	 0,
1196		.flags	= BN_FLG_STATIC_DATA
1197	};
1198
1199	return (&const_one);
1200}
1201
1202
1203int
1204BN_num_bits_word(BN_ULONG l)
1205{
1206	static const char bits[256] =
1207	{
1208		0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
1209		5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1210		6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
1211		6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
1212		7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
1213		7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
1214		7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
1215		7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
1216		8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
1217		8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
1218		8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
1219		8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
1220		8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
1221		8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
1222		8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
1223		8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
1224	};
1225
1226#if defined(SIXTY_FOUR_BIT_LONG)
1227	if (l & 0xffffffff00000000L) {
1228		if (l & 0xffff000000000000L) {
1229			if (l & 0xff00000000000000L) {
1230				return (bits[(int)(l >> 56)] + 56);
1231			} else{
1232				return (bits[(int)(l >> 48)] + 48);
1233			}
1234		} else {
1235			if (l & 0x0000ff0000000000L) {
1236				return (bits[(int)(l >> 40)] + 40);
1237			} else{
1238				return (bits[(int)(l >> 32)] + 32);
1239			}
1240		}
1241	} else
1242#else
1243#ifdef SIXTY_FOUR_BIT
1244	if (l & 0xffffffff00000000LL) {
1245		if (l & 0xffff000000000000LL) {
1246			if (l & 0xff00000000000000LL) {
1247				return (bits[(int)(l>>56)]+56);
1248			} else{
1249				return (bits[(int)(l>>48)]+48);
1250			}
1251		} else {
1252			if (l & 0x0000ff0000000000LL) {
1253				return (bits[(int)(l>>40)]+40);
1254			} else{
1255				return (bits[(int)(l>>32)]+32);
1256			}
1257		}
1258	} else
1259#endif  /* SIXTY_FOUR_BIT */
1260#endif  /* SIXTY_FOUR_BIT_LONG */
1261	{
1262#if defined(THIRTY_TWO_BIT) || defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)
1263		if (l & 0xffff0000L) {
1264			if (l & 0xff000000L) {
1265				return (bits[(int)(l >> 24L)] + 24);
1266			} else{
1267				return (bits[(int)(l >> 16L)] + 16);
1268			}
1269		} else
1270#endif
1271		{
1272#if defined(SIXTEEN_BIT) || defined(THIRTY_TWO_BIT) || defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)
1273			if (l & 0xff00L) {
1274				return (bits[(int)(l >> 8)]+8);
1275			} else
1276#endif
1277			return (bits[(int)(l)]);
1278		}
1279	}
1280}
1281
1282
1283int
1284BN_num_bits(const BIGNUM *a)
1285{
1286	int i = a->top - 1;
1287
1288	bn_check_top(a);
1289
1290	if (BN_is_zero(a)) {
1291		return (0);
1292	}
1293	return ((i*BN_BITS2) + BN_num_bits_word(a->d[i]));
1294}
1295
1296
1297int
1298BN_lshift1(BIGNUM *r, const BIGNUM *a)
1299{
1300	register BN_ULONG *ap, *rp, t, c;
1301	int i;
1302
1303	bn_check_top(r);
1304	bn_check_top(a);
1305
1306	if (r != a) {
1307		r->neg = a->neg;
1308		if (bn_wexpand(r, a->top+1) == NULL) {
1309			return (0);
1310		}
1311		r->top = a->top;
1312	} else {
1313		if (bn_wexpand(r, a->top+1) == NULL) {
1314			return (0);
1315		}
1316	}
1317	ap = a->d;
1318	rp = r->d;
1319	c = 0;
1320	for (i = 0; i < a->top; i++) {
1321		t = *(ap++);
1322		*(rp++) = ((t<<1) | c) & BN_MASK2;
1323		c = (t & BN_TBIT) ? 1 : 0;
1324	}
1325	if (c) {
1326		*rp = 1;
1327		r->top++;
1328	}
1329
1330	bn_check_top(r);
1331	return (1);
1332}
1333
1334
1335int
1336BN_rshift1(BIGNUM *r, const BIGNUM *a)
1337{
1338	BN_ULONG *ap, *rp, t, c;
1339	int i;
1340
1341	bn_check_top(r);
1342	bn_check_top(a);
1343
1344	if (BN_is_zero(a)) {
1345		BN_zero(r);
1346		return (1);
1347	}
1348	if (a != r) {
1349		if (bn_wexpand(r, a->top) == NULL) {
1350			return (0);
1351		}
1352		r->top = a->top;
1353		r->neg = a->neg;
1354	}
1355	ap = a->d;
1356	rp = r->d;
1357	c = 0;
1358	for (i = a->top-1; i >= 0; i--) {
1359		t = ap[i];
1360		rp[i] = ((t >> 1) & BN_MASK2) | c;
1361		c = (t & 1) ? BN_TBIT : 0;
1362	}
1363	bn_correct_top(r);
1364	bn_check_top(r);
1365
1366	return (1);
1367}
1368
1369
1370int
1371BN_lshift(BIGNUM *r, const BIGNUM *a, int n)
1372{
1373	int i, nw, lb, rb;
1374	BN_ULONG *t, *f;
1375	BN_ULONG l;
1376
1377	bn_check_top(r);
1378	bn_check_top(a);
1379
1380	r->neg = a->neg;
1381	nw = n / BN_BITS2;
1382	if (bn_wexpand(r, a->top + nw + 1) == NULL) {
1383		return (0);
1384	}
1385	lb = n % BN_BITS2;
1386	rb = BN_BITS2-lb;
1387	f = a->d;
1388	t = r->d;
1389	t[a->top+nw] = 0;
1390	if (lb == 0) {
1391		for (i = a->top-1; i >= 0; i--) {
1392			t[nw+i] = f[i];
1393		}
1394	} else{
1395		for (i = a->top-1; i >= 0; i--) {
1396			l = f[i];
1397			t[nw+i+1] |= (l>>rb) & BN_MASK2;
1398			t[nw+i] = (l << lb) & BN_MASK2;
1399		}
1400	}
1401	memset(t, 0, nw * sizeof(t[0]));
1402	r->top = a->top+nw+1;
1403	bn_correct_top(r);
1404	bn_check_top(r);
1405
1406	return (1);
1407}
1408
1409
1410int
1411BN_rshift(BIGNUM *r, const BIGNUM *a, int n)
1412{
1413	int i, j, nw, lb, rb;
1414	BN_ULONG *t, *f;
1415	BN_ULONG l, tmp;
1416
1417	bn_check_top(r);
1418	bn_check_top(a);
1419
1420	nw = n / BN_BITS2;
1421	rb = n % BN_BITS2;
1422	lb = BN_BITS2 - rb;
1423	if ((nw >= a->top) || (a->top == 0)) {
1424		BN_zero(r);
1425		return (1);
1426	}
1427	if (r != a) {
1428		r->neg = a->neg;
1429		if (bn_wexpand(r, a->top - nw + 1) == NULL) {
1430			return (0);
1431		}
1432	} else {
1433		if (n == 0) {
1434			return (1); /* or the copying loop will go berserk */
1435		}
1436	}
1437
1438	f = &(a->d[nw]);
1439	t = r->d;
1440	j = a->top-nw;
1441	r->top = j;
1442
1443	if (rb == 0) {
1444		for (i = j; i != 0; i--) {
1445			*(t++) = *(f++);
1446		}
1447	} else {
1448		l = *(f++);
1449		for (i = j-1; i != 0; i--) {
1450			tmp = (l >> rb) & BN_MASK2;
1451			l = *(f++);
1452			*(t++) = (tmp | (l << lb)) & BN_MASK2;
1453		}
1454		*(t++) = (l >> rb) & BN_MASK2;
1455	}
1456	bn_correct_top(r);
1457	bn_check_top(r);
1458
1459	return (1);
1460}
1461
1462
1463int
1464BN_num_bytes(const BIGNUM *bn)
1465{
1466/*
1467 *  return (bn->top);
1468 */
1469	return ((int)((BN_num_bits(bn) + 7) / 8));
1470}
1471
1472
1473BIGNUM *
1474BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret)
1475{
1476	unsigned int i, m;
1477	unsigned int n;
1478	BN_ULONG l;
1479	BIGNUM *bn = NULL;
1480
1481	if (ret == NULL) {
1482		ret = bn = BN_new();
1483	}
1484	if (ret == NULL) {
1485		return (NULL);
1486	}
1487	bn_check_top(ret);
1488	l = 0;
1489	n = len;
1490	if (n == 0) {
1491		ret->top = 0;
1492		return (ret);
1493	}
1494	i = ((n - 1) / BN_BYTES) + 1;
1495	m = ((n - 1) % (BN_BYTES));
1496	if (bn_wexpand(ret, (int)i) == NULL) {
1497		if (bn) {
1498			BN_free(bn);
1499		}
1500		return (NULL);
1501	}
1502	ret->top = i;
1503	ret->neg = 0;
1504	while (n--) {
1505		l = (l << 8L) | *(s++);
1506		if (m-- == 0) {
1507			ret->d[--i] = l;
1508			l = 0;
1509			m = BN_BYTES-1;
1510		}
1511	}
1512
1513	/* need to call this due to clear byte at top if avoiding
1514	 * having the top bit set (-ve number) */
1515	bn_correct_top(ret);
1516	return (ret);
1517}
1518
1519
1520/* ignore negative */
1521int
1522BN_bn2bin(const BIGNUM *a, unsigned char *to)
1523{
1524	int n, i;
1525	BN_ULONG l;
1526
1527	bn_check_top(a);
1528	n = i = BN_num_bytes(a);
1529	while (i--) {
1530		l = a->d[i / BN_BYTES];
1531		*(to++) = (unsigned char)(l >> (8 * (i % BN_BYTES))) & 0xff;
1532	}
1533	return (n);
1534}
1535
1536
1537int
1538BN_ucmp(const BIGNUM *a, const BIGNUM *b)
1539{
1540	int i;
1541	BN_ULONG t1, t2, *ap, *bp;
1542
1543	bn_check_top(a);
1544	bn_check_top(b);
1545
1546	i = a->top - b->top;
1547	if (i != 0) {
1548		return (i);
1549	}
1550	ap = a->d;
1551	bp = b->d;
1552	for (i = a->top - 1; i >= 0; i--) {
1553		t1 = ap[i];
1554		t2 = bp[i];
1555		if (t1 != t2) {
1556			return ((t1 > t2) ? 1 : -1);
1557		}
1558	}
1559	return (0);
1560}
1561
1562
1563int
1564BN_cmp(const BIGNUM *a, const BIGNUM *b)
1565{
1566	int i;
1567	int gt, lt;
1568	BN_ULONG t1, t2;
1569
1570	if ((a == NULL) || (b == NULL)) {
1571		if (a != NULL) {
1572			return (-1);
1573		} else if (b != NULL) {
1574			return (1);
1575		} else{
1576			return (0);
1577		}
1578	}
1579
1580	bn_check_top(a);
1581	bn_check_top(b);
1582
1583	if (a->neg != b->neg) {
1584		if (a->neg) {
1585			return (-1);
1586		} else{
1587			return (1);
1588		}
1589	}
1590	if (a->neg == 0) {
1591		gt = 1;
1592		lt = -1;
1593	} else {
1594		gt = -1;
1595		lt = 1;
1596	}
1597
1598	if (a->top > b->top) {
1599		return (gt);
1600	}
1601	if (a->top < b->top) {
1602		return (lt);
1603	}
1604	for (i = a->top-1; i >= 0; i--) {
1605		t1 = a->d[i];
1606		t2 = b->d[i];
1607		if (t1 > t2) {
1608			return (gt);
1609		}
1610		if (t1 < t2) {
1611			return (lt);
1612		}
1613	}
1614
1615	return (0);
1616}
1617
1618
1619int
1620BN_set_bit(BIGNUM *a, int n)
1621{
1622	int i, j, k;
1623
1624	if (n < 0) {
1625		return (0);
1626	}
1627
1628	i = n / BN_BITS2;
1629	j = n % BN_BITS2;
1630	if (a->top <= i) {
1631		if (bn_wexpand(a, i+1) == NULL) {
1632			return (0);
1633		}
1634		for (k = a->top; k < i + 1; k++) {
1635			a->d[k] = 0;
1636		}
1637		a->top = i + 1;
1638	}
1639
1640	a->d[i] |= (((BN_ULONG)1) << j);
1641	bn_check_top(a);
1642
1643	return (1);
1644}
1645
1646
1647int
1648BN_clear_bit(BIGNUM *a, int n)
1649{
1650	int i, j;
1651
1652	bn_check_top(a);
1653	if (n < 0) {
1654		return (0);
1655	}
1656
1657	i = n / BN_BITS2;
1658	j = n % BN_BITS2;
1659	if (a->top <= i) {
1660		return (0);
1661	}
1662
1663	a->d[i] &= (~(((BN_ULONG)1) << j));
1664	bn_correct_top(a);
1665	return (1);
1666}
1667
1668
1669int
1670BN_is_bit_set(const BIGNUM *a, int n)
1671{
1672	int i, j;
1673
1674	bn_check_top(a);
1675	if (n < 0) {
1676		return (0);
1677	}
1678	i = n / BN_BITS2;
1679	j = n % BN_BITS2;
1680	if (a->top <= i) {
1681		return (0);
1682	}
1683	return (((a->d[i]) >> j) & ((BN_ULONG)1));
1684}
1685
1686
1687int
1688BN_mask_bits(BIGNUM *a, int n)
1689{
1690	int b, w;
1691
1692	bn_check_top(a);
1693	if (n < 0) {
1694		return (0);
1695	}
1696
1697	w = n / BN_BITS2;
1698	b = n % BN_BITS2;
1699	if (w >= a->top) {
1700		return (0);
1701	}
1702	if (b == 0) {
1703		a->top = w;
1704	} else{
1705		a->top = w + 1;
1706		a->d[w] &= ~(BN_MASK2 << b);
1707	}
1708	bn_correct_top(a);
1709
1710	return (1);
1711}
1712
1713
1714static const char Hex[] = "0123456789ABCDEF";
1715
1716/* Must 'free' the returned data */
1717char *
1718BN_bn2hex(const BIGNUM *a)
1719{
1720	int i, j, v, z = 0;
1721	char *buf;
1722	char *p;
1723
1724	buf = (char *)malloc(a->top * BN_BYTES * 2 + 2);
1725	if (buf == NULL) {
1726		/* BNerr(BN_F_BN_BN2HEX,ERR_R_MALLOC_FAILURE); */
1727		goto err;
1728	}
1729	p = buf;
1730	if (a->neg) {
1731		*(p++) = '-';
1732	}
1733	if (BN_is_zero(a)) {
1734		*(p++) = '0';
1735	}
1736	for (i = a->top-1; i >= 0; i--) {
1737		for (j = BN_BITS2-8; j >= 0; j -= 8) {
1738			/* strip leading zeros */
1739			v = ((int)(a->d[i] >> (long)j)) & 0xff;
1740			if (z || (v != 0)) {
1741				*(p++) = Hex[v>>4];
1742				*(p++) = Hex[v&0x0f];
1743				z = 1;
1744			}
1745		}
1746	}
1747	*p = '\0';
1748err:
1749	return (buf);
1750}
1751
1752
1753/* Must 'free' the returned data */
1754char *
1755BN_bn2dec(const BIGNUM *a)
1756{
1757	int i = 0, num, ok = 0;
1758	char *buf = NULL;
1759	char *p;
1760	BIGNUM *t = NULL;
1761	BN_ULONG *bn_data = NULL, *lp;
1762
1763	/* get an upper bound for the length of the decimal integer
1764	 * num <= (BN_num_bits(a) + 1) * log(2)
1765	 *     <= 3 * BN_num_bits(a) * 0.1001 + log(2) + 1     (rounding error)
1766	 *     <= BN_num_bits(a)/10 + BN_num_bits/1000 + 1 + 1
1767	 */
1768	i = BN_num_bits(a) * 3;
1769	num = (i / 10 + i / 1000 + 1) + 1;
1770	bn_data = (BN_ULONG *)malloc((num / BN_DEC_NUM + 1) * sizeof(BN_ULONG));
1771	buf = (char *)malloc(num + 3);
1772	if ((buf == NULL) || (bn_data == NULL)) {
1773		/* BNerr(BN_F_BN_BN2DEC,ERR_R_MALLOC_FAILURE); */
1774		goto err;
1775	}
1776	if ((t = BN_dup(a)) == NULL) {
1777		goto err;
1778	}
1779
1780#define BUF_REMAIN    (num+3 - (size_t)(p - buf))
1781	p = buf;
1782	lp = bn_data;
1783	if (BN_is_zero(t)) {
1784		*(p++) = '0';
1785		*(p++) = '\0';
1786	} else {
1787		if (BN_is_negative(t)) {
1788			*p++ = '-';
1789		}
1790
1791		i = 0;
1792		while (!BN_is_zero(t)) {
1793			*lp = BN_div_word(t, BN_DEC_CONV);
1794			lp++;
1795		}
1796		lp--;
1797
1798		/* We now have a series of blocks, BN_DEC_NUM chars
1799		 * in length, where the last one needs truncation.
1800		 * The blocks need to be reversed in order. */
1801		snprintf(p, BUF_REMAIN, BN_DEC_FMT1, *lp);
1802		while (*p) {
1803			p++;
1804		}
1805		while (lp != bn_data) {
1806			lp--;
1807			snprintf(p, BUF_REMAIN, BN_DEC_FMT2, *lp);
1808			while (*p) {
1809				p++;
1810			}
1811		}
1812	}
1813	ok = 1;
1814err:
1815	if (bn_data != NULL) {
1816		free(bn_data);
1817	}
1818	if (t != NULL) {
1819		BN_free(t);
1820	}
1821	if (!ok && buf) {
1822		free(buf);
1823		buf = NULL;
1824	}
1825
1826	return (buf);
1827}
1828
1829
1830int
1831BN_hex2bn(BIGNUM **bn, const char *a)
1832{
1833	BIGNUM *ret = NULL;
1834	BN_ULONG l = 0;
1835	int neg = 0, h, m, i, j, k, c;
1836	int num;
1837
1838	if ((a == NULL) || (*a == '\0')) {
1839		return (0);
1840	}
1841
1842	if (*a == '-') {
1843		neg = 1;
1844		a++;
1845	}
1846
1847	for (i = 0; isxdigit((unsigned char)a[i]); i++) {
1848	}
1849
1850	num = i + neg;
1851	if (bn == NULL) {
1852		return (num);
1853	}
1854
1855	/* a is the start of the hex digits, and it is 'i' long */
1856	if (*bn == NULL) {
1857		if ((ret = BN_new()) == NULL) {
1858			return (0);
1859		}
1860	} else {
1861		ret = *bn;
1862		BN_zero(ret);
1863	}
1864
1865	/* i is the number of hex digests; */
1866	if (bn_expand(ret, i * 4) == NULL) {
1867		goto err;
1868	}
1869
1870	j = i; /* least significant 'hex' */
1871	m = 0;
1872	h = 0;
1873	while (j > 0) {
1874		m = ((BN_BYTES*2) <= j) ? (BN_BYTES*2) : j;
1875		l = 0;
1876		for ( ; ; ) {
1877			c = a[j-m];
1878			if ((c >= '0') && (c <= '9')) {
1879				k = c-'0';
1880			} else if ((c >= 'a') && (c <= 'f')) {
1881				k = c-'a'+10;
1882			} else if ((c >= 'A') && (c <= 'F')) {
1883				k = c-'A'+10;
1884			} else {
1885				k = 0; /* paranoia */
1886			}
1887			l = (l<<4) | k;
1888
1889			if (--m <= 0) {
1890				ret->d[h++] = l;
1891				break;
1892			}
1893		}
1894		j -= (BN_BYTES * 2);
1895	}
1896	ret->top = h;
1897	bn_correct_top(ret);
1898	ret->neg = neg;
1899
1900	*bn = ret;
1901	bn_check_top(ret);
1902	return (num);
1903
1904err:
1905	if (*bn == NULL) {
1906		BN_free(ret);
1907	}
1908	return (0);
1909}
1910
1911
1912int
1913BN_dec2bn(BIGNUM **bn, const char *a)
1914{
1915	BIGNUM *ret = NULL;
1916	BN_ULONG l = 0;
1917	int neg = 0, i, j;
1918	int num;
1919
1920	if ((a == NULL) || (*a == '\0')) {
1921		return (0);
1922	}
1923	if (*a == '-') {
1924		neg = 1;
1925		a++;
1926	}
1927
1928	for (i = 0; isdigit((unsigned char)a[i]); i++) {
1929	}
1930
1931	num = i + neg;
1932	if (bn == NULL) {
1933		return (num);
1934	}
1935
1936	/* a is the start of the digits, and it is 'i' long.
1937	 * We chop it into BN_DEC_NUM digits at a time */
1938	if (*bn == NULL) {
1939		if ((ret = BN_new()) == NULL) {
1940			return (0);
1941		}
1942	} else {
1943		ret = *bn;
1944		BN_zero(ret);
1945	}
1946
1947	/* i is the number of digests, a bit of an over expand; */
1948	if (bn_expand(ret, i * 4) == NULL) {
1949		goto err;
1950	}
1951
1952	j = BN_DEC_NUM - (i % BN_DEC_NUM);
1953	if (j == BN_DEC_NUM) {
1954		j = 0;
1955	}
1956	l = 0;
1957	while (*a) {
1958		l *= 10;
1959		l += *a - '0';
1960		a++;
1961		if (++j == BN_DEC_NUM) {
1962			BN_mul_word(ret, BN_DEC_CONV);
1963			BN_add_word(ret, l);
1964			l = 0;
1965			j = 0;
1966		}
1967	}
1968	ret->neg = neg;
1969
1970	bn_correct_top(ret);
1971	*bn = ret;
1972	bn_check_top(ret);
1973	return (num);
1974
1975err:
1976	if (*bn == NULL) {
1977		BN_free(ret);
1978	}
1979	return (0);
1980}
1981
1982
1983int
1984BN_print_fp(FILE *fp, const BIGNUM *a)
1985{
1986	BIO *b;
1987	int ret;
1988
1989	if ((b = BIO_new(BIO_s_file())) == NULL) {
1990		return (0);
1991	}
1992	BIO_set_fp(b, fp, BIO_NOCLOSE);
1993	ret = BN_print(b, a);
1994	BIO_free(b);
1995	return (ret);
1996}
1997
1998
1999int
2000BN_print(void *bp, const BIGNUM *a)
2001{
2002	int i, j, v, z = 0;
2003	int ret = 0;
2004
2005	if ((a->neg) && (BIO_write((BIO *)bp, "-", 1) != 1)) {
2006		goto end;
2007	}
2008	if (BN_is_zero(a) && (BIO_write((BIO *)bp, "0", 1) != 1)) {
2009		goto end;
2010	}
2011	for (i = a->top - 1; i >= 0; i--) {
2012		for (j = BN_BITS2 - 4; j >= 0; j -= 4) {
2013			/* strip leading zeros */
2014			v = ((int)(a->d[i] >> (long)j)) & 0x0f;
2015			if (z || (v != 0)) {
2016				if (BIO_write((BIO *)bp, &(Hex[v]), 1) != 1) {
2017					goto end;
2018				}
2019				z = 1;
2020			}
2021		}
2022	}
2023	ret = 1;
2024end:
2025	return (ret);
2026}
2027
2028
2029void
2030BN_set_negative(BIGNUM *a, int b)
2031{
2032	if (b && !BN_is_zero(a)) {
2033		a->neg = 1;
2034	} else{
2035		a->neg = 0;
2036	}
2037}
2038
2039
2040int
2041BN_set_word(BIGNUM *a, BN_ULONG w)
2042{
2043	bn_check_top(a);
2044	if (bn_expand(a, (int)sizeof(BN_ULONG) * 8) == NULL) {
2045		return (0);
2046	}
2047	a->neg = 0;
2048	a->d[0] = w;
2049	a->top = (w ? 1 : 0);
2050	bn_check_top(a);
2051	return (1);
2052}
2053
2054
2055BN_ULONG
2056BN_get_word(const BIGNUM *a)
2057{
2058	if (a->top > 1) {
2059		return (BN_MASK2);
2060	} else if (a->top == 1) {
2061		return (a->d[0]);
2062	}
2063	return (0);
2064}
2065
2066
2067BN_ULONG
2068BN_mod_word(const BIGNUM *a, BN_ULONG w)
2069{
2070#ifndef BN_LLONG
2071	BN_ULONG ret = 0;
2072#else
2073	BN_ULLONG ret = 0;
2074#endif
2075	int i;
2076
2077	if (w == 0) {
2078		return ((BN_ULONG)-1);
2079	}
2080
2081	bn_check_top(a);
2082	w &= BN_MASK2;
2083	for (i = a->top - 1; i >= 0; i--) {
2084#ifndef BN_LLONG
2085		ret = ((ret << BN_BITS4) | ((a->d[i] >> BN_BITS4) & BN_MASK2l)) % w;
2086		ret = ((ret << BN_BITS4) | (a->d[i] & BN_MASK2l)) % w;
2087#else
2088		ret = (BN_ULLONG)(((ret << (BN_ULLONG)BN_BITS2) | a->d[i]) %
2089		    (BN_ULLONG)w);
2090#endif
2091	}
2092
2093	return ((BN_ULONG)ret);
2094}
2095
2096
2097#ifdef BN_LLONG
2098static BN_ULONG
2099bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
2100{
2101	return ((BN_ULONG)(((((BN_ULLONG)h) << BN_BITS2) | l) / (BN_ULLONG)d));
2102}
2103
2104
2105#else
2106
2107/* Divide h,l by d and return the result. */
2108/* I need to test this some more :-( */
2109BN_ULONG
2110bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
2111{
2112	BN_ULONG dh, dl, q, ret = 0, th, tl, t;
2113	int i, count = 2;
2114
2115	if (d == 0) {
2116		return (BN_MASK2);
2117	}
2118
2119	i = BN_num_bits_word(d);
2120	assert((i == BN_BITS2) || (h <= (BN_ULONG)1<<i));
2121
2122	i = BN_BITS2 - i;
2123	if (h >= d) {
2124		h -= d;
2125	}
2126
2127	if (i) {
2128		d <<= i;
2129		h = (h << i) | (l >> (BN_BITS2 - i));
2130		l <<= i;
2131	}
2132	dh = (d & BN_MASK2h) >> BN_BITS4;
2133	dl = (d & BN_MASK2l);
2134	for ( ; ; ) {
2135		if ((h >> BN_BITS4) == dh) {
2136			q = BN_MASK2l;
2137		} else{
2138			q = h / dh;
2139		}
2140
2141		th = q * dh;
2142		tl = dl * q;
2143		for ( ; ; ) {
2144			t = h - th;
2145			if ((t & BN_MASK2h) ||
2146			    ((tl) <= (
2147				    (t<<BN_BITS4)|
2148				    ((l&BN_MASK2h)>>BN_BITS4)))) {
2149				break;
2150			}
2151			q--;
2152			th -= dh;
2153			tl -= dl;
2154		}
2155		t = (tl >> BN_BITS4);
2156		tl = (tl << BN_BITS4) & BN_MASK2h;
2157		th += t;
2158
2159		if (l < tl) {
2160			th++;
2161		}
2162		l -= tl;
2163		if (h < th) {
2164			h += d;
2165			q--;
2166		}
2167		h -= th;
2168
2169		if (--count == 0) {
2170			break;
2171		}
2172
2173		ret = q << BN_BITS4;
2174		h = ((h << BN_BITS4) | (l >> BN_BITS4)) & BN_MASK2;
2175		l = (l & BN_MASK2l) << BN_BITS4;
2176	}
2177	ret |= q;
2178
2179	return (ret);
2180}
2181
2182
2183#endif /* BN_LLONG */
2184
2185BN_ULONG
2186BN_div_word(BIGNUM *a, BN_ULONG w)
2187{
2188	BN_ULONG ret = 0;
2189	int i, j;
2190
2191	bn_check_top(a);
2192	w &= BN_MASK2;
2193
2194	if (!w) {
2195		/* actually this an error (division by zero) */
2196		return ((BN_ULONG)-1);
2197	}
2198	if (a->top == 0) {
2199		return (0);
2200	}
2201
2202	/* normalize input (so bn_div_words doesn't complain) */
2203	j = BN_BITS2 - BN_num_bits_word(w);
2204	w <<= j;
2205	if (!BN_lshift(a, a, j)) {
2206		return ((BN_ULONG)-1);
2207	}
2208
2209	for (i = a->top-1; i >= 0; i--) {
2210		BN_ULONG l, d;
2211
2212		l = a->d[i];
2213		d = bn_div_words(ret, l, w);
2214		ret = (l - ((d * w) & BN_MASK2)) & BN_MASK2;
2215		a->d[i] = d;
2216	}
2217	if ((a->top > 0) && (a->d[a->top-1] == 0)) {
2218		a->top--;
2219	}
2220	ret >>= j;
2221	bn_check_top(a);
2222	return (ret);
2223}
2224
2225
2226int
2227BN_add_word(BIGNUM *a, BN_ULONG w)
2228{
2229	BN_ULONG l;
2230	int i;
2231
2232	bn_check_top(a);
2233	w &= BN_MASK2;
2234
2235	/* degenerate case: w is zero */
2236	if (!w) {
2237		return (1);
2238	}
2239	/* degenerate case: a is zero */
2240	if (BN_is_zero(a)) {
2241		return (BN_set_word(a, w));
2242	}
2243	/* handle 'a' when negative */
2244	if (a->neg) {
2245		a->neg = 0;
2246		i = BN_sub_word(a, w);
2247		if (!BN_is_zero(a)) {
2248			a->neg = !(a->neg);
2249		}
2250		return (i);
2251	}
2252	/* Only expand (and risk failing) if it's possibly necessary */
2253	if (((BN_ULONG)(a->d[a->top - 1] + 1) == 0) &&
2254	    (bn_wexpand(a, a->top+1) == NULL)) {
2255		return (0);
2256	}
2257	i = 0;
2258	for ( ; ; ) {
2259		if (i >= a->top) {
2260			l = w;
2261		} else{
2262			l = (a->d[i] + w) & BN_MASK2;
2263		}
2264		a->d[i] = l;
2265		if (w > l) {
2266			w = 1;
2267		} else{
2268			break;
2269		}
2270		i++;
2271	}
2272	if (i >= a->top) {
2273		a->top++;
2274	}
2275	bn_check_top(a);
2276	return (1);
2277}
2278
2279
2280int
2281BN_sub_word(BIGNUM *a, BN_ULONG w)
2282{
2283	int i;
2284
2285	bn_check_top(a);
2286	w &= BN_MASK2;
2287
2288	/* degenerate case: w is zero */
2289	if (!w) {
2290		return (1);
2291	}
2292	/* degenerate case: a is zero */
2293	if (BN_is_zero(a)) {
2294		i = BN_set_word(a, w);
2295		if (i != 0) {
2296			BN_set_negative(a, 1);
2297		}
2298		return (i);
2299	}
2300	/* handle 'a' when negative */
2301	if (a->neg) {
2302		a->neg = 0;
2303		i = BN_add_word(a, w);
2304		a->neg = 1;
2305		return (i);
2306	}
2307
2308	if ((a->top == 1) && (a->d[0] < w)) {
2309		a->d[0] = w - a->d[0];
2310		a->neg = 1;
2311		return (1);
2312	}
2313	i = 0;
2314	for ( ; ; ) {
2315		if (a->d[i] >= w) {
2316			a->d[i] -= w;
2317			break;
2318		} else {
2319			a->d[i] = (a->d[i] - w) & BN_MASK2;
2320			i++;
2321			w = 1;
2322		}
2323	}
2324	if ((a->d[i] == 0) && (i == (a->top-1))) {
2325		a->top--;
2326	}
2327	bn_check_top(a);
2328	return (1);
2329}
2330
2331
2332int
2333BN_mul_word(BIGNUM *a, BN_ULONG w)
2334{
2335	BN_ULONG ll;
2336
2337	bn_check_top(a);
2338	w &= BN_MASK2;
2339	if (a->top) {
2340		if (w == 0) {
2341			BN_zero(a);
2342		} else{
2343			ll = bn_mul_words(a->d, a->d, a->top, w);
2344			if (ll) {
2345				if (bn_wexpand(a, a->top + 1) == NULL) {
2346					return (0);
2347				}
2348				a->d[a->top++] = ll;
2349			}
2350		}
2351	}
2352	bn_check_top(a);
2353	return (1);
2354}
2355
2356
2357/*
2358 *
2359 */
2360
2361/* r can == a or b */
2362int
2363BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
2364{
2365	const BIGNUM *tmp;
2366	int a_neg = a->neg, ret;
2367
2368	bn_check_top(a);
2369	bn_check_top(b);
2370
2371	/*  a +  b	a+b
2372	 *  a + -b	a-b
2373	 * -a +  b	b-a
2374	 * -a + -b	-(a+b)
2375	 */
2376	if (a_neg ^ b->neg) {
2377		/* only one is negative */
2378		if (a_neg) {
2379			tmp = a;
2380			a = b;
2381			b = tmp;
2382		}
2383
2384		/* we are now a - b */
2385
2386		if (BN_ucmp(a, b) < 0) {
2387			if (!BN_usub(r, b, a)) {
2388				return (0);
2389			}
2390			r->neg = 1;
2391		} else {
2392			if (!BN_usub(r, a, b)) {
2393				return (0);
2394			}
2395			r->neg = 0;
2396		}
2397		return (1);
2398	}
2399
2400	ret = BN_uadd(r, a, b);
2401	r->neg = a_neg;
2402	bn_check_top(r);
2403	return (ret);
2404}
2405
2406
2407#ifdef BN_LLONG
2408BN_ULONG
2409bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
2410{
2411	BN_ULLONG ll = 0;
2412
2413	assert(n >= 0);
2414	if (n <= 0) {
2415		return ((BN_ULONG)0);
2416	}
2417
2418	for ( ; ; ) {
2419		ll += (BN_ULLONG)a[0] + b[0];
2420		r[0] = (BN_ULONG)ll & BN_MASK2;
2421		ll >>= BN_BITS2;
2422		if (--n <= 0) {
2423			break;
2424		}
2425
2426		ll += (BN_ULLONG)a[1] + b[1];
2427		r[1] = (BN_ULONG)ll & BN_MASK2;
2428		ll >>= BN_BITS2;
2429		if (--n <= 0) {
2430			break;
2431		}
2432
2433		ll += (BN_ULLONG)a[2] + b[2];
2434		r[2] = (BN_ULONG)ll & BN_MASK2;
2435		ll >>= BN_BITS2;
2436		if (--n <= 0) {
2437			break;
2438		}
2439
2440		ll += (BN_ULLONG)a[3] + b[3];
2441		r[3] = (BN_ULONG)ll & BN_MASK2;
2442		ll >>= BN_BITS2;
2443		if (--n <= 0) {
2444			break;
2445		}
2446
2447		a += 4;
2448		b += 4;
2449		r += 4;
2450	}
2451
2452	return ((BN_ULONG)ll);
2453}
2454
2455
2456#else /* !BN_LLONG */
2457
2458BN_ULONG
2459bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
2460{
2461	BN_ULONG c, l, t;
2462
2463	assert(n >= 0);
2464	if (n <= 0) {
2465		return ((BN_ULONG)0);
2466	}
2467
2468	c = 0;
2469	for ( ; ; ) {
2470		t = a[0];
2471		t = (t+c) & BN_MASK2;
2472		c = (t < c);
2473		l = (t + b[0]) & BN_MASK2;
2474		c += (l < t);
2475		r[0] = l;
2476		if (--n <= 0) {
2477			break;
2478		}
2479
2480		t = a[1];
2481		t = (t + c) & BN_MASK2;
2482		c = (t < c);
2483		l = (t + b[1]) & BN_MASK2;
2484		c += (l < t);
2485		r[1] = l;
2486		if (--n <= 0) {
2487			break;
2488		}
2489
2490		t = a[2];
2491		t = (t+c) & BN_MASK2;
2492		c = (t < c);
2493		l = (t+b[2]) & BN_MASK2;
2494		c += (l < t);
2495		r[2] = l;
2496		if (--n <= 0) {
2497			break;
2498		}
2499
2500		t = a[3];
2501		t = (t + c)&BN_MASK2;
2502		c = (t < c);
2503		l = (t + b[3]) & BN_MASK2;
2504		c += (l < t);
2505		r[3] = l;
2506		if (--n <= 0) {
2507			break;
2508		}
2509
2510		a += 4;
2511		b += 4;
2512		r += 4;
2513	}
2514	return ((BN_ULONG)c);
2515}
2516
2517
2518#endif /* !BN_LLONG */
2519
2520/* unsigned add of b to a */
2521int
2522BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
2523{
2524	int max, min, dif;
2525	BN_ULONG *ap, *bp, *rp, carry, t1, t2;
2526	const BIGNUM *tmp;
2527
2528	bn_check_top(a);
2529	bn_check_top(b);
2530
2531	if (a->top < b->top) {
2532		tmp = a;
2533		a = b;
2534		b = tmp;
2535	}
2536	max = a->top;
2537	min = b->top;
2538	dif = max - min;
2539
2540	if (bn_wexpand(r, max+1) == NULL) {
2541		return (0);
2542	}
2543
2544	r->top = max;
2545
2546
2547	ap = a->d;
2548	bp = b->d;
2549	rp = r->d;
2550
2551	carry = bn_add_words(rp, ap, bp, min);
2552	rp += min;
2553	ap += min;
2554	bp += min;
2555
2556	if (carry) {
2557		while (dif) {
2558			dif--;
2559			t1 = *(ap++);
2560			t2 = (t1 + 1) & BN_MASK2;
2561			*(rp++) = t2;
2562			if (t2) {
2563				carry = 0;
2564				break;
2565			}
2566		}
2567		if (carry) {
2568			/* carry != 0 => dif == 0 */
2569			*rp = 1;
2570			r->top++;
2571		}
2572	}
2573	if (dif && (rp != ap)) {
2574		while (dif--) {
2575			/* copy remaining words if ap != rp */
2576			*(rp++) = *(ap++);
2577		}
2578	}
2579	r->neg = 0;
2580	bn_check_top(r);
2581
2582	return (1);
2583}
2584
2585
2586/* unsigned subtraction of b from a, a must be larger than b. */
2587int
2588BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
2589{
2590	int max, min, dif;
2591	register BN_ULONG t1, t2, *ap, *bp, *rp;
2592	int i, carry;
2593
2594	bn_check_top(a);
2595	bn_check_top(b);
2596
2597	max = a->top;
2598	min = b->top;
2599	dif = max - min;
2600
2601	if (dif < 0) {
2602		/* BNerr(BN_F_BN_USUB,BN_R_ARG2_LT_ARG3); */
2603		return (0);
2604	}
2605
2606	if (bn_wexpand(r, max) == NULL) {
2607		return (0);
2608	}
2609
2610	ap = a->d;
2611	bp = b->d;
2612	rp = r->d;
2613
2614#if 1
2615	carry = 0;
2616	for (i = min; i != 0; i--) {
2617		t1 = *(ap++);
2618		t2 = *(bp++);
2619		if (carry) {
2620			carry = (t1 <= t2);
2621			t1 = (t1-t2-1)&BN_MASK2;
2622		} else {
2623			carry = (t1 < t2);
2624			t1 = (t1-t2) & BN_MASK2;
2625		}
2626		*(rp++) = t1 & BN_MASK2;
2627	}
2628#else
2629	carry = bn_sub_words(rp, ap, bp, min);
2630	ap += min;
2631	bp += min;
2632	rp += min;
2633#endif
2634	if (carry) { /* subtracted */
2635		if (!dif) {
2636			/* error: a < b */
2637			return (0);
2638		}
2639		while (dif) {
2640			dif--;
2641			t1 = *(ap++);
2642			t2 = (t1 -1) & BN_MASK2;
2643			*(rp++) = t2;
2644			if (t1) {
2645				break;
2646			}
2647		}
2648	}
2649#if 0
2650	memcpy(rp, ap, sizeof(*rp)*(max-i));
2651#else
2652	if (rp != ap) {
2653		for ( ; ; ) {
2654			if (!dif--) {
2655				break;
2656			}
2657			rp[0] = ap[0];
2658			if (!dif--) {
2659				break;
2660			}
2661			rp[1] = ap[1];
2662			if (!dif--) {
2663				break;
2664			}
2665			rp[2] = ap[2];
2666			if (!dif--) {
2667				break;
2668			}
2669			rp[3] = ap[3];
2670			rp += 4;
2671			ap += 4;
2672		}
2673	}
2674#endif
2675
2676	r->top = max;
2677	r->neg = 0;
2678	bn_correct_top(r);
2679	return (1);
2680}
2681
2682
2683int
2684BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
2685{
2686	int max;
2687	int add = 0, neg = 0;
2688	const BIGNUM *tmp;
2689
2690	bn_check_top(a);
2691	bn_check_top(b);
2692
2693	/*  a -  b	a-b
2694	 *  a - -b	a+b
2695	 * -a -  b	-(a+b)
2696	 * -a - -b	b-a
2697	 */
2698	if (a->neg) {
2699		if (b->neg) {
2700			tmp = a;
2701			a = b;
2702			b = tmp;
2703		} else {
2704			add = 1;
2705			neg = 1;
2706		}
2707	} else {
2708		if (b->neg) {
2709			add = 1;
2710			neg = 0;
2711		}
2712	}
2713
2714	if (add) {
2715		if (!BN_uadd(r, a, b)) {
2716			return (0);
2717		}
2718		r->neg = neg;
2719		return (1);
2720	}
2721
2722	/* We are actually doing a - b :-) */
2723
2724	max = (a->top > b->top) ? a->top : b->top;
2725	if (bn_wexpand(r, max) == NULL) {
2726		return (0);
2727	}
2728	if (BN_ucmp(a, b) < 0) {
2729		if (!BN_usub(r, b, a)) {
2730			return (0);
2731		}
2732		r->neg = 1;
2733	} else {
2734		if (!BN_usub(r, a, b)) {
2735			return (0);
2736		}
2737		r->neg = 0;
2738	}
2739	bn_check_top(r);
2740
2741	return (1);
2742}
2743
2744
2745/*
2746 * div
2747 *
2748 */
2749int
2750BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
2751    BN_CTX *ctx)
2752{
2753	int i, nm, nd;
2754	int ret = 0;
2755	BIGNUM *D;
2756
2757	bn_check_top(m);
2758	bn_check_top(d);
2759	if (BN_is_zero(d)) {
2760		/* BNerr(BN_F_BN_DIV,BN_R_DIV_BY_ZERO); */
2761		return (0);
2762	}
2763
2764	if (BN_ucmp(m, d) < 0) {
2765		if (rem != NULL) {
2766			if (BN_copy(rem, m) == NULL) {
2767				return (0);
2768			}
2769		}
2770		if (dv != NULL) {
2771			BN_zero(dv);
2772		}
2773		return (1);
2774	}
2775
2776	BN_CTX_start(ctx);
2777	D = BN_CTX_get(ctx);
2778	if (dv == NULL) {
2779		dv = BN_CTX_get(ctx);
2780	}
2781	if (rem == NULL) {
2782		rem = BN_CTX_get(ctx);
2783	}
2784	if ((D == NULL) || (dv == NULL) || (rem == NULL)) {
2785		goto end;
2786	}
2787
2788	nd = BN_num_bits(d);
2789	nm = BN_num_bits(m);
2790	if (BN_copy(D, d) == NULL) {
2791		goto end;
2792	}
2793	if (BN_copy(rem, m) == NULL) {
2794		goto end;
2795	}
2796
2797	/* The next 2 are needed so we can do a dv->d[0]|=1 later
2798	 * since BN_lshift1 will only work once there is a value :-) */
2799	BN_zero(dv);
2800	if (bn_wexpand(dv, 1) == NULL) {
2801		goto end;
2802	}
2803	dv->top = 1;
2804
2805	if (!BN_lshift(D, D, nm - nd)) {
2806		goto end;
2807	}
2808	for (i = nm-nd; i >= 0; i--) {
2809		if (!BN_lshift1(dv, dv)) {
2810			goto end;
2811		}
2812		if (BN_ucmp(rem, D) >= 0) {
2813			dv->d[0] |= 1;
2814			if (!BN_usub(rem, rem, D)) {
2815				goto end;
2816			}
2817		}
2818		if (!BN_rshift1(D, D)) {
2819			goto end;
2820		}
2821	}
2822	rem->neg = BN_is_zero(rem) ? 0 : m->neg;
2823	dv->neg = m->neg ^ d->neg;
2824	ret = 1;
2825end:
2826	BN_CTX_end(ctx);
2827
2828	return (ret);
2829}
2830
2831
2832/*
2833 * Mul
2834 */
2835void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb);
2836
2837#if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS)
2838
2839/* Here follows specialised variants of bn_add_words() and
2840 * bn_sub_words().  They have the property performing operations on
2841 * arrays of different sizes.  The sizes of those arrays is expressed through
2842 * cl, which is the common length ( basicall, min(len(a),len(b)) ), and dl,
2843 * which is the delta between the two lengths, calculated as len(a)-len(b).
2844 * All lengths are the number of BN_ULONGs...  For the operations that require
2845 * a result array as parameter, it must have the length cl+abs(dl).
2846 * These functions should probably end up in bn_asm.c as soon as there are
2847 * assembler counterparts for the systems that use assembler files.  */
2848BN_ULONG
2849bn_sub_part_words(BN_ULONG *r,
2850    const BN_ULONG *a, const BN_ULONG *b,
2851    int cl, int dl)
2852{
2853	BN_ULONG c, t;
2854
2855	assert(cl >= 0);
2856	c = bn_sub_words(r, a, b, cl);
2857
2858	if (dl == 0) {
2859		return (c);
2860	}
2861
2862	r += cl;
2863	a += cl;
2864	b += cl;
2865
2866	if (dl < 0) {
2867#ifdef BN_COUNT
2868		fprintf(stderr, "  bn_sub_part_words %d + %d (dl < 0, c = %d)\n", cl, dl, c);
2869#endif
2870		for ( ; ; ) {
2871			t = b[0];
2872			r[0] = (0-t-c)&BN_MASK2;
2873			if (t != 0) {
2874				c = 1;
2875			}
2876			if (++dl >= 0) {
2877				break;
2878			}
2879
2880			t = b[1];
2881			r[1] = (0-t-c)&BN_MASK2;
2882			if (t != 0) {
2883				c = 1;
2884			}
2885			if (++dl >= 0) {
2886				break;
2887			}
2888
2889			t = b[2];
2890			r[2] = (0-t-c)&BN_MASK2;
2891			if (t != 0) {
2892				c = 1;
2893			}
2894			if (++dl >= 0) {
2895				break;
2896			}
2897
2898			t = b[3];
2899			r[3] = (0-t-c)&BN_MASK2;
2900			if (t != 0) {
2901				c = 1;
2902			}
2903			if (++dl >= 0) {
2904				break;
2905			}
2906
2907			b += 4;
2908			r += 4;
2909		}
2910	}else          {
2911		int save_dl = dl;
2912#ifdef BN_COUNT
2913		fprintf(stderr, "  bn_sub_part_words %d + %d (dl > 0, c = %d)\n", cl, dl, c);
2914#endif
2915		while (c) {
2916			t = a[0];
2917			r[0] = (t-c)&BN_MASK2;
2918			if (t != 0) {
2919				c = 0;
2920			}
2921			if (--dl <= 0) {
2922				break;
2923			}
2924
2925			t = a[1];
2926			r[1] = (t-c)&BN_MASK2;
2927			if (t != 0) {
2928				c = 0;
2929			}
2930			if (--dl <= 0) {
2931				break;
2932			}
2933
2934			t = a[2];
2935			r[2] = (t-c)&BN_MASK2;
2936			if (t != 0) {
2937				c = 0;
2938			}
2939			if (--dl <= 0) {
2940				break;
2941			}
2942
2943			t = a[3];
2944			r[3] = (t-c)&BN_MASK2;
2945			if (t != 0) {
2946				c = 0;
2947			}
2948			if (--dl <= 0) {
2949				break;
2950			}
2951
2952			save_dl = dl;
2953			a += 4;
2954			r += 4;
2955		}
2956		if (dl > 0) {
2957#ifdef BN_COUNT
2958			fprintf(stderr, "  bn_sub_part_words %d + %d (dl > 0, c == 0)\n", cl, dl);
2959#endif
2960			if (save_dl > dl) {
2961				switch (save_dl - dl) {
2962				case 1:
2963					r[1] = a[1];
2964					if (--dl <= 0) {
2965						break;
2966					}
2967
2968				case 2:
2969					r[2] = a[2];
2970					if (--dl <= 0) {
2971						break;
2972					}
2973
2974				case 3:
2975					r[3] = a[3];
2976					if (--dl <= 0) {
2977						break;
2978					}
2979				}
2980				a += 4;
2981				r += 4;
2982			}
2983		}
2984		if (dl > 0) {
2985#ifdef BN_COUNT
2986			fprintf(stderr, "  bn_sub_part_words %d + %d (dl > 0, copy)\n", cl, dl);
2987#endif
2988			for ( ; ; ) {
2989				r[0] = a[0];
2990				if (--dl <= 0) {
2991					break;
2992				}
2993				r[1] = a[1];
2994				if (--dl <= 0) {
2995					break;
2996				}
2997				r[2] = a[2];
2998				if (--dl <= 0) {
2999					break;
3000				}
3001				r[3] = a[3];
3002				if (--dl <= 0) {
3003					break;
3004				}
3005
3006				a += 4;
3007				r += 4;
3008			}
3009		}
3010	}
3011	return (c);
3012}
3013
3014
3015#endif
3016
3017BN_ULONG bn_add_part_words(BN_ULONG *r,
3018    const BN_ULONG *a, const BN_ULONG *b,
3019    int cl, int dl)
3020{
3021	BN_ULONG c, l, t;
3022
3023	assert(cl >= 0);
3024	c = bn_add_words(r, a, b, cl);
3025
3026	if (dl == 0) {
3027		return (c);
3028	}
3029
3030	r += cl;
3031	a += cl;
3032	b += cl;
3033
3034	if (dl < 0) {
3035		int save_dl = dl;
3036#ifdef BN_COUNT
3037		fprintf(stderr, "  bn_add_part_words %d + %d (dl < 0, c = %d)\n", cl, dl, c);
3038#endif
3039		while (c) {
3040			l = (c+b[0])&BN_MASK2;
3041			c = (l < c);
3042			r[0] = l;
3043			if (++dl >= 0) {
3044				break;
3045			}
3046
3047			l = (c+b[1])&BN_MASK2;
3048			c = (l < c);
3049			r[1] = l;
3050			if (++dl >= 0) {
3051				break;
3052			}
3053
3054			l = (c+b[2])&BN_MASK2;
3055			c = (l < c);
3056			r[2] = l;
3057			if (++dl >= 0) {
3058				break;
3059			}
3060
3061			l = (c+b[3])&BN_MASK2;
3062			c = (l < c);
3063			r[3] = l;
3064			if (++dl >= 0) {
3065				break;
3066			}
3067
3068			save_dl = dl;
3069			b += 4;
3070			r += 4;
3071		}
3072		if (dl < 0) {
3073#ifdef BN_COUNT
3074			fprintf(stderr, "  bn_add_part_words %d + %d (dl < 0, c == 0)\n", cl, dl);
3075#endif
3076			if (save_dl < dl) {
3077				switch (dl - save_dl) {
3078				case 1:
3079					r[1] = b[1];
3080					if (++dl >= 0) {
3081						break;
3082					}
3083
3084				case 2:
3085					r[2] = b[2];
3086					if (++dl >= 0) {
3087						break;
3088					}
3089
3090				case 3:
3091					r[3] = b[3];
3092					if (++dl >= 0) {
3093						break;
3094					}
3095				}
3096				b += 4;
3097				r += 4;
3098			}
3099		}
3100		if (dl < 0) {
3101#ifdef BN_COUNT
3102			fprintf(stderr, "  bn_add_part_words %d + %d (dl < 0, copy)\n", cl, dl);
3103#endif
3104			for ( ; ; ) {
3105				r[0] = b[0];
3106				if (++dl >= 0) {
3107					break;
3108				}
3109				r[1] = b[1];
3110				if (++dl >= 0) {
3111					break;
3112				}
3113				r[2] = b[2];
3114				if (++dl >= 0) {
3115					break;
3116				}
3117				r[3] = b[3];
3118				if (++dl >= 0) {
3119					break;
3120				}
3121
3122				b += 4;
3123				r += 4;
3124			}
3125		}
3126	}else          {
3127		int save_dl = dl;
3128#ifdef BN_COUNT
3129		fprintf(stderr, "  bn_add_part_words %d + %d (dl > 0)\n", cl, dl);
3130#endif
3131		while (c) {
3132			t = (a[0]+c)&BN_MASK2;
3133			c = (t < c);
3134			r[0] = t;
3135			if (--dl <= 0) {
3136				break;
3137			}
3138
3139			t = (a[1]+c)&BN_MASK2;
3140			c = (t < c);
3141			r[1] = t;
3142			if (--dl <= 0) {
3143				break;
3144			}
3145
3146			t = (a[2]+c)&BN_MASK2;
3147			c = (t < c);
3148			r[2] = t;
3149			if (--dl <= 0) {
3150				break;
3151			}
3152
3153			t = (a[3]+c)&BN_MASK2;
3154			c = (t < c);
3155			r[3] = t;
3156			if (--dl <= 0) {
3157				break;
3158			}
3159
3160			save_dl = dl;
3161			a += 4;
3162			r += 4;
3163		}
3164#ifdef BN_COUNT
3165		fprintf(stderr, "  bn_add_part_words %d + %d (dl > 0, c == 0)\n", cl, dl);
3166#endif
3167		if (dl > 0) {
3168			if (save_dl > dl) {
3169				switch (save_dl - dl) {
3170				case 1:
3171					r[1] = a[1];
3172					if (--dl <= 0) {
3173						break;
3174					}
3175
3176				case 2:
3177					r[2] = a[2];
3178					if (--dl <= 0) {
3179						break;
3180					}
3181
3182				case 3:
3183					r[3] = a[3];
3184					if (--dl <= 0) {
3185						break;
3186					}
3187				}
3188				a += 4;
3189				r += 4;
3190			}
3191		}
3192		if (dl > 0) {
3193#ifdef BN_COUNT
3194			fprintf(stderr, "  bn_add_part_words %d + %d (dl > 0, copy)\n", cl, dl);
3195#endif
3196			for ( ; ; ) {
3197				r[0] = a[0];
3198				if (--dl <= 0) {
3199					break;
3200				}
3201				r[1] = a[1];
3202				if (--dl <= 0) {
3203					break;
3204				}
3205				r[2] = a[2];
3206				if (--dl <= 0) {
3207					break;
3208				}
3209				r[3] = a[3];
3210				if (--dl <= 0) {
3211					break;
3212				}
3213
3214				a += 4;
3215				r += 4;
3216			}
3217		}
3218	}
3219	return (c);
3220}
3221
3222
3223#ifdef BN_RECURSION
3224
3225/* Karatsuba recursive multiplication algorithm
3226 * (cf. Knuth, The Art of Computer Programming, Vol. 2) */
3227
3228/* r is 2*n2 words in size,
3229 * a and b are both n2 words in size.
3230 * n2 must be a power of 2.
3231 * We multiply and return the result.
3232 * t must be 2*n2 words in size
3233 * We calculate
3234 * a[0]*b[0]
3235 * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
3236 * a[1]*b[1]
3237 */
3238/* dnX may not be positive, but n2/2+dnX has to be */
3239void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
3240    int dna, int dnb, BN_ULONG *t)
3241{
3242	int n = n2/2, c1, c2;
3243	int tna = n+dna, tnb = n+dnb;
3244	unsigned int neg, zero;
3245	BN_ULONG ln, lo, *p;
3246
3247# ifdef BN_COUNT
3248	fprintf(stderr, " bn_mul_recursive %d%+d * %d%+d\n", n2, dna, n2, dnb);
3249# endif
3250# ifdef BN_MUL_COMBA
3251#  if 0
3252	if (n2 == 4) {
3253		bn_mul_comba4(r, a, b);
3254		return;
3255	}
3256#  endif
3257
3258	/* Only call bn_mul_comba 8 if n2 == 8 and the
3259	 * two arrays are complete [steve]
3260	 */
3261	if ((n2 == 8) && (dna == 0) && (dnb == 0)) {
3262		bn_mul_comba8(r, a, b);
3263		return;
3264	}
3265# endif /* BN_MUL_COMBA */
3266	/* Else do normal multiply */
3267	if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) {
3268		bn_mul_normal(r, a, n2+dna, b, n2+dnb);
3269		if ((dna + dnb) < 0) {
3270			memset(&r[2*n2 + dna + dnb], 0,
3271			    sizeof(BN_ULONG) * -(dna + dnb));
3272		}
3273		return;
3274	}
3275	/* r=(a[0]-a[1])*(b[1]-b[0]) */
3276	c1 = bn_cmp_part_words(a, &(a[n]), tna, n-tna);
3277	c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb-n);
3278	zero = neg = 0;
3279	switch (c1*3+c2) {
3280	case -4:
3281		bn_sub_part_words(t, &(a[n]), a, tna, tna-n);           /* - */
3282		bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n-tnb);     /* - */
3283		break;
3284
3285	case -3:
3286		zero = 1;
3287		break;
3288
3289	case -2:
3290		bn_sub_part_words(t, &(a[n]), a, tna, tna-n);           /* - */
3291		bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb-n);     /* + */
3292		neg = 1;
3293		break;
3294
3295	case -1:
3296	case 0:
3297	case 1:
3298		zero = 1;
3299		break;
3300
3301	case 2:
3302		bn_sub_part_words(t, a, &(a[n]), tna, n-tna);           /* + */
3303		bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n-tnb);     /* - */
3304		neg = 1;
3305		break;
3306
3307	case 3:
3308		zero = 1;
3309		break;
3310
3311	case 4:
3312		bn_sub_part_words(t, a, &(a[n]), tna, n-tna);
3313		bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb-n);
3314		break;
3315	}
3316
3317# ifdef BN_MUL_COMBA
3318	if ((n == 4) && (dna == 0) && (dnb == 0)) {
3319		/* XXX: bn_mul_comba4 could take
3320		 * extra args to do this well */
3321		if (!zero) {
3322			bn_mul_comba4(&(t[n2]), t, &(t[n]));
3323		} else{
3324			memset(&(t[n2]), 0, 8*sizeof(BN_ULONG));
3325		}
3326
3327		bn_mul_comba4(r, a, b);
3328		bn_mul_comba4(&(r[n2]), &(a[n]), &(b[n]));
3329	}else if ((n == 8) && (dna == 0) && (dnb == 0))                {
3330		/* XXX: bn_mul_comba8 could
3331		 * take extra args to do this
3332		 * well */
3333		if (!zero) {
3334			bn_mul_comba8(&(t[n2]), t, &(t[n]));
3335		} else{
3336			memset(&(t[n2]), 0, 16*sizeof(BN_ULONG));
3337		}
3338
3339		bn_mul_comba8(r, a, b);
3340		bn_mul_comba8(&(r[n2]), &(a[n]), &(b[n]));
3341	}else
3342# endif /* BN_MUL_COMBA */
3343	{
3344		p = &(t[n2*2]);
3345		if (!zero) {
3346			bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
3347		} else{
3348			memset(&(t[n2]), 0, n2*sizeof(BN_ULONG));
3349		}
3350		bn_mul_recursive(r, a, b, n, 0, 0, p);
3351		bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), n, dna, dnb, p);
3352	}
3353
3354	/* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
3355	 * r[10] holds (a[0]*b[0])
3356	 * r[32] holds (b[1]*b[1])
3357	 */
3358
3359	c1 = (int)(bn_add_words(t, r, &(r[n2]), n2));
3360
3361	if (neg) { /* if t[32] is negative */
3362		c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2));
3363	}else          {
3364		/* Might have a carry */
3365		c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
3366	}
3367
3368	/* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
3369	 * r[10] holds (a[0]*b[0])
3370	 * r[32] holds (b[1]*b[1])
3371	 * c1 holds the carry bits
3372	 */
3373	c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
3374	if (c1) {
3375		p = &(r[n+n2]);
3376		lo = *p;
3377		ln = (lo+c1)&BN_MASK2;
3378		*p = ln;
3379
3380		/* The overflow will stop before we over write
3381		 * words we should not overwrite */
3382		if (ln < (BN_ULONG)c1) {
3383			do {
3384				p++;
3385				lo = *p;
3386				ln = (lo+1)&BN_MASK2;
3387				*p = ln;
3388			} while (ln == 0);
3389		}
3390	}
3391}
3392
3393
3394/* n+tn is the word length
3395 * t needs to be n*4 is size, as does r */
3396/* tnX may not be negative but less than n */
3397void
3398bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n,
3399    int tna, int tnb, BN_ULONG *t)
3400{
3401	int i, j, n2 = n*2;
3402	int c1, c2, neg;
3403	BN_ULONG ln, lo, *p;
3404
3405# ifdef BN_COUNT
3406	fprintf(stderr, " bn_mul_part_recursive (%d%+d) * (%d%+d)\n",
3407	    n, tna, n, tnb);
3408# endif
3409	if (n < 8) {
3410		bn_mul_normal(r, a, n+tna, b, n+tnb);
3411		return;
3412	}
3413
3414	/* r=(a[0]-a[1])*(b[1]-b[0]) */
3415	c1 = bn_cmp_part_words(a, &(a[n]), tna, n-tna);
3416	c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb-n);
3417	neg = 0;
3418	switch (c1*3+c2) {
3419	case -4:
3420		bn_sub_part_words(t, &(a[n]), a, tna, tna-n);           /* - */
3421		bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n-tnb);     /* - */
3422		break;
3423
3424	case -3:
3425	/* break; */
3426	case -2:
3427		bn_sub_part_words(t, &(a[n]), a, tna, tna-n);           /* - */
3428		bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb-n);     /* + */
3429		neg = 1;
3430		break;
3431
3432	case -1:
3433	case 0:
3434	case 1:
3435	/* break; */
3436	case 2:
3437		bn_sub_part_words(t, a, &(a[n]), tna, n-tna);           /* + */
3438		bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n-tnb);     /* - */
3439		neg = 1;
3440		break;
3441
3442	case 3:
3443	/* break; */
3444	case 4:
3445		bn_sub_part_words(t, a, &(a[n]), tna, n-tna);
3446		bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb-n);
3447		break;
3448	}
3449
3450	/* The zero case isn't yet implemented here. The speedup
3451	 * would probably be negligible. */
3452# if 0
3453	if (n == 4) {
3454		bn_mul_comba4(&(t[n2]), t, &(t[n]));
3455		bn_mul_comba4(r, a, b);
3456		bn_mul_normal(&(r[n2]), &(a[n]), tn, &(b[n]), tn);
3457		memset(&(r[n2+tn*2]), 0, sizeof(BN_ULONG)*(n2-tn*2));
3458	}else
3459# endif
3460	if (n == 8) {
3461		bn_mul_comba8(&(t[n2]), t, &(t[n]));
3462		bn_mul_comba8(r, a, b);
3463		bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb);
3464		memset(&(r[n2+tna+tnb]), 0, sizeof(BN_ULONG)*(n2-tna-tnb));
3465	}else          {
3466		p = &(t[n2*2]);
3467		bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
3468		bn_mul_recursive(r, a, b, n, 0, 0, p);
3469		i = n/2;
3470
3471		/* If there is only a bottom half to the number,
3472		 * just do it */
3473		if (tna > tnb) {
3474			j = tna - i;
3475		} else{
3476			j = tnb - i;
3477		}
3478		if (j == 0) {
3479			bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]),
3480			    i, tna-i, tnb-i, p);
3481			memset(&(r[n2+i*2]), 0, sizeof(BN_ULONG)*(n2-i*2));
3482		}else if (j > 0)          { /* eg, n == 16, i == 8 and tn == 11 */
3483			bn_mul_part_recursive(&(r[n2]), &(a[n]), &(b[n]),
3484			    i, tna-i, tnb-i, p);
3485			memset(&(r[n2+tna+tnb]), 0,
3486			    sizeof(BN_ULONG)*(n2-tna-tnb));
3487		}else                  { /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
3488			memset(&(r[n2]), 0, sizeof(BN_ULONG)*n2);
3489			if ((tna < BN_MUL_RECURSIVE_SIZE_NORMAL) &&
3490			    (tnb < BN_MUL_RECURSIVE_SIZE_NORMAL)) {
3491				bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb);
3492			}else          {
3493				for ( ; ; ) {
3494					i /= 2;
3495
3496					/* these simplified conditions work
3497					 * exclusively because difference
3498					 * between tna and tnb is 1 or 0 */
3499					if ((i < tna) || (i < tnb)) {
3500						bn_mul_part_recursive(&(r[n2]),
3501						    &(a[n]), &(b[n]),
3502						    i, tna-i, tnb-i, p);
3503						break;
3504					}else if ((i == tna) || (i == tnb))              {
3505						bn_mul_recursive(&(r[n2]),
3506						    &(a[n]), &(b[n]),
3507						    i, tna-i, tnb-i, p);
3508						break;
3509					}
3510				}
3511			}
3512		}
3513	}
3514
3515	/* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
3516	 * r[10] holds (a[0]*b[0])
3517	 * r[32] holds (b[1]*b[1])
3518	 */
3519
3520	c1 = (int)(bn_add_words(t, r, &(r[n2]), n2));
3521
3522	if (neg) { /* if t[32] is negative */
3523		c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2));
3524	}else          {
3525		/* Might have a carry */
3526		c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
3527	}
3528
3529	/* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
3530	 * r[10] holds (a[0]*b[0])
3531	 * r[32] holds (b[1]*b[1])
3532	 * c1 holds the carry bits
3533	 */
3534	c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
3535	if (c1) {
3536		p = &(r[n+n2]);
3537		lo = *p;
3538		ln = (lo+c1)&BN_MASK2;
3539		*p = ln;
3540
3541		/* The overflow will stop before we over write
3542		 * words we should not overwrite */
3543		if (ln < (BN_ULONG)c1) {
3544			do {
3545				p++;
3546				lo = *p;
3547				ln = (lo+1)&BN_MASK2;
3548				*p = ln;
3549			} while (ln == 0);
3550		}
3551	}
3552}
3553
3554
3555/* a and b must be the same size, which is n2.
3556 * r needs to be n2 words and t needs to be n2*2
3557 */
3558void
3559bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
3560    BN_ULONG *t)
3561{
3562	int n = n2/2;
3563
3564# ifdef BN_COUNT
3565	fprintf(stderr, " bn_mul_low_recursive %d * %d\n", n2, n2);
3566# endif
3567
3568	bn_mul_recursive(r, a, b, n, 0, 0, &(t[0]));
3569	if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL) {
3570		bn_mul_low_recursive(&(t[0]), &(a[0]), &(b[n]), n, &(t[n2]));
3571		bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
3572		bn_mul_low_recursive(&(t[0]), &(a[n]), &(b[0]), n, &(t[n2]));
3573		bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
3574	}else          {
3575		bn_mul_low_normal(&(t[0]), &(a[0]), &(b[n]), n);
3576		bn_mul_low_normal(&(t[n]), &(a[n]), &(b[0]), n);
3577		bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
3578		bn_add_words(&(r[n]), &(r[n]), &(t[n]), n);
3579	}
3580}
3581
3582
3583/* a and b must be the same size, which is n2.
3584 * r needs to be n2 words and t needs to be n2*2
3585 * l is the low words of the output.
3586 * t needs to be n2*3
3587 */
3588void
3589bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
3590    BN_ULONG *t)
3591{
3592	int i, n;
3593	int c1, c2;
3594	int neg, oneg, zero;
3595	BN_ULONG ll, lc, *lp, *mp;
3596
3597# ifdef BN_COUNT
3598	fprintf(stderr, " bn_mul_high %d * %d\n", n2, n2);
3599# endif
3600	n = n2/2;
3601
3602	/* Calculate (al-ah)*(bh-bl) */
3603	neg = zero = 0;
3604	c1 = bn_cmp_words(&(a[0]), &(a[n]), n);
3605	c2 = bn_cmp_words(&(b[n]), &(b[0]), n);
3606	switch (c1*3+c2) {
3607	case -4:
3608		bn_sub_words(&(r[0]), &(a[n]), &(a[0]), n);
3609		bn_sub_words(&(r[n]), &(b[0]), &(b[n]), n);
3610		break;
3611
3612	case -3:
3613		zero = 1;
3614		break;
3615
3616	case -2:
3617		bn_sub_words(&(r[0]), &(a[n]), &(a[0]), n);
3618		bn_sub_words(&(r[n]), &(b[n]), &(b[0]), n);
3619		neg = 1;
3620		break;
3621
3622	case -1:
3623	case 0:
3624	case 1:
3625		zero = 1;
3626		break;
3627
3628	case 2:
3629		bn_sub_words(&(r[0]), &(a[0]), &(a[n]), n);
3630		bn_sub_words(&(r[n]), &(b[0]), &(b[n]), n);
3631		neg = 1;
3632		break;
3633
3634	case 3:
3635		zero = 1;
3636		break;
3637
3638	case 4:
3639		bn_sub_words(&(r[0]), &(a[0]), &(a[n]), n);
3640		bn_sub_words(&(r[n]), &(b[n]), &(b[0]), n);
3641		break;
3642	}
3643
3644	oneg = neg;
3645	/* t[10] = (a[0]-a[1])*(b[1]-b[0]) */
3646	/* r[10] = (a[1]*b[1]) */
3647# ifdef BN_MUL_COMBA
3648	if (n == 8) {
3649		bn_mul_comba8(&(t[0]), &(r[0]), &(r[n]));
3650		bn_mul_comba8(r, &(a[n]), &(b[n]));
3651	}else
3652# endif
3653	{
3654		bn_mul_recursive(&(t[0]), &(r[0]), &(r[n]), n, 0, 0, &(t[n2]));
3655		bn_mul_recursive(r, &(a[n]), &(b[n]), n, 0, 0, &(t[n2]));
3656	}
3657
3658	/* s0 == low(al*bl)
3659	 * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl)
3660	 * We know s0 and s1 so the only unknown is high(al*bl)
3661	 * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl))
3662	 * high(al*bl) == s1 - (r[0]+l[0]+t[0])
3663	 */
3664	if (l != NULL) {
3665		lp = &(t[n2+n]);
3666		c1 = (int)(bn_add_words(lp, &(r[0]), &(l[0]), n));
3667	}else          {
3668		c1 = 0;
3669		lp = &(r[0]);
3670	}
3671
3672	if (neg) {
3673		neg = (int)(bn_sub_words(&(t[n2]), lp, &(t[0]), n));
3674	} else{
3675		bn_add_words(&(t[n2]), lp, &(t[0]), n);
3676		neg = 0;
3677	}
3678
3679	if (l != NULL) {
3680		bn_sub_words(&(t[n2+n]), &(l[n]), &(t[n2]), n);
3681	}else          {
3682		lp = &(t[n2+n]);
3683		mp = &(t[n2]);
3684		for (i = 0; i < n; i++) {
3685			lp[i] = ((~mp[i])+1)&BN_MASK2;
3686		}
3687	}
3688
3689	/* s[0] = low(al*bl)
3690	 * t[3] = high(al*bl)
3691	 * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign
3692	 * r[10] = (a[1]*b[1])
3693	 */
3694
3695	/* R[10] = al*bl
3696	 * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0])
3697	 * R[32] = ah*bh
3698	 */
3699
3700	/* R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow)
3701	 * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow)
3702	 * R[3]=r[1]+(carry/borrow)
3703	 */
3704	if (l != NULL) {
3705		lp = &(t[n2]);
3706		c1 = (int)(bn_add_words(lp, &(t[n2+n]), &(l[0]), n));
3707	}else          {
3708		lp = &(t[n2+n]);
3709		c1 = 0;
3710	}
3711	c1 += (int)(bn_add_words(&(t[n2]), lp, &(r[0]), n));
3712	if (oneg) {
3713		c1 -= (int)(bn_sub_words(&(t[n2]), &(t[n2]), &(t[0]), n));
3714	} else{
3715		c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), &(t[0]), n));
3716	}
3717
3718	c2 = (int)(bn_add_words(&(r[0]), &(r[0]), &(t[n2+n]), n));
3719	c2 += (int)(bn_add_words(&(r[0]), &(r[0]), &(r[n]), n));
3720	if (oneg) {
3721		c2 -= (int)(bn_sub_words(&(r[0]), &(r[0]), &(t[n]), n));
3722	} else{
3723		c2 += (int)(bn_add_words(&(r[0]), &(r[0]), &(t[n]), n));
3724	}
3725
3726	if (c1 != 0) { /* Add starting at r[0], could be +ve or -ve */
3727		i = 0;
3728		if (c1 > 0) {
3729			lc = c1;
3730			do {
3731				ll = (r[i]+lc)&BN_MASK2;
3732				r[i++] = ll;
3733				lc = (lc > ll);
3734			} while (lc);
3735		}else          {
3736			lc = -c1;
3737			do {
3738				ll = r[i];
3739				r[i++] = (ll-lc)&BN_MASK2;
3740				lc = (lc > ll);
3741			} while (lc);
3742		}
3743	}
3744	if (c2 != 0) { /* Add starting at r[1] */
3745		i = n;
3746		if (c2 > 0) {
3747			lc = c2;
3748			do {
3749				ll = (r[i]+lc)&BN_MASK2;
3750				r[i++] = ll;
3751				lc = (lc > ll);
3752			} while (lc);
3753		}else          {
3754			lc = -c2;
3755			do {
3756				ll = r[i];
3757				r[i++] = (ll-lc)&BN_MASK2;
3758				lc = (lc > ll);
3759			} while (lc);
3760		}
3761	}
3762}
3763
3764
3765#endif /* BN_RECURSION */
3766
3767int
3768BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
3769{
3770	int ret = 0;
3771	int top, al, bl;
3772	BIGNUM *rr;
3773
3774#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
3775	int i;
3776#endif
3777#ifdef BN_RECURSION
3778	BIGNUM *t = NULL;
3779	int j = 0, k;
3780#endif
3781
3782#ifdef BN_COUNT
3783	fprintf(stderr, "BN_mul %d * %d\n", a->top, b->top);
3784#endif
3785
3786	bn_check_top(a);
3787	bn_check_top(b);
3788	bn_check_top(r);
3789
3790	al = a->top;
3791	bl = b->top;
3792
3793	if ((al == 0) || (bl == 0)) {
3794		BN_zero(r);
3795		return (1);
3796	}
3797	top = al + bl;
3798
3799	BN_CTX_start(ctx);
3800	if ((r == a) || (r == b)) {
3801		if ((rr = BN_CTX_get(ctx)) == NULL) {
3802			goto err;
3803		}
3804	} else{
3805		rr = r;
3806	}
3807	rr->neg = a->neg^b->neg;
3808
3809#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
3810	i = al - bl;
3811#endif
3812#ifdef BN_MUL_COMBA
3813	if (i == 0) {
3814# if 0
3815		if (al == 4) {
3816			if (bn_wexpand(rr, 8) == NULL) {
3817				goto err;
3818			}
3819			rr->top = 8;
3820			bn_mul_comba4(rr->d, a->d, b->d);
3821			goto end;
3822		}
3823# endif
3824		if (al == 8) {
3825			if (bn_wexpand(rr, 16) == NULL) {
3826				goto err;
3827			}
3828			rr->top = 16;
3829			bn_mul_comba8(rr->d, a->d, b->d);
3830			goto end;
3831		}
3832	}
3833#endif  /* BN_MUL_COMBA */
3834#ifdef BN_RECURSION
3835	if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL)) {
3836		if ((i >= -1) && (i <= 1)) {
3837			/* Find out the power of two lower or equal
3838			 * to the longest of the two numbers */
3839			if (i >= 0) {
3840				j = BN_num_bits_word((BN_ULONG)al);
3841			}
3842			if (i == -1) {
3843				j = BN_num_bits_word((BN_ULONG)bl);
3844			}
3845			j = 1 << (j-1);
3846			assert(j <= al || j <= bl);
3847			k = j + j;
3848			t = BN_CTX_get(ctx);
3849			if (t == NULL) {
3850				goto err;
3851			}
3852			if ((al > j) || (bl > j)) {
3853				if (bn_wexpand(t, k*4) == NULL) {
3854					goto err;
3855				}
3856				if (bn_wexpand(rr, k*4) == NULL) {
3857					goto err;
3858				}
3859				bn_mul_part_recursive(rr->d, a->d, b->d,
3860				    j, al-j, bl-j, t->d);
3861			} else {
3862				/* al <= j || bl <= j */
3863				if (bn_wexpand(t, k * 2) == NULL) {
3864					goto err;
3865				}
3866				if (bn_wexpand(rr, k * 2) == NULL) {
3867					goto err;
3868				}
3869				bn_mul_recursive(rr->d, a->d, b->d,
3870				    j, al - j, bl - j, t->d);
3871			}
3872			rr->top = top;
3873			goto end;
3874		}
3875#if 0
3876		if ((i == 1) && !BN_get_flags(b, BN_FLG_STATIC_DATA)) {
3877			BIGNUM *tmp_bn = (BIGNUM *)b;
3878			if (bn_wexpand(tmp_bn, al) == NULL) {
3879				goto err;
3880			}
3881			tmp_bn->d[bl] = 0;
3882			bl++;
3883			i--;
3884		}else if ((i == -1) && !BN_get_flags(a, BN_FLG_STATIC_DATA))             {
3885			BIGNUM *tmp_bn = (BIGNUM *)a;
3886			if (bn_wexpand(tmp_bn, bl) == NULL) {
3887				goto err;
3888			}
3889			tmp_bn->d[al] = 0;
3890			al++;
3891			i++;
3892		}
3893		if (i == 0) {
3894			/* symmetric and > 4 */
3895			/* 16 or larger */
3896			j = BN_num_bits_word((BN_ULONG)al);
3897			j = 1<<(j-1);
3898			k = j+j;
3899			t = BN_CTX_get(ctx);
3900			if (al == j) { /* exact multiple */
3901				if (bn_wexpand(t, k*2) == NULL) {
3902					goto err;
3903				}
3904				if (bn_wexpand(rr, k*2) == NULL) {
3905					goto err;
3906				}
3907				bn_mul_recursive(rr->d, a->d, b->d, al, t->d);
3908			}else          {
3909				if (bn_wexpand(t, k*4) == NULL) {
3910					goto err;
3911				}
3912				if (bn_wexpand(rr, k*4) == NULL) {
3913					goto err;
3914				}
3915				bn_mul_part_recursive(rr->d, a->d, b->d, al-j, j, t->d);
3916			}
3917			rr->top = top;
3918			goto end;
3919		}
3920#endif
3921	}
3922#endif  /* BN_RECURSION */
3923	if (bn_wexpand(rr, top) == NULL) {
3924		goto err;
3925	}
3926	rr->top = top;
3927	bn_mul_normal(rr->d, a->d, al, b->d, bl);
3928
3929#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
3930end:
3931#endif
3932	bn_correct_top(rr);
3933	if (r != rr) {
3934		BN_copy(r, rr);
3935	}
3936	ret = 1;
3937err:
3938	bn_check_top(r);
3939	BN_CTX_end(ctx);
3940	return (ret);
3941}
3942
3943
3944void
3945bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
3946{
3947	BN_ULONG *rr;
3948
3949#ifdef BN_COUNT
3950	fprintf(stderr, " bn_mul_normal %d * %d\n", na, nb);
3951#endif
3952
3953	if (na < nb) {
3954		int itmp;
3955		BN_ULONG *ltmp;
3956
3957		itmp = na;
3958		na = nb;
3959		nb = itmp;
3960		ltmp = a;
3961		a = b;
3962		b = ltmp;
3963	}
3964	rr = &(r[na]);
3965	if (nb <= 0) {
3966		(void)bn_mul_words(r, a, na, 0);
3967		return;
3968	} else{
3969		rr[0] = bn_mul_words(r, a, na, b[0]);
3970	}
3971
3972	for ( ; ; ) {
3973		if (--nb <= 0) {
3974			return;
3975		}
3976		rr[1] = bn_mul_add_words(&(r[1]), a, na, b[1]);
3977		if (--nb <= 0) {
3978			return;
3979		}
3980		rr[2] = bn_mul_add_words(&(r[2]), a, na, b[2]);
3981		if (--nb <= 0) {
3982			return;
3983		}
3984		rr[3] = bn_mul_add_words(&(r[3]), a, na, b[3]);
3985		if (--nb <= 0) {
3986			return;
3987		}
3988		rr[4] = bn_mul_add_words(&(r[4]), a, na, b[4]);
3989		rr += 4;
3990		r += 4;
3991		b += 4;
3992	}
3993}
3994
3995
3996#ifdef RECURSION
3997void
3998bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
3999{
4000#ifdef BN_COUNT
4001	fprintf(stderr, " bn_mul_low_normal %d * %d\n", n, n);
4002#endif
4003	bn_mul_words(r, a, n, b[0]);
4004
4005	for ( ; ; ) {
4006		if (--n <= 0) {
4007			return;
4008		}
4009		bn_mul_add_words(&(r[1]), a, n, b[1]);
4010		if (--n <= 0) {
4011			return;
4012		}
4013		bn_mul_add_words(&(r[2]), a, n, b[2]);
4014		if (--n <= 0) {
4015			return;
4016		}
4017		bn_mul_add_words(&(r[3]), a, n, b[3]);
4018		if (--n <= 0) {
4019			return;
4020		}
4021		bn_mul_add_words(&(r[4]), a, n, b[4]);
4022		r += 4;
4023		b += 4;
4024	}
4025}
4026
4027
4028#endif /* RECURSION */
4029
4030/*
4031 * mod
4032 */
4033int
4034BN_mod(BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx)
4035{
4036	return (BN_div(NULL, rem, m, d, ctx));
4037}
4038
4039
4040int
4041BN_nnmod(BIGNUM *r, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx)
4042{
4043	/* like BN_mod, but returns non-negative remainder
4044	 * (i.e.,  0 <= r < |d|  always holds) */
4045
4046	if (!(BN_mod(r, m, d, ctx))) {
4047		return (0);
4048	}
4049	if (!r->neg) {
4050		return (1);
4051	}
4052	/* now   -|d| < r < 0,  so we have to set  r := r + |d| */
4053	return ((d->neg ? BN_sub : BN_add)(r, r, d));
4054}
4055
4056
4057/* slow but works */
4058int
4059BN_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m,
4060    BN_CTX *ctx)
4061{
4062	BIGNUM *t;
4063	int ret = 0;
4064
4065	bn_check_top(a);
4066	bn_check_top(b);
4067	bn_check_top(m);
4068
4069	BN_CTX_start(ctx);
4070	if ((t = BN_CTX_get(ctx)) == NULL) {
4071		goto err;
4072	}
4073	if (a == b) {
4074		if (!BN_sqr(t, a, ctx)) {
4075			goto err;
4076		}
4077	} else {
4078		if (!BN_mul(t, a, b, ctx)) {
4079			goto err;
4080		}
4081	}
4082	if (!BN_nnmod(r, t, m, ctx)) {
4083		goto err;
4084	}
4085	bn_check_top(r);
4086	ret = 1;
4087err:
4088	BN_CTX_end(ctx);
4089	return (ret);
4090}
4091
4092
4093#define EXP_TABLE_SIZE    32
4094
4095/* The old fallback, simple version :-) */
4096int
4097BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
4098    const BIGNUM *m, BN_CTX *ctx)
4099{
4100	int i, j, bits, ret = 0, wstart, wend, window, wvalue;
4101	int start = 1;
4102	BIGNUM *d;
4103	/* Table of variables obtained from 'ctx' */
4104	BIGNUM *val[EXP_TABLE_SIZE];
4105
4106	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
4107		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
4108		/* BNerr(BN_F_BN_MOD_EXP_SIMPLE,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); */
4109		return (-1);
4110	}
4111
4112	bits = BN_num_bits(p);
4113
4114	if (bits == 0) {
4115		ret = BN_one(r);
4116		return (ret);
4117	}
4118
4119	BN_CTX_start(ctx);
4120	d = BN_CTX_get(ctx);
4121	val[0] = BN_CTX_get(ctx);
4122	if (!d || !val[0]) {
4123		goto err;
4124	}
4125
4126	if (!BN_nnmod(val[0], a, m, ctx)) {
4127		goto err;                                       /* 1 */
4128	}
4129	if (BN_is_zero(val[0])) {
4130		BN_zero(r);
4131		ret = 1;
4132		goto err;
4133	}
4134
4135	window = BN_window_bits_for_exponent_size(bits);
4136	if (window > 1) {
4137		if (!BN_mod_mul(d, val[0], val[0], m, ctx)) {
4138			goto err;                               /* 2 */
4139		}
4140		j = 1<<(window-1);
4141		for (i = 1; i < j; i++) {
4142			if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
4143			    !BN_mod_mul(val[i], val[i-1], d, m, ctx)) {
4144				goto err;
4145			}
4146		}
4147	}
4148
4149	start = 1;              /* This is used to avoid multiplication etc
4150	                         * when there is only the value '1' in the
4151	                         * buffer. */
4152	wvalue = 0;             /* The 'value' of the window */
4153	wstart = bits - 1;      /* The top bit of the window */
4154	wend = 0;               /* The bottom bit of the window */
4155
4156	if (!BN_one(r)) {
4157		goto err;
4158	}
4159
4160	for ( ; ; ) {
4161		if (BN_is_bit_set(p, wstart) == 0) {
4162			if (!start) {
4163				if (!BN_mod_mul(r, r, r, m, ctx)) {
4164					goto err;
4165				}
4166			}
4167			if (wstart == 0) {
4168				break;
4169			}
4170			wstart--;
4171			continue;
4172		}
4173
4174		/* We now have wstart on a 'set' bit, we now need to work out
4175		 * how bit a window to do.  To do this we need to scan
4176		 * forward until the last set bit before the end of the
4177		 * window */
4178		j = wstart;
4179		wvalue = 1;
4180		wend = 0;
4181		for (i = 1; i < window; i++) {
4182			if (wstart - i < 0) {
4183				break;
4184			}
4185			if (BN_is_bit_set(p, wstart - i)) {
4186				wvalue <<= (i - wend);
4187				wvalue |= 1;
4188				wend = i;
4189			}
4190		}
4191
4192		/* wend is the size of the current window */
4193		j = wend + 1;
4194		/* add the 'bytes above' */
4195		if (!start) {
4196			for (i = 0; i < j; i++) {
4197				if (!BN_mod_mul(r, r, r, m, ctx)) {
4198					goto err;
4199				}
4200			}
4201		}
4202
4203		/* wvalue will be an odd number < 2^window */
4204		if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx)) {
4205			goto err;
4206		}
4207
4208		/* move the 'window' down further */
4209		wstart -= wend + 1;
4210		wvalue = 0;
4211		start = 0;
4212		if (wstart < 0) {
4213			break;
4214		}
4215	}
4216	ret = 1;
4217err:
4218	BN_CTX_end(ctx);
4219	bn_check_top(r);
4220
4221	return (ret);
4222}
4223
4224
4225int
4226BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
4227    BN_CTX *ctx)
4228{
4229	int ret;
4230
4231	bn_check_top(a);
4232	bn_check_top(p);
4233	bn_check_top(m);
4234
4235	/* For even modulus  m = 2^k*m_odd,  it might make sense to compute
4236	 * a^p mod m_odd  and  a^p mod 2^k  separately (with Montgomery
4237	 * exponentiation for the odd part), using appropriate exponent
4238	 * reductions, and combine the results using the CRT.
4239	 *
4240	 * For now, we use Montgomery only if the modulus is odd; otherwise,
4241	 * exponentiation using the reciprocal-based quick remaindering
4242	 * algorithm is used.
4243	 *
4244	 * (Timing obtained with expspeed.c [computations  a^p mod m
4245	 * where  a, p, m  are of the same length: 256, 512, 1024, 2048,
4246	 * 4096, 8192 bits], compared to the running time of the
4247	 * standard algorithm:
4248	 *
4249	 *   BN_mod_exp_mont   33 .. 40 %  [AMD K6-2, Linux, debug configuration]
4250	 *                     55 .. 77 %  [UltraSparc processor, but
4251	 *                                  debug-solaris-sparcv8-gcc conf.]
4252	 *
4253	 *   BN_mod_exp_recp   50 .. 70 %  [AMD K6-2, Linux, debug configuration]
4254	 *                     62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
4255	 *
4256	 * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
4257	 * at 2048 and more bits, but at 512 and 1024 bits, it was
4258	 * slower even than the standard algorithm!
4259	 *
4260	 * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
4261	 * should be obtained when the new Montgomery reduction code
4262	 * has been integrated into OpenSSL.)
4263	 */
4264
4265#define MONT_MUL_MOD
4266#define MONT_EXP_WORD
4267
4268/*
4269 * #define RECP_MUL_MOD
4270 */
4271
4272#ifdef MONT_MUL_MOD
4273
4274	/* I have finally been able to take out this pre-condition of
4275	 * the top bit being set.  It was caused by an error in BN_div
4276	 * with negatives.  There was also another problem when for a^b%m
4277	 * a >= m.  eay 07-May-97 */
4278/*	if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */
4279
4280	if (BN_is_odd(m)) {
4281#  ifdef MONT_EXP_WORD
4282		if ((a->top == 1) && !a->neg && (BN_get_flags(p, BN_FLG_CONSTTIME) == 0)) {
4283			BN_ULONG A = a->d[0];
4284			ret = BN_mod_exp_mont_word(r, A, p, m, ctx, NULL);
4285		} else
4286#  endif
4287		ret = BN_mod_exp_mont(r, a, p, m, ctx, NULL);
4288	} else
4289#endif
4290#ifdef RECP_MUL_MOD
4291	{
4292		ret = BN_mod_exp_recp(r, a, p, m, ctx);
4293	}
4294#else
4295	{
4296		ret = BN_mod_exp_simple(r, a, p, m, ctx);
4297	}
4298#endif
4299
4300	bn_check_top(r);
4301	return (ret);
4302}
4303
4304
4305#ifdef RECP_MUL_MOD
4306int
4307BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
4308    const BIGNUM *m, BN_CTX *ctx)
4309{
4310	int i, j, bits, ret = 0, wstart, wend, window, wvalue;
4311	int start = 1;
4312	BIGNUM *aa;
4313	/* Table of variables obtained from 'ctx' */
4314	BIGNUM *val[EXP_TABLE_SIZE];
4315	BN_RECP_CTX recp;
4316
4317	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
4318		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
4319		/* BNerr(BN_F_BN_MOD_EXP_RECP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); */
4320		return (-1);
4321	}
4322
4323	bits = BN_num_bits(p);
4324
4325	if (bits == 0) {
4326		ret = BN_one(r);
4327		return (ret);
4328	}
4329
4330	BN_CTX_start(ctx);
4331	aa = BN_CTX_get(ctx);
4332	val[0] = BN_CTX_get(ctx);
4333	if (!aa || !val[0]) {
4334		goto err;
4335	}
4336
4337	BN_RECP_CTX_init(&recp);
4338	if (m->neg) {
4339		/* ignore sign of 'm' */
4340		if (!BN_copy(aa, m)) {
4341			goto err;
4342		}
4343		aa->neg = 0;
4344		if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0) {
4345			goto err;
4346		}
4347	} else {
4348		if (BN_RECP_CTX_set(&recp, m, ctx) <= 0) {
4349			goto err;
4350		}
4351	}
4352
4353	if (!BN_nnmod(val[0], a, m, ctx)) {
4354		goto err;                                       /* 1 */
4355	}
4356	if (BN_is_zero(val[0])) {
4357		BN_zero(r);
4358		ret = 1;
4359		goto err;
4360	}
4361
4362	window = BN_window_bits_for_exponent_size(bits);
4363	if (window > 1) {
4364		if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx)) {
4365			goto err;                               /* 2 */
4366		}
4367		j = 1 << (window - 1);
4368		for (i = 1; i < j; i++) {
4369			if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
4370			    !BN_mod_mul_reciprocal(val[i], val[i-1],
4371			    aa, &recp, ctx)) {
4372				goto err;
4373			}
4374		}
4375	}
4376
4377	start = 1;              /* This is used to avoid multiplication etc
4378	                         * when there is only the value '1' in the
4379	                         * buffer. */
4380	wvalue = 0;             /* The 'value' of the window */
4381	wstart = bits - 1;      /* The top bit of the window */
4382	wend = 0;               /* The bottom bit of the window */
4383
4384	if (!BN_one(r)) {
4385		goto err;
4386	}
4387
4388	for ( ; ; ) {
4389		if (BN_is_bit_set(p, wstart) == 0) {
4390			if (!start) {
4391				if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) {
4392					goto err;
4393				}
4394			}
4395			if (wstart == 0) {
4396				break;
4397			}
4398			wstart--;
4399			continue;
4400		}
4401
4402		/* We now have wstart on a 'set' bit, we now need to work out
4403		 * how bit a window to do.  To do this we need to scan
4404		 * forward until the last set bit before the end of the
4405		 * window */
4406		j = wstart;
4407		wvalue = 1;
4408		wend = 0;
4409		for (i = 1; i < window; i++) {
4410			if (wstart - i < 0) {
4411				break;
4412			}
4413			if (BN_is_bit_set(p, wstart-i)) {
4414				wvalue <<= (i - wend);
4415				wvalue |= 1;
4416				wend = i;
4417			}
4418		}
4419
4420		/* wend is the size of the current window */
4421		j = wend + 1;
4422		/* add the 'bytes above' */
4423		if (!start) {
4424			for (i = 0; i < j; i++) {
4425				if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) {
4426					goto err;
4427				}
4428			}
4429		}
4430
4431		/* wvalue will be an odd number < 2^window */
4432		if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], &recp, ctx)) {
4433			goto err;
4434		}
4435
4436		/* move the 'window' down further */
4437		wstart -= wend + 1;
4438		wvalue = 0;
4439		start = 0;
4440		if (wstart < 0) {
4441			break;
4442		}
4443	}
4444	ret = 1;
4445err:
4446	BN_CTX_end(ctx);
4447	BN_RECP_CTX_free(&recp);
4448	bn_check_top(r);
4449	return (ret);
4450}
4451
4452
4453#endif /* RECP_MUL_MOD */
4454
4455int
4456BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
4457    const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
4458{
4459	BN_MONT_CTX *mont = NULL;
4460	int b, bits, ret = 0;
4461	int r_is_one;
4462	BN_ULONG w, next_w;
4463	BIGNUM *d, *r, *t;
4464	BIGNUM *swap_tmp;
4465
4466#define BN_MOD_MUL_WORD(r, w, m)		\
4467	(BN_mul_word(r, (w)) &&			\
4468	(        /* BN_ucmp(r, (m)) < 0 ? 1 :*/	\
4469		(BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
4470
4471	/* BN_MOD_MUL_WORD is only used with 'w' large,
4472	 * so the BN_ucmp test is probably more overhead
4473	 * than always using BN_mod (which uses BN_copy if
4474	 * a similar test returns true). */
4475
4476	/* We can use BN_mod and do not need BN_nnmod because our
4477	 * accumulator is never negative (the result of BN_mod does
4478	 * not depend on the sign of the modulus).
4479	 */
4480#define BN_TO_MONTGOMERY_WORD(r, w, mont) \
4481	(BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
4482
4483	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
4484		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
4485		/* BNerr(BN_F_BN_MOD_EXP_MONT_WORD,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); */
4486		return (-1);
4487	}
4488
4489	bn_check_top(p);
4490	bn_check_top(m);
4491
4492	if (!BN_is_odd(m)) {
4493		/* BNerr(BN_F_BN_MOD_EXP_MONT_WORD,BN_R_CALLED_WITH_EVEN_MODULUS); */
4494		return (0);
4495	}
4496	if (m->top == 1) {
4497		a %= m->d[0]; /* make sure that 'a' is reduced */
4498	}
4499	bits = BN_num_bits(p);
4500	if (bits == 0) {
4501		ret = BN_one(rr);
4502		return (ret);
4503	}
4504	if (a == 0) {
4505		BN_zero(rr);
4506		ret = 1;
4507		return (ret);
4508	}
4509
4510	BN_CTX_start(ctx);
4511	d = BN_CTX_get(ctx);
4512	r = BN_CTX_get(ctx);
4513	t = BN_CTX_get(ctx);
4514	if ((d == NULL) || (r == NULL) || (t == NULL)) {
4515		goto err;
4516	}
4517
4518	if (in_mont != NULL) {
4519		mont = in_mont;
4520	} else{
4521		if ((mont = BN_MONT_CTX_new()) == NULL) {
4522			goto err;
4523		}
4524		if (!BN_MONT_CTX_set(mont, m, ctx)) {
4525			goto err;
4526		}
4527	}
4528
4529	r_is_one = 1; /* except for Montgomery factor */
4530
4531	/* bits-1 >= 0 */
4532
4533	/* The result is accumulated in the product r*w. */
4534	w = a; /* bit 'bits-1' of 'p' is always set */
4535	for (b = bits-2; b >= 0; b--) {
4536		/* First, square r*w. */
4537		next_w = w*w;
4538		if ((next_w/w) != w) {
4539			/* overflow */
4540			if (r_is_one) {
4541				if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) {
4542					goto err;
4543				}
4544				r_is_one = 0;
4545			} else {
4546				if (!BN_MOD_MUL_WORD(r, w, m)) {
4547					goto err;
4548				}
4549			}
4550			next_w = 1;
4551		}
4552		w = next_w;
4553		if (!r_is_one) {
4554			if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) {
4555				goto err;
4556			}
4557		}
4558
4559		/* Second, multiply r*w by 'a' if exponent bit is set. */
4560		if (BN_is_bit_set(p, b)) {
4561			next_w = w*a;
4562			if ((next_w / a) != w) {
4563				/* overflow */
4564				if (r_is_one) {
4565					if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) {
4566						goto err;
4567					}
4568					r_is_one = 0;
4569				} else {
4570					if (!BN_MOD_MUL_WORD(r, w, m)) {
4571						goto err;
4572					}
4573				}
4574				next_w = a;
4575			}
4576			w = next_w;
4577		}
4578	}
4579
4580	/* Finally, set r:=r*w. */
4581	if (w != 1) {
4582		if (r_is_one) {
4583			if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) {
4584				goto err;
4585			}
4586			r_is_one = 0;
4587		} else {
4588			if (!BN_MOD_MUL_WORD(r, w, m)) {
4589				goto err;
4590			}
4591		}
4592	}
4593
4594	if (r_is_one) {
4595		/* can happen only if a == 1*/
4596		if (!BN_one(rr)) {
4597			goto err;
4598		}
4599	} else {
4600		if (!BN_from_montgomery(rr, r, mont, ctx)) {
4601			goto err;
4602		}
4603	}
4604	ret = 1;
4605err:
4606	if ((in_mont == NULL) && (mont != NULL)) {
4607		BN_MONT_CTX_free(mont);
4608	}
4609	BN_CTX_end(ctx);
4610	bn_check_top(rr);
4611	return (ret);
4612}
4613
4614
4615int
4616BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
4617    const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
4618{
4619	int i, j, bits, ret = 0, wstart, wend, window, wvalue;
4620	int start = 1;
4621	BIGNUM *d, *r;
4622	const BIGNUM *aa;
4623	/* Table of variables obtained from 'ctx' */
4624	BIGNUM *val[EXP_TABLE_SIZE];
4625	BN_MONT_CTX *mont = NULL;
4626
4627	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
4628		return (BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont));
4629	}
4630
4631	bn_check_top(a);
4632	bn_check_top(p);
4633	bn_check_top(m);
4634
4635	if (!BN_is_odd(m)) {
4636		/* BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS); */
4637		return (0);
4638	}
4639	bits = BN_num_bits(p);
4640	if (bits == 0) {
4641		ret = BN_one(rr);
4642		return (ret);
4643	}
4644
4645	BN_CTX_start(ctx);
4646	d = BN_CTX_get(ctx);
4647	r = BN_CTX_get(ctx);
4648	val[0] = BN_CTX_get(ctx);
4649	if (!d || !r || !val[0]) {
4650		goto err;
4651	}
4652
4653	/* If this is not done, things will break in the montgomery
4654	 * part */
4655
4656	if (in_mont != NULL) {
4657		mont = in_mont;
4658	} else{
4659		if ((mont = BN_MONT_CTX_new()) == NULL) {
4660			goto err;
4661		}
4662		if (!BN_MONT_CTX_set(mont, m, ctx)) {
4663			goto err;
4664		}
4665	}
4666
4667	if (a->neg || (BN_ucmp(a, m) >= 0)) {
4668		if (!BN_nnmod(val[0], a, m, ctx)) {
4669			goto err;
4670		}
4671		aa = val[0];
4672	} else{
4673		aa = a;
4674	}
4675	if (BN_is_zero(aa)) {
4676		BN_zero(rr);
4677		ret = 1;
4678		goto err;
4679	}
4680	if (!BN_to_montgomery(val[0], aa, mont, ctx)) {
4681		goto err;                                    /* 1 */
4682	}
4683	window = BN_window_bits_for_exponent_size(bits);
4684	if (window > 1) {
4685		if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) {
4686			goto err;                                               /* 2 */
4687		}
4688		j = 1<<(window-1);
4689		for (i = 1; i < j; i++) {
4690			if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
4691			    !BN_mod_mul_montgomery(val[i], val[i-1],
4692			    d, mont, ctx)) {
4693				goto err;
4694			}
4695		}
4696	}
4697
4698	start = 1;              /* This is used to avoid multiplication etc
4699	                         * when there is only the value '1' in the
4700	                         * buffer. */
4701	wvalue = 0;             /* The 'value' of the window */
4702	wstart = bits-1;        /* The top bit of the window */
4703	wend = 0;               /* The bottom bit of the window */
4704
4705	if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) {
4706		goto err;
4707	}
4708	for ( ; ; ) {
4709		if (BN_is_bit_set(p, wstart) == 0) {
4710			if (!start) {
4711				if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) {
4712					goto err;
4713				}
4714			}
4715			if (wstart == 0) {
4716				break;
4717			}
4718			wstart--;
4719			continue;
4720		}
4721
4722		/* We now have wstart on a 'set' bit, we now need to work out
4723		 * how bit a window to do.  To do this we need to scan
4724		 * forward until the last set bit before the end of the
4725		 * window */
4726		j = wstart;
4727		wvalue = 1;
4728		wend = 0;
4729		for (i = 1; i < window; i++) {
4730			if (wstart-i < 0) {
4731				break;
4732			}
4733			if (BN_is_bit_set(p, wstart-i)) {
4734				wvalue <<= (i-wend);
4735				wvalue |= 1;
4736				wend = i;
4737			}
4738		}
4739
4740		/* wend is the size of the current window */
4741		j = wend + 1;
4742		/* add the 'bytes above' */
4743		if (!start) {
4744			for (i = 0; i < j; i++) {
4745				if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) {
4746					goto err;
4747				}
4748			}
4749		}
4750
4751		/* wvalue will be an odd number < 2^window */
4752		if (!BN_mod_mul_montgomery(r, r, val[wvalue>>1], mont, ctx)) {
4753			goto err;
4754		}
4755
4756		/* move the 'window' down further */
4757		wstart -= wend + 1;
4758		wvalue = 0;
4759		start = 0;
4760		if (wstart < 0) {
4761			break;
4762		}
4763	}
4764	if (!BN_from_montgomery(rr, r, mont, ctx)) {
4765		goto err;
4766	}
4767	ret = 1;
4768err:
4769	if ((in_mont == NULL) && (mont != NULL)) {
4770		BN_MONT_CTX_free(mont);
4771	}
4772	BN_CTX_end(ctx);
4773	bn_check_top(rr);
4774	return (ret);
4775}
4776
4777
4778int
4779BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1,
4780    const BIGNUM *a2, const BIGNUM *p2, const BIGNUM *m,
4781    BN_CTX *ctx, BN_MONT_CTX *in_mont)
4782{
4783	int i, j, bits, b, bits1, bits2, ret = 0, wpos1, wpos2, window1, window2, wvalue1, wvalue2;
4784	int r_is_one = 1;
4785	BIGNUM *d, *r;
4786	const BIGNUM *a_mod_m;
4787	/* Tables of variables obtained from 'ctx' */
4788	BIGNUM *val1[EXP_TABLE_SIZE], *val2[EXP_TABLE_SIZE];
4789	BN_MONT_CTX *mont = NULL;
4790
4791	bn_check_top(a1);
4792	bn_check_top(p1);
4793	bn_check_top(a2);
4794	bn_check_top(p2);
4795	bn_check_top(m);
4796
4797	if (!(m->d[0] & 1)) {
4798		/* BNerr(BN_F_BN_MOD_EXP2_MONT,BN_R_CALLED_WITH_EVEN_MODULUS); */
4799		return (0);
4800	}
4801	bits1 = BN_num_bits(p1);
4802	bits2 = BN_num_bits(p2);
4803	if ((bits1 == 0) && (bits2 == 0)) {
4804		ret = BN_one(rr);
4805		return (ret);
4806	}
4807
4808	bits = (bits1 > bits2) ? bits1 : bits2;
4809
4810	BN_CTX_start(ctx);
4811	d = BN_CTX_get(ctx);
4812	r = BN_CTX_get(ctx);
4813	val1[0] = BN_CTX_get(ctx);
4814	val2[0] = BN_CTX_get(ctx);
4815	if (!d || !r || !val1[0] || !val2[0]) {
4816		goto err;
4817	}
4818
4819	if (in_mont != NULL) {
4820		mont = in_mont;
4821	} else{
4822		if ((mont = BN_MONT_CTX_new()) == NULL) {
4823			goto err;
4824		}
4825		if (!BN_MONT_CTX_set(mont, m, ctx)) {
4826			goto err;
4827		}
4828	}
4829
4830	window1 = BN_window_bits_for_exponent_size(bits1);
4831	window2 = BN_window_bits_for_exponent_size(bits2);
4832
4833	/*
4834	 * Build table for a1:   val1[i] := a1^(2*i + 1) mod m  for i = 0 .. 2^(window1-1)
4835	 */
4836	if (a1->neg || (BN_ucmp(a1, m) >= 0)) {
4837		if (!BN_mod(val1[0], a1, m, ctx)) {
4838			goto err;
4839		}
4840		a_mod_m = val1[0];
4841	} else{
4842		a_mod_m = a1;
4843	}
4844	if (BN_is_zero(a_mod_m)) {
4845		BN_zero(rr);
4846		ret = 1;
4847		goto err;
4848	}
4849
4850	if (!BN_to_montgomery(val1[0], a_mod_m, mont, ctx)) {
4851		goto err;
4852	}
4853	if (window1 > 1) {
4854		if (!BN_mod_mul_montgomery(d, val1[0], val1[0], mont, ctx)) {
4855			goto err;
4856		}
4857
4858		j = 1<<(window1-1);
4859		for (i = 1; i < j; i++) {
4860			if (((val1[i] = BN_CTX_get(ctx)) == NULL) ||
4861			    !BN_mod_mul_montgomery(val1[i], val1[i-1],
4862			    d, mont, ctx)) {
4863				goto err;
4864			}
4865		}
4866	}
4867
4868
4869	/*
4870	 * Build table for a2:   val2[i] := a2^(2*i + 1) mod m  for i = 0 .. 2^(window2-1)
4871	 */
4872	if (a2->neg || (BN_ucmp(a2, m) >= 0)) {
4873		if (!BN_mod(val2[0], a2, m, ctx)) {
4874			goto err;
4875		}
4876		a_mod_m = val2[0];
4877	} else{
4878		a_mod_m = a2;
4879	}
4880	if (BN_is_zero(a_mod_m)) {
4881		BN_zero(rr);
4882		ret = 1;
4883		goto err;
4884	}
4885	if (!BN_to_montgomery(val2[0], a_mod_m, mont, ctx)) {
4886		goto err;
4887	}
4888	if (window2 > 1) {
4889		if (!BN_mod_mul_montgomery(d, val2[0], val2[0], mont, ctx)) {
4890			goto err;
4891		}
4892
4893		j = 1<<(window2-1);
4894		for (i = 1; i < j; i++) {
4895			if (((val2[i] = BN_CTX_get(ctx)) == NULL) ||
4896			    !BN_mod_mul_montgomery(val2[i], val2[i-1],
4897			    d, mont, ctx)) {
4898				goto err;
4899			}
4900		}
4901	}
4902
4903
4904	/* Now compute the power product, using independent windows. */
4905	r_is_one = 1;
4906	wvalue1 = 0;    /* The 'value' of the first window */
4907	wvalue2 = 0;    /* The 'value' of the second window */
4908	wpos1 = 0;      /* If wvalue1 > 0, the bottom bit of the first window */
4909	wpos2 = 0;      /* If wvalue2 > 0, the bottom bit of the second window */
4910
4911	if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) {
4912		goto err;
4913	}
4914	for (b = bits-1; b >= 0; b--) {
4915		if (!r_is_one) {
4916			if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) {
4917				goto err;
4918			}
4919		}
4920
4921		if (!wvalue1) {
4922			if (BN_is_bit_set(p1, b)) {
4923				/* consider bits b-window1+1 .. b for this window */
4924				i = b-window1+1;
4925				while (!BN_is_bit_set(p1, i)) { /* works for i<0 */
4926					i++;
4927				}
4928				wpos1 = i;
4929				wvalue1 = 1;
4930				for (i = b-1; i >= wpos1; i--) {
4931					wvalue1 <<= 1;
4932					if (BN_is_bit_set(p1, i)) {
4933						wvalue1++;
4934					}
4935				}
4936			}
4937		}
4938
4939		if (!wvalue2) {
4940			if (BN_is_bit_set(p2, b)) {
4941				/* consider bits b-window2+1 .. b for this window */
4942				i = b-window2+1;
4943				while (!BN_is_bit_set(p2, i)) {
4944					i++;
4945				}
4946				wpos2 = i;
4947				wvalue2 = 1;
4948				for (i = b-1; i >= wpos2; i--) {
4949					wvalue2 <<= 1;
4950					if (BN_is_bit_set(p2, i)) {
4951						wvalue2++;
4952					}
4953				}
4954			}
4955		}
4956
4957		if (wvalue1 && (b == wpos1)) {
4958			/* wvalue1 is odd and < 2^window1 */
4959			if (!BN_mod_mul_montgomery(r, r, val1[wvalue1>>1], mont, ctx)) {
4960				goto err;
4961			}
4962			wvalue1 = 0;
4963			r_is_one = 0;
4964		}
4965
4966		if (wvalue2 && (b == wpos2)) {
4967			/* wvalue2 is odd and < 2^window2 */
4968			if (!BN_mod_mul_montgomery(r, r, val2[wvalue2>>1], mont, ctx)) {
4969				goto err;
4970			}
4971			wvalue2 = 0;
4972			r_is_one = 0;
4973		}
4974	}
4975	if (!BN_from_montgomery(rr, r, mont, ctx)) {
4976		goto err;
4977	}
4978	ret = 1;
4979err:
4980	if ((in_mont == NULL) && (mont != NULL)) {
4981		BN_MONT_CTX_free(mont);
4982	}
4983	BN_CTX_end(ctx);
4984	bn_check_top(rr);
4985	return (ret);
4986}
4987
4988
4989/* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout
4990 * so that accessing any of these table values shows the same access pattern as far
4991 * as cache lines are concerned.  The following functions are used to transfer a BIGNUM
4992 * from/to that table. */
4993
4994/* BN_mod_exp_mont_conttime is based on the assumption that the
4995 * L1 data cache line width of the target processor is at least
4996 * the following value.
4997 */
4998#define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH	(64)
4999#define MOD_EXP_CTIME_MIN_CACHE_LINE_MASK	(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - 1)
5000
5001#if MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 64
5002#  define BN_window_bits_for_ctime_exponent_size(b) \
5003	((b) > 937 ? 6 :			    \
5004	(b) > 306 ? 5 :				    \
5005	(b) > 89 ? 4 :				    \
5006	(b) > 22 ? 3 : 1)
5007
5008#  define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE    (6)
5009
5010#else
5011
5012#  define BN_window_bits_for_ctime_exponent_size(b) \
5013	((b) > 306 ? 5 :			    \
5014	(b) > 89 ? 4 :				    \
5015	(b) > 22 ? 3 : 1)
5016
5017#  define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE    (5)
5018#endif /* MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 64 */
5019
5020static int
5021MOD_EXP_CTIME_COPY_TO_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width)
5022{
5023	size_t i, j;
5024
5025	if (bn_wexpand(b, top) == NULL) {
5026		return (0);
5027	}
5028	while (b->top < top) {
5029		b->d[b->top++] = 0;
5030	}
5031
5032	for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) {
5033		buf[j] = ((unsigned char *)b->d)[i];
5034	}
5035
5036	bn_correct_top(b);
5037	return (1);
5038}
5039
5040
5041static int
5042MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width)
5043{
5044	size_t i, j;
5045
5046	if (bn_wexpand(b, top) == NULL) {
5047		return (0);
5048	}
5049
5050	for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) {
5051		((unsigned char *)b->d)[i] = buf[j];
5052	}
5053
5054	b->top = top;
5055	bn_correct_top(b);
5056	return (1);
5057}
5058
5059
5060/* Given a pointer value, compute the next address that is a cache line multiple. */
5061#define MOD_EXP_CTIME_ALIGN(x_)	\
5062	((unsigned char *)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((BN_ULONG)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
5063
5064/* This variant of BN_mod_exp_mont() uses fixed windows and the special
5065 * precomputation memory layout to limit data-dependency to a minimum
5066 * to protect secret exponents (cf. the hyper-threading timing attacks
5067 * pointed out by Colin Percival,
5068 * http://www.daemonology.net/hyperthreading-considered-harmful/)
5069 */
5070int
5071BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
5072    const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
5073{
5074	int i, bits, ret = 0, idx, window, wvalue;
5075	int top;
5076	BIGNUM *r;
5077	const BIGNUM *aa;
5078	BN_MONT_CTX *mont = NULL;
5079
5080	int numPowers;
5081	unsigned char *powerbufFree = NULL;
5082	int powerbufLen = 0;
5083	unsigned char *powerbuf = NULL;
5084	BIGNUM *computeTemp = NULL, *am = NULL;
5085
5086	bn_check_top(a);
5087	bn_check_top(p);
5088	bn_check_top(m);
5089
5090	top = m->top;
5091
5092	if (!(m->d[0] & 1)) {
5093		/* BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME,BN_R_CALLED_WITH_EVEN_MODULUS); */
5094		return (0);
5095	}
5096	bits = BN_num_bits(p);
5097	if (bits == 0) {
5098		ret = BN_one(rr);
5099		return (ret);
5100	}
5101
5102	/* Initialize BIGNUM context and allocate intermediate result */
5103	BN_CTX_start(ctx);
5104	r = BN_CTX_get(ctx);
5105	if (r == NULL) {
5106		goto err;
5107	}
5108
5109	/* Allocate a montgomery context if it was not supplied by the caller.
5110	 * If this is not done, things will break in the montgomery part.
5111	 */
5112	if (in_mont != NULL) {
5113		mont = in_mont;
5114	} else{
5115		if ((mont = BN_MONT_CTX_new()) == NULL) {
5116			goto err;
5117		}
5118		if (!BN_MONT_CTX_set(mont, m, ctx)) {
5119			goto err;
5120		}
5121	}
5122
5123	/* Get the window size to use with size of p. */
5124	window = BN_window_bits_for_ctime_exponent_size(bits);
5125
5126	/* Allocate a buffer large enough to hold all of the pre-computed
5127	 * powers of a.
5128	 */
5129	numPowers = 1 << window;
5130	powerbufLen = sizeof(m->d[0])*top*numPowers;
5131	if ((powerbufFree = (unsigned char *)malloc(powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL) {
5132		goto err;
5133	}
5134
5135	powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
5136	memset(powerbuf, 0, powerbufLen);
5137
5138	/* Initialize the intermediate result. Do this early to save double conversion,
5139	 * once each for a^0 and intermediate result.
5140	 */
5141	if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) {
5142		goto err;
5143	}
5144	if (!MOD_EXP_CTIME_COPY_TO_PREBUF(r, top, powerbuf, 0, numPowers)) {
5145		goto err;
5146	}
5147
5148	/* Initialize computeTemp as a^1 with montgomery precalcs */
5149	computeTemp = BN_CTX_get(ctx);
5150	am = BN_CTX_get(ctx);
5151	if ((computeTemp == NULL) || (am == NULL)) {
5152		goto err;
5153	}
5154
5155	if (a->neg || (BN_ucmp(a, m) >= 0)) {
5156		if (!BN_mod(am, a, m, ctx)) {
5157			goto err;
5158		}
5159		aa = am;
5160	} else{
5161		aa = a;
5162	}
5163	if (!BN_to_montgomery(am, aa, mont, ctx)) {
5164		goto err;
5165	}
5166	if (!BN_copy(computeTemp, am)) {
5167		goto err;
5168	}
5169	if (!MOD_EXP_CTIME_COPY_TO_PREBUF(am, top, powerbuf, 1, numPowers)) {
5170		goto err;
5171	}
5172
5173	/* If the window size is greater than 1, then calculate
5174	 * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
5175	 * (even powers could instead be computed as (a^(i/2))^2
5176	 * to use the slight performance advantage of sqr over mul).
5177	 */
5178	if (window > 1) {
5179		for (i = 2; i < numPowers; i++) {
5180			/* Calculate a^i = a^(i-1) * a */
5181			if (!BN_mod_mul_montgomery(computeTemp, am, computeTemp, mont, ctx)) {
5182				goto err;
5183			}
5184			if (!MOD_EXP_CTIME_COPY_TO_PREBUF(computeTemp, top, powerbuf, i, numPowers)) {
5185				goto err;
5186			}
5187		}
5188	}
5189
5190	/* Adjust the number of bits up to a multiple of the window size.
5191	 * If the exponent length is not a multiple of the window size, then
5192	 * this pads the most significant bits with zeros to normalize the
5193	 * scanning loop to there's no special cases.
5194	 *
5195	 * * NOTE: Making the window size a power of two less than the native
5196	 * * word size ensures that the padded bits won't go past the last
5197	 * * word in the internal BIGNUM structure. Going past the end will
5198	 * * still produce the correct result, but causes a different branch
5199	 * * to be taken in the BN_is_bit_set function.
5200	 */
5201	bits = ((bits + window - 1) / window) * window;
5202	idx = bits - 1; /* The top bit of the window */
5203
5204	/* Scan the exponent one window at a time starting from the most
5205	 * significant bits.
5206	 */
5207	while (idx >= 0) {
5208		wvalue = 0; /* The 'value' of the window */
5209
5210		/* Scan the window, squaring the result as we go */
5211		for (i = 0; i < window; i++, idx--) {
5212			if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) {
5213				goto err;
5214			}
5215			wvalue = (wvalue<<1)+BN_is_bit_set(p, idx);
5216		}
5217
5218		/* Fetch the appropriate pre-computed value from the pre-buf */
5219		if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(computeTemp, top, powerbuf, wvalue, numPowers)) {
5220			goto err;
5221		}
5222
5223		/* Multiply the result into the intermediate result */
5224		if (!BN_mod_mul_montgomery(r, r, computeTemp, mont, ctx)) {
5225			goto err;
5226		}
5227	}
5228
5229	/* Convert the final result from montgomery to standard format */
5230	if (!BN_from_montgomery(rr, r, mont, ctx)) {
5231		goto err;
5232	}
5233	ret = 1;
5234err:
5235	if ((in_mont == NULL) && (mont != NULL)) {
5236		BN_MONT_CTX_free(mont);
5237	}
5238	if (powerbuf != NULL) {
5239		memset(powerbuf, 0, powerbufLen);
5240		free(powerbufFree);
5241	}
5242	if (am != NULL) {
5243		BN_clear(am);
5244	}
5245	if (computeTemp != NULL) {
5246		BN_clear(computeTemp);
5247	}
5248	BN_CTX_end(ctx);
5249	return (ret);
5250}
5251
5252
5253/*
5254 *  Sqr
5255 */
5256
5257static void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp);
5258
5259#ifdef BN_RECURSION
5260static void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t);
5261
5262#endif
5263
5264/* r must not be a */
5265/* I've just gone over this and it is now %20 faster on x86 - eay - 27 Jun 96 */
5266int
5267BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx)
5268{
5269	int max, al;
5270	int ret = 0;
5271	BIGNUM *tmp, *rr;
5272
5273#ifdef BN_COUNT
5274	fprintf(stderr, "BN_sqr %d * %d\n", a->top, a->top);
5275#endif
5276	bn_check_top(a);
5277
5278	al = a->top;
5279	if (al <= 0) {
5280		r->top = 0;
5281		return (1);
5282	}
5283
5284	BN_CTX_start(ctx);
5285	rr = (a != r) ? r : BN_CTX_get(ctx);
5286	tmp = BN_CTX_get(ctx);
5287	if (!rr || !tmp) {
5288		goto err;
5289	}
5290
5291	max = 2 * al; /* Non-zero (from above) */
5292	if (bn_wexpand(rr, max) == NULL) {
5293		goto err;
5294	}
5295
5296	if (al == 4) {
5297#ifndef BN_SQR_COMBA
5298		BN_ULONG t[8];
5299		bn_sqr_normal(rr->d, a->d, 4, t);
5300#else
5301		bn_sqr_comba4(rr->d, a->d);
5302#endif
5303	} else if (al == 8) {
5304#ifndef BN_SQR_COMBA
5305		BN_ULONG t[16];
5306		bn_sqr_normal(rr->d, a->d, 8, t);
5307#else
5308		bn_sqr_comba8(rr->d, a->d);
5309#endif
5310	} else {
5311#if defined(BN_RECURSION)
5312		if (al < BN_SQR_RECURSIVE_SIZE_NORMAL) {
5313			BN_ULONG t[BN_SQR_RECURSIVE_SIZE_NORMAL * 2];
5314			bn_sqr_normal(rr->d, a->d, al, t);
5315		} else {
5316			int j, k;
5317
5318			j = BN_num_bits_word((BN_ULONG)al);
5319			j = 1 << (j - 1);
5320			k = j + j;
5321			if (al == j) {
5322				if (bn_wexpand(tmp, k*2) == NULL) {
5323					goto err;
5324				}
5325				bn_sqr_recursive(rr->d, a->d, al, tmp->d);
5326			} else {
5327				if (bn_wexpand(tmp, max) == NULL) {
5328					goto err;
5329				}
5330				bn_sqr_normal(rr->d, a->d, al, tmp->d);
5331			}
5332		}
5333#else
5334		if (bn_wexpand(tmp, max) == NULL) {
5335			goto err;
5336		}
5337		bn_sqr_normal(rr->d, a->d, al, tmp->d);
5338#endif
5339	}
5340
5341	rr->neg = 0;
5342
5343	/* If the most-significant half of the top word of 'a' is zero, then
5344	 * the square of 'a' will max-1 words. */
5345	if (a->d[al - 1] == (a->d[al - 1] & BN_MASK2l)) {
5346		rr->top = max - 1;
5347	} else{
5348		rr->top = max;
5349	}
5350	if (rr != r) {
5351		BN_copy(r, rr);
5352	}
5353	ret = 1;
5354err:
5355	bn_check_top(rr);
5356	bn_check_top(tmp);
5357	BN_CTX_end(ctx);
5358	return (ret);
5359}
5360
5361
5362/* tmp must have 2*n words */
5363static void
5364bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp)
5365{
5366	int i, j, max;
5367	const BN_ULONG *ap;
5368	BN_ULONG *rp;
5369
5370	max = n * 2;
5371	ap = a;
5372	rp = r;
5373	rp[0] = rp[max - 1] = 0;
5374	rp++;
5375	j = n;
5376
5377	if (--j > 0) {
5378		ap++;
5379		rp[j] = bn_mul_words(rp, ap, j, ap[-1]);
5380		rp += 2;
5381	}
5382
5383	for (i = n-2; i > 0; i--) {
5384		j--;
5385		ap++;
5386		rp[j] = bn_mul_add_words(rp, ap, j, ap[-1]);
5387		rp += 2;
5388	}
5389
5390	bn_add_words(r, r, r, max);
5391
5392	/* There will not be a carry */
5393
5394	bn_sqr_words(tmp, a, n);
5395
5396	bn_add_words(r, r, tmp, max);
5397}
5398
5399
5400#ifdef BN_RECURSION
5401
5402/* r is 2*n words in size,
5403 * a and b are both n words in size.    (There's not actually a 'b' here ...)
5404 * n must be a power of 2.
5405 * We multiply and return the result.
5406 * t must be 2*n words in size
5407 * We calculate
5408 * a[0]*b[0]
5409 * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
5410 * a[1]*b[1]
5411 */
5412static void
5413bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t)
5414{
5415	int n = n2 / 2;
5416	int zero, c1;
5417	BN_ULONG ln, lo, *p;
5418
5419#ifdef BN_COUNT
5420	fprintf(stderr, " bn_sqr_recursive %d * %d\n", n2, n2);
5421#endif
5422	if (n2 == 4) {
5423#ifndef BN_SQR_COMBA
5424		bn_sqr_normal(r, a, 4, t);
5425#else
5426		bn_sqr_comba4(r, a);
5427#endif
5428		return;
5429	} else if (n2 == 8) {
5430#ifndef BN_SQR_COMBA
5431		bn_sqr_normal(r, a, 8, t);
5432#else
5433		bn_sqr_comba8(r, a);
5434#endif
5435		return;
5436	}
5437	if (n2 < BN_SQR_RECURSIVE_SIZE_NORMAL) {
5438		bn_sqr_normal(r, a, n2, t);
5439		return;
5440	}
5441	/* r=(a[0]-a[1])*(a[1]-a[0]) */
5442	c1 = bn_cmp_words(a, &(a[n]), n);
5443	zero = 0;
5444	if (c1 > 0) {
5445		bn_sub_words(t, a, &(a[n]), n);
5446	} else if (c1 < 0) {
5447		bn_sub_words(t, &(a[n]), a, n);
5448	} else{
5449		zero = 1;
5450	}
5451
5452	/* The result will always be negative unless it is zero */
5453	p = &(t[n2 * 2]);
5454
5455	if (!zero) {
5456		bn_sqr_recursive(&(t[n2]), t, n, p);
5457	} else{
5458		memset(&(t[n2]), 0, n2 * sizeof(BN_ULONG));
5459	}
5460	bn_sqr_recursive(r, a, n, p);
5461	bn_sqr_recursive(&(r[n2]), &(a[n]), n, p);
5462
5463	/* t[32] holds (a[0]-a[1])*(a[1]-a[0]), it is negative or zero
5464	 * r[10] holds (a[0]*b[0])
5465	 * r[32] holds (b[1]*b[1])
5466	 */
5467
5468	c1 = (int)(bn_add_words(t, r, &(r[n2]), n2));
5469
5470	/* t[32] is negative */
5471	c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2));
5472
5473	/* t[32] holds (a[0]-a[1])*(a[1]-a[0])+(a[0]*a[0])+(a[1]*a[1])
5474	 * r[10] holds (a[0]*a[0])
5475	 * r[32] holds (a[1]*a[1])
5476	 * c1 holds the carry bits
5477	 */
5478	c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
5479	if (c1) {
5480		p = &(r[n+n2]);
5481		lo = *p;
5482		ln = (lo+c1)&BN_MASK2;
5483		*p = ln;
5484
5485		/* The overflow will stop before we over write
5486		 * words we should not overwrite */
5487		if (ln < (BN_ULONG)c1) {
5488			do {
5489				p++;
5490				lo = *p;
5491				ln = (lo+1)&BN_MASK2;
5492				*p = ln;
5493			} while (ln == 0);
5494		}
5495	}
5496}
5497
5498
5499#endif /* BN_RECURSION */
5500
5501/*
5502 * rand
5503 */
5504static int
5505bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom)
5506{
5507	unsigned char *buf = NULL;
5508	int ret = 0, bit, bytes, mask;
5509	time_t tim;
5510
5511	if (bits == 0) {
5512		BN_zero(rnd);
5513		return (1);
5514	}
5515
5516	bytes = (bits + 7) / 8;
5517	bit = (bits - 1) % 8;
5518	mask = 0xff << (bit + 1);
5519
5520	buf = (unsigned char *)malloc(bytes);
5521	if (buf == NULL) {
5522		/* BNerr(BN_F_BNRAND,ERR_R_MALLOC_FAILURE); */
5523		fprintf(stderr, "malloc() failed\n");
5524		goto err;
5525	}
5526
5527	/* make a random number and set the top and bottom bits */
5528	time(&tim);
5529	RAND_add(&tim, sizeof(tim), 0.0);
5530
5531	if (pseudorand) {
5532		if (RAND_pseudo_bytes(buf, bytes) == -1) {
5533			fprintf(stderr, "RAND_pseudo_bytes() failed\n");
5534			goto err;
5535		}
5536	} else {
5537		if (RAND_bytes(buf, bytes) <= 0) {
5538			fprintf(stderr, "RAND_bytes() failed\n");
5539			goto err;
5540		}
5541	}
5542
5543#if 1
5544	if (pseudorand == 2) {
5545		/* generate patterns that are more likely to trigger BN
5546		 * library bugs */
5547		int i;
5548		unsigned char c;
5549
5550		for (i = 0; i < bytes; i++) {
5551			RAND_pseudo_bytes(&c, 1);
5552			if ((c >= 128) && (i > 0)) {
5553				buf[i] = buf[i-1];
5554			} else if (c < 42) {
5555				buf[i] = 0;
5556			} else if (c < 84) {
5557				buf[i] = 255;
5558			}
5559		}
5560	}
5561#endif
5562
5563	if (top != -1) {
5564		if (top) {
5565			if (bit == 0) {
5566				buf[0] = 1;
5567				buf[1] |= 0x80;
5568			} else {
5569				buf[0] |= (3<<(bit-1));
5570			}
5571		} else {
5572			buf[0] |= (1<<bit);
5573		}
5574	}
5575	buf[0] &= ~mask;
5576	if (bottom) { /* set bottom bit if requested */
5577		buf[bytes-1] |= 1;
5578	}
5579	if (!BN_bin2bn(buf, bytes, rnd)) {
5580		goto err;
5581	}
5582	ret = 1;
5583err:
5584	if (buf != NULL) {
5585		memset(buf, 0, bytes);
5586		free(buf);
5587	}
5588	bn_check_top(rnd);
5589
5590	return (ret);
5591}
5592
5593
5594int
5595BN_rand(BIGNUM *rnd, int bits, int top, int bottom)
5596{
5597	return (bnrand(0, rnd, bits, top, bottom));
5598}
5599
5600
5601int
5602BN_pseudo_rand(BIGNUM *rnd, int bits, int top, int bottom)
5603{
5604	return (bnrand(1, rnd, bits, top, bottom));
5605}
5606
5607
5608#if 1
5609int
5610BN_bntest_rand(BIGNUM *rnd, int bits, int top, int bottom)
5611{
5612	return (bnrand(2, rnd, bits, top, bottom));
5613}
5614
5615
5616#endif
5617
5618/* random number r:  0 <= r < range */
5619static int
5620bn_rand_range(int pseudo, BIGNUM *r, const BIGNUM *range)
5621{
5622	int (*bn_rand)(BIGNUM *, int, int, int) = pseudo ? BN_pseudo_rand : BN_rand;
5623	int n;
5624	int count = 100;
5625
5626	if (range->neg || BN_is_zero(range)) {
5627		/* BNerr(BN_F_BN_RAND_RANGE, BN_R_INVALID_RANGE); */
5628		return (0);
5629	}
5630
5631	n = BN_num_bits(range); /* n > 0 */
5632
5633	/* BN_is_bit_set(range, n - 1) always holds */
5634
5635	if (n == 1) {
5636		BN_zero(r);
5637	} else if (!BN_is_bit_set(range, n - 2) && !BN_is_bit_set(range, n - 3)) {
5638		/* range = 100..._2,
5639		 * so  3*range (= 11..._2)  is exactly one bit longer than  range */
5640		do {
5641			if (!bn_rand(r, n + 1, -1, 0)) {
5642				return (0);
5643			}
5644
5645			/* If  r < 3*range,  use  r := r MOD range
5646			 * (which is either  r, r - range,  or  r - 2*range).
5647			 * Otherwise, iterate once more.
5648			 * Since  3*range = 11..._2, each iteration succeeds with
5649			 * probability >= .75. */
5650			if (BN_cmp(r, range) >= 0) {
5651				if (!BN_sub(r, r, range)) {
5652					return (0);
5653				}
5654				if (BN_cmp(r, range) >= 0) {
5655					if (!BN_sub(r, r, range)) {
5656						return (0);
5657					}
5658				}
5659			}
5660
5661			if (!--count) {
5662				/* BNerr(BN_F_BN_RAND_RANGE, BN_R_TOO_MANY_ITERATIONS); */
5663				return (0);
5664			}
5665		} while (BN_cmp(r, range) >= 0);
5666	} else {
5667		do {
5668			/* range = 11..._2  or  range = 101..._2 */
5669			if (!bn_rand(r, n, -1, 0)) {
5670				return (0);
5671			}
5672
5673			if (!--count) {
5674				/* BNerr(BN_F_BN_RAND_RANGE, BN_R_TOO_MANY_ITERATIONS); */
5675				return (0);
5676			}
5677		} while (BN_cmp(r, range) >= 0);
5678	}
5679
5680	bn_check_top(r);
5681
5682	return (1);
5683}
5684
5685
5686int
5687BN_rand_range(BIGNUM *r, const BIGNUM *range)
5688{
5689	return (bn_rand_range(0, r, range));
5690}
5691
5692
5693int
5694BN_pseudo_rand_range(BIGNUM *r, const BIGNUM *range)
5695{
5696	return (bn_rand_range(1, r, range));
5697}
5698
5699
5700/*
5701 *  primes
5702 *
5703 */
5704
5705#ifndef EIGHT_BIT
5706#define NUMPRIMES	2048
5707typedef unsigned short		prime_t;
5708#else
5709#define NUMPRIMES	54
5710typedef unsigned char		prime_t;
5711#endif
5712static const prime_t primes[NUMPRIMES] =
5713{
5714	2,	   3,	  5,	 7,    11,    13,    17,    19,
5715	23,	  29,	 31,	37,    41,    43,    47,    53,
5716	59,	  61,	 67,	71,    73,    79,    83,    89,
5717	97,	 101,	103,   107,   109,   113,   127,   131,
5718	137,	 139,	149,   151,   157,   163,   167,   173,
5719	179,	 181,	191,   193,   197,   199,   211,   223,
5720	227,	 229,	233,   239,   241,   251,
5721#ifndef EIGHT_BIT
5722	257,	 263,
5723	269,	 271,	277,   281,   283,   293,   307,   311,
5724	313,	 317,	331,   337,   347,   349,   353,   359,
5725	367,	 373,	379,   383,   389,   397,   401,   409,
5726	419,	 421,	431,   433,   439,   443,   449,   457,
5727	461,	 463,	467,   479,   487,   491,   499,   503,
5728	509,	 521,	523,   541,   547,   557,   563,   569,
5729	571,	 577,	587,   593,   599,   601,   607,   613,
5730	617,	 619,	631,   641,   643,   647,   653,   659,
5731	661,	 673,	677,   683,   691,   701,   709,   719,
5732	727,	 733,	739,   743,   751,   757,   761,   769,
5733	773,	 787,	797,   809,   811,   821,   823,   827,
5734	829,	 839,	853,   857,   859,   863,   877,   881,
5735	883,	 887,	907,   911,   919,   929,   937,   941,
5736	947,	 953,	967,   971,   977,   983,   991,   997,
5737	1009,	1013,  1019,  1021,  1031,  1033,  1039,  1049,
5738	1051,	1061,  1063,  1069,  1087,  1091,  1093,  1097,
5739	1103,	1109,  1117,  1123,  1129,  1151,  1153,  1163,
5740	1171,	1181,  1187,  1193,  1201,  1213,  1217,  1223,
5741	1229,	1231,  1237,  1249,  1259,  1277,  1279,  1283,
5742	1289,	1291,  1297,  1301,  1303,  1307,  1319,  1321,
5743	1327,	1361,  1367,  1373,  1381,  1399,  1409,  1423,
5744	1427,	1429,  1433,  1439,  1447,  1451,  1453,  1459,
5745	1471,	1481,  1483,  1487,  1489,  1493,  1499,  1511,
5746	1523,	1531,  1543,  1549,  1553,  1559,  1567,  1571,
5747	1579,	1583,  1597,  1601,  1607,  1609,  1613,  1619,
5748	1621,	1627,  1637,  1657,  1663,  1667,  1669,  1693,
5749	1697,	1699,  1709,  1721,  1723,  1733,  1741,  1747,
5750	1753,	1759,  1777,  1783,  1787,  1789,  1801,  1811,
5751	1823,	1831,  1847,  1861,  1867,  1871,  1873,  1877,
5752	1879,	1889,  1901,  1907,  1913,  1931,  1933,  1949,
5753	1951,	1973,  1979,  1987,  1993,  1997,  1999,  2003,
5754	2011,	2017,  2027,  2029,  2039,  2053,  2063,  2069,
5755	2081,	2083,  2087,  2089,  2099,  2111,  2113,  2129,
5756	2131,	2137,  2141,  2143,  2153,  2161,  2179,  2203,
5757	2207,	2213,  2221,  2237,  2239,  2243,  2251,  2267,
5758	2269,	2273,  2281,  2287,  2293,  2297,  2309,  2311,
5759	2333,	2339,  2341,  2347,  2351,  2357,  2371,  2377,
5760	2381,	2383,  2389,  2393,  2399,  2411,  2417,  2423,
5761	2437,	2441,  2447,  2459,  2467,  2473,  2477,  2503,
5762	2521,	2531,  2539,  2543,  2549,  2551,  2557,  2579,
5763	2591,	2593,  2609,  2617,  2621,  2633,  2647,  2657,
5764	2659,	2663,  2671,  2677,  2683,  2687,  2689,  2693,
5765	2699,	2707,  2711,  2713,  2719,  2729,  2731,  2741,
5766	2749,	2753,  2767,  2777,  2789,  2791,  2797,  2801,
5767	2803,	2819,  2833,  2837,  2843,  2851,  2857,  2861,
5768	2879,	2887,  2897,  2903,  2909,  2917,  2927,  2939,
5769	2953,	2957,  2963,  2969,  2971,  2999,  3001,  3011,
5770	3019,	3023,  3037,  3041,  3049,  3061,  3067,  3079,
5771	3083,	3089,  3109,  3119,  3121,  3137,  3163,  3167,
5772	3169,	3181,  3187,  3191,  3203,  3209,  3217,  3221,
5773	3229,	3251,  3253,  3257,  3259,  3271,  3299,  3301,
5774	3307,	3313,  3319,  3323,  3329,  3331,  3343,  3347,
5775	3359,	3361,  3371,  3373,  3389,  3391,  3407,  3413,
5776	3433,	3449,  3457,  3461,  3463,  3467,  3469,  3491,
5777	3499,	3511,  3517,  3527,  3529,  3533,  3539,  3541,
5778	3547,	3557,  3559,  3571,  3581,  3583,  3593,  3607,
5779	3613,	3617,  3623,  3631,  3637,  3643,  3659,  3671,
5780	3673,	3677,  3691,  3697,  3701,  3709,  3719,  3727,
5781	3733,	3739,  3761,  3767,  3769,  3779,  3793,  3797,
5782	3803,	3821,  3823,  3833,  3847,  3851,  3853,  3863,
5783	3877,	3881,  3889,  3907,  3911,  3917,  3919,  3923,
5784	3929,	3931,  3943,  3947,  3967,  3989,  4001,  4003,
5785	4007,	4013,  4019,  4021,  4027,  4049,  4051,  4057,
5786	4073,	4079,  4091,  4093,  4099,  4111,  4127,  4129,
5787	4133,	4139,  4153,  4157,  4159,  4177,  4201,  4211,
5788	4217,	4219,  4229,  4231,  4241,  4243,  4253,  4259,
5789	4261,	4271,  4273,  4283,  4289,  4297,  4327,  4337,
5790	4339,	4349,  4357,  4363,  4373,  4391,  4397,  4409,
5791	4421,	4423,  4441,  4447,  4451,  4457,  4463,  4481,
5792	4483,	4493,  4507,  4513,  4517,  4519,  4523,  4547,
5793	4549,	4561,  4567,  4583,  4591,  4597,  4603,  4621,
5794	4637,	4639,  4643,  4649,  4651,  4657,  4663,  4673,
5795	4679,	4691,  4703,  4721,  4723,  4729,  4733,  4751,
5796	4759,	4783,  4787,  4789,  4793,  4799,  4801,  4813,
5797	4817,	4831,  4861,  4871,  4877,  4889,  4903,  4909,
5798	4919,	4931,  4933,  4937,  4943,  4951,  4957,  4967,
5799	4969,	4973,  4987,  4993,  4999,  5003,  5009,  5011,
5800	5021,	5023,  5039,  5051,  5059,  5077,  5081,  5087,
5801	5099,	5101,  5107,  5113,  5119,  5147,  5153,  5167,
5802	5171,	5179,  5189,  5197,  5209,  5227,  5231,  5233,
5803	5237,	5261,  5273,  5279,  5281,  5297,  5303,  5309,
5804	5323,	5333,  5347,  5351,  5381,  5387,  5393,  5399,
5805	5407,	5413,  5417,  5419,  5431,  5437,  5441,  5443,
5806	5449,	5471,  5477,  5479,  5483,  5501,  5503,  5507,
5807	5519,	5521,  5527,  5531,  5557,  5563,  5569,  5573,
5808	5581,	5591,  5623,  5639,  5641,  5647,  5651,  5653,
5809	5657,	5659,  5669,  5683,  5689,  5693,  5701,  5711,
5810	5717,	5737,  5741,  5743,  5749,  5779,  5783,  5791,
5811	5801,	5807,  5813,  5821,  5827,  5839,  5843,  5849,
5812	5851,	5857,  5861,  5867,  5869,  5879,  5881,  5897,
5813	5903,	5923,  5927,  5939,  5953,  5981,  5987,  6007,
5814	6011,	6029,  6037,  6043,  6047,  6053,  6067,  6073,
5815	6079,	6089,  6091,  6101,  6113,  6121,  6131,  6133,
5816	6143,	6151,  6163,  6173,  6197,  6199,  6203,  6211,
5817	6217,	6221,  6229,  6247,  6257,  6263,  6269,  6271,
5818	6277,	6287,  6299,  6301,  6311,  6317,  6323,  6329,
5819	6337,	6343,  6353,  6359,  6361,  6367,  6373,  6379,
5820	6389,	6397,  6421,  6427,  6449,  6451,  6469,  6473,
5821	6481,	6491,  6521,  6529,  6547,  6551,  6553,  6563,
5822	6569,	6571,  6577,  6581,  6599,  6607,  6619,  6637,
5823	6653,	6659,  6661,  6673,  6679,  6689,  6691,  6701,
5824	6703,	6709,  6719,  6733,  6737,  6761,  6763,  6779,
5825	6781,	6791,  6793,  6803,  6823,  6827,  6829,  6833,
5826	6841,	6857,  6863,  6869,  6871,  6883,  6899,  6907,
5827	6911,	6917,  6947,  6949,  6959,  6961,  6967,  6971,
5828	6977,	6983,  6991,  6997,  7001,  7013,  7019,  7027,
5829	7039,	7043,  7057,  7069,  7079,  7103,  7109,  7121,
5830	7127,	7129,  7151,  7159,  7177,  7187,  7193,  7207,
5831	7211,	7213,  7219,  7229,  7237,  7243,  7247,  7253,
5832	7283,	7297,  7307,  7309,  7321,  7331,  7333,  7349,
5833	7351,	7369,  7393,  7411,  7417,  7433,  7451,  7457,
5834	7459,	7477,  7481,  7487,  7489,  7499,  7507,  7517,
5835	7523,	7529,  7537,  7541,  7547,  7549,  7559,  7561,
5836	7573,	7577,  7583,  7589,  7591,  7603,  7607,  7621,
5837	7639,	7643,  7649,  7669,  7673,  7681,  7687,  7691,
5838	7699,	7703,  7717,  7723,  7727,  7741,  7753,  7757,
5839	7759,	7789,  7793,  7817,  7823,  7829,  7841,  7853,
5840	7867,	7873,  7877,  7879,  7883,  7901,  7907,  7919,
5841	7927,	7933,  7937,  7949,  7951,  7963,  7993,  8009,
5842	8011,	8017,  8039,  8053,  8059,  8069,  8081,  8087,
5843	8089,	8093,  8101,  8111,  8117,  8123,  8147,  8161,
5844	8167,	8171,  8179,  8191,  8209,  8219,  8221,  8231,
5845	8233,	8237,  8243,  8263,  8269,  8273,  8287,  8291,
5846	8293,	8297,  8311,  8317,  8329,  8353,  8363,  8369,
5847	8377,	8387,  8389,  8419,  8423,  8429,  8431,  8443,
5848	8447,	8461,  8467,  8501,  8513,  8521,  8527,  8537,
5849	8539,	8543,  8563,  8573,  8581,  8597,  8599,  8609,
5850	8623,	8627,  8629,  8641,  8647,  8663,  8669,  8677,
5851	8681,	8689,  8693,  8699,  8707,  8713,  8719,  8731,
5852	8737,	8741,  8747,  8753,  8761,  8779,  8783,  8803,
5853	8807,	8819,  8821,  8831,  8837,  8839,  8849,  8861,
5854	8863,	8867,  8887,  8893,  8923,  8929,  8933,  8941,
5855	8951,	8963,  8969,  8971,  8999,  9001,  9007,  9011,
5856	9013,	9029,  9041,  9043,  9049,  9059,  9067,  9091,
5857	9103,	9109,  9127,  9133,  9137,  9151,  9157,  9161,
5858	9173,	9181,  9187,  9199,  9203,  9209,  9221,  9227,
5859	9239,	9241,  9257,  9277,  9281,  9283,  9293,  9311,
5860	9319,	9323,  9337,  9341,  9343,  9349,  9371,  9377,
5861	9391,	9397,  9403,  9413,  9419,  9421,  9431,  9433,
5862	9437,	9439,  9461,  9463,  9467,  9473,  9479,  9491,
5863	9497,	9511,  9521,  9533,  9539,  9547,  9551,  9587,
5864	9601,	9613,  9619,  9623,  9629,  9631,  9643,  9649,
5865	9661,	9677,  9679,  9689,  9697,  9719,  9721,  9733,
5866	9739,	9743,  9749,  9767,  9769,  9781,  9787,  9791,
5867	9803,	9811,  9817,  9829,  9833,  9839,  9851,  9857,
5868	9859,	9871,  9883,  9887,  9901,  9907,  9923,  9929,
5869	9931,	9941,  9949,  9967,  9973, 10007, 10009, 10037,
5870	10039, 10061, 10067, 10069, 10079, 10091, 10093, 10099,
5871	10103, 10111, 10133, 10139, 10141, 10151, 10159, 10163,
5872	10169, 10177, 10181, 10193, 10211, 10223, 10243, 10247,
5873	10253, 10259, 10267, 10271, 10273, 10289, 10301, 10303,
5874	10313, 10321, 10331, 10333, 10337, 10343, 10357, 10369,
5875	10391, 10399, 10427, 10429, 10433, 10453, 10457, 10459,
5876	10463, 10477, 10487, 10499, 10501, 10513, 10529, 10531,
5877	10559, 10567, 10589, 10597, 10601, 10607, 10613, 10627,
5878	10631, 10639, 10651, 10657, 10663, 10667, 10687, 10691,
5879	10709, 10711, 10723, 10729, 10733, 10739, 10753, 10771,
5880	10781, 10789, 10799, 10831, 10837, 10847, 10853, 10859,
5881	10861, 10867, 10883, 10889, 10891, 10903, 10909, 10937,
5882	10939, 10949, 10957, 10973, 10979, 10987, 10993, 11003,
5883	11027, 11047, 11057, 11059, 11069, 11071, 11083, 11087,
5884	11093, 11113, 11117, 11119, 11131, 11149, 11159, 11161,
5885	11171, 11173, 11177, 11197, 11213, 11239, 11243, 11251,
5886	11257, 11261, 11273, 11279, 11287, 11299, 11311, 11317,
5887	11321, 11329, 11351, 11353, 11369, 11383, 11393, 11399,
5888	11411, 11423, 11437, 11443, 11447, 11467, 11471, 11483,
5889	11489, 11491, 11497, 11503, 11519, 11527, 11549, 11551,
5890	11579, 11587, 11593, 11597, 11617, 11621, 11633, 11657,
5891	11677, 11681, 11689, 11699, 11701, 11717, 11719, 11731,
5892	11743, 11777, 11779, 11783, 11789, 11801, 11807, 11813,
5893	11821, 11827, 11831, 11833, 11839, 11863, 11867, 11887,
5894	11897, 11903, 11909, 11923, 11927, 11933, 11939, 11941,
5895	11953, 11959, 11969, 11971, 11981, 11987, 12007, 12011,
5896	12037, 12041, 12043, 12049, 12071, 12073, 12097, 12101,
5897	12107, 12109, 12113, 12119, 12143, 12149, 12157, 12161,
5898	12163, 12197, 12203, 12211, 12227, 12239, 12241, 12251,
5899	12253, 12263, 12269, 12277, 12281, 12289, 12301, 12323,
5900	12329, 12343, 12347, 12373, 12377, 12379, 12391, 12401,
5901	12409, 12413, 12421, 12433, 12437, 12451, 12457, 12473,
5902	12479, 12487, 12491, 12497, 12503, 12511, 12517, 12527,
5903	12539, 12541, 12547, 12553, 12569, 12577, 12583, 12589,
5904	12601, 12611, 12613, 12619, 12637, 12641, 12647, 12653,
5905	12659, 12671, 12689, 12697, 12703, 12713, 12721, 12739,
5906	12743, 12757, 12763, 12781, 12791, 12799, 12809, 12821,
5907	12823, 12829, 12841, 12853, 12889, 12893, 12899, 12907,
5908	12911, 12917, 12919, 12923, 12941, 12953, 12959, 12967,
5909	12973, 12979, 12983, 13001, 13003, 13007, 13009, 13033,
5910	13037, 13043, 13049, 13063, 13093, 13099, 13103, 13109,
5911	13121, 13127, 13147, 13151, 13159, 13163, 13171, 13177,
5912	13183, 13187, 13217, 13219, 13229, 13241, 13249, 13259,
5913	13267, 13291, 13297, 13309, 13313, 13327, 13331, 13337,
5914	13339, 13367, 13381, 13397, 13399, 13411, 13417, 13421,
5915	13441, 13451, 13457, 13463, 13469, 13477, 13487, 13499,
5916	13513, 13523, 13537, 13553, 13567, 13577, 13591, 13597,
5917	13613, 13619, 13627, 13633, 13649, 13669, 13679, 13681,
5918	13687, 13691, 13693, 13697, 13709, 13711, 13721, 13723,
5919	13729, 13751, 13757, 13759, 13763, 13781, 13789, 13799,
5920	13807, 13829, 13831, 13841, 13859, 13873, 13877, 13879,
5921	13883, 13901, 13903, 13907, 13913, 13921, 13931, 13933,
5922	13963, 13967, 13997, 13999, 14009, 14011, 14029, 14033,
5923	14051, 14057, 14071, 14081, 14083, 14087, 14107, 14143,
5924	14149, 14153, 14159, 14173, 14177, 14197, 14207, 14221,
5925	14243, 14249, 14251, 14281, 14293, 14303, 14321, 14323,
5926	14327, 14341, 14347, 14369, 14387, 14389, 14401, 14407,
5927	14411, 14419, 14423, 14431, 14437, 14447, 14449, 14461,
5928	14479, 14489, 14503, 14519, 14533, 14537, 14543, 14549,
5929	14551, 14557, 14561, 14563, 14591, 14593, 14621, 14627,
5930	14629, 14633, 14639, 14653, 14657, 14669, 14683, 14699,
5931	14713, 14717, 14723, 14731, 14737, 14741, 14747, 14753,
5932	14759, 14767, 14771, 14779, 14783, 14797, 14813, 14821,
5933	14827, 14831, 14843, 14851, 14867, 14869, 14879, 14887,
5934	14891, 14897, 14923, 14929, 14939, 14947, 14951, 14957,
5935	14969, 14983, 15013, 15017, 15031, 15053, 15061, 15073,
5936	15077, 15083, 15091, 15101, 15107, 15121, 15131, 15137,
5937	15139, 15149, 15161, 15173, 15187, 15193, 15199, 15217,
5938	15227, 15233, 15241, 15259, 15263, 15269, 15271, 15277,
5939	15287, 15289, 15299, 15307, 15313, 15319, 15329, 15331,
5940	15349, 15359, 15361, 15373, 15377, 15383, 15391, 15401,
5941	15413, 15427, 15439, 15443, 15451, 15461, 15467, 15473,
5942	15493, 15497, 15511, 15527, 15541, 15551, 15559, 15569,
5943	15581, 15583, 15601, 15607, 15619, 15629, 15641, 15643,
5944	15647, 15649, 15661, 15667, 15671, 15679, 15683, 15727,
5945	15731, 15733, 15737, 15739, 15749, 15761, 15767, 15773,
5946	15787, 15791, 15797, 15803, 15809, 15817, 15823, 15859,
5947	15877, 15881, 15887, 15889, 15901, 15907, 15913, 15919,
5948	15923, 15937, 15959, 15971, 15973, 15991, 16001, 16007,
5949	16033, 16057, 16061, 16063, 16067, 16069, 16073, 16087,
5950	16091, 16097, 16103, 16111, 16127, 16139, 16141, 16183,
5951	16187, 16189, 16193, 16217, 16223, 16229, 16231, 16249,
5952	16253, 16267, 16273, 16301, 16319, 16333, 16339, 16349,
5953	16361, 16363, 16369, 16381, 16411, 16417, 16421, 16427,
5954	16433, 16447, 16451, 16453, 16477, 16481, 16487, 16493,
5955	16519, 16529, 16547, 16553, 16561, 16567, 16573, 16603,
5956	16607, 16619, 16631, 16633, 16649, 16651, 16657, 16661,
5957	16673, 16691, 16693, 16699, 16703, 16729, 16741, 16747,
5958	16759, 16763, 16787, 16811, 16823, 16829, 16831, 16843,
5959	16871, 16879, 16883, 16889, 16901, 16903, 16921, 16927,
5960	16931, 16937, 16943, 16963, 16979, 16981, 16987, 16993,
5961	17011, 17021, 17027, 17029, 17033, 17041, 17047, 17053,
5962	17077, 17093, 17099, 17107, 17117, 17123, 17137, 17159,
5963	17167, 17183, 17189, 17191, 17203, 17207, 17209, 17231,
5964	17239, 17257, 17291, 17293, 17299, 17317, 17321, 17327,
5965	17333, 17341, 17351, 17359, 17377, 17383, 17387, 17389,
5966	17393, 17401, 17417, 17419, 17431, 17443, 17449, 17467,
5967	17471, 17477, 17483, 17489, 17491, 17497, 17509, 17519,
5968	17539, 17551, 17569, 17573, 17579, 17581, 17597, 17599,
5969	17609, 17623, 17627, 17657, 17659, 17669, 17681, 17683,
5970	17707, 17713, 17729, 17737, 17747, 17749, 17761, 17783,
5971	17789, 17791, 17807, 17827, 17837, 17839, 17851, 17863,
5972#endif
5973};
5974
5975static int witness(BIGNUM *w, const BIGNUM *a, const BIGNUM *a1,
5976    const BIGNUM *a1_odd, int k, BN_CTX *ctx, BN_MONT_CTX *mont);
5977static int probable_prime(BIGNUM *rnd, int bits);
5978static int probable_prime_dh(BIGNUM *rnd, int bits,
5979    const BIGNUM *add, const BIGNUM *rem, BN_CTX *ctx);
5980static int probable_prime_dh_safe(BIGNUM *rnd, int bits,
5981    const BIGNUM *add, const BIGNUM *rem, BN_CTX *ctx);
5982
5983int
5984BN_generate_prime_ex(BIGNUM *ret, int bits, int safe,
5985    const BIGNUM *add, const BIGNUM *rem, BN_GENCB *cb)
5986{
5987	BIGNUM *t;
5988	int found = 0;
5989	int i, j, c1 = 0;
5990	BN_CTX *ctx;
5991	int checks = BN_prime_checks_for_size(bits);
5992
5993	ctx = BN_CTX_new();
5994	if (ctx == NULL) {
5995		goto err;
5996	}
5997	BN_CTX_start(ctx);
5998	t = BN_CTX_get(ctx);
5999	if (!t) {
6000		goto err;
6001	}
6002loop:
6003	/* make a random number and set the top and bottom bits */
6004	if (add == NULL) {
6005		if (!probable_prime(ret, bits)) {
6006			goto err;
6007		}
6008	} else {
6009		if (safe) {
6010			if (!probable_prime_dh_safe(ret, bits, add, rem, ctx)) {
6011				goto err;
6012			}
6013		} else {
6014			if (!probable_prime_dh(ret, bits, add, rem, ctx)) {
6015				goto err;
6016			}
6017		}
6018	}
6019	/* if (BN_mod_word(ret,(BN_ULONG)3) == 1) goto loop; */
6020	if (!BN_GENCB_call(cb, 0, c1++)) {
6021		/* aborted */
6022		goto err;
6023	}
6024
6025	if (!safe) {
6026		i = BN_is_prime_fasttest_ex(ret, checks, ctx, 0, cb);
6027		if (i == -1) {
6028			goto err;
6029		}
6030		if (i == 0) {
6031			goto loop;
6032		}
6033	} else {
6034		/* for "safe prime" generation,
6035		 * check that (p-1)/2 is prime.
6036		 * Since a prime is odd, We just
6037		 * need to divide by 2 */
6038		if (!BN_rshift1(t, ret)) {
6039			goto err;
6040		}
6041
6042		for (i = 0; i < checks; i++) {
6043			j = BN_is_prime_fasttest_ex(ret, 1, ctx, 0, cb);
6044			if (j == -1) {
6045				goto err;
6046			}
6047			if (j == 0) {
6048				goto loop;
6049			}
6050
6051			j = BN_is_prime_fasttest_ex(t, 1, ctx, 0, cb);
6052			if (j == -1) {
6053				goto err;
6054			}
6055			if (j == 0) {
6056				goto loop;
6057			}
6058
6059			if (!BN_GENCB_call(cb, 2, c1 - 1)) {
6060				goto err;
6061			}
6062			/* We have a safe prime test pass */
6063		}
6064	}
6065	/* we have a prime :-) */
6066	found = 1;
6067err:
6068	if (ctx != NULL) {
6069		BN_CTX_end(ctx);
6070		BN_CTX_free(ctx);
6071	}
6072	bn_check_top(ret);
6073
6074	return (found);
6075}
6076
6077
6078int
6079BN_is_prime_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed, BN_GENCB *cb)
6080{
6081	return (BN_is_prime_fasttest_ex(a, checks, ctx_passed, 0, cb));
6082}
6083
6084
6085int
6086BN_is_prime_fasttest_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed,
6087    int do_trial_division, BN_GENCB *cb)
6088{
6089	int i, j, ret = -1;
6090	int k;
6091	BN_CTX *ctx = NULL;
6092	BIGNUM *A1, *A1_odd, *check; /* taken from ctx */
6093	BN_MONT_CTX *mont = NULL;
6094	const BIGNUM *A = NULL;
6095
6096	if (BN_cmp(a, BN_value_one()) <= 0) {
6097		return (0);
6098	}
6099
6100	if (checks == BN_prime_checks) {
6101		checks = BN_prime_checks_for_size(BN_num_bits(a));
6102	}
6103
6104	/* first look for small factors */
6105	if (!BN_is_odd(a)) {
6106		/* a is even => a is prime if and only if a == 2 */
6107		return (BN_is_word(a, 2));
6108	}
6109	if (do_trial_division) {
6110		for (i = 1; i < NUMPRIMES; i++) {
6111			if (BN_mod_word(a, primes[i]) == 0) {
6112				return (0);
6113			}
6114		}
6115		if (!BN_GENCB_call(cb, 1, -1)) {
6116			goto err;
6117		}
6118	}
6119
6120	if (ctx_passed != NULL) {
6121		ctx = ctx_passed;
6122	} else if ((ctx = BN_CTX_new()) == NULL) {
6123		goto err;
6124	}
6125	BN_CTX_start(ctx);
6126
6127	/* A := abs(a) */
6128	if (a->neg) {
6129		BIGNUM *t;
6130		if ((t = BN_CTX_get(ctx)) == NULL) {
6131			goto err;
6132		}
6133		BN_copy(t, a);
6134		t->neg = 0;
6135		A = t;
6136	} else{
6137		A = a;
6138	}
6139	A1 = BN_CTX_get(ctx);
6140	A1_odd = BN_CTX_get(ctx);
6141	check = BN_CTX_get(ctx);
6142	if (check == NULL) {
6143		goto err;
6144	}
6145
6146	/* compute A1 := A - 1 */
6147	if (!BN_copy(A1, A)) {
6148		goto err;
6149	}
6150	if (!BN_sub_word(A1, 1)) {
6151		goto err;
6152	}
6153	if (BN_is_zero(A1)) {
6154		ret = 0;
6155		goto err;
6156	}
6157
6158	/* write  A1  as  A1_odd * 2^k */
6159	k = 1;
6160	while (!BN_is_bit_set(A1, k)) {
6161		k++;
6162	}
6163	if (!BN_rshift(A1_odd, A1, k)) {
6164		goto err;
6165	}
6166
6167	/* Montgomery setup for computations mod A */
6168	mont = BN_MONT_CTX_new();
6169	if (mont == NULL) {
6170		goto err;
6171	}
6172	if (!BN_MONT_CTX_set(mont, A, ctx)) {
6173		goto err;
6174	}
6175
6176	for (i = 0; i < checks; i++) {
6177		if (!BN_pseudo_rand_range(check, A1)) {
6178			goto err;
6179		}
6180		if (!BN_add_word(check, 1)) {
6181			goto err;
6182		}
6183		/* now 1 <= check < A */
6184
6185		j = witness(check, A, A1, A1_odd, k, ctx, mont);
6186		if (j == -1) {
6187			goto err;
6188		}
6189		if (j) {
6190			ret = 0;
6191			goto err;
6192		}
6193		if (!BN_GENCB_call(cb, 1, i)) {
6194			goto err;
6195		}
6196	}
6197	ret = 1;
6198err:
6199	if (ctx != NULL) {
6200		BN_CTX_end(ctx);
6201		if (ctx_passed == NULL) {
6202			BN_CTX_free(ctx);
6203		}
6204	}
6205	if (mont != NULL) {
6206		BN_MONT_CTX_free(mont);
6207	}
6208
6209	return (ret);
6210}
6211
6212
6213static int
6214witness(BIGNUM *w, const BIGNUM *a, const BIGNUM *a1,
6215    const BIGNUM *a1_odd, int k, BN_CTX *ctx, BN_MONT_CTX *mont)
6216{
6217#if 0
6218	if (!BN_mod_exp(w, w, a1_odd, a, ctx)) { /* w := w^a1_odd mod a */
6219		return (-1);
6220	}
6221#else
6222	if (!BN_mod_exp_mont(w, w, a1_odd, a, ctx, mont)) { /* w := w^a1_odd mod a */
6223		return (-1);
6224	}
6225#endif
6226	if (BN_is_one(w)) {
6227		return (0); /* probably prime */
6228	}
6229	if (BN_cmp(w, a1) == 0) {
6230		return (0); /* w == -1 (mod a),  'a' is probably prime */
6231	}
6232	while (--k) {
6233		if (!BN_mod_mul(w, w, w, a, ctx)) { /* w := w^2 mod a */
6234			return (-1);
6235		}
6236		if (BN_is_one(w)) {
6237			return (1); /* 'a' is composite, otherwise a previous 'w' would
6238		                     * have been == -1 (mod 'a') */
6239		}
6240		if (BN_cmp(w, a1) == 0) {
6241			return (0); /* w == -1 (mod a), 'a' is probably prime */
6242		}
6243	}
6244
6245	/* If we get here, 'w' is the (a-1)/2-th power of the original 'w',
6246	 * and it is neither -1 nor +1 -- so 'a' cannot be prime */
6247	bn_check_top(w);
6248	return (1);
6249}
6250
6251
6252static int
6253probable_prime(BIGNUM *rnd, int bits)
6254{
6255	int i;
6256	prime_t mods[NUMPRIMES];
6257	BN_ULONG delta, maxdelta;
6258
6259again:
6260	if (!BN_rand(rnd, bits, 1, 1)) {
6261		return (0);
6262	}
6263	/* we now have a random number 'rand' to test. */
6264	for (i = 1; i < NUMPRIMES; i++) {
6265		mods[i] = (prime_t)BN_mod_word(rnd, (BN_ULONG)primes[i]);
6266	}
6267	maxdelta = BN_MASK2 - primes[NUMPRIMES-1];
6268	delta = 0;
6269loop: for (i = 1; i < NUMPRIMES; i++) {
6270		/* check that rnd is not a prime and also
6271		 * that gcd(rnd-1,primes) == 1 (except for 2) */
6272		if (((mods[i]+delta)%primes[i]) <= 1) {
6273			delta += 2;
6274			if (delta > maxdelta) {
6275				goto again;
6276			}
6277			goto loop;
6278		}
6279	}
6280	if (!BN_add_word(rnd, delta)) {
6281		return (0);
6282	}
6283	bn_check_top(rnd);
6284	return (1);
6285}
6286
6287
6288static int probable_prime_dh(BIGNUM *rnd, int bits,
6289    const BIGNUM *add, const BIGNUM *rem, BN_CTX *ctx)
6290{
6291	int i, ret = 0;
6292	BIGNUM *t1;
6293
6294	BN_CTX_start(ctx);
6295	if ((t1 = BN_CTX_get(ctx)) == NULL) {
6296		goto err;
6297	}
6298
6299	if (!BN_rand(rnd, bits, 0, 1)) {
6300		goto err;
6301	}
6302
6303	/* we need ((rnd-rem) % add) == 0 */
6304
6305	if (!BN_mod(t1, rnd, add, ctx)) {
6306		goto err;
6307	}
6308	if (!BN_sub(rnd, rnd, t1)) {
6309		goto err;
6310	}
6311	if (rem == NULL) {
6312		if (!BN_add_word(rnd, 1))           {
6313			goto err;
6314		}
6315	}else                                                             {
6316		if (!BN_add(rnd, rnd, rem))                                                            {
6317			goto err;
6318		}
6319	}
6320
6321	/* we now have a random number 'rand' to test. */
6322
6323loop: for (i = 1; i < NUMPRIMES; i++) {
6324		/* check that rnd is a prime */
6325		if (BN_mod_word(rnd, (BN_ULONG)primes[i]) <= 1) {
6326			if (!BN_add(rnd, rnd, add)) {
6327				goto err;
6328			}
6329			goto loop;
6330		}
6331	}
6332	ret = 1;
6333err:
6334	BN_CTX_end(ctx);
6335	bn_check_top(rnd);
6336	return (ret);
6337}
6338
6339
6340static int
6341probable_prime_dh_safe(BIGNUM *p, int bits, const BIGNUM *padd,
6342    const BIGNUM *rem, BN_CTX *ctx)
6343{
6344	int i, ret = 0;
6345	BIGNUM *t1, *qadd, *q;
6346
6347	bits--;
6348	BN_CTX_start(ctx);
6349	t1 = BN_CTX_get(ctx);
6350	q = BN_CTX_get(ctx);
6351	qadd = BN_CTX_get(ctx);
6352	if (qadd == NULL) {
6353		goto err;
6354	}
6355
6356	if (!BN_rshift1(qadd, padd)) {
6357		goto err;
6358	}
6359
6360	if (!BN_rand(q, bits, 0, 1)) {
6361		goto err;
6362	}
6363
6364	/* we need ((rnd-rem) % add) == 0 */
6365	if (!BN_mod(t1, q, qadd, ctx)) {
6366		goto err;
6367	}
6368	if (!BN_sub(q, q, t1)) {
6369		goto err;
6370	}
6371	if (rem == NULL) {
6372		if (!BN_add_word(q, 1)) {
6373			goto err;
6374		}
6375	} else {
6376		if (!BN_rshift1(t1, rem)) {
6377			goto err;
6378		}
6379		if (!BN_add(q, q, t1)) {
6380			goto err;
6381		}
6382	}
6383
6384	/* we now have a random number 'rand' to test. */
6385	if (!BN_lshift1(p, q)) {
6386		goto err;
6387	}
6388	if (!BN_add_word(p, 1)) {
6389		goto err;
6390	}
6391
6392loop: for (i = 1; i < NUMPRIMES; i++) {
6393		/* check that p and q are prime */
6394
6395		/* check that for p and q
6396		 * gcd(p-1,primes) == 1 (except for 2) */
6397		if ((BN_mod_word(p, (BN_ULONG)primes[i]) == 0) ||
6398		    (BN_mod_word(q, (BN_ULONG)primes[i]) == 0)) {
6399			if (!BN_add(p, p, padd)) {
6400				goto err;
6401			}
6402			if (!BN_add(q, q, qadd)) {
6403				goto err;
6404			}
6405			goto loop;
6406		}
6407	}
6408	ret = 1;
6409err:
6410	BN_CTX_end(ctx);
6411	bn_check_top(p);
6412
6413	return (ret);
6414}
6415
6416
6417/*
6418 *  GCD
6419 */
6420static BIGNUM *euclid(BIGNUM *a, BIGNUM *b);
6421
6422int
6423BN_gcd(BIGNUM *r, const BIGNUM *in_a, const BIGNUM *in_b, BN_CTX *ctx)
6424{
6425	BIGNUM *a, *b, *t;
6426	int ret = 0;
6427
6428	bn_check_top(in_a);
6429	bn_check_top(in_b);
6430
6431	BN_CTX_start(ctx);
6432	a = BN_CTX_get(ctx);
6433	b = BN_CTX_get(ctx);
6434	if ((a == NULL) || (b == NULL)) {
6435		goto err;
6436	}
6437
6438	if (BN_copy(a, in_a) == NULL) {
6439		goto err;
6440	}
6441	if (BN_copy(b, in_b) == NULL) {
6442		goto err;
6443	}
6444	a->neg = 0;
6445	b->neg = 0;
6446
6447	if (BN_cmp(a, b) < 0) {
6448		t = a;
6449		a = b;
6450		b = t;
6451	}
6452	t = euclid(a, b);
6453	if (t == NULL) {
6454		goto err;
6455	}
6456
6457	if (BN_copy(r, t) == NULL) {
6458		goto err;
6459	}
6460	ret = 1;
6461err:
6462	BN_CTX_end(ctx);
6463	bn_check_top(r);
6464	return (ret);
6465}
6466
6467
6468static BIGNUM *
6469euclid(BIGNUM *a, BIGNUM *b)
6470{
6471	BIGNUM *t;
6472	int shifts = 0;
6473
6474	bn_check_top(a);
6475	bn_check_top(b);
6476
6477	/* 0 <= b <= a */
6478	while (!BN_is_zero(b)) {
6479		/* 0 < b <= a */
6480
6481		if (BN_is_odd(a)) {
6482			if (BN_is_odd(b)) {
6483				if (!BN_sub(a, a, b)) {
6484					goto err;
6485				}
6486				if (!BN_rshift1(a, a)) {
6487					goto err;
6488				}
6489				if (BN_cmp(a, b) < 0) {
6490					t = a;
6491					a = b;
6492					b = t;
6493				}
6494			} else {
6495				/* a odd - b even */
6496				if (!BN_rshift1(b, b)) {
6497					goto err;
6498				}
6499				if (BN_cmp(a, b) < 0) {
6500					t = a;
6501					a = b;
6502					b = t;
6503				}
6504			}
6505		} else {
6506			/* a is even */
6507			if (BN_is_odd(b)) {
6508				if (!BN_rshift1(a, a)) {
6509					goto err;
6510				}
6511				if (BN_cmp(a, b) < 0) {
6512					t = a;
6513					a = b;
6514					b = t;
6515				}
6516			} else {
6517				/* a even - b even */
6518				if (!BN_rshift1(a, a)) {
6519					goto err;
6520				}
6521				if (!BN_rshift1(b, b)) {
6522					goto err;
6523				}
6524				shifts++;
6525			}
6526		}
6527		/* 0 <= b <= a */
6528	}
6529
6530	if (shifts) {
6531		if (!BN_lshift(a, a, shifts)) {
6532			goto err;
6533		}
6534	}
6535	bn_check_top(a);
6536
6537	return (a);
6538
6539err:
6540	return (NULL);
6541}
6542
6543
6544/* solves ax == 1 (mod n) */
6545static BIGNUM *BN_mod_inverse_no_branch(BIGNUM *in,
6546    const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx);
6547
6548BIGNUM *
6549BN_mod_inverse(BIGNUM *in, const BIGNUM *a, const BIGNUM *n,
6550    BN_CTX *ctx)
6551{
6552	BIGNUM *A, *B, *X, *Y, *M, *D, *T, *R = NULL;
6553	BIGNUM *ret = NULL;
6554	int sign;
6555
6556	if ((BN_get_flags(a, BN_FLG_CONSTTIME) != 0) || (BN_get_flags(n, BN_FLG_CONSTTIME) != 0)) {
6557		return (BN_mod_inverse_no_branch(in, a, n, ctx));
6558	}
6559
6560	bn_check_top(a);
6561	bn_check_top(n);
6562
6563	BN_CTX_start(ctx);
6564	A = BN_CTX_get(ctx);
6565	B = BN_CTX_get(ctx);
6566	X = BN_CTX_get(ctx);
6567	D = BN_CTX_get(ctx);
6568	M = BN_CTX_get(ctx);
6569	Y = BN_CTX_get(ctx);
6570	T = BN_CTX_get(ctx);
6571	if (T == NULL) {
6572		goto err;
6573	}
6574
6575	if (in == NULL) {
6576		R = BN_new();
6577	} else{
6578		R = in;
6579	}
6580	if (R == NULL) {
6581		goto err;
6582	}
6583
6584	BN_one(X);
6585	BN_zero(Y);
6586	if (BN_copy(B, a) == NULL) {
6587		goto err;
6588	}
6589	if (BN_copy(A, n) == NULL) {
6590		goto err;
6591	}
6592	A->neg = 0;
6593	if (B->neg || (BN_ucmp(B, A) >= 0)) {
6594		if (!BN_nnmod(B, B, A, ctx)) {
6595			goto err;
6596		}
6597	}
6598	sign = -1;
6599
6600	/* From  B = a mod |n|,  A = |n|  it follows that
6601	 *
6602	 *      0 <= B < A,
6603	 *     -sign*X*a  ==  B   (mod |n|),
6604	 *      sign*Y*a  ==  A   (mod |n|).
6605	 */
6606
6607	if (BN_is_odd(n) && (BN_num_bits(n) <= ((BN_BITS <= 32) ? 450 : 2048))) {
6608		/* Binary inversion algorithm; requires odd modulus.
6609		 * This is faster than the general algorithm if the modulus
6610		 * is sufficiently small (about 400 .. 500 bits on 32-bit
6611		 * sytems, but much more on 64-bit systems) */
6612		int shift;
6613
6614		while (!BN_is_zero(B)) {
6615			/*
6616			 *      0 < B < |n|,
6617			 *      0 < A <= |n|,
6618			 * (1) -sign*X*a  ==  B   (mod |n|),
6619			 * (2)  sign*Y*a  ==  A   (mod |n|)
6620			 */
6621
6622			/* Now divide  B  by the maximum possible power of two in the integers,
6623			 * and divide  X  by the same value mod |n|.
6624			 * When we're done, (1) still holds. */
6625			shift = 0;
6626			while (!BN_is_bit_set(B, shift)) {
6627				/* note that 0 < B */
6628				shift++;
6629
6630				if (BN_is_odd(X)) {
6631					if (!BN_uadd(X, X, n)) {
6632						goto err;
6633					}
6634				}
6635				/* now X is even, so we can easily divide it by two */
6636				if (!BN_rshift1(X, X)) {
6637					goto err;
6638				}
6639			}
6640			if (shift > 0) {
6641				if (!BN_rshift(B, B, shift)) {
6642					goto err;
6643				}
6644			}
6645
6646
6647			/* Same for  A  and  Y.  Afterwards, (2) still holds. */
6648			shift = 0;
6649			while (!BN_is_bit_set(A, shift)) {
6650				/* note that 0 < A */
6651				shift++;
6652
6653				if (BN_is_odd(Y)) {
6654					if (!BN_uadd(Y, Y, n)) {
6655						goto err;
6656					}
6657				}
6658				/* now Y is even */
6659				if (!BN_rshift1(Y, Y)) {
6660					goto err;
6661				}
6662			}
6663			if (shift > 0) {
6664				if (!BN_rshift(A, A, shift)) {
6665					goto err;
6666				}
6667			}
6668
6669
6670			/* We still have (1) and (2).
6671			 * Both  A  and  B  are odd.
6672			 * The following computations ensure that
6673			 *
6674			 *     0 <= B < |n|,
6675			 *      0 < A < |n|,
6676			 * (1) -sign*X*a  ==  B   (mod |n|),
6677			 * (2)  sign*Y*a  ==  A   (mod |n|),
6678			 *
6679			 * and that either  A  or  B  is even in the next iteration.
6680			 */
6681			if (BN_ucmp(B, A) >= 0) {
6682				/* -sign*(X + Y)*a == B - A  (mod |n|) */
6683				if (!BN_uadd(X, X, Y)) {
6684					goto err;
6685				}
6686
6687				/* NB: we could use BN_mod_add_quick(X, X, Y, n), but that
6688				 * actually makes the algorithm slower */
6689				if (!BN_usub(B, B, A)) {
6690					goto err;
6691				}
6692			} else {
6693				/*  sign*(X + Y)*a == A - B  (mod |n|) */
6694				if (!BN_uadd(Y, Y, X)) {
6695					goto err;
6696				}
6697				/* as above, BN_mod_add_quick(Y, Y, X, n) would slow things down */
6698				if (!BN_usub(A, A, B)) {
6699					goto err;
6700				}
6701			}
6702		}
6703	} else {
6704		/* general inversion algorithm */
6705
6706		while (!BN_is_zero(B)) {
6707			BIGNUM *tmp;
6708
6709			/*
6710			 *      0 < B < A,
6711			 * (*) -sign*X*a  ==  B   (mod |n|),
6712			 *      sign*Y*a  ==  A   (mod |n|)
6713			 */
6714
6715			/* (D, M) := (A/B, A%B) ... */
6716			if (BN_num_bits(A) == BN_num_bits(B)) {
6717				if (!BN_one(D)) {
6718					goto err;
6719				}
6720				if (!BN_sub(M, A, B)) {
6721					goto err;
6722				}
6723			} else if (BN_num_bits(A) == BN_num_bits(B) + 1) {
6724				/* A/B is 1, 2, or 3 */
6725				if (!BN_lshift1(T, B)) {
6726					goto err;
6727				}
6728				if (BN_ucmp(A, T) < 0) {
6729					/* A < 2*B, so D=1 */
6730					if (!BN_one(D)) {
6731						goto err;
6732					}
6733					if (!BN_sub(M, A, B)) {
6734						goto err;
6735					}
6736				} else {
6737					/* A >= 2*B, so D=2 or D=3 */
6738					if (!BN_sub(M, A, T)) {
6739						goto err;
6740					}
6741					if (!BN_add(D, T, B)) {
6742						goto err;             /* use D (:= 3*B) as temp */
6743					}
6744					if (BN_ucmp(A, D) < 0) {
6745						/* A < 3*B, so D=2 */
6746						if (!BN_set_word(D, 2)) {
6747							goto err;
6748						}
6749						/* M (= A - 2*B) already has the correct value */
6750					} else {
6751						/* only D=3 remains */
6752						if (!BN_set_word(D, 3)) {
6753							goto err;
6754						}
6755						/* currently  M = A - 2*B,  but we need  M = A - 3*B */
6756						if (!BN_sub(M, M, B)) {
6757							goto err;
6758						}
6759					}
6760				}
6761			} else {
6762				if (!BN_div(D, M, A, B, ctx)) {
6763					goto err;
6764				}
6765			}
6766
6767			/* Now
6768			 *      A = D*B + M;
6769			 * thus we have
6770			 * (**)  sign*Y*a  ==  D*B + M   (mod |n|).
6771			 */
6772
6773			tmp = A; /* keep the BIGNUM object, the value does not matter */
6774
6775			/* (A, B) := (B, A mod B) ... */
6776			A = B;
6777			B = M;
6778			/* ... so we have  0 <= B < A  again */
6779
6780			/* Since the former  M  is now  B  and the former  B  is now  A,
6781			 * (**) translates into
6782			 *       sign*Y*a  ==  D*A + B    (mod |n|),
6783			 * i.e.
6784			 *       sign*Y*a - D*A  ==  B    (mod |n|).
6785			 * Similarly, (*) translates into
6786			 *      -sign*X*a  ==  A          (mod |n|).
6787			 *
6788			 * Thus,
6789			 *   sign*Y*a + D*sign*X*a  ==  B  (mod |n|),
6790			 * i.e.
6791			 *        sign*(Y + D*X)*a  ==  B  (mod |n|).
6792			 *
6793			 * So if we set  (X, Y, sign) := (Y + D*X, X, -sign),  we arrive back at
6794			 *      -sign*X*a  ==  B   (mod |n|),
6795			 *       sign*Y*a  ==  A   (mod |n|).
6796			 * Note that  X  and  Y  stay non-negative all the time.
6797			 */
6798
6799			/* most of the time D is very small, so we can optimize tmp := D*X+Y */
6800			if (BN_is_one(D)) {
6801				if (!BN_add(tmp, X, Y)) {
6802					goto err;
6803				}
6804			} else {
6805				if (BN_is_word(D, 2)) {
6806					if (!BN_lshift1(tmp, X)) {
6807						goto err;
6808					}
6809				} else if (BN_is_word(D, 4)) {
6810					if (!BN_lshift(tmp, X, 2)) {
6811						goto err;
6812					}
6813				} else if (D->top == 1) {
6814					if (!BN_copy(tmp, X)) {
6815						goto err;
6816					}
6817					if (!BN_mul_word(tmp, D->d[0])) {
6818						goto err;
6819					}
6820				} else {
6821					if (!BN_mul(tmp, D, X, ctx)) {
6822						goto err;
6823					}
6824				}
6825				if (!BN_add(tmp, tmp, Y)) {
6826					goto err;
6827				}
6828			}
6829
6830			M = Y; /* keep the BIGNUM object, the value does not matter */
6831			Y = X;
6832			X = tmp;
6833			sign = -sign;
6834		}
6835	}
6836
6837	/*
6838	 * The while loop (Euclid's algorithm) ends when
6839	 *      A == gcd(a,n);
6840	 * we have
6841	 *       sign*Y*a  ==  A  (mod |n|),
6842	 * where  Y  is non-negative.
6843	 */
6844
6845	if (sign < 0) {
6846		if (!BN_sub(Y, n, Y)) {
6847			goto err;
6848		}
6849	}
6850	/* Now  Y*a  ==  A  (mod |n|).  */
6851
6852
6853	if (BN_is_one(A)) {
6854		/* Y*a == 1  (mod |n|) */
6855		if (!Y->neg && (BN_ucmp(Y, n) < 0)) {
6856			if (!BN_copy(R, Y)) {
6857				goto err;
6858			}
6859		} else {
6860			if (!BN_nnmod(R, Y, n, ctx)) {
6861				goto err;
6862			}
6863		}
6864	} else {
6865		/* BNerr(BN_F_BN_MOD_INVERSE,BN_R_NO_INVERSE); */
6866		goto err;
6867	}
6868	ret = R;
6869err:
6870	if ((ret == NULL) && (in == NULL)) {
6871		BN_free(R);
6872	}
6873	BN_CTX_end(ctx);
6874	bn_check_top(ret);
6875	return (ret);
6876}
6877
6878
6879/* BN_mod_inverse_no_branch is a special version of BN_mod_inverse.
6880 * It does not contain branches that may leak sensitive information.
6881 */
6882static BIGNUM *
6883BN_mod_inverse_no_branch(BIGNUM *in,
6884    const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx)
6885{
6886	BIGNUM *A, *B, *X, *Y, *M, *D, *T, *R = NULL;
6887	BIGNUM local_A, local_B;
6888	BIGNUM *pA, *pB;
6889	BIGNUM *ret = NULL;
6890	int sign;
6891
6892	bn_check_top(a);
6893	bn_check_top(n);
6894
6895	BN_CTX_start(ctx);
6896	A = BN_CTX_get(ctx);
6897	B = BN_CTX_get(ctx);
6898	X = BN_CTX_get(ctx);
6899	D = BN_CTX_get(ctx);
6900	M = BN_CTX_get(ctx);
6901	Y = BN_CTX_get(ctx);
6902	T = BN_CTX_get(ctx);
6903	if (T == NULL) {
6904		goto err;
6905	}
6906
6907	if (in == NULL) {
6908		R = BN_new();
6909	} else{
6910		R = in;
6911	}
6912	if (R == NULL) {
6913		goto err;
6914	}
6915
6916	BN_one(X);
6917	BN_zero(Y);
6918	if (BN_copy(B, a) == NULL) {
6919		goto err;
6920	}
6921	if (BN_copy(A, n) == NULL) {
6922		goto err;
6923	}
6924	A->neg = 0;
6925
6926	if (B->neg || (BN_ucmp(B, A) >= 0)) {
6927		/* Turn BN_FLG_CONSTTIME flag on, so that when BN_div is invoked,
6928		 * BN_div_no_branch will be called eventually.
6929		 */
6930		pB = &local_B;
6931		BN_with_flags(pB, B, BN_FLG_CONSTTIME);
6932		if (!BN_nnmod(B, pB, A, ctx)) {
6933			goto err;
6934		}
6935	}
6936	sign = -1;
6937
6938	/* From  B = a mod |n|,  A = |n|  it follows that
6939	 *
6940	 *      0 <= B < A,
6941	 *     -sign*X*a  ==  B   (mod |n|),
6942	 *      sign*Y*a  ==  A   (mod |n|).
6943	 */
6944
6945	while (!BN_is_zero(B)) {
6946		BIGNUM *tmp;
6947
6948		/*
6949		 *      0 < B < A,
6950		 * (*) -sign*X*a  ==  B   (mod |n|),
6951		 *      sign*Y*a  ==  A   (mod |n|)
6952		 */
6953
6954		/* Turn BN_FLG_CONSTTIME flag on, so that when BN_div is invoked,
6955		 * BN_div_no_branch will be called eventually.
6956		 */
6957		pA = &local_A;
6958		BN_with_flags(pA, A, BN_FLG_CONSTTIME);
6959
6960		/* (D, M) := (A/B, A%B) ... */
6961		if (!BN_div(D, M, pA, B, ctx)) {
6962			goto err;
6963		}
6964
6965		/* Now
6966		 *      A = D*B + M;
6967		 * thus we have
6968		 * (**)  sign*Y*a  ==  D*B + M   (mod |n|).
6969		 */
6970
6971		tmp = A; /* keep the BIGNUM object, the value does not matter */
6972
6973		/* (A, B) := (B, A mod B) ... */
6974		A = B;
6975		B = M;
6976		/* ... so we have  0 <= B < A  again */
6977
6978		/* Since the former  M  is now  B  and the former  B  is now  A,
6979		 * (**) translates into
6980		 *       sign*Y*a  ==  D*A + B    (mod |n|),
6981		 * i.e.
6982		 *       sign*Y*a - D*A  ==  B    (mod |n|).
6983		 * Similarly, (*) translates into
6984		 *      -sign*X*a  ==  A          (mod |n|).
6985		 *
6986		 * Thus,
6987		 *   sign*Y*a + D*sign*X*a  ==  B  (mod |n|),
6988		 * i.e.
6989		 *        sign*(Y + D*X)*a  ==  B  (mod |n|).
6990		 *
6991		 * So if we set  (X, Y, sign) := (Y + D*X, X, -sign),  we arrive back at
6992		 *      -sign*X*a  ==  B   (mod |n|),
6993		 *       sign*Y*a  ==  A   (mod |n|).
6994		 * Note that  X  and  Y  stay non-negative all the time.
6995		 */
6996
6997		if (!BN_mul(tmp, D, X, ctx)) {
6998			goto err;
6999		}
7000		if (!BN_add(tmp, tmp, Y)) {
7001			goto err;
7002		}
7003
7004		M = Y; /* keep the BIGNUM object, the value does not matter */
7005		Y = X;
7006		X = tmp;
7007		sign = -sign;
7008	}
7009
7010	/*
7011	 * The while loop (Euclid's algorithm) ends when
7012	 *      A == gcd(a,n);
7013	 * we have
7014	 *       sign*Y*a  ==  A  (mod |n|),
7015	 * where  Y  is non-negative.
7016	 */
7017
7018	if (sign < 0) {
7019		if (!BN_sub(Y, n, Y)) {
7020			goto err;
7021		}
7022	}
7023	/* Now  Y*a  ==  A  (mod |n|).  */
7024
7025	if (BN_is_one(A)) {
7026		/* Y*a == 1  (mod |n|) */
7027		if (!Y->neg && (BN_ucmp(Y, n) < 0)) {
7028			if (!BN_copy(R, Y)) {
7029				goto err;
7030			}
7031		} else {
7032			if (!BN_nnmod(R, Y, n, ctx)) {
7033				goto err;
7034			}
7035		}
7036	} else {
7037		/* BNerr(BN_F_BN_MOD_INVERSE_NO_BRANCH,BN_R_NO_INVERSE); */
7038		goto err;
7039	}
7040	ret = R;
7041err:
7042	if ((ret == NULL) && (in == NULL)) {
7043		BN_free(R);
7044	}
7045	BN_CTX_end(ctx);
7046	bn_check_top(ret);
7047	return (ret);
7048}
7049
7050
7051/*
7052 * mont
7053 */
7054
7055/*
7056 * Details about Montgomery multiplication algorithms can be found at
7057 * http://security.ece.orst.edu/publications.html, e.g.
7058 * http://security.ece.orst.edu/koc/papers/j37acmon.pdf and
7059 * sections 3.8 and 4.2 in http://security.ece.orst.edu/koc/papers/r01rsasw.pdf
7060 */
7061
7062#define MONT_WORD    /* use the faster word-based algorithm */
7063
7064#if defined(MONT_WORD) && defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2 <= 32)
7065
7066/* This condition means we have a specific non-default build:
7067 * In the 0.9.8 branch, OPENSSL_BN_ASM_MONT is normally not set for any
7068 * BN_BITS2<=32 platform; an explicit "enable-montasm" is required.
7069 * I.e., if we are here, the user intentionally deviates from the
7070 * normal stable build to get better Montgomery performance from
7071 * the 0.9.9-dev backport.
7072 *
7073 * In this case only, we also enable BN_from_montgomery_word()
7074 * (another non-stable feature from 0.9.9-dev).
7075 */
7076#define MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD
7077#endif
7078
7079#ifdef MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD
7080static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont);
7081
7082#endif
7083
7084int
7085BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
7086    BN_MONT_CTX *mont, BN_CTX *ctx)
7087{
7088	BIGNUM *tmp;
7089	int ret = 0;
7090
7091#if defined(OPENSSL_BN_ASM_MONT) && defined(MONT_WORD)
7092	int num = mont->N.top;
7093
7094	if ((num > 1) && (a->top == num) && (b->top == num)) {
7095		if (bn_wexpand(r, num) == NULL) {
7096			return (0);
7097		}
7098		if (bn_mul_mont(r->d, a->d, b->d, mont->N.d, &mont->n0, num)) {
7099			r->neg = a->neg^b->neg;
7100			r->top = num;
7101			bn_correct_top(r);
7102			return (1);
7103		}
7104	}
7105#endif
7106
7107	BN_CTX_start(ctx);
7108	tmp = BN_CTX_get(ctx);
7109	if (tmp == NULL) {
7110		goto err;
7111	}
7112
7113	bn_check_top(tmp);
7114	if (a == b) {
7115		if (!BN_sqr(tmp, a, ctx)) {
7116			goto err;
7117		}
7118	} else {
7119		if (!BN_mul(tmp, a, b, ctx)) {
7120			goto err;
7121		}
7122	}
7123	/* reduce from aRR to aR */
7124#ifdef MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD
7125	if (!BN_from_montgomery_word(r, tmp, mont)) {
7126		goto err;
7127	}
7128#else
7129	if (!BN_from_montgomery(r, tmp, mont, ctx)) {
7130		goto err;
7131	}
7132#endif
7133	bn_check_top(r);
7134	ret = 1;
7135err:
7136	BN_CTX_end(ctx);
7137	return (ret);
7138}
7139
7140
7141#ifdef MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD
7142static int
7143BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
7144{
7145	BIGNUM *n;
7146	BN_ULONG *ap, *np, *rp, n0, v, *nrp;
7147	int al, nl, max, i, x, ri;
7148
7149	n = &(mont->N);
7150
7151	/* mont->ri is the size of mont->N in bits (rounded up
7152	 * to the word size) */
7153	al = ri = mont->ri / BN_BITS2;
7154
7155	nl = n->top;
7156	if ((al == 0) || (nl == 0)) {
7157		ret->top = 0;
7158		return (1);
7159	}
7160
7161	max = (nl+al+1); /* allow for overflow (no?) XXX */
7162	if (bn_wexpand(r, max) == NULL) {
7163		return (0);
7164	}
7165
7166	r->neg ^= n->neg;
7167	np = n->d;
7168	rp = r->d;
7169	nrp = &(r->d[nl]);
7170
7171	/* clear the top words of T */
7172	for (i = r->top; i < max; i++) { /* memset? XXX */
7173		r->d[i] = 0;
7174	}
7175
7176	r->top = max;
7177	n0 = mont->n0;
7178
7179#ifdef BN_COUNT
7180	fprintf(stderr, "word BN_from_montgomery_word %d * %d\n", nl, nl);
7181#endif
7182	for (i = 0; i < nl; i++) {
7183#ifdef __TANDEM
7184		{
7185			long long t1;
7186			long long t2;
7187			long long t3;
7188			t1 = rp[0] * (n0 & 0177777);
7189			t2 = 037777600000l;
7190			t2 = n0 & t2;
7191			t3 = rp[0] & 0177777;
7192			t2 = (t3 * t2) & BN_MASK2;
7193			t1 = t1 + t2;
7194			v = bn_mul_add_words(rp, np, nl, (BN_ULONG)t1);
7195		}
7196#else
7197		v = bn_mul_add_words(rp, np, nl, (rp[0]*n0) & BN_MASK2);
7198#endif
7199		nrp++;
7200		rp++;
7201		if (((nrp[-1] += v)&BN_MASK2) >= v) {
7202			continue;
7203		} else{
7204			if (((++nrp[0])&BN_MASK2) != 0) {
7205				continue;
7206			}
7207			if (((++nrp[1])&BN_MASK2) != 0) {
7208				continue;
7209			}
7210			for (x = 2; (((++nrp[x])&BN_MASK2) == 0); x++) {
7211			}
7212		}
7213	}
7214	bn_correct_top(r);
7215
7216	/* mont->ri will be a multiple of the word size and below code
7217	 * is kind of BN_rshift(ret,r,mont->ri) equivalent */
7218	if (r->top <= ri) {
7219		ret->top = 0;
7220		return (1);
7221	}
7222	al = r->top-ri;
7223
7224	if (bn_wexpand(ret, ri) == NULL) {
7225		return (0);
7226	}
7227	x = 0 - (((al-ri) >> (sizeof(al) * 8 - 1)) & 1);
7228	ret->top = x = (ri & ~x) | (al & x);    /* min(ri,al) */
7229	ret->neg = r->neg;
7230
7231	rp = ret->d;
7232	ap = &(r->d[ri]);
7233
7234	{
7235		size_t m1, m2;
7236
7237		v = bn_sub_words(rp, ap, np, ri);
7238
7239		/* this ----------------^^ works even in al<ri case
7240		 * thanks to zealous zeroing of top of the vector in the
7241		 * beginning. */
7242
7243		/* if (al==ri && !v) || al>ri) nrp=rp; else nrp=ap; */
7244
7245		/* in other words if subtraction result is real, then
7246		 * trick unconditional memcpy below to perform in-place
7247		 * "refresh" instead of actual copy. */
7248		m1 = 0-(size_t)(((al-ri)>>(sizeof(al)*8-1))&1); /* al<ri */
7249		m2 = 0-(size_t)(((ri-al)>>(sizeof(al)*8-1))&1); /* al>ri */
7250		m1 |= m2;                                       /* (al!=ri) */
7251		m1 |= (0-(size_t)v);                            /* (al!=ri || v) */
7252		m1 &= ~m2;                                      /* (al!=ri || v) && !al>ri */
7253		nrp = (BN_ULONG *)(((size_t)rp&~m1)|((size_t)ap&m1));
7254	}
7255
7256	/* 'i<ri' is chosen to eliminate dependency on input data, even
7257	 * though it results in redundant copy in al<ri case. */
7258	for (i = 0, ri -= 4; i < ri; i += 4) {
7259		BN_ULONG t1, t2, t3, t4;
7260
7261		t1 = nrp[i+0];
7262		t2 = nrp[i+1];
7263		t3 = nrp[i+2];
7264		ap[i+0] = 0;
7265		t4 = nrp[i+3];
7266		ap[i+1] = 0;
7267		rp[i+0] = t1;
7268		ap[i+2] = 0;
7269		rp[i+1] = t2;
7270		ap[i+3] = 0;
7271		rp[i+2] = t3;
7272		rp[i+3] = t4;
7273	}
7274	for (ri += 4; i < ri; i++) {
7275		rp[i] = nrp[i], ap[i] = 0;
7276	}
7277	bn_correct_top(r);
7278	bn_correct_top(ret);
7279	bn_check_top(ret);
7280
7281	return (1);
7282}
7283
7284
7285int
7286BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
7287    BN_CTX *ctx)
7288{
7289	int retn = 0;
7290	BIGNUM *t;
7291
7292	BN_CTX_start(ctx);
7293	if ((t = BN_CTX_get(ctx)) && BN_copy(t, a)) {
7294		retn = BN_from_montgomery_word(ret, t, mont);
7295	}
7296	BN_CTX_end(ctx);
7297	return (retn);
7298}
7299
7300
7301#else /* !MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD */
7302
7303int
7304BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
7305    BN_CTX *ctx)
7306{
7307	int retn = 0;
7308
7309#ifdef MONT_WORD
7310	BIGNUM *n, *r;
7311	BN_ULONG *ap, *np, *rp, n0, v, *nrp;
7312	int al, nl, max, i, x, ri;
7313
7314	BN_CTX_start(ctx);
7315	if ((r = BN_CTX_get(ctx)) == NULL) {
7316		goto err;
7317	}
7318
7319	if (!BN_copy(r, a)) {
7320		goto err;
7321	}
7322	n = &(mont->N);
7323
7324	ap = a->d;
7325
7326	/* mont->ri is the size of mont->N in bits (rounded up
7327	 * to the word size) */
7328	al = ri = mont->ri/BN_BITS2;
7329
7330	nl = n->top;
7331	if ((al == 0) || (nl == 0)) {
7332		r->top = 0;
7333		return (1);
7334	}
7335
7336	max = (nl+al+1); /* allow for overflow (no?) XXX */
7337	if (bn_wexpand(r, max) == NULL) {
7338		goto err;
7339	}
7340
7341	r->neg = a->neg^n->neg;
7342	np = n->d;
7343	rp = r->d;
7344	nrp = &(r->d[nl]);
7345
7346	/* clear the top words of T */
7347#if 1
7348	for (i = r->top; i < max; i++) { /* memset? XXX */
7349		r->d[i] = 0;
7350	}
7351#else
7352	memset(&(r->d[r->top]), 0, (max-r->top)*sizeof(BN_ULONG));
7353#endif
7354
7355	r->top = max;
7356	n0 = mont->n0;
7357
7358#ifdef BN_COUNT
7359	fprintf(stderr, "word BN_from_montgomery %d * %d\n", nl, nl);
7360#endif
7361	for (i = 0; i < nl; i++) {
7362#ifdef __TANDEM
7363		{
7364			long long t1;
7365			long long t2;
7366			long long t3;
7367			t1 = rp[0] * (n0 & 0177777);
7368			t2 = 037777600000l;
7369			t2 = n0 & t2;
7370			t3 = rp[0] & 0177777;
7371			t2 = (t3 * t2) & BN_MASK2;
7372			t1 = t1 + t2;
7373			v = bn_mul_add_words(rp, np, nl, (BN_ULONG)t1);
7374		}
7375#else
7376		v = bn_mul_add_words(rp, np, nl, (rp[0]*n0)&BN_MASK2);
7377#endif
7378		nrp++;
7379		rp++;
7380		if (((nrp[-1] += v)&BN_MASK2) >= v) {
7381			continue;
7382		} else{
7383			if (((++nrp[0])&BN_MASK2) != 0) {
7384				continue;
7385			}
7386			if (((++nrp[1])&BN_MASK2) != 0) {
7387				continue;
7388			}
7389			for (x = 2; (((++nrp[x])&BN_MASK2) == 0); x++) {
7390			}
7391		}
7392	}
7393	bn_correct_top(r);
7394
7395	/* mont->ri will be a multiple of the word size and below code
7396	 * is kind of BN_rshift(ret,r,mont->ri) equivalent */
7397	if (r->top <= ri) {
7398		ret->top = 0;
7399		retn = 1;
7400		goto err;
7401	}
7402	al = r->top-ri;
7403
7404# define BRANCH_FREE    1
7405# if BRANCH_FREE
7406	if (bn_wexpand(ret, ri) == NULL) {
7407		goto err;
7408	}
7409	x = 0-(((al-ri)>>(sizeof(al)*8-1))&1);
7410	ret->top = x = (ri&~x)|(al&x);      /* min(ri,al) */
7411	ret->neg = r->neg;
7412
7413	rp = ret->d;
7414	ap = &(r->d[ri]);
7415
7416	{
7417		size_t m1, m2;
7418
7419		v = bn_sub_words(rp, ap, np, ri);
7420
7421		/* this ----------------^^ works even in al<ri case
7422		 * thanks to zealous zeroing of top of the vector in the
7423		 * beginning. */
7424
7425		/* if (al==ri && !v) || al>ri) nrp=rp; else nrp=ap; */
7426
7427		/* in other words if subtraction result is real, then
7428		 * trick unconditional memcpy below to perform in-place
7429		 * "refresh" instead of actual copy. */
7430		m1 = 0-(size_t)(((al-ri)>>(sizeof(al)*8-1))&1); /* al<ri */
7431		m2 = 0-(size_t)(((ri-al)>>(sizeof(al)*8-1))&1); /* al>ri */
7432		m1 |= m2;                                       /* (al!=ri) */
7433		m1 |= (0-(size_t)v);                            /* (al!=ri || v) */
7434		m1 &= ~m2;                                      /* (al!=ri || v) && !al>ri */
7435		nrp = (BN_ULONG *)(((size_t)rp&~m1)|((size_t)ap&m1));
7436	}
7437
7438	/* 'i<ri' is chosen to eliminate dependency on input data, even
7439	 * though it results in redundant copy in al<ri case. */
7440	for (i = 0, ri -= 4; i < ri; i += 4) {
7441		BN_ULONG t1, t2, t3, t4;
7442
7443		t1 = nrp[i+0];
7444		t2 = nrp[i+1];
7445		t3 = nrp[i+2];
7446		ap[i+0] = 0;
7447		t4 = nrp[i+3];
7448		ap[i+1] = 0;
7449		rp[i+0] = t1;
7450		ap[i+2] = 0;
7451		rp[i+1] = t2;
7452		ap[i+3] = 0;
7453		rp[i+2] = t3;
7454		rp[i+3] = t4;
7455	}
7456	for (ri += 4; i < ri; i++) {
7457		rp[i] = nrp[i], ap[i] = 0;
7458	}
7459	bn_correct_top(r);
7460	bn_correct_top(ret);
7461# else
7462	if (bn_wexpand(ret, al) == NULL) {
7463		goto err;
7464	}
7465	ret->top = al;
7466	ret->neg = r->neg;
7467
7468	rp = ret->d;
7469	ap = &(r->d[ri]);
7470	al -= 4;
7471	for (i = 0; i < al; i += 4) {
7472		BN_ULONG t1, t2, t3, t4;
7473
7474		t1 = ap[i+0];
7475		t2 = ap[i+1];
7476		t3 = ap[i+2];
7477		t4 = ap[i+3];
7478		rp[i+0] = t1;
7479		rp[i+1] = t2;
7480		rp[i+2] = t3;
7481		rp[i+3] = t4;
7482	}
7483	al += 4;
7484	for ( ; i < al; i++) {
7485		rp[i] = ap[i];
7486	}
7487# endif
7488#else   /* !MONT_WORD */
7489	BIGNUM *t1, *t2;
7490
7491	BN_CTX_start(ctx);
7492	t1 = BN_CTX_get(ctx);
7493	t2 = BN_CTX_get(ctx);
7494	if ((t1 == NULL) || (t2 == NULL)) {
7495		goto err;
7496	}
7497
7498	if (!BN_copy(t1, a)) {
7499		goto err;
7500	}
7501	BN_mask_bits(t1, mont->ri);
7502
7503	if (!BN_mul(t2, t1, &mont->Ni, ctx)) {
7504		goto err;
7505	}
7506	BN_mask_bits(t2, mont->ri);
7507
7508	if (!BN_mul(t1, t2, &mont->N, ctx)) {
7509		goto err;
7510	}
7511	if (!BN_add(t2, a, t1)) {
7512		goto err;
7513	}
7514	if (!BN_rshift(ret, t2, mont->ri)) {
7515		goto err;
7516	}
7517#endif  /* MONT_WORD */
7518
7519#if !defined(BRANCH_FREE) || BRANCH_FREE == 0
7520	if (BN_ucmp(ret, &(mont->N)) >= 0) {
7521		if (!BN_usub(ret, ret, &(mont->N))) {
7522			goto err;
7523		}
7524	}
7525#endif
7526	retn = 1;
7527	bn_check_top(ret);
7528err:
7529	BN_CTX_end(ctx);
7530	return (retn);
7531}
7532
7533
7534#endif /* MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD */
7535
7536BN_MONT_CTX *
7537BN_MONT_CTX_new(void)
7538{
7539	BN_MONT_CTX *ret;
7540
7541	if ((ret = (BN_MONT_CTX *)malloc(sizeof(BN_MONT_CTX))) == NULL) {
7542		return (NULL);
7543	}
7544
7545	BN_MONT_CTX_init(ret);
7546	ret->flags = BN_FLG_MALLOCED;
7547	return (ret);
7548}
7549
7550
7551void
7552BN_MONT_CTX_init(BN_MONT_CTX *ctx)
7553{
7554	ctx->ri = 0;
7555	BN_init(&(ctx->RR));
7556	BN_init(&(ctx->N));
7557	BN_init(&(ctx->Ni));
7558#if 0   /* for OpenSSL 0.9.9 mont->n0 */
7559	ctx->n0[0] = ctx->n0[1] = 0;
7560#else
7561	ctx->n0 = 0;
7562#endif
7563	ctx->flags = 0;
7564}
7565
7566
7567void
7568BN_MONT_CTX_free(BN_MONT_CTX *mont)
7569{
7570	if (mont == NULL) {
7571		return;
7572	}
7573
7574	BN_free(&(mont->RR));
7575	BN_free(&(mont->N));
7576	BN_free(&(mont->Ni));
7577	if (mont->flags & BN_FLG_MALLOCED) {
7578		free(mont);
7579	}
7580}
7581
7582
7583int
7584BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
7585{
7586	int ret = 0;
7587	BIGNUM *Ri, *R;
7588
7589	BN_CTX_start(ctx);
7590	if ((Ri = BN_CTX_get(ctx)) == NULL) {
7591		goto err;
7592	}
7593	R = &(mont->RR);                                /* grab RR as a temp */
7594	if (!BN_copy(&(mont->N), mod)) {
7595		goto err;                               /* Set N */
7596	}
7597	mont->N.neg = 0;
7598
7599#ifdef MONT_WORD
7600	{
7601		BIGNUM tmod;
7602		BN_ULONG buf[2];
7603
7604		mont->ri = (BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2;
7605		BN_zero(R);
7606		if (!(BN_set_bit(R, BN_BITS2))) {
7607			goto err;       /* R */
7608		}
7609		buf[0] = mod->d[0];     /* tmod = N mod word size */
7610		buf[1] = 0;
7611
7612		BN_init(&tmod);
7613		tmod.d = buf;
7614		tmod.top = buf[0] != 0 ? 1 : 0;
7615		tmod.dmax = 2;
7616		tmod.neg = 0;
7617
7618		/* Ri = R^-1 mod N*/
7619		if ((BN_mod_inverse(Ri, R, &tmod, ctx)) == NULL) {
7620			goto err;
7621		}
7622		if (!BN_lshift(Ri, Ri, BN_BITS2)) {
7623			goto err;                         /* R*Ri */
7624		}
7625		if (!BN_is_zero(Ri)) {
7626			if (!BN_sub_word(Ri, 1)) {
7627				goto err;
7628			}
7629		} else {
7630			/* if N mod word size == 1 */
7631			if (!BN_set_word(Ri, BN_MASK2)) {
7632				goto err;                         /* Ri-- (mod word size) */
7633			}
7634		}
7635		if (!BN_div(Ri, NULL, Ri, &tmod, ctx)) {
7636			goto err;
7637		}
7638
7639		/* Ni = (R*Ri-1)/N,
7640		 * keep only least significant word: */
7641		mont->n0 = (Ri->top > 0) ? Ri->d[0] : 0;
7642	}
7643#else   /* !MONT_WORD */
7644	{ /* bignum version */
7645		mont->ri = BN_num_bits(&mont->N);
7646		BN_zero(R);
7647		if (!BN_set_bit(R, mont->ri)) {
7648			goto err;                       /* R = 2^ri */
7649		}
7650		/* Ri = R^-1 mod N*/
7651		if ((BN_mod_inverse(Ri, R, &mont->N, ctx)) == NULL) {
7652			goto err;
7653		}
7654		if (!BN_lshift(Ri, Ri, mont->ri)) {
7655			goto err;                         /* R*Ri */
7656		}
7657		if (!BN_sub_word(Ri, 1)) {
7658			goto err;
7659		}
7660		/* Ni = (R*Ri-1) / N */
7661		if (!BN_div(&(mont->Ni), NULL, Ri, &mont->N, ctx)) {
7662			goto err;
7663		}
7664	}
7665#endif
7666
7667	/* setup RR for conversions */
7668	BN_zero(&(mont->RR));
7669	if (!BN_set_bit(&(mont->RR), mont->ri*2)) {
7670		goto err;
7671	}
7672	if (!BN_mod(&(mont->RR), &(mont->RR), &(mont->N), ctx)) {
7673		goto err;
7674	}
7675
7676	ret = 1;
7677err:
7678	BN_CTX_end(ctx);
7679	return (ret);
7680}
7681
7682
7683BN_MONT_CTX *
7684BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from)
7685{
7686	if (to == from) {
7687		return (to);
7688	}
7689
7690	if (!BN_copy(&(to->RR), &(from->RR))) {
7691		return (NULL);
7692	}
7693	if (!BN_copy(&(to->N), &(from->N))) {
7694		return (NULL);
7695	}
7696	if (!BN_copy(&(to->Ni), &(from->Ni))) {
7697		return (NULL);
7698	}
7699	to->ri = from->ri;
7700	to->n0 = from->n0;
7701	return (to);
7702}
7703
7704
7705#if 0
7706BN_MONT_CTX *
7707BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, int lock,
7708    const BIGNUM *mod, BN_CTX *ctx)
7709{
7710	int got_write_lock = 0;
7711	BN_MONT_CTX *ret;
7712
7713	CRYPTO_r_lock(lock);
7714	if (!*pmont) {
7715		CRYPTO_r_unlock(lock);
7716		CRYPTO_w_lock(lock);
7717		got_write_lock = 1;
7718
7719		if (!*pmont) {
7720			ret = BN_MONT_CTX_new();
7721			if (ret && !BN_MONT_CTX_set(ret, mod, ctx)) {
7722				BN_MONT_CTX_free(ret);
7723			} else{
7724				*pmont = ret;
7725			}
7726		}
7727	}
7728
7729	ret = *pmont;
7730
7731	if (got_write_lock) {
7732		CRYPTO_w_unlock(lock);
7733	} else{
7734		CRYPTO_r_unlock(lock);
7735	}
7736
7737	return (ret);
7738}
7739#endif
7740