1/* crypto/bn/bn_exp.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to.  The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 *    notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 *    notice, this list of conditions and the following disclaimer in the
30 *    documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 *    must display the following acknowledgement:
33 *    "This product includes cryptographic software written by
34 *     Eric Young (eay@cryptsoft.com)"
35 *    The word 'cryptographic' can be left out if the rouines from the library
36 *    being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 *    the apps directory (application code) you must include an acknowledgement:
39 *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed.  i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2000 The OpenSSL Project.  All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 *    notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 *    notice, this list of conditions and the following disclaimer in
70 *    the documentation and/or other materials provided with the
71 *    distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 *    software must display the following acknowledgment:
75 *    "This product includes software developed by the OpenSSL Project
76 *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 *    endorse or promote products derived from this software without
80 *    prior written permission. For written permission, please contact
81 *    openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 *    nor may "OpenSSL" appear in their names without prior written
85 *    permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 *    acknowledgment:
89 *    "This product includes software developed by the OpenSSL Project
90 *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com).  This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112
113#include <stdio.h>
114#include "cryptlib.h"
115#include "bn_lcl.h"
116#ifdef ATALLA
117# include <alloca.h>
118# include <atasi.h>
119# include <assert.h>
120# include <dlfcn.h>
121#endif
122
123
124#define TABLE_SIZE	32
125
126/* slow but works */
127int BN_mod_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, const BIGNUM *m, BN_CTX *ctx)
128	{
129	BIGNUM *t;
130	int r=0;
131
132	bn_check_top(a);
133	bn_check_top(b);
134	bn_check_top(m);
135
136	BN_CTX_start(ctx);
137	if ((t = BN_CTX_get(ctx)) == NULL) goto err;
138	if (a == b)
139		{ if (!BN_sqr(t,a,ctx)) goto err; }
140	else
141		{ if (!BN_mul(t,a,b,ctx)) goto err; }
142	if (!BN_mod(ret,t,m,ctx)) goto err;
143	r=1;
144err:
145	BN_CTX_end(ctx);
146	return(r);
147	}
148
149
150/* this one works - simple but works */
151int BN_exp(BIGNUM *r, BIGNUM *a, BIGNUM *p, BN_CTX *ctx)
152	{
153	int i,bits,ret=0;
154	BIGNUM *v,*rr;
155
156	BN_CTX_start(ctx);
157	if ((r == a) || (r == p))
158		rr = BN_CTX_get(ctx);
159	else
160		rr = r;
161	if ((v = BN_CTX_get(ctx)) == NULL) goto err;
162
163	if (BN_copy(v,a) == NULL) goto err;
164	bits=BN_num_bits(p);
165
166	if (BN_is_odd(p))
167		{ if (BN_copy(rr,a) == NULL) goto err; }
168	else	{ if (!BN_one(rr)) goto err; }
169
170	for (i=1; i<bits; i++)
171		{
172		if (!BN_sqr(v,v,ctx)) goto err;
173		if (BN_is_bit_set(p,i))
174			{
175			if (!BN_mul(rr,rr,v,ctx)) goto err;
176			}
177		}
178	ret=1;
179err:
180	if (r != rr) BN_copy(r,rr);
181	BN_CTX_end(ctx);
182	return(ret);
183	}
184
185
186#ifdef ATALLA
187
188/*
189 * This routine will dynamically check for the existance of an Atalla AXL-200
190 * SSL accelerator module.  If one is found, the variable
191 * asi_accelerator_present is set to 1 and the function pointers
192 * ptr_ASI_xxxxxx above will be initialized to corresponding ASI API calls.
193 */
194typedef int tfnASI_GetPerformanceStatistics(int reset_flag,
195					    unsigned int *ret_buf);
196typedef int tfnASI_GetHardwareConfig(long card_num, unsigned int *ret_buf);
197typedef int tfnASI_RSAPrivateKeyOpFn(RSAPrivateKey * rsaKey,
198				     unsigned char *output,
199				     unsigned char *input,
200				     unsigned int modulus_len);
201
202static tfnASI_GetHardwareConfig *ptr_ASI_GetHardwareConfig;
203static tfnASI_RSAPrivateKeyOpFn *ptr_ASI_RSAPrivateKeyOpFn;
204static tfnASI_GetPerformanceStatistics *ptr_ASI_GetPerformanceStatistics;
205static int asi_accelerator_present;
206static int tried_atalla;
207
208void atalla_initialize_accelerator_handle(void)
209	{
210	void *dl_handle;
211	int status;
212	unsigned int config_buf[1024];
213	static int tested;
214
215	if(tested)
216		return;
217
218	tested=1;
219
220	bzero((void *)config_buf, 1024);
221
222	/*
223	 * Check to see if the library is present on the system
224	 */
225	dl_handle = dlopen("atasi.so", RTLD_NOW);
226	if (dl_handle == (void *) NULL)
227		{
228/*		printf("atasi.so library is not present on the system\n");
229		printf("No HW acceleration available\n");*/
230		return;
231	        }
232
233	/*
234	 * The library is present.  Now we'll check to insure that the
235	 * LDM is up and running. First we'll get the address of the
236	 * function in the atasi library that we need to see if the
237	 * LDM is operating.
238	 */
239
240	ptr_ASI_GetHardwareConfig =
241	  (tfnASI_GetHardwareConfig *)dlsym(dl_handle,"ASI_GetHardwareConfig");
242
243	if (ptr_ASI_GetHardwareConfig)
244		{
245		/*
246		 * We found the call, now we'll get our config
247		 * status.  If we get a non 0 result, the LDM is not
248		 * running and we cannot use the Atalla ASI *
249		 * library.
250		 */
251		status = (*ptr_ASI_GetHardwareConfig)(0L, config_buf);
252		if (status != 0)
253			{
254			printf("atasi.so library is present but not initialized\n");
255			printf("No HW acceleration available\n");
256			return;
257			}
258	        }
259	else
260		{
261/*		printf("We found the library, but not the function. Very Strange!\n");*/
262		return ;
263	      	}
264
265	/*
266	 * It looks like we have acceleration capabilities.  Load up the
267	 * pointers to our ASI API calls.
268	 */
269	ptr_ASI_RSAPrivateKeyOpFn=
270	  (tfnASI_RSAPrivateKeyOpFn *)dlsym(dl_handle, "ASI_RSAPrivateKeyOpFn");
271	if (ptr_ASI_RSAPrivateKeyOpFn == NULL)
272		{
273/*		printf("We found the library, but no RSA function. Very Strange!\n");*/
274		return;
275	        }
276
277	ptr_ASI_GetPerformanceStatistics =
278	  (tfnASI_GetPerformanceStatistics *)dlsym(dl_handle, "ASI_GetPerformanceStatistics");
279	if (ptr_ASI_GetPerformanceStatistics == NULL)
280		{
281/*		printf("We found the library, but no stat function. Very Strange!\n");*/
282		return;
283	      }
284
285	/*
286	 * Indicate that acceleration is available
287	 */
288	asi_accelerator_present = 1;
289
290/*	printf("This system has acceleration!\n");*/
291
292	return;
293	}
294
295/* make sure this only gets called once when bn_mod_exp calls bn_mod_exp_mont */
296int BN_mod_exp_atalla(BIGNUM *r, BIGNUM *a, const BIGNUM *p, const BIGNUM *m)
297	{
298	unsigned char *abin;
299	unsigned char *pbin;
300	unsigned char *mbin;
301	unsigned char *rbin;
302	int an,pn,mn,ret;
303	RSAPrivateKey keydata;
304
305	atalla_initialize_accelerator_handle();
306	if(!asi_accelerator_present)
307		return 0;
308
309
310/* We should be able to run without size testing */
311# define ASIZE	128
312	an=BN_num_bytes(a);
313	pn=BN_num_bytes(p);
314	mn=BN_num_bytes(m);
315
316	if(an <= ASIZE && pn <= ASIZE && mn <= ASIZE)
317	    {
318	    int size=mn;
319
320	    assert(an <= mn);
321	    abin=alloca(size);
322	    memset(abin,'\0',mn);
323	    BN_bn2bin(a,abin+size-an);
324
325	    pbin=alloca(pn);
326	    BN_bn2bin(p,pbin);
327
328	    mbin=alloca(size);
329	    memset(mbin,'\0',mn);
330	    BN_bn2bin(m,mbin+size-mn);
331
332	    rbin=alloca(size);
333
334	    memset(&keydata,'\0',sizeof keydata);
335	    keydata.privateExponent.data=pbin;
336	    keydata.privateExponent.len=pn;
337	    keydata.modulus.data=mbin;
338	    keydata.modulus.len=size;
339
340	    ret=(*ptr_ASI_RSAPrivateKeyOpFn)(&keydata,rbin,abin,keydata.modulus.len);
341/*fprintf(stderr,"!%s\n",BN_bn2hex(a));*/
342	    if(!ret)
343	        {
344		BN_bin2bn(rbin,keydata.modulus.len,r);
345/*fprintf(stderr,"?%s\n",BN_bn2hex(r));*/
346		return 1;
347	        }
348	    }
349	return 0;
350        }
351#endif /* def ATALLA */
352
353
354int BN_mod_exp(BIGNUM *r, BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
355	       BN_CTX *ctx)
356	{
357	int ret;
358
359	bn_check_top(a);
360	bn_check_top(p);
361	bn_check_top(m);
362
363#ifdef ATALLA
364	if(BN_mod_exp_atalla(r,a,p,m))
365	    return 1;
366/* If it fails, try the other methods (but don't try atalla again) */
367	tried_atalla=1;
368#endif
369
370#ifdef MONT_MUL_MOD
371	/* I have finally been able to take out this pre-condition of
372	 * the top bit being set.  It was caused by an error in BN_div
373	 * with negatives.  There was also another problem when for a^b%m
374	 * a >= m.  eay 07-May-97 */
375/*	if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */
376
377	if (BN_is_odd(m))
378		{
379		if (a->top == 1)
380			{
381			BN_ULONG A = a->d[0];
382			ret=BN_mod_exp_mont_word(r,A,p,m,ctx,NULL);
383			}
384		else
385			ret=BN_mod_exp_mont(r,a,p,m,ctx,NULL);
386		}
387	else
388#endif
389#ifdef RECP_MUL_MOD
390		{ ret=BN_mod_exp_recp(r,a,p,m,ctx); }
391#else
392		{ ret=BN_mod_exp_simple(r,a,p,m,ctx); }
393#endif
394
395#ifdef ATALLA
396	tried_atalla=0;
397#endif
398
399	return(ret);
400	}
401
402
403int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
404		    const BIGNUM *m, BN_CTX *ctx)
405	{
406	int i,j,bits,ret=0,wstart,wend,window,wvalue;
407	int start=1,ts=0;
408	BIGNUM *aa;
409	BIGNUM val[TABLE_SIZE];
410	BN_RECP_CTX recp;
411
412	bits=BN_num_bits(p);
413
414	if (bits == 0)
415		{
416		BN_one(r);
417		return(1);
418		}
419
420	BN_CTX_start(ctx);
421	if ((aa = BN_CTX_get(ctx)) == NULL) goto err;
422
423	BN_RECP_CTX_init(&recp);
424	if (BN_RECP_CTX_set(&recp,m,ctx) <= 0) goto err;
425
426	BN_init(&(val[0]));
427	ts=1;
428
429	if (!BN_mod(&(val[0]),a,m,ctx)) goto err;		/* 1 */
430
431	window = BN_window_bits_for_exponent_size(bits);
432	if (window > 1)
433		{
434		if (!BN_mod_mul_reciprocal(aa,&(val[0]),&(val[0]),&recp,ctx))
435			goto err;				/* 2 */
436		j=1<<(window-1);
437		for (i=1; i<j; i++)
438			{
439			BN_init(&val[i]);
440			if (!BN_mod_mul_reciprocal(&(val[i]),&(val[i-1]),aa,&recp,ctx))
441				goto err;
442			}
443		ts=i;
444		}
445
446	start=1;	/* This is used to avoid multiplication etc
447			 * when there is only the value '1' in the
448			 * buffer. */
449	wvalue=0;	/* The 'value' of the window */
450	wstart=bits-1;	/* The top bit of the window */
451	wend=0;		/* The bottom bit of the window */
452
453	if (!BN_one(r)) goto err;
454
455	for (;;)
456		{
457		if (BN_is_bit_set(p,wstart) == 0)
458			{
459			if (!start)
460				if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx))
461				goto err;
462			if (wstart == 0) break;
463			wstart--;
464			continue;
465			}
466		/* We now have wstart on a 'set' bit, we now need to work out
467		 * how bit a window to do.  To do this we need to scan
468		 * forward until the last set bit before the end of the
469		 * window */
470		j=wstart;
471		wvalue=1;
472		wend=0;
473		for (i=1; i<window; i++)
474			{
475			if (wstart-i < 0) break;
476			if (BN_is_bit_set(p,wstart-i))
477				{
478				wvalue<<=(i-wend);
479				wvalue|=1;
480				wend=i;
481				}
482			}
483
484		/* wend is the size of the current window */
485		j=wend+1;
486		/* add the 'bytes above' */
487		if (!start)
488			for (i=0; i<j; i++)
489				{
490				if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx))
491					goto err;
492				}
493
494		/* wvalue will be an odd number < 2^window */
495		if (!BN_mod_mul_reciprocal(r,r,&(val[wvalue>>1]),&recp,ctx))
496			goto err;
497
498		/* move the 'window' down further */
499		wstart-=wend+1;
500		wvalue=0;
501		start=0;
502		if (wstart < 0) break;
503		}
504	ret=1;
505err:
506	BN_CTX_end(ctx);
507	for (i=0; i<ts; i++)
508		BN_clear_free(&(val[i]));
509	BN_RECP_CTX_free(&recp);
510	return(ret);
511	}
512
513
514int BN_mod_exp_mont(BIGNUM *rr, BIGNUM *a, const BIGNUM *p,
515		    const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
516	{
517	int i,j,bits,ret=0,wstart,wend,window,wvalue;
518	int start=1,ts=0;
519	BIGNUM *d,*r;
520	BIGNUM *aa;
521	BIGNUM val[TABLE_SIZE];
522	BN_MONT_CTX *mont=NULL;
523
524	bn_check_top(a);
525	bn_check_top(p);
526	bn_check_top(m);
527
528#ifdef ATALLA
529	if(!tried_atalla && BN_mod_exp_atalla(rr,a,p,m))
530	    return 1;
531/* If it fails, try the other methods */
532#endif
533
534	if (!(m->d[0] & 1))
535		{
536		BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS);
537		return(0);
538		}
539	bits=BN_num_bits(p);
540	if (bits == 0)
541		{
542		BN_one(rr);
543		return(1);
544		}
545	BN_CTX_start(ctx);
546	d = BN_CTX_get(ctx);
547	r = BN_CTX_get(ctx);
548	if (d == NULL || r == NULL) goto err;
549
550	/* If this is not done, things will break in the montgomery
551	 * part */
552
553	if (in_mont != NULL)
554		mont=in_mont;
555	else
556		{
557		if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
558		if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
559		}
560
561	BN_init(&val[0]);
562	ts=1;
563	if (BN_ucmp(a,m) >= 0)
564		{
565		if (!BN_mod(&(val[0]),a,m,ctx))
566			goto err;
567		aa= &(val[0]);
568		}
569	else
570		aa=a;
571	if (!BN_to_montgomery(&(val[0]),aa,mont,ctx)) goto err; /* 1 */
572
573	window = BN_window_bits_for_exponent_size(bits);
574	if (window > 1)
575		{
576		if (!BN_mod_mul_montgomery(d,&(val[0]),&(val[0]),mont,ctx)) goto err; /* 2 */
577		j=1<<(window-1);
578		for (i=1; i<j; i++)
579			{
580			BN_init(&(val[i]));
581			if (!BN_mod_mul_montgomery(&(val[i]),&(val[i-1]),d,mont,ctx))
582				goto err;
583			}
584		ts=i;
585		}
586
587	start=1;	/* This is used to avoid multiplication etc
588			 * when there is only the value '1' in the
589			 * buffer. */
590	wvalue=0;	/* The 'value' of the window */
591	wstart=bits-1;	/* The top bit of the window */
592	wend=0;		/* The bottom bit of the window */
593
594	if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
595	for (;;)
596		{
597		if (BN_is_bit_set(p,wstart) == 0)
598			{
599			if (!start)
600				{
601				if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
602				goto err;
603				}
604			if (wstart == 0) break;
605			wstart--;
606			continue;
607			}
608		/* We now have wstart on a 'set' bit, we now need to work out
609		 * how bit a window to do.  To do this we need to scan
610		 * forward until the last set bit before the end of the
611		 * window */
612		j=wstart;
613		wvalue=1;
614		wend=0;
615		for (i=1; i<window; i++)
616			{
617			if (wstart-i < 0) break;
618			if (BN_is_bit_set(p,wstart-i))
619				{
620				wvalue<<=(i-wend);
621				wvalue|=1;
622				wend=i;
623				}
624			}
625
626		/* wend is the size of the current window */
627		j=wend+1;
628		/* add the 'bytes above' */
629		if (!start)
630			for (i=0; i<j; i++)
631				{
632				if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
633					goto err;
634				}
635
636		/* wvalue will be an odd number < 2^window */
637		if (!BN_mod_mul_montgomery(r,r,&(val[wvalue>>1]),mont,ctx))
638			goto err;
639
640		/* move the 'window' down further */
641		wstart-=wend+1;
642		wvalue=0;
643		start=0;
644		if (wstart < 0) break;
645		}
646	if (!BN_from_montgomery(rr,r,mont,ctx)) goto err;
647	ret=1;
648err:
649	if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
650	BN_CTX_end(ctx);
651	for (i=0; i<ts; i++)
652		BN_clear_free(&(val[i]));
653	return(ret);
654	}
655
656int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
657                         const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
658	{
659	BN_MONT_CTX *mont = NULL;
660	int b, bits, ret=0;
661	int r_is_one;
662	BN_ULONG w, next_w;
663	BIGNUM *d, *r, *t;
664	BIGNUM *swap_tmp;
665#define BN_MOD_MUL_WORD(r, w, m) \
666		(BN_mul_word(r, (w)) && \
667		(/* BN_ucmp(r, (m)) < 0 ? 1 :*/  \
668			(BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
669		/* BN_MOD_MUL_WORD is only used with 'w' large,
670		  * so the BN_ucmp test is probably more overhead
671		  * than always using BN_mod (which uses BN_copy if
672		  * a similar test returns true). */
673#define BN_TO_MONTGOMERY_WORD(r, w, mont) \
674		(BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
675
676	bn_check_top(p);
677	bn_check_top(m);
678
679	if (!(m->d[0] & 1))
680		{
681		BNerr(BN_F_BN_MOD_EXP_MONT_WORD,BN_R_CALLED_WITH_EVEN_MODULUS);
682		return(0);
683		}
684	bits = BN_num_bits(p);
685	if (bits == 0)
686		{
687		BN_one(rr);
688		return(1);
689		}
690	BN_CTX_start(ctx);
691	d = BN_CTX_get(ctx);
692	r = BN_CTX_get(ctx);
693	t = BN_CTX_get(ctx);
694	if (d == NULL || r == NULL || t == NULL) goto err;
695
696#ifdef ATALLA
697	if (!tried_atalla)
698		{
699		BN_set_word(t, a);
700		if (BN_mod_exp_atalla(rr, t, p, m))
701			{
702			BN_CTX_end(ctx);
703			return 1;
704			}
705		}
706/* If it fails, try the other methods */
707#endif
708
709	if (in_mont != NULL)
710		mont=in_mont;
711	else
712		{
713		if ((mont = BN_MONT_CTX_new()) == NULL) goto err;
714		if (!BN_MONT_CTX_set(mont, m, ctx)) goto err;
715		}
716
717	r_is_one = 1; /* except for Montgomery factor */
718
719	/* bits-1 >= 0 */
720
721	/* The result is accumulated in the product r*w. */
722	w = a; /* bit 'bits-1' of 'p' is always set */
723	for (b = bits-2; b >= 0; b--)
724		{
725		/* First, square r*w. */
726		next_w = w*w;
727		if ((next_w/w) != w) /* overflow */
728			{
729			if (r_is_one)
730				{
731				if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
732				r_is_one = 0;
733				}
734			else
735				{
736				if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
737				}
738			next_w = 1;
739			}
740		w = next_w;
741		if (!r_is_one)
742			{
743			if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) goto err;
744			}
745
746		/* Second, multiply r*w by 'a' if exponent bit is set. */
747		if (BN_is_bit_set(p, b))
748			{
749			next_w = w*a;
750			if ((next_w/a) != w) /* overflow */
751				{
752				if (r_is_one)
753					{
754					if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
755					r_is_one = 0;
756					}
757				else
758					{
759					if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
760					}
761				next_w = a;
762				}
763			w = next_w;
764			}
765		}
766
767	/* Finally, set r:=r*w. */
768	if (w != 1)
769		{
770		if (r_is_one)
771			{
772			if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
773			r_is_one = 0;
774			}
775		else
776			{
777			if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
778			}
779		}
780
781	if (r_is_one) /* can happen only if a == 1*/
782		{
783		if (!BN_one(rr)) goto err;
784		}
785	else
786		{
787		if (!BN_from_montgomery(rr, r, mont, ctx)) goto err;
788		}
789	ret = 1;
790err:
791	if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
792	BN_CTX_end(ctx);
793	return(ret);
794	}
795
796
797/* The old fallback, simple version :-) */
798int BN_mod_exp_simple(BIGNUM *r, BIGNUM *a, BIGNUM *p, BIGNUM *m,
799	     BN_CTX *ctx)
800	{
801	int i,j,bits,ret=0,wstart,wend,window,wvalue,ts=0;
802	int start=1;
803	BIGNUM *d;
804	BIGNUM val[TABLE_SIZE];
805
806	bits=BN_num_bits(p);
807
808	if (bits == 0)
809		{
810		BN_one(r);
811		return(1);
812		}
813
814	BN_CTX_start(ctx);
815	if ((d = BN_CTX_get(ctx)) == NULL) goto err;
816
817	BN_init(&(val[0]));
818	ts=1;
819	if (!BN_mod(&(val[0]),a,m,ctx)) goto err;		/* 1 */
820
821	window = BN_window_bits_for_exponent_size(bits);
822	if (window > 1)
823		{
824		if (!BN_mod_mul(d,&(val[0]),&(val[0]),m,ctx))
825			goto err;				/* 2 */
826		j=1<<(window-1);
827		for (i=1; i<j; i++)
828			{
829			BN_init(&(val[i]));
830			if (!BN_mod_mul(&(val[i]),&(val[i-1]),d,m,ctx))
831				goto err;
832			}
833		ts=i;
834		}
835
836	start=1;	/* This is used to avoid multiplication etc
837			 * when there is only the value '1' in the
838			 * buffer. */
839	wvalue=0;	/* The 'value' of the window */
840	wstart=bits-1;	/* The top bit of the window */
841	wend=0;		/* The bottom bit of the window */
842
843	if (!BN_one(r)) goto err;
844
845	for (;;)
846		{
847		if (BN_is_bit_set(p,wstart) == 0)
848			{
849			if (!start)
850				if (!BN_mod_mul(r,r,r,m,ctx))
851				goto err;
852			if (wstart == 0) break;
853			wstart--;
854			continue;
855			}
856		/* We now have wstart on a 'set' bit, we now need to work out
857		 * how bit a window to do.  To do this we need to scan
858		 * forward until the last set bit before the end of the
859		 * window */
860		j=wstart;
861		wvalue=1;
862		wend=0;
863		for (i=1; i<window; i++)
864			{
865			if (wstart-i < 0) break;
866			if (BN_is_bit_set(p,wstart-i))
867				{
868				wvalue<<=(i-wend);
869				wvalue|=1;
870				wend=i;
871				}
872			}
873
874		/* wend is the size of the current window */
875		j=wend+1;
876		/* add the 'bytes above' */
877		if (!start)
878			for (i=0; i<j; i++)
879				{
880				if (!BN_mod_mul(r,r,r,m,ctx))
881					goto err;
882				}
883
884		/* wvalue will be an odd number < 2^window */
885		if (!BN_mod_mul(r,r,&(val[wvalue>>1]),m,ctx))
886			goto err;
887
888		/* move the 'window' down further */
889		wstart-=wend+1;
890		wvalue=0;
891		start=0;
892		if (wstart < 0) break;
893		}
894	ret=1;
895err:
896	BN_CTX_end(ctx);
897	for (i=0; i<ts; i++)
898		BN_clear_free(&(val[i]));
899	return(ret);
900	}
901
902