bn_div.c revision 246772
1170613Sbms/* crypto/bn/bn_div.c */
2189592Sbms/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3170613Sbms * All rights reserved.
4170613Sbms *
5170613Sbms * This package is an SSL implementation written
6170613Sbms * by Eric Young (eay@cryptsoft.com).
7170613Sbms * The implementation was written so as to conform with Netscapes SSL.
8170613Sbms *
9170613Sbms * This library is free for commercial and non-commercial use as long as
10170613Sbms * the following conditions are aheared to.  The following conditions
11170613Sbms * apply to all code found in this distribution, be it the RC4, RSA,
12170613Sbms * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13170613Sbms * included with this distribution is covered by the same copyright terms
14170613Sbms * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15170613Sbms *
16170613Sbms * Copyright remains Eric Young's, and as such any Copyright notices in
17170613Sbms * the code are not to be removed.
18170613Sbms * If this package is used in a product, Eric Young should be given attribution
19170613Sbms * as the author of the parts of the library used.
20170613Sbms * This can be in the form of a textual message at program startup or
21170613Sbms * in documentation (online or textual) provided with the package.
22170613Sbms *
23170613Sbms * Redistribution and use in source and binary forms, with or without
24170613Sbms * modification, are permitted provided that the following conditions
25170613Sbms * are met:
26170613Sbms * 1. Redistributions of source code must retain the copyright
27170613Sbms *    notice, this list of conditions and the following disclaimer.
28170613Sbms * 2. Redistributions in binary form must reproduce the above copyright
29170613Sbms *    notice, this list of conditions and the following disclaimer in the
30170613Sbms *    documentation and/or other materials provided with the distribution.
31170613Sbms * 3. All advertising materials mentioning features or use of this software
32170613Sbms *    must display the following acknowledgement:
33170613Sbms *    "This product includes cryptographic software written by
34170613Sbms *     Eric Young (eay@cryptsoft.com)"
35170613Sbms *    The word 'cryptographic' can be left out if the rouines from the library
36170613Sbms *    being used are not cryptographic related :-).
37170613Sbms * 4. If you include any Windows specific code (or a derivative thereof) from
38170613Sbms *    the apps directory (application code) you must include an acknowledgement:
39170613Sbms *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40170613Sbms *
41170613Sbms * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42170613Sbms * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43171746Scsjp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44170613Sbms * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45170613Sbms * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46189592Sbms * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47170613Sbms * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48189592Sbms * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49189592Sbms * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50170613Sbms * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51170613Sbms * SUCH DAMAGE.
52170613Sbms *
53170613Sbms * The licence and distribution terms for any publically available version or
54185571Sbz * derivative of this code cannot be changed.  i.e. this code cannot simply be
55170613Sbms * copied and put under another distribution licence
56170613Sbms * [including the GNU Public Licence.]
57170613Sbms */
58170613Sbms
59170613Sbms#include <stdio.h>
60170613Sbms#include <openssl/bn.h>
61170613Sbms#include "cryptlib.h"
62170613Sbms#include "bn_lcl.h"
63189592Sbms
64191659Sbms
65189592Sbms/* The old slow way */
66189592Sbms#if 0
67170613Sbmsint BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
68170613Sbms	   BN_CTX *ctx)
69170613Sbms	{
70170613Sbms	int i,nm,nd;
71170613Sbms	int ret = 0;
72170613Sbms	BIGNUM *D;
73170613Sbms
74170613Sbms	bn_check_top(m);
75170613Sbms	bn_check_top(d);
76170613Sbms	if (BN_is_zero(d))
77170613Sbms		{
78189592Sbms		BNerr(BN_F_BN_DIV,BN_R_DIV_BY_ZERO);
79189592Sbms		return(0);
80170613Sbms		}
81170613Sbms
82189592Sbms	if (BN_ucmp(m,d) < 0)
83189592Sbms		{
84170613Sbms		if (rem != NULL)
85170613Sbms			{ if (BN_copy(rem,m) == NULL) return(0); }
86189592Sbms		if (dv != NULL) BN_zero(dv);
87189592Sbms		return(1);
88189592Sbms		}
89189592Sbms
90189592Sbms	BN_CTX_start(ctx);
91189592Sbms	D = BN_CTX_get(ctx);
92189592Sbms	if (dv == NULL) dv = BN_CTX_get(ctx);
93189592Sbms	if (rem == NULL) rem = BN_CTX_get(ctx);
94189592Sbms	if (D == NULL || dv == NULL || rem == NULL)
95170613Sbms		goto end;
96170613Sbms
97189592Sbms	nd=BN_num_bits(d);
98170613Sbms	nm=BN_num_bits(m);
99170613Sbms	if (BN_copy(D,d) == NULL) goto end;
100170613Sbms	if (BN_copy(rem,m) == NULL) goto end;
101170613Sbms
102189592Sbms	/* The next 2 are needed so we can do a dv->d[0]|=1 later
103170613Sbms	 * since BN_lshift1 will only work once there is a value :-) */
104170613Sbms	BN_zero(dv);
105189592Sbms	if(bn_wexpand(dv,1) == NULL) goto end;
106189592Sbms	dv->top=1;
107189592Sbms
108189592Sbms	if (!BN_lshift(D,D,nm-nd)) goto end;
109170613Sbms	for (i=nm-nd; i>=0; i--)
110170613Sbms		{
111170613Sbms		if (!BN_lshift1(dv,dv)) goto end;
112170613Sbms		if (BN_ucmp(rem,D) >= 0)
113189592Sbms			{
114189592Sbms			dv->d[0]|=1;
115189592Sbms			if (!BN_usub(rem,rem,D)) goto end;
116170613Sbms			}
117189592Sbms/* CAN IMPROVE (and have now :=) */
118189592Sbms		if (!BN_rshift1(D,D)) goto end;
119189592Sbms		}
120189592Sbms	rem->neg=BN_is_zero(rem)?0:m->neg;
121189592Sbms	dv->neg=m->neg^d->neg;
122189592Sbms	ret = 1;
123189592Sbms end:
124189592Sbms	BN_CTX_end(ctx);
125189592Sbms	return(ret);
126189592Sbms	}
127189592Sbms
128189592Sbms#else
129170613Sbms
130189592Sbms#if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) \
131189592Sbms    && !defined(PEDANTIC) && !defined(BN_DIV3W)
132189592Sbms# if defined(__GNUC__) && __GNUC__>=2
133189592Sbms#  if defined(__i386) || defined (__i386__)
134189592Sbms   /*
135189592Sbms    * There were two reasons for implementing this template:
136189592Sbms    * - GNU C generates a call to a function (__udivdi3 to be exact)
137189592Sbms    *   in reply to ((((BN_ULLONG)n0)<<BN_BITS2)|n1)/d0 (I fail to
138189592Sbms    *   understand why...);
139189592Sbms    * - divl doesn't only calculate quotient, but also leaves
140189592Sbms    *   remainder in %edx which we can definitely use here:-)
141189592Sbms    *
142189592Sbms    *					<appro@fy.chalmers.se>
143189592Sbms    */
144189592Sbms#undef bn_div_words
145170613Sbms#  define bn_div_words(n0,n1,d0)		\
146170613Sbms	({  asm volatile (			\
147170613Sbms		"divl	%4"			\
148170613Sbms		: "=a"(q), "=d"(rem)		\
149170613Sbms		: "a"(n1), "d"(n0), "g"(d0)	\
150189592Sbms		: "cc");			\
151189592Sbms	    q;					\
152189592Sbms	})
153189592Sbms#  define REMAINDER_IS_ALREADY_CALCULATED
154170613Sbms#  elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG)
155170613Sbms   /*
156189592Sbms    * Same story here, but it's 128-bit by 64-bit division. Wow!
157170613Sbms    *					<appro@fy.chalmers.se>
158189357Sbms    */
159189357Sbms#  undef bn_div_words
160189592Sbms#  define bn_div_words(n0,n1,d0)		\
161189592Sbms	({  asm volatile (			\
162189592Sbms		"divq	%4"			\
163189592Sbms		: "=a"(q), "=d"(rem)		\
164189592Sbms		: "a"(n1), "d"(n0), "g"(d0)	\
165189592Sbms		: "cc");			\
166189592Sbms	    q;					\
167189592Sbms	})
168189592Sbms#  define REMAINDER_IS_ALREADY_CALCULATED
169189592Sbms#  endif /* __<cpu> */
170189592Sbms# endif /* __GNUC__ */
171189592Sbms#endif /* OPENSSL_NO_ASM */
172189357Sbms
173189357Sbms
174189357Sbms/* BN_div computes  dv := num / divisor,  rounding towards
175189357Sbms * zero, and sets up rm  such that  dv*divisor + rm = num  holds.
176189357Sbms * Thus:
177189592Sbms *     dv->neg == num->neg ^ divisor->neg  (unless the result is zero)
178189592Sbms *     rm->neg == num->neg                 (unless the remainder is zero)
179189592Sbms * If 'dv' or 'rm' is NULL, the respective value is not returned.
180189592Sbms */
181170613Sbmsint BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
182189592Sbms	   BN_CTX *ctx)
183189592Sbms	{
184189592Sbms	int norm_shift,i,loop;
185189592Sbms	BIGNUM *tmp,wnum,*snum,*sdiv,*res;
186189592Sbms	BN_ULONG *resp,*wnump;
187189592Sbms	BN_ULONG d0,d1;
188189592Sbms	int num_n,div_n;
189189592Sbms	int no_branch=0;
190189592Sbms
191189592Sbms	/* Invalid zero-padding would have particularly bad consequences
192189592Sbms	 * in the case of 'num', so don't just rely on bn_check_top() for this one
193189592Sbms	 * (bn_check_top() works only for BN_DEBUG builds) */
194189592Sbms	if (num->top > 0 && num->d[num->top - 1] == 0)
195189592Sbms		{
196189592Sbms		BNerr(BN_F_BN_DIV,BN_R_NOT_INITIALIZED);
197189592Sbms		return 0;
198189592Sbms		}
199189592Sbms
200189592Sbms	bn_check_top(num);
201189592Sbms
202189592Sbms	if ((BN_get_flags(num, BN_FLG_CONSTTIME) != 0) || (BN_get_flags(divisor, BN_FLG_CONSTTIME) != 0))
203189592Sbms		{
204189592Sbms		no_branch=1;
205189592Sbms		}
206189592Sbms
207189592Sbms	bn_check_top(dv);
208189592Sbms	bn_check_top(rm);
209189592Sbms	/* bn_check_top(num); */ /* 'num' has been checked already */
210189592Sbms	bn_check_top(divisor);
211189592Sbms
212189592Sbms	if (BN_is_zero(divisor))
213189592Sbms		{
214189592Sbms		BNerr(BN_F_BN_DIV,BN_R_DIV_BY_ZERO);
215189592Sbms		return(0);
216189592Sbms		}
217189592Sbms
218170613Sbms	if (!no_branch && BN_ucmp(num,divisor) < 0)
219170613Sbms		{
220170613Sbms		if (rm != NULL)
221170613Sbms			{ if (BN_copy(rm,num) == NULL) return(0); }
222170613Sbms		if (dv != NULL) BN_zero(dv);
223170613Sbms		return(1);
224170613Sbms		}
225170613Sbms
226170613Sbms	BN_CTX_start(ctx);
227170613Sbms	tmp=BN_CTX_get(ctx);
228170613Sbms	snum=BN_CTX_get(ctx);
229170613Sbms	sdiv=BN_CTX_get(ctx);
230170613Sbms	if (dv == NULL)
231170613Sbms		res=BN_CTX_get(ctx);
232170613Sbms	else	res=dv;
233170613Sbms	if (sdiv == NULL || res == NULL || tmp == NULL || snum == NULL)
234170613Sbms		goto err;
235170613Sbms
236170613Sbms	/* First we normalise the numbers */
237170613Sbms	norm_shift=BN_BITS2-((BN_num_bits(divisor))%BN_BITS2);
238170613Sbms	if (!(BN_lshift(sdiv,divisor,norm_shift))) goto err;
239170613Sbms	sdiv->neg=0;
240170613Sbms	norm_shift+=BN_BITS2;
241170613Sbms	if (!(BN_lshift(snum,num,norm_shift))) goto err;
242170613Sbms	snum->neg=0;
243189592Sbms
244170613Sbms	if (no_branch)
245170613Sbms		{
246170613Sbms		/* Since we don't know whether snum is larger than sdiv,
247189592Sbms		 * we pad snum with enough zeroes without changing its
248189592Sbms		 * value.
249170613Sbms		 */
250170613Sbms		if (snum->top <= sdiv->top+1)
251170613Sbms			{
252170613Sbms			if (bn_wexpand(snum, sdiv->top + 2) == NULL) goto err;
253170613Sbms			for (i = snum->top; i < sdiv->top + 2; i++) snum->d[i] = 0;
254170613Sbms			snum->top = sdiv->top + 2;
255170613Sbms			}
256170613Sbms		else
257170613Sbms			{
258170613Sbms			if (bn_wexpand(snum, snum->top + 1) == NULL) goto err;
259170613Sbms			snum->d[snum->top] = 0;
260189592Sbms			snum->top ++;
261170613Sbms			}
262170613Sbms		}
263170613Sbms
264170613Sbms	div_n=sdiv->top;
265170613Sbms	num_n=snum->top;
266170613Sbms	loop=num_n-div_n;
267170613Sbms	/* Lets setup a 'window' into snum
268170613Sbms	 * This is the part that corresponds to the current
269170613Sbms	 * 'area' being divided */
270170613Sbms	wnum.neg   = 0;
271170613Sbms	wnum.d     = &(snum->d[loop]);
272189592Sbms	wnum.top   = div_n;
273189592Sbms	/* only needed when BN_ucmp messes up the values between top and max */
274189592Sbms	wnum.dmax  = snum->dmax - loop; /* so we don't step out of bounds */
275170613Sbms
276189592Sbms	/* Get the top 2 words of sdiv */
277170613Sbms	/* div_n=sdiv->top; */
278170613Sbms	d0=sdiv->d[div_n-1];
279170613Sbms	d1=(div_n == 1)?0:sdiv->d[div_n-2];
280170613Sbms
281189592Sbms	/* pointer to the 'top' of snum */
282170613Sbms	wnump= &(snum->d[num_n-1]);
283170613Sbms
284170613Sbms	/* Setup to 'res' */
285170613Sbms	res->neg= (num->neg^divisor->neg);
286170613Sbms	if (!bn_wexpand(res,(loop+1))) goto err;
287170613Sbms	res->top=loop-no_branch;
288170613Sbms	resp= &(res->d[loop-1]);
289170613Sbms
290170613Sbms	/* space for temp */
291170613Sbms	if (!bn_wexpand(tmp,(div_n+1))) goto err;
292170613Sbms
293189592Sbms	if (!no_branch)
294170613Sbms		{
295170613Sbms		if (BN_ucmp(&wnum,sdiv) >= 0)
296170613Sbms			{
297170613Sbms			/* If BN_DEBUG_RAND is defined BN_ucmp changes (via
298170613Sbms			 * bn_pollute) the const bignum arguments =>
299170613Sbms			 * clean the values between top and max again */
300170613Sbms			bn_clear_top2max(&wnum);
301170613Sbms			bn_sub_words(wnum.d, wnum.d, sdiv->d, div_n);
302170613Sbms			*resp=1;
303170613Sbms			}
304189592Sbms		else
305170613Sbms			res->top--;
306189592Sbms		}
307189592Sbms
308189592Sbms	/* if res->top == 0 then clear the neg value otherwise decrease
309170613Sbms	 * the resp pointer */
310189592Sbms	if (res->top == 0)
311189592Sbms		res->neg = 0;
312189592Sbms	else
313170613Sbms		resp--;
314189592Sbms
315170613Sbms	for (i=0; i<loop-1; i++, wnump--, resp--)
316189592Sbms		{
317189592Sbms		BN_ULONG q,l0;
318170613Sbms		/* the first part of the loop uses the top two words of
319170613Sbms		 * snum and sdiv to calculate a BN_ULONG q such that
320170613Sbms		 * | wnum - sdiv * q | < sdiv */
321170613Sbms#if defined(BN_DIV3W) && !defined(OPENSSL_NO_ASM)
322170613Sbms		BN_ULONG bn_div_3_words(BN_ULONG*,BN_ULONG,BN_ULONG);
323170613Sbms		q=bn_div_3_words(wnump,d1,d0);
324170613Sbms#else
325170613Sbms		BN_ULONG n0,n1,rem=0;
326170613Sbms
327170613Sbms		n0=wnump[0];
328189592Sbms		n1=wnump[-1];
329189592Sbms		if (n0 == d0)
330189592Sbms			q=BN_MASK2;
331189592Sbms		else 			/* n0 < d0 */
332189592Sbms			{
333189592Sbms#ifdef BN_LLONG
334170613Sbms			BN_ULLONG t2;
335170613Sbms
336170613Sbms#if defined(BN_LLONG) && defined(BN_DIV2W) && !defined(bn_div_words)
337189592Sbms			q=(BN_ULONG)(((((BN_ULLONG)n0)<<BN_BITS2)|n1)/d0);
338189592Sbms#else
339189592Sbms			q=bn_div_words(n0,n1,d0);
340189592Sbms#ifdef BN_DEBUG_LEVITTE
341170613Sbms			fprintf(stderr,"DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\
342189592SbmsX) -> 0x%08X\n",
343189592Sbms				n0, n1, d0, q);
344189592Sbms#endif
345170613Sbms#endif
346189592Sbms
347189592Sbms#ifndef REMAINDER_IS_ALREADY_CALCULATED
348189592Sbms			/*
349189592Sbms			 * rem doesn't have to be BN_ULLONG. The least we
350189592Sbms			 * know it's less that d0, isn't it?
351189592Sbms			 */
352189592Sbms			rem=(n1-q*d0)&BN_MASK2;
353189592Sbms#endif
354189592Sbms			t2=(BN_ULLONG)d1*q;
355189592Sbms
356189592Sbms			for (;;)
357189592Sbms				{
358189592Sbms				if (t2 <= ((((BN_ULLONG)rem)<<BN_BITS2)|wnump[-2]))
359189592Sbms					break;
360189592Sbms				q--;
361189592Sbms				rem += d0;
362189592Sbms				if (rem < d0) break; /* don't let rem overflow */
363189592Sbms				t2 -= d1;
364189592Sbms				}
365189592Sbms#else /* !BN_LLONG */
366189592Sbms			BN_ULONG t2l,t2h;
367189592Sbms
368189592Sbms			q=bn_div_words(n0,n1,d0);
369189592Sbms#ifdef BN_DEBUG_LEVITTE
370189592Sbms			fprintf(stderr,"DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\
371189592SbmsX) -> 0x%08X\n",
372189592Sbms				n0, n1, d0, q);
373189592Sbms#endif
374189592Sbms#ifndef REMAINDER_IS_ALREADY_CALCULATED
375189592Sbms			rem=(n1-q*d0)&BN_MASK2;
376189592Sbms#endif
377189592Sbms
378189592Sbms#if defined(BN_UMULT_LOHI)
379189592Sbms			BN_UMULT_LOHI(t2l,t2h,d1,q);
380189592Sbms#elif defined(BN_UMULT_HIGH)
381189592Sbms			t2l = d1 * q;
382189592Sbms			t2h = BN_UMULT_HIGH(d1,q);
383189592Sbms#else
384189592Sbms			{
385189592Sbms			BN_ULONG ql, qh;
386189592Sbms			t2l=LBITS(d1); t2h=HBITS(d1);
387189592Sbms			ql =LBITS(q);  qh =HBITS(q);
388189592Sbms			mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */
389189592Sbms			}
390189592Sbms#endif
391189592Sbms
392170613Sbms			for (;;)
393189592Sbms				{
394170613Sbms				if ((t2h < rem) ||
395189592Sbms					((t2h == rem) && (t2l <= wnump[-2])))
396170613Sbms					break;
397189592Sbms				q--;
398170613Sbms				rem += d0;
399170613Sbms				if (rem < d0) break; /* don't let rem overflow */
400170613Sbms				if (t2l < d1) t2h--; t2l -= d1;
401170613Sbms				}
402170613Sbms#endif /* !BN_LLONG */
403170613Sbms			}
404170613Sbms#endif /* !BN_DIV3W */
405170613Sbms
406189592Sbms		l0=bn_mul_words(tmp->d,sdiv->d,div_n,q);
407189592Sbms		tmp->d[div_n]=l0;
408189592Sbms		wnum.d--;
409170613Sbms		/* ingore top values of the bignums just sub the two
410189592Sbms		 * BN_ULONG arrays with bn_sub_words */
411189592Sbms		if (bn_sub_words(wnum.d, wnum.d, tmp->d, div_n+1))
412189592Sbms			{
413189592Sbms			/* Note: As we have considered only the leading
414170613Sbms			 * two BN_ULONGs in the calculation of q, sdiv * q
415189592Sbms			 * might be greater than wnum (but then (q-1) * sdiv
416189592Sbms			 * is less or equal than wnum)
417189592Sbms			 */
418189592Sbms			q--;
419189592Sbms			if (bn_add_words(wnum.d, wnum.d, sdiv->d, div_n))
420189592Sbms				/* we can't have an overflow here (assuming
421189592Sbms				 * that q != 0, but if q == 0 then tmp is
422189592Sbms				 * zero anyway) */
423189931Sbms				(*wnump)++;
424189931Sbms			}
425189931Sbms		/* store part of the result */
426189592Sbms		*resp = q;
427189592Sbms		}
428189592Sbms	bn_correct_top(snum);
429189592Sbms	if (rm != NULL)
430189592Sbms		{
431189592Sbms		/* Keep a copy of the neg flag in num because if rm==num
432189592Sbms		 * BN_rshift() will overwrite it.
433189592Sbms		 */
434189592Sbms		int neg = num->neg;
435170613Sbms		BN_rshift(rm,snum,norm_shift);
436189592Sbms		if (!BN_is_zero(rm))
437189592Sbms			rm->neg = neg;
438189592Sbms		bn_check_top(rm);
439189592Sbms		}
440189592Sbms	if (no_branch)	bn_correct_top(res);
441189592Sbms	BN_CTX_end(ctx);
442189592Sbms	return(1);
443189592Sbmserr:
444189592Sbms	bn_check_top(rm);
445170613Sbms	BN_CTX_end(ctx);
446189592Sbms	return(0);
447189592Sbms	}
448189931Sbms#endif
449189592Sbms