bn_div.c revision 246772
1170613Sbms/* crypto/bn/bn_div.c */ 2189592Sbms/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 3170613Sbms * All rights reserved. 4170613Sbms * 5170613Sbms * This package is an SSL implementation written 6170613Sbms * by Eric Young (eay@cryptsoft.com). 7170613Sbms * The implementation was written so as to conform with Netscapes SSL. 8170613Sbms * 9170613Sbms * This library is free for commercial and non-commercial use as long as 10170613Sbms * the following conditions are aheared to. The following conditions 11170613Sbms * apply to all code found in this distribution, be it the RC4, RSA, 12170613Sbms * lhash, DES, etc., code; not just the SSL code. The SSL documentation 13170613Sbms * included with this distribution is covered by the same copyright terms 14170613Sbms * except that the holder is Tim Hudson (tjh@cryptsoft.com). 15170613Sbms * 16170613Sbms * Copyright remains Eric Young's, and as such any Copyright notices in 17170613Sbms * the code are not to be removed. 18170613Sbms * If this package is used in a product, Eric Young should be given attribution 19170613Sbms * as the author of the parts of the library used. 20170613Sbms * This can be in the form of a textual message at program startup or 21170613Sbms * in documentation (online or textual) provided with the package. 22170613Sbms * 23170613Sbms * Redistribution and use in source and binary forms, with or without 24170613Sbms * modification, are permitted provided that the following conditions 25170613Sbms * are met: 26170613Sbms * 1. Redistributions of source code must retain the copyright 27170613Sbms * notice, this list of conditions and the following disclaimer. 28170613Sbms * 2. Redistributions in binary form must reproduce the above copyright 29170613Sbms * notice, this list of conditions and the following disclaimer in the 30170613Sbms * documentation and/or other materials provided with the distribution. 31170613Sbms * 3. All advertising materials mentioning features or use of this software 32170613Sbms * must display the following acknowledgement: 33170613Sbms * "This product includes cryptographic software written by 34170613Sbms * Eric Young (eay@cryptsoft.com)" 35170613Sbms * The word 'cryptographic' can be left out if the rouines from the library 36170613Sbms * being used are not cryptographic related :-). 37170613Sbms * 4. If you include any Windows specific code (or a derivative thereof) from 38170613Sbms * the apps directory (application code) you must include an acknowledgement: 39170613Sbms * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" 40170613Sbms * 41170613Sbms * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND 42170613Sbms * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 43171746Scsjp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 44170613Sbms * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 45170613Sbms * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 46189592Sbms * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 47170613Sbms * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 48189592Sbms * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 49189592Sbms * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 50170613Sbms * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 51170613Sbms * SUCH DAMAGE. 52170613Sbms * 53170613Sbms * The licence and distribution terms for any publically available version or 54185571Sbz * derivative of this code cannot be changed. i.e. this code cannot simply be 55170613Sbms * copied and put under another distribution licence 56170613Sbms * [including the GNU Public Licence.] 57170613Sbms */ 58170613Sbms 59170613Sbms#include <stdio.h> 60170613Sbms#include <openssl/bn.h> 61170613Sbms#include "cryptlib.h" 62170613Sbms#include "bn_lcl.h" 63189592Sbms 64191659Sbms 65189592Sbms/* The old slow way */ 66189592Sbms#if 0 67170613Sbmsint BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, 68170613Sbms BN_CTX *ctx) 69170613Sbms { 70170613Sbms int i,nm,nd; 71170613Sbms int ret = 0; 72170613Sbms BIGNUM *D; 73170613Sbms 74170613Sbms bn_check_top(m); 75170613Sbms bn_check_top(d); 76170613Sbms if (BN_is_zero(d)) 77170613Sbms { 78189592Sbms BNerr(BN_F_BN_DIV,BN_R_DIV_BY_ZERO); 79189592Sbms return(0); 80170613Sbms } 81170613Sbms 82189592Sbms if (BN_ucmp(m,d) < 0) 83189592Sbms { 84170613Sbms if (rem != NULL) 85170613Sbms { if (BN_copy(rem,m) == NULL) return(0); } 86189592Sbms if (dv != NULL) BN_zero(dv); 87189592Sbms return(1); 88189592Sbms } 89189592Sbms 90189592Sbms BN_CTX_start(ctx); 91189592Sbms D = BN_CTX_get(ctx); 92189592Sbms if (dv == NULL) dv = BN_CTX_get(ctx); 93189592Sbms if (rem == NULL) rem = BN_CTX_get(ctx); 94189592Sbms if (D == NULL || dv == NULL || rem == NULL) 95170613Sbms goto end; 96170613Sbms 97189592Sbms nd=BN_num_bits(d); 98170613Sbms nm=BN_num_bits(m); 99170613Sbms if (BN_copy(D,d) == NULL) goto end; 100170613Sbms if (BN_copy(rem,m) == NULL) goto end; 101170613Sbms 102189592Sbms /* The next 2 are needed so we can do a dv->d[0]|=1 later 103170613Sbms * since BN_lshift1 will only work once there is a value :-) */ 104170613Sbms BN_zero(dv); 105189592Sbms if(bn_wexpand(dv,1) == NULL) goto end; 106189592Sbms dv->top=1; 107189592Sbms 108189592Sbms if (!BN_lshift(D,D,nm-nd)) goto end; 109170613Sbms for (i=nm-nd; i>=0; i--) 110170613Sbms { 111170613Sbms if (!BN_lshift1(dv,dv)) goto end; 112170613Sbms if (BN_ucmp(rem,D) >= 0) 113189592Sbms { 114189592Sbms dv->d[0]|=1; 115189592Sbms if (!BN_usub(rem,rem,D)) goto end; 116170613Sbms } 117189592Sbms/* CAN IMPROVE (and have now :=) */ 118189592Sbms if (!BN_rshift1(D,D)) goto end; 119189592Sbms } 120189592Sbms rem->neg=BN_is_zero(rem)?0:m->neg; 121189592Sbms dv->neg=m->neg^d->neg; 122189592Sbms ret = 1; 123189592Sbms end: 124189592Sbms BN_CTX_end(ctx); 125189592Sbms return(ret); 126189592Sbms } 127189592Sbms 128189592Sbms#else 129170613Sbms 130189592Sbms#if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) \ 131189592Sbms && !defined(PEDANTIC) && !defined(BN_DIV3W) 132189592Sbms# if defined(__GNUC__) && __GNUC__>=2 133189592Sbms# if defined(__i386) || defined (__i386__) 134189592Sbms /* 135189592Sbms * There were two reasons for implementing this template: 136189592Sbms * - GNU C generates a call to a function (__udivdi3 to be exact) 137189592Sbms * in reply to ((((BN_ULLONG)n0)<<BN_BITS2)|n1)/d0 (I fail to 138189592Sbms * understand why...); 139189592Sbms * - divl doesn't only calculate quotient, but also leaves 140189592Sbms * remainder in %edx which we can definitely use here:-) 141189592Sbms * 142189592Sbms * <appro@fy.chalmers.se> 143189592Sbms */ 144189592Sbms#undef bn_div_words 145170613Sbms# define bn_div_words(n0,n1,d0) \ 146170613Sbms ({ asm volatile ( \ 147170613Sbms "divl %4" \ 148170613Sbms : "=a"(q), "=d"(rem) \ 149170613Sbms : "a"(n1), "d"(n0), "g"(d0) \ 150189592Sbms : "cc"); \ 151189592Sbms q; \ 152189592Sbms }) 153189592Sbms# define REMAINDER_IS_ALREADY_CALCULATED 154170613Sbms# elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG) 155170613Sbms /* 156189592Sbms * Same story here, but it's 128-bit by 64-bit division. Wow! 157170613Sbms * <appro@fy.chalmers.se> 158189357Sbms */ 159189357Sbms# undef bn_div_words 160189592Sbms# define bn_div_words(n0,n1,d0) \ 161189592Sbms ({ asm volatile ( \ 162189592Sbms "divq %4" \ 163189592Sbms : "=a"(q), "=d"(rem) \ 164189592Sbms : "a"(n1), "d"(n0), "g"(d0) \ 165189592Sbms : "cc"); \ 166189592Sbms q; \ 167189592Sbms }) 168189592Sbms# define REMAINDER_IS_ALREADY_CALCULATED 169189592Sbms# endif /* __<cpu> */ 170189592Sbms# endif /* __GNUC__ */ 171189592Sbms#endif /* OPENSSL_NO_ASM */ 172189357Sbms 173189357Sbms 174189357Sbms/* BN_div computes dv := num / divisor, rounding towards 175189357Sbms * zero, and sets up rm such that dv*divisor + rm = num holds. 176189357Sbms * Thus: 177189592Sbms * dv->neg == num->neg ^ divisor->neg (unless the result is zero) 178189592Sbms * rm->neg == num->neg (unless the remainder is zero) 179189592Sbms * If 'dv' or 'rm' is NULL, the respective value is not returned. 180189592Sbms */ 181170613Sbmsint BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, 182189592Sbms BN_CTX *ctx) 183189592Sbms { 184189592Sbms int norm_shift,i,loop; 185189592Sbms BIGNUM *tmp,wnum,*snum,*sdiv,*res; 186189592Sbms BN_ULONG *resp,*wnump; 187189592Sbms BN_ULONG d0,d1; 188189592Sbms int num_n,div_n; 189189592Sbms int no_branch=0; 190189592Sbms 191189592Sbms /* Invalid zero-padding would have particularly bad consequences 192189592Sbms * in the case of 'num', so don't just rely on bn_check_top() for this one 193189592Sbms * (bn_check_top() works only for BN_DEBUG builds) */ 194189592Sbms if (num->top > 0 && num->d[num->top - 1] == 0) 195189592Sbms { 196189592Sbms BNerr(BN_F_BN_DIV,BN_R_NOT_INITIALIZED); 197189592Sbms return 0; 198189592Sbms } 199189592Sbms 200189592Sbms bn_check_top(num); 201189592Sbms 202189592Sbms if ((BN_get_flags(num, BN_FLG_CONSTTIME) != 0) || (BN_get_flags(divisor, BN_FLG_CONSTTIME) != 0)) 203189592Sbms { 204189592Sbms no_branch=1; 205189592Sbms } 206189592Sbms 207189592Sbms bn_check_top(dv); 208189592Sbms bn_check_top(rm); 209189592Sbms /* bn_check_top(num); */ /* 'num' has been checked already */ 210189592Sbms bn_check_top(divisor); 211189592Sbms 212189592Sbms if (BN_is_zero(divisor)) 213189592Sbms { 214189592Sbms BNerr(BN_F_BN_DIV,BN_R_DIV_BY_ZERO); 215189592Sbms return(0); 216189592Sbms } 217189592Sbms 218170613Sbms if (!no_branch && BN_ucmp(num,divisor) < 0) 219170613Sbms { 220170613Sbms if (rm != NULL) 221170613Sbms { if (BN_copy(rm,num) == NULL) return(0); } 222170613Sbms if (dv != NULL) BN_zero(dv); 223170613Sbms return(1); 224170613Sbms } 225170613Sbms 226170613Sbms BN_CTX_start(ctx); 227170613Sbms tmp=BN_CTX_get(ctx); 228170613Sbms snum=BN_CTX_get(ctx); 229170613Sbms sdiv=BN_CTX_get(ctx); 230170613Sbms if (dv == NULL) 231170613Sbms res=BN_CTX_get(ctx); 232170613Sbms else res=dv; 233170613Sbms if (sdiv == NULL || res == NULL || tmp == NULL || snum == NULL) 234170613Sbms goto err; 235170613Sbms 236170613Sbms /* First we normalise the numbers */ 237170613Sbms norm_shift=BN_BITS2-((BN_num_bits(divisor))%BN_BITS2); 238170613Sbms if (!(BN_lshift(sdiv,divisor,norm_shift))) goto err; 239170613Sbms sdiv->neg=0; 240170613Sbms norm_shift+=BN_BITS2; 241170613Sbms if (!(BN_lshift(snum,num,norm_shift))) goto err; 242170613Sbms snum->neg=0; 243189592Sbms 244170613Sbms if (no_branch) 245170613Sbms { 246170613Sbms /* Since we don't know whether snum is larger than sdiv, 247189592Sbms * we pad snum with enough zeroes without changing its 248189592Sbms * value. 249170613Sbms */ 250170613Sbms if (snum->top <= sdiv->top+1) 251170613Sbms { 252170613Sbms if (bn_wexpand(snum, sdiv->top + 2) == NULL) goto err; 253170613Sbms for (i = snum->top; i < sdiv->top + 2; i++) snum->d[i] = 0; 254170613Sbms snum->top = sdiv->top + 2; 255170613Sbms } 256170613Sbms else 257170613Sbms { 258170613Sbms if (bn_wexpand(snum, snum->top + 1) == NULL) goto err; 259170613Sbms snum->d[snum->top] = 0; 260189592Sbms snum->top ++; 261170613Sbms } 262170613Sbms } 263170613Sbms 264170613Sbms div_n=sdiv->top; 265170613Sbms num_n=snum->top; 266170613Sbms loop=num_n-div_n; 267170613Sbms /* Lets setup a 'window' into snum 268170613Sbms * This is the part that corresponds to the current 269170613Sbms * 'area' being divided */ 270170613Sbms wnum.neg = 0; 271170613Sbms wnum.d = &(snum->d[loop]); 272189592Sbms wnum.top = div_n; 273189592Sbms /* only needed when BN_ucmp messes up the values between top and max */ 274189592Sbms wnum.dmax = snum->dmax - loop; /* so we don't step out of bounds */ 275170613Sbms 276189592Sbms /* Get the top 2 words of sdiv */ 277170613Sbms /* div_n=sdiv->top; */ 278170613Sbms d0=sdiv->d[div_n-1]; 279170613Sbms d1=(div_n == 1)?0:sdiv->d[div_n-2]; 280170613Sbms 281189592Sbms /* pointer to the 'top' of snum */ 282170613Sbms wnump= &(snum->d[num_n-1]); 283170613Sbms 284170613Sbms /* Setup to 'res' */ 285170613Sbms res->neg= (num->neg^divisor->neg); 286170613Sbms if (!bn_wexpand(res,(loop+1))) goto err; 287170613Sbms res->top=loop-no_branch; 288170613Sbms resp= &(res->d[loop-1]); 289170613Sbms 290170613Sbms /* space for temp */ 291170613Sbms if (!bn_wexpand(tmp,(div_n+1))) goto err; 292170613Sbms 293189592Sbms if (!no_branch) 294170613Sbms { 295170613Sbms if (BN_ucmp(&wnum,sdiv) >= 0) 296170613Sbms { 297170613Sbms /* If BN_DEBUG_RAND is defined BN_ucmp changes (via 298170613Sbms * bn_pollute) the const bignum arguments => 299170613Sbms * clean the values between top and max again */ 300170613Sbms bn_clear_top2max(&wnum); 301170613Sbms bn_sub_words(wnum.d, wnum.d, sdiv->d, div_n); 302170613Sbms *resp=1; 303170613Sbms } 304189592Sbms else 305170613Sbms res->top--; 306189592Sbms } 307189592Sbms 308189592Sbms /* if res->top == 0 then clear the neg value otherwise decrease 309170613Sbms * the resp pointer */ 310189592Sbms if (res->top == 0) 311189592Sbms res->neg = 0; 312189592Sbms else 313170613Sbms resp--; 314189592Sbms 315170613Sbms for (i=0; i<loop-1; i++, wnump--, resp--) 316189592Sbms { 317189592Sbms BN_ULONG q,l0; 318170613Sbms /* the first part of the loop uses the top two words of 319170613Sbms * snum and sdiv to calculate a BN_ULONG q such that 320170613Sbms * | wnum - sdiv * q | < sdiv */ 321170613Sbms#if defined(BN_DIV3W) && !defined(OPENSSL_NO_ASM) 322170613Sbms BN_ULONG bn_div_3_words(BN_ULONG*,BN_ULONG,BN_ULONG); 323170613Sbms q=bn_div_3_words(wnump,d1,d0); 324170613Sbms#else 325170613Sbms BN_ULONG n0,n1,rem=0; 326170613Sbms 327170613Sbms n0=wnump[0]; 328189592Sbms n1=wnump[-1]; 329189592Sbms if (n0 == d0) 330189592Sbms q=BN_MASK2; 331189592Sbms else /* n0 < d0 */ 332189592Sbms { 333189592Sbms#ifdef BN_LLONG 334170613Sbms BN_ULLONG t2; 335170613Sbms 336170613Sbms#if defined(BN_LLONG) && defined(BN_DIV2W) && !defined(bn_div_words) 337189592Sbms q=(BN_ULONG)(((((BN_ULLONG)n0)<<BN_BITS2)|n1)/d0); 338189592Sbms#else 339189592Sbms q=bn_div_words(n0,n1,d0); 340189592Sbms#ifdef BN_DEBUG_LEVITTE 341170613Sbms fprintf(stderr,"DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\ 342189592SbmsX) -> 0x%08X\n", 343189592Sbms n0, n1, d0, q); 344189592Sbms#endif 345170613Sbms#endif 346189592Sbms 347189592Sbms#ifndef REMAINDER_IS_ALREADY_CALCULATED 348189592Sbms /* 349189592Sbms * rem doesn't have to be BN_ULLONG. The least we 350189592Sbms * know it's less that d0, isn't it? 351189592Sbms */ 352189592Sbms rem=(n1-q*d0)&BN_MASK2; 353189592Sbms#endif 354189592Sbms t2=(BN_ULLONG)d1*q; 355189592Sbms 356189592Sbms for (;;) 357189592Sbms { 358189592Sbms if (t2 <= ((((BN_ULLONG)rem)<<BN_BITS2)|wnump[-2])) 359189592Sbms break; 360189592Sbms q--; 361189592Sbms rem += d0; 362189592Sbms if (rem < d0) break; /* don't let rem overflow */ 363189592Sbms t2 -= d1; 364189592Sbms } 365189592Sbms#else /* !BN_LLONG */ 366189592Sbms BN_ULONG t2l,t2h; 367189592Sbms 368189592Sbms q=bn_div_words(n0,n1,d0); 369189592Sbms#ifdef BN_DEBUG_LEVITTE 370189592Sbms fprintf(stderr,"DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\ 371189592SbmsX) -> 0x%08X\n", 372189592Sbms n0, n1, d0, q); 373189592Sbms#endif 374189592Sbms#ifndef REMAINDER_IS_ALREADY_CALCULATED 375189592Sbms rem=(n1-q*d0)&BN_MASK2; 376189592Sbms#endif 377189592Sbms 378189592Sbms#if defined(BN_UMULT_LOHI) 379189592Sbms BN_UMULT_LOHI(t2l,t2h,d1,q); 380189592Sbms#elif defined(BN_UMULT_HIGH) 381189592Sbms t2l = d1 * q; 382189592Sbms t2h = BN_UMULT_HIGH(d1,q); 383189592Sbms#else 384189592Sbms { 385189592Sbms BN_ULONG ql, qh; 386189592Sbms t2l=LBITS(d1); t2h=HBITS(d1); 387189592Sbms ql =LBITS(q); qh =HBITS(q); 388189592Sbms mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */ 389189592Sbms } 390189592Sbms#endif 391189592Sbms 392170613Sbms for (;;) 393189592Sbms { 394170613Sbms if ((t2h < rem) || 395189592Sbms ((t2h == rem) && (t2l <= wnump[-2]))) 396170613Sbms break; 397189592Sbms q--; 398170613Sbms rem += d0; 399170613Sbms if (rem < d0) break; /* don't let rem overflow */ 400170613Sbms if (t2l < d1) t2h--; t2l -= d1; 401170613Sbms } 402170613Sbms#endif /* !BN_LLONG */ 403170613Sbms } 404170613Sbms#endif /* !BN_DIV3W */ 405170613Sbms 406189592Sbms l0=bn_mul_words(tmp->d,sdiv->d,div_n,q); 407189592Sbms tmp->d[div_n]=l0; 408189592Sbms wnum.d--; 409170613Sbms /* ingore top values of the bignums just sub the two 410189592Sbms * BN_ULONG arrays with bn_sub_words */ 411189592Sbms if (bn_sub_words(wnum.d, wnum.d, tmp->d, div_n+1)) 412189592Sbms { 413189592Sbms /* Note: As we have considered only the leading 414170613Sbms * two BN_ULONGs in the calculation of q, sdiv * q 415189592Sbms * might be greater than wnum (but then (q-1) * sdiv 416189592Sbms * is less or equal than wnum) 417189592Sbms */ 418189592Sbms q--; 419189592Sbms if (bn_add_words(wnum.d, wnum.d, sdiv->d, div_n)) 420189592Sbms /* we can't have an overflow here (assuming 421189592Sbms * that q != 0, but if q == 0 then tmp is 422189592Sbms * zero anyway) */ 423189931Sbms (*wnump)++; 424189931Sbms } 425189931Sbms /* store part of the result */ 426189592Sbms *resp = q; 427189592Sbms } 428189592Sbms bn_correct_top(snum); 429189592Sbms if (rm != NULL) 430189592Sbms { 431189592Sbms /* Keep a copy of the neg flag in num because if rm==num 432189592Sbms * BN_rshift() will overwrite it. 433189592Sbms */ 434189592Sbms int neg = num->neg; 435170613Sbms BN_rshift(rm,snum,norm_shift); 436189592Sbms if (!BN_is_zero(rm)) 437189592Sbms rm->neg = neg; 438189592Sbms bn_check_top(rm); 439189592Sbms } 440189592Sbms if (no_branch) bn_correct_top(res); 441189592Sbms BN_CTX_end(ctx); 442189592Sbms return(1); 443189592Sbmserr: 444189592Sbms bn_check_top(rm); 445170613Sbms BN_CTX_end(ctx); 446189592Sbms return(0); 447189592Sbms } 448189931Sbms#endif 449189592Sbms