1141296Sdas 2141296Sdas/* @(#)e_sqrt.c 1.3 95/01/18 */ 32116Sjkh/* 42116Sjkh * ==================================================== 52116Sjkh * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. 62116Sjkh * 7141296Sdas * Developed at SunSoft, a Sun Microsystems, Inc. business. 82116Sjkh * Permission to use, copy, modify, and distribute this 9141296Sdas * software is freely granted, provided that this notice 102116Sjkh * is preserved. 112116Sjkh * ==================================================== 122116Sjkh */ 132116Sjkh 14176720Sdas#include <sys/cdefs.h> 15176720Sdas__FBSDID("$FreeBSD$"); 162116Sjkh 172116Sjkh/* __ieee754_sqrt(x) 182116Sjkh * Return correctly rounded sqrt. 192116Sjkh * ------------------------------------------ 202116Sjkh * | Use the hardware sqrt if you have one | 212116Sjkh * ------------------------------------------ 22141296Sdas * Method: 23141296Sdas * Bit by bit method using integer arithmetic. (Slow, but portable) 242116Sjkh * 1. Normalization 25141296Sdas * Scale x to y in [1,4) with even powers of 2: 262116Sjkh * find an integer k such that 1 <= (y=x*2^(2k)) < 4, then 272116Sjkh * sqrt(x) = 2^k * sqrt(y) 282116Sjkh * 2. Bit by bit computation 292116Sjkh * Let q = sqrt(y) truncated to i bit after binary point (q = 1), 302116Sjkh * i 0 312116Sjkh * i+1 2 322116Sjkh * s = 2*q , and y = 2 * ( y - q ). (1) 332116Sjkh * i i i i 34141296Sdas * 35141296Sdas * To compute q from q , one checks whether 36141296Sdas * i+1 i 372116Sjkh * 382116Sjkh * -(i+1) 2 392116Sjkh * (q + 2 ) <= y. (2) 402116Sjkh * i 412116Sjkh * -(i+1) 422116Sjkh * If (2) is false, then q = q ; otherwise q = q + 2 . 432116Sjkh * i+1 i i+1 i 442116Sjkh * 452116Sjkh * With some algebric manipulation, it is not difficult to see 46141296Sdas * that (2) is equivalent to 472116Sjkh * -(i+1) 482116Sjkh * s + 2 <= y (3) 492116Sjkh * i i 502116Sjkh * 51141296Sdas * The advantage of (3) is that s and y can be computed by 522116Sjkh * i i 532116Sjkh * the following recurrence formula: 542116Sjkh * if (3) is false 552116Sjkh * 562116Sjkh * s = s , y = y ; (4) 572116Sjkh * i+1 i i+1 i 582116Sjkh * 592116Sjkh * otherwise, 602116Sjkh * -i -(i+1) 612116Sjkh * s = s + 2 , y = y - s - 2 (5) 622116Sjkh * i+1 i i+1 i i 63141296Sdas * 64141296Sdas * One may easily use induction to prove (4) and (5). 652116Sjkh * Note. Since the left hand side of (3) contain only i+2 bits, 66141296Sdas * it does not necessary to do a full (53-bit) comparison 672116Sjkh * in (3). 682116Sjkh * 3. Final rounding 692116Sjkh * After generating the 53 bits result, we compute one more bit. 702116Sjkh * Together with the remainder, we can decide whether the 712116Sjkh * result is exact, bigger than 1/2ulp, or less than 1/2ulp 722116Sjkh * (it will never equal to 1/2ulp). 732116Sjkh * The rounding mode can be detected by checking whether 742116Sjkh * huge + tiny is equal to huge, and whether huge - tiny is 752116Sjkh * equal to huge for some floating point number "huge" and "tiny". 76141296Sdas * 772116Sjkh * Special cases: 782116Sjkh * sqrt(+-0) = +-0 ... exact 792116Sjkh * sqrt(inf) = inf 802116Sjkh * sqrt(-ve) = NaN ... with invalid signal 812116Sjkh * sqrt(NaN) = NaN ... with invalid signal for signaling NaN 822116Sjkh * 832116Sjkh * Other methods : see the appended file at the end of the program below. 842116Sjkh *--------------- 852116Sjkh */ 862116Sjkh 87176720Sdas#include <float.h> 88176720Sdas 892116Sjkh#include "math.h" 902116Sjkh#include "math_private.h" 912116Sjkh 922116Sjkhstatic const double one = 1.0, tiny=1.0e-300; 932116Sjkh 9497413Salfreddouble 95117912Speter__ieee754_sqrt(double x) 962116Sjkh{ 972116Sjkh double z; 988870Srgrimes int32_t sign = (int)0x80000000; 992116Sjkh int32_t ix0,s0,q,m,t,i; 1002116Sjkh u_int32_t r,t1,s1,ix1,q1; 1012116Sjkh 1022116Sjkh EXTRACT_WORDS(ix0,ix1,x); 1032116Sjkh 1042116Sjkh /* take care of Inf and NaN */ 105141296Sdas if((ix0&0x7ff00000)==0x7ff00000) { 1062116Sjkh return x*x+x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf 1072116Sjkh sqrt(-inf)=sNaN */ 108141296Sdas } 1092116Sjkh /* take care of zero */ 1102116Sjkh if(ix0<=0) { 1112116Sjkh if(((ix0&(~sign))|ix1)==0) return x;/* sqrt(+-0) = +-0 */ 1122116Sjkh else if(ix0<0) 1132116Sjkh return (x-x)/(x-x); /* sqrt(-ve) = sNaN */ 1142116Sjkh } 1152116Sjkh /* normalize x */ 1162116Sjkh m = (ix0>>20); 1172116Sjkh if(m==0) { /* subnormal x */ 1182116Sjkh while(ix0==0) { 1192116Sjkh m -= 21; 1202116Sjkh ix0 |= (ix1>>11); ix1 <<= 21; 1212116Sjkh } 1222116Sjkh for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1; 1232116Sjkh m -= i-1; 1242116Sjkh ix0 |= (ix1>>(32-i)); 1252116Sjkh ix1 <<= i; 1262116Sjkh } 1272116Sjkh m -= 1023; /* unbias exponent */ 1282116Sjkh ix0 = (ix0&0x000fffff)|0x00100000; 1292116Sjkh if(m&1){ /* odd m, double x to make it even */ 1302116Sjkh ix0 += ix0 + ((ix1&sign)>>31); 1312116Sjkh ix1 += ix1; 1322116Sjkh } 1332116Sjkh m >>= 1; /* m = [m/2] */ 1342116Sjkh 1352116Sjkh /* generate sqrt(x) bit by bit */ 1362116Sjkh ix0 += ix0 + ((ix1&sign)>>31); 1372116Sjkh ix1 += ix1; 1382116Sjkh q = q1 = s0 = s1 = 0; /* [q,q1] = sqrt(x) */ 1392116Sjkh r = 0x00200000; /* r = moving bit from right to left */ 1402116Sjkh 1412116Sjkh while(r!=0) { 142141296Sdas t = s0+r; 143141296Sdas if(t<=ix0) { 144141296Sdas s0 = t+r; 145141296Sdas ix0 -= t; 146141296Sdas q += r; 147141296Sdas } 1482116Sjkh ix0 += ix0 + ((ix1&sign)>>31); 1492116Sjkh ix1 += ix1; 1502116Sjkh r>>=1; 1512116Sjkh } 1522116Sjkh 1532116Sjkh r = sign; 1542116Sjkh while(r!=0) { 155141296Sdas t1 = s1+r; 1562116Sjkh t = s0; 157141296Sdas if((t<ix0)||((t==ix0)&&(t1<=ix1))) { 1582116Sjkh s1 = t1+r; 1592116Sjkh if(((t1&sign)==sign)&&(s1&sign)==0) s0 += 1; 1602116Sjkh ix0 -= t; 1612116Sjkh if (ix1 < t1) ix0 -= 1; 1622116Sjkh ix1 -= t1; 1632116Sjkh q1 += r; 1642116Sjkh } 1652116Sjkh ix0 += ix0 + ((ix1&sign)>>31); 1662116Sjkh ix1 += ix1; 1672116Sjkh r>>=1; 1682116Sjkh } 1692116Sjkh 1702116Sjkh /* use floating add to find out rounding direction */ 1712116Sjkh if((ix0|ix1)!=0) { 1722116Sjkh z = one-tiny; /* trigger inexact flag */ 1732116Sjkh if (z>=one) { 1742116Sjkh z = one+tiny; 1752116Sjkh if (q1==(u_int32_t)0xffffffff) { q1=0; q += 1;} 1762116Sjkh else if (z>one) { 1772116Sjkh if (q1==(u_int32_t)0xfffffffe) q+=1; 178141296Sdas q1+=2; 1792116Sjkh } else 1802116Sjkh q1 += (q1&1); 1812116Sjkh } 1822116Sjkh } 1832116Sjkh ix0 = (q>>1)+0x3fe00000; 1842116Sjkh ix1 = q1>>1; 1852116Sjkh if ((q&1)==1) ix1 |= sign; 1862116Sjkh ix0 += (m <<20); 1872116Sjkh INSERT_WORDS(z,ix0,ix1); 1882116Sjkh return z; 1892116Sjkh} 1902116Sjkh 191176720Sdas#if (LDBL_MANT_DIG == 53) 192176720Sdas__weak_reference(sqrt, sqrtl); 193176720Sdas#endif 194176720Sdas 1952116Sjkh/* 1962116SjkhOther methods (use floating-point arithmetic) 1972116Sjkh------------- 198141296Sdas(This is a copy of a drafted paper by Prof W. Kahan 1992116Sjkhand K.C. Ng, written in May, 1986) 2002116Sjkh 201141296Sdas Two algorithms are given here to implement sqrt(x) 2022116Sjkh (IEEE double precision arithmetic) in software. 2032116Sjkh Both supply sqrt(x) correctly rounded. The first algorithm (in 2042116Sjkh Section A) uses newton iterations and involves four divisions. 2052116Sjkh The second one uses reciproot iterations to avoid division, but 2062116Sjkh requires more multiplications. Both algorithms need the ability 207141296Sdas to chop results of arithmetic operations instead of round them, 2082116Sjkh and the INEXACT flag to indicate when an arithmetic operation 209141296Sdas is executed exactly with no roundoff error, all part of the 2102116Sjkh standard (IEEE 754-1985). The ability to perform shift, add, 2112116Sjkh subtract and logical AND operations upon 32-bit words is needed 2122116Sjkh too, though not part of the standard. 2132116Sjkh 2142116SjkhA. sqrt(x) by Newton Iteration 2152116Sjkh 2162116Sjkh (1) Initial approximation 2172116Sjkh 2182116Sjkh Let x0 and x1 be the leading and the trailing 32-bit words of 219141296Sdas a floating point number x (in IEEE double format) respectively 2202116Sjkh 2212116Sjkh 1 11 52 ...widths 2222116Sjkh ------------------------------------------------------ 2232116Sjkh x: |s| e | f | 2242116Sjkh ------------------------------------------------------ 2252116Sjkh msb lsb msb lsb ...order 2262116Sjkh 227141296Sdas 2282116Sjkh ------------------------ ------------------------ 2292116Sjkh x0: |s| e | f1 | x1: | f2 | 2302116Sjkh ------------------------ ------------------------ 2312116Sjkh 2322116Sjkh By performing shifts and subtracts on x0 and x1 (both regarded 2332116Sjkh as integers), we obtain an 8-bit approximation of sqrt(x) as 2342116Sjkh follows. 2352116Sjkh 2362116Sjkh k := (x0>>1) + 0x1ff80000; 2372116Sjkh y0 := k - T1[31&(k>>15)]. ... y ~ sqrt(x) to 8 bits 2382116Sjkh Here k is a 32-bit integer and T1[] is an integer array containing 2392116Sjkh correction terms. Now magically the floating value of y (y's 2402116Sjkh leading 32-bit word is y0, the value of its trailing word is 0) 2412116Sjkh approximates sqrt(x) to almost 8-bit. 2422116Sjkh 2432116Sjkh Value of T1: 2442116Sjkh static int T1[32]= { 2452116Sjkh 0, 1024, 3062, 5746, 9193, 13348, 18162, 23592, 2462116Sjkh 29598, 36145, 43202, 50740, 58733, 67158, 75992, 85215, 2472116Sjkh 83599, 71378, 60428, 50647, 41945, 34246, 27478, 21581, 2482116Sjkh 16499, 12183, 8588, 5674, 3403, 1742, 661, 130,}; 2492116Sjkh 2502116Sjkh (2) Iterative refinement 2512116Sjkh 252141296Sdas Apply Heron's rule three times to y, we have y approximates 2532116Sjkh sqrt(x) to within 1 ulp (Unit in the Last Place): 2542116Sjkh 2552116Sjkh y := (y+x/y)/2 ... almost 17 sig. bits 2562116Sjkh y := (y+x/y)/2 ... almost 35 sig. bits 2572116Sjkh y := y-(y-x/y)/2 ... within 1 ulp 2582116Sjkh 2592116Sjkh 2602116Sjkh Remark 1. 2612116Sjkh Another way to improve y to within 1 ulp is: 2622116Sjkh 2632116Sjkh y := (y+x/y) ... almost 17 sig. bits to 2*sqrt(x) 2642116Sjkh y := y - 0x00100006 ... almost 18 sig. bits to sqrt(x) 2652116Sjkh 2662116Sjkh 2 2672116Sjkh (x-y )*y 2682116Sjkh y := y + 2* ---------- ...within 1 ulp 2692116Sjkh 2 2702116Sjkh 3y + x 2712116Sjkh 2722116Sjkh 2732116Sjkh This formula has one division fewer than the one above; however, 2742116Sjkh it requires more multiplications and additions. Also x must be 2752116Sjkh scaled in advance to avoid spurious overflow in evaluating the 2762116Sjkh expression 3y*y+x. Hence it is not recommended uless division 277141296Sdas is slow. If division is very slow, then one should use the 2782116Sjkh reciproot algorithm given in section B. 2792116Sjkh 2802116Sjkh (3) Final adjustment 2812116Sjkh 282141296Sdas By twiddling y's last bit it is possible to force y to be 2832116Sjkh correctly rounded according to the prevailing rounding mode 2842116Sjkh as follows. Let r and i be copies of the rounding mode and 2852116Sjkh inexact flag before entering the square root program. Also we 2862116Sjkh use the expression y+-ulp for the next representable floating 2872116Sjkh numbers (up and down) of y. Note that y+-ulp = either fixed 2882116Sjkh point y+-1, or multiply y by nextafter(1,+-inf) in chopped 2892116Sjkh mode. 2902116Sjkh 2912116Sjkh I := FALSE; ... reset INEXACT flag I 2922116Sjkh R := RZ; ... set rounding mode to round-toward-zero 2932116Sjkh z := x/y; ... chopped quotient, possibly inexact 2942116Sjkh If(not I) then { ... if the quotient is exact 2952116Sjkh if(z=y) { 2962116Sjkh I := i; ... restore inexact flag 2972116Sjkh R := r; ... restore rounded mode 2982116Sjkh return sqrt(x):=y. 2992116Sjkh } else { 3002116Sjkh z := z - ulp; ... special rounding 3012116Sjkh } 3022116Sjkh } 3032116Sjkh i := TRUE; ... sqrt(x) is inexact 3042116Sjkh If (r=RN) then z=z+ulp ... rounded-to-nearest 3052116Sjkh If (r=RP) then { ... round-toward-+inf 3062116Sjkh y = y+ulp; z=z+ulp; 3072116Sjkh } 3082116Sjkh y := y+z; ... chopped sum 3092116Sjkh y0:=y0-0x00100000; ... y := y/2 is correctly rounded. 3102116Sjkh I := i; ... restore inexact flag 3112116Sjkh R := r; ... restore rounded mode 3122116Sjkh return sqrt(x):=y. 313141296Sdas 3142116Sjkh (4) Special cases 3152116Sjkh 3162116Sjkh Square root of +inf, +-0, or NaN is itself; 3172116Sjkh Square root of a negative number is NaN with invalid signal. 3182116Sjkh 3192116Sjkh 3202116SjkhB. sqrt(x) by Reciproot Iteration 3212116Sjkh 3222116Sjkh (1) Initial approximation 3232116Sjkh 3242116Sjkh Let x0 and x1 be the leading and the trailing 32-bit words of 3252116Sjkh a floating point number x (in IEEE double format) respectively 3262116Sjkh (see section A). By performing shifs and subtracts on x0 and y0, 3272116Sjkh we obtain a 7.8-bit approximation of 1/sqrt(x) as follows. 3282116Sjkh 3292116Sjkh k := 0x5fe80000 - (x0>>1); 3302116Sjkh y0:= k - T2[63&(k>>14)]. ... y ~ 1/sqrt(x) to 7.8 bits 3312116Sjkh 332141296Sdas Here k is a 32-bit integer and T2[] is an integer array 3332116Sjkh containing correction terms. Now magically the floating 3342116Sjkh value of y (y's leading 32-bit word is y0, the value of 3352116Sjkh its trailing word y1 is set to zero) approximates 1/sqrt(x) 3362116Sjkh to almost 7.8-bit. 3372116Sjkh 3382116Sjkh Value of T2: 3392116Sjkh static int T2[64]= { 3402116Sjkh 0x1500, 0x2ef8, 0x4d67, 0x6b02, 0x87be, 0xa395, 0xbe7a, 0xd866, 3412116Sjkh 0xf14a, 0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f, 3422116Sjkh 0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d, 3432116Sjkh 0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0, 3442116Sjkh 0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989, 3452116Sjkh 0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd, 3462116Sjkh 0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e, 3472116Sjkh 0x1527f,0x1334a,0x11051,0xe951, 0xbe01, 0x8e0d, 0x5924, 0x1edd,}; 3482116Sjkh 3492116Sjkh (2) Iterative refinement 3502116Sjkh 3512116Sjkh Apply Reciproot iteration three times to y and multiply the 3522116Sjkh result by x to get an approximation z that matches sqrt(x) 353141296Sdas to about 1 ulp. To be exact, we will have 3542116Sjkh -1ulp < sqrt(x)-z<1.0625ulp. 355141296Sdas 3562116Sjkh ... set rounding mode to Round-to-nearest 3572116Sjkh y := y*(1.5-0.5*x*y*y) ... almost 15 sig. bits to 1/sqrt(x) 3582116Sjkh y := y*((1.5-2^-30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x) 3592116Sjkh ... special arrangement for better accuracy 3602116Sjkh z := x*y ... 29 bits to sqrt(x), with z*y<1 3612116Sjkh z := z + 0.5*z*(1-z*y) ... about 1 ulp to sqrt(x) 3622116Sjkh 3632116Sjkh Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that 364141296Sdas (a) the term z*y in the final iteration is always less than 1; 3652116Sjkh (b) the error in the final result is biased upward so that 3662116Sjkh -1 ulp < sqrt(x) - z < 1.0625 ulp 3672116Sjkh instead of |sqrt(x)-z|<1.03125ulp. 3682116Sjkh 3692116Sjkh (3) Final adjustment 3702116Sjkh 371141296Sdas By twiddling y's last bit it is possible to force y to be 3722116Sjkh correctly rounded according to the prevailing rounding mode 3732116Sjkh as follows. Let r and i be copies of the rounding mode and 3742116Sjkh inexact flag before entering the square root program. Also we 3752116Sjkh use the expression y+-ulp for the next representable floating 3762116Sjkh numbers (up and down) of y. Note that y+-ulp = either fixed 3772116Sjkh point y+-1, or multiply y by nextafter(1,+-inf) in chopped 3782116Sjkh mode. 3792116Sjkh 3802116Sjkh R := RZ; ... set rounding mode to round-toward-zero 3812116Sjkh switch(r) { 3822116Sjkh case RN: ... round-to-nearest 3832116Sjkh if(x<= z*(z-ulp)...chopped) z = z - ulp; else 3842116Sjkh if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp; 3852116Sjkh break; 3862116Sjkh case RZ:case RM: ... round-to-zero or round-to--inf 3872116Sjkh R:=RP; ... reset rounding mod to round-to-+inf 3882116Sjkh if(x<z*z ... rounded up) z = z - ulp; else 3892116Sjkh if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp; 3902116Sjkh break; 3912116Sjkh case RP: ... round-to-+inf 3922116Sjkh if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else 3932116Sjkh if(x>z*z ...chopped) z = z+ulp; 3942116Sjkh break; 3952116Sjkh } 3962116Sjkh 3972116Sjkh Remark 3. The above comparisons can be done in fixed point. For 3982116Sjkh example, to compare x and w=z*z chopped, it suffices to compare 3992116Sjkh x1 and w1 (the trailing parts of x and w), regarding them as 4002116Sjkh two's complement integers. 4012116Sjkh 4022116Sjkh ...Is z an exact square root? 4032116Sjkh To determine whether z is an exact square root of x, let z1 be the 4042116Sjkh trailing part of z, and also let x0 and x1 be the leading and 4052116Sjkh trailing parts of x. 4062116Sjkh 4072116Sjkh If ((z1&0x03ffffff)!=0) ... not exact if trailing 26 bits of z!=0 4082116Sjkh I := 1; ... Raise Inexact flag: z is not exact 4092116Sjkh else { 4102116Sjkh j := 1 - [(x0>>20)&1] ... j = logb(x) mod 2 411141296Sdas k := z1 >> 26; ... get z's 25-th and 26-th 4122116Sjkh fraction bits 4132116Sjkh I := i or (k&j) or ((k&(j+j+1))!=(x1&3)); 4142116Sjkh } 4152116Sjkh R:= r ... restore rounded mode 4162116Sjkh return sqrt(x):=z. 4172116Sjkh 418141296Sdas If multiplication is cheaper then the foregoing red tape, the 4192116Sjkh Inexact flag can be evaluated by 4202116Sjkh 4212116Sjkh I := i; 4222116Sjkh I := (z*z!=x) or I. 4232116Sjkh 424141296Sdas Note that z*z can overwrite I; this value must be sensed if it is 4252116Sjkh True. 4262116Sjkh 4272116Sjkh Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be 4282116Sjkh zero. 4292116Sjkh 4302116Sjkh -------------------- 431141296Sdas z1: | f2 | 4322116Sjkh -------------------- 4332116Sjkh bit 31 bit 0 4342116Sjkh 4352116Sjkh Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd 4362116Sjkh or even of logb(x) have the following relations: 4372116Sjkh 4382116Sjkh ------------------------------------------------- 4392116Sjkh bit 27,26 of z1 bit 1,0 of x1 logb(x) 4402116Sjkh ------------------------------------------------- 4412116Sjkh 00 00 odd and even 4422116Sjkh 01 01 even 4432116Sjkh 10 10 odd 4442116Sjkh 10 00 even 4452116Sjkh 11 01 even 4462116Sjkh ------------------------------------------------- 4472116Sjkh 448141296Sdas (4) Special cases (see (4) of Section A). 449141296Sdas 4502116Sjkh */ 451141296Sdas 452