1336299Smmacy/*- 2336299Smmacy * ==================================================== 3336299Smmacy * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. 4336299Smmacy * 5336299Smmacy * Developed at SunPro, a Sun Microsystems, Inc. business. 6336299Smmacy * Permission to use, copy, modify, and distribute this 7336299Smmacy * software is freely granted, provided that this notice 8336299Smmacy * is preserved. 9336299Smmacy * ==================================================== 10336299Smmacy */ 11336299Smmacy 12336299Smmacy/* 13336299Smmacy * Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net> 14336299Smmacy * 15336299Smmacy * Permission to use, copy, modify, and distribute this software for any 16336299Smmacy * purpose with or without fee is hereby granted, provided that the above 17336299Smmacy * copyright notice and this permission notice appear in all copies. 18336299Smmacy * 19336299Smmacy * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 20336299Smmacy * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 21336299Smmacy * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 22336299Smmacy * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 23336299Smmacy * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 24336299Smmacy * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 25336299Smmacy * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 26336299Smmacy */ 27336299Smmacy 28336299Smmacy/* powl(x,y) return x**y 29336299Smmacy * 30336299Smmacy * n 31336299Smmacy * Method: Let x = 2 * (1+f) 32336299Smmacy * 1. Compute and return log2(x) in two pieces: 33336299Smmacy * log2(x) = w1 + w2, 34336299Smmacy * where w1 has 113-53 = 60 bit trailing zeros. 35336299Smmacy * 2. Perform y*log2(x) = n+y' by simulating muti-precision 36336299Smmacy * arithmetic, where |y'|<=0.5. 37336299Smmacy * 3. Return x**y = 2**n*exp(y'*log2) 38336299Smmacy * 39336299Smmacy * Special cases: 40336299Smmacy * 1. (anything) ** 0 is 1 41336299Smmacy * 2. (anything) ** 1 is itself 42336299Smmacy * 3. (anything) ** NAN is NAN 43336299Smmacy * 4. NAN ** (anything except 0) is NAN 44336299Smmacy * 5. +-(|x| > 1) ** +INF is +INF 45336299Smmacy * 6. +-(|x| > 1) ** -INF is +0 46336299Smmacy * 7. +-(|x| < 1) ** +INF is +0 47336299Smmacy * 8. +-(|x| < 1) ** -INF is +INF 48336299Smmacy * 9. +-1 ** +-INF is NAN 49336299Smmacy * 10. +0 ** (+anything except 0, NAN) is +0 50336299Smmacy * 11. -0 ** (+anything except 0, NAN, odd integer) is +0 51336299Smmacy * 12. +0 ** (-anything except 0, NAN) is +INF 52336299Smmacy * 13. -0 ** (-anything except 0, NAN, odd integer) is +INF 53336299Smmacy * 14. -0 ** (odd integer) = -( +0 ** (odd integer) ) 54336299Smmacy * 15. +INF ** (+anything except 0,NAN) is +INF 55336299Smmacy * 16. +INF ** (-anything except 0,NAN) is +0 56336299Smmacy * 17. -INF ** (anything) = -0 ** (-anything) 57336299Smmacy * 18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer) 58336299Smmacy * 19. (-anything except 0 and inf) ** (non-integer) is NAN 59336299Smmacy * 60336299Smmacy */ 61336299Smmacy 62336299Smmacy#include <sys/cdefs.h> 63336299Smmacy__FBSDID("$FreeBSD: stable/11/lib/msun/ld128/e_powl.c 336767 2018-07-27 17:39:36Z dim $"); 64336299Smmacy 65336299Smmacy#include <float.h> 66336299Smmacy#include <math.h> 67336299Smmacy 68336299Smmacy#include "math_private.h" 69336299Smmacy 70336299Smmacystatic const long double bp[] = { 71336299Smmacy 1.0L, 72336299Smmacy 1.5L, 73336299Smmacy}; 74336299Smmacy 75336299Smmacy/* log_2(1.5) */ 76336299Smmacystatic const long double dp_h[] = { 77336299Smmacy 0.0, 78336299Smmacy 5.8496250072115607565592654282227158546448E-1L 79336299Smmacy}; 80336299Smmacy 81336299Smmacy/* Low part of log_2(1.5) */ 82336299Smmacystatic const long double dp_l[] = { 83336299Smmacy 0.0, 84336299Smmacy 1.0579781240112554492329533686862998106046E-16L 85336299Smmacy}; 86336299Smmacy 87336299Smmacystatic const long double zero = 0.0L, 88336299Smmacy one = 1.0L, 89336299Smmacy two = 2.0L, 90336299Smmacy two113 = 1.0384593717069655257060992658440192E34L, 91336299Smmacy huge = 1.0e3000L, 92336299Smmacy tiny = 1.0e-3000L; 93336299Smmacy 94336299Smmacy/* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2)) 95336299Smmacy z = (x-1)/(x+1) 96336299Smmacy 1 <= x <= 1.25 97336299Smmacy Peak relative error 2.3e-37 */ 98336299Smmacystatic const long double LN[] = 99336299Smmacy{ 100336299Smmacy -3.0779177200290054398792536829702930623200E1L, 101336299Smmacy 6.5135778082209159921251824580292116201640E1L, 102336299Smmacy -4.6312921812152436921591152809994014413540E1L, 103336299Smmacy 1.2510208195629420304615674658258363295208E1L, 104336299Smmacy -9.9266909031921425609179910128531667336670E-1L 105336299Smmacy}; 106336299Smmacystatic const long double LD[] = 107336299Smmacy{ 108336299Smmacy -5.129862866715009066465422805058933131960E1L, 109336299Smmacy 1.452015077564081884387441590064272782044E2L, 110336299Smmacy -1.524043275549860505277434040464085593165E2L, 111336299Smmacy 7.236063513651544224319663428634139768808E1L, 112336299Smmacy -1.494198912340228235853027849917095580053E1L 113336299Smmacy /* 1.0E0 */ 114336299Smmacy}; 115336299Smmacy 116336299Smmacy/* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2))) 117336299Smmacy 0 <= x <= 0.5 118336299Smmacy Peak relative error 5.7e-38 */ 119336299Smmacystatic const long double PN[] = 120336299Smmacy{ 121336299Smmacy 5.081801691915377692446852383385968225675E8L, 122336299Smmacy 9.360895299872484512023336636427675327355E6L, 123336299Smmacy 4.213701282274196030811629773097579432957E4L, 124336299Smmacy 5.201006511142748908655720086041570288182E1L, 125336299Smmacy 9.088368420359444263703202925095675982530E-3L, 126336299Smmacy}; 127336299Smmacystatic const long double PD[] = 128336299Smmacy{ 129336299Smmacy 3.049081015149226615468111430031590411682E9L, 130336299Smmacy 1.069833887183886839966085436512368982758E8L, 131336299Smmacy 8.259257717868875207333991924545445705394E5L, 132336299Smmacy 1.872583833284143212651746812884298360922E3L, 133336299Smmacy /* 1.0E0 */ 134336299Smmacy}; 135336299Smmacy 136336299Smmacystatic const long double 137336299Smmacy /* ln 2 */ 138336299Smmacy lg2 = 6.9314718055994530941723212145817656807550E-1L, 139336299Smmacy lg2_h = 6.9314718055994528622676398299518041312695E-1L, 140336299Smmacy lg2_l = 2.3190468138462996154948554638754786504121E-17L, 141336299Smmacy ovt = 8.0085662595372944372e-0017L, 142336299Smmacy /* 2/(3*log(2)) */ 143336299Smmacy cp = 9.6179669392597560490661645400126142495110E-1L, 144336299Smmacy cp_h = 9.6179669392597555432899980587535537779331E-1L, 145336299Smmacy cp_l = 5.0577616648125906047157785230014751039424E-17L; 146336299Smmacy 147336299Smmacylong double 148336299Smmacypowl(long double x, long double y) 149336299Smmacy{ 150336299Smmacy long double z, ax, z_h, z_l, p_h, p_l; 151336299Smmacy long double yy1, t1, t2, r, s, t, u, v, w; 152336299Smmacy long double s2, s_h, s_l, t_h, t_l; 153336299Smmacy int32_t i, j, k, yisint, n; 154336299Smmacy u_int32_t ix, iy; 155336299Smmacy int32_t hx, hy; 156336299Smmacy ieee_quad_shape_type o, p, q; 157336299Smmacy 158336299Smmacy p.value = x; 159336299Smmacy hx = p.parts32.mswhi; 160336299Smmacy ix = hx & 0x7fffffff; 161336299Smmacy 162336299Smmacy q.value = y; 163336299Smmacy hy = q.parts32.mswhi; 164336299Smmacy iy = hy & 0x7fffffff; 165336299Smmacy 166336299Smmacy 167336299Smmacy /* y==zero: x**0 = 1 */ 168336299Smmacy if ((iy | q.parts32.mswlo | q.parts32.lswhi | q.parts32.lswlo) == 0) 169336299Smmacy return one; 170336299Smmacy 171336299Smmacy /* 1.0**y = 1; -1.0**+-Inf = 1 */ 172336299Smmacy if (x == one) 173336299Smmacy return one; 174336299Smmacy if (x == -1.0L && iy == 0x7fff0000 175336299Smmacy && (q.parts32.mswlo | q.parts32.lswhi | q.parts32.lswlo) == 0) 176336299Smmacy return one; 177336299Smmacy 178336299Smmacy /* +-NaN return x+y */ 179336299Smmacy if ((ix > 0x7fff0000) 180336299Smmacy || ((ix == 0x7fff0000) 181336299Smmacy && ((p.parts32.mswlo | p.parts32.lswhi | p.parts32.lswlo) != 0)) 182336299Smmacy || (iy > 0x7fff0000) 183336299Smmacy || ((iy == 0x7fff0000) 184336299Smmacy && ((q.parts32.mswlo | q.parts32.lswhi | q.parts32.lswlo) != 0))) 185336299Smmacy return x + y; 186336299Smmacy 187336299Smmacy /* determine if y is an odd int when x < 0 188336299Smmacy * yisint = 0 ... y is not an integer 189336299Smmacy * yisint = 1 ... y is an odd int 190336299Smmacy * yisint = 2 ... y is an even int 191336299Smmacy */ 192336299Smmacy yisint = 0; 193336299Smmacy if (hx < 0) 194336299Smmacy { 195336299Smmacy if (iy >= 0x40700000) /* 2^113 */ 196336299Smmacy yisint = 2; /* even integer y */ 197336299Smmacy else if (iy >= 0x3fff0000) /* 1.0 */ 198336299Smmacy { 199336299Smmacy if (floorl (y) == y) 200336299Smmacy { 201336299Smmacy z = 0.5 * y; 202336299Smmacy if (floorl (z) == z) 203336299Smmacy yisint = 2; 204336299Smmacy else 205336299Smmacy yisint = 1; 206336299Smmacy } 207336299Smmacy } 208336299Smmacy } 209336299Smmacy 210336299Smmacy /* special value of y */ 211336299Smmacy if ((q.parts32.mswlo | q.parts32.lswhi | q.parts32.lswlo) == 0) 212336299Smmacy { 213336299Smmacy if (iy == 0x7fff0000) /* y is +-inf */ 214336299Smmacy { 215336299Smmacy if (((ix - 0x3fff0000) | p.parts32.mswlo | p.parts32.lswhi | 216336299Smmacy p.parts32.lswlo) == 0) 217336299Smmacy return y - y; /* +-1**inf is NaN */ 218336299Smmacy else if (ix >= 0x3fff0000) /* (|x|>1)**+-inf = inf,0 */ 219336299Smmacy return (hy >= 0) ? y : zero; 220336299Smmacy else /* (|x|<1)**-,+inf = inf,0 */ 221336299Smmacy return (hy < 0) ? -y : zero; 222336299Smmacy } 223336299Smmacy if (iy == 0x3fff0000) 224336299Smmacy { /* y is +-1 */ 225336299Smmacy if (hy < 0) 226336299Smmacy return one / x; 227336299Smmacy else 228336299Smmacy return x; 229336299Smmacy } 230336299Smmacy if (hy == 0x40000000) 231336299Smmacy return x * x; /* y is 2 */ 232336299Smmacy if (hy == 0x3ffe0000) 233336299Smmacy { /* y is 0.5 */ 234336299Smmacy if (hx >= 0) /* x >= +0 */ 235336299Smmacy return sqrtl (x); 236336299Smmacy } 237336299Smmacy } 238336299Smmacy 239336299Smmacy ax = fabsl (x); 240336299Smmacy /* special value of x */ 241336299Smmacy if ((p.parts32.mswlo | p.parts32.lswhi | p.parts32.lswlo) == 0) 242336299Smmacy { 243336299Smmacy if (ix == 0x7fff0000 || ix == 0 || ix == 0x3fff0000) 244336299Smmacy { 245336299Smmacy z = ax; /*x is +-0,+-inf,+-1 */ 246336299Smmacy if (hy < 0) 247336299Smmacy z = one / z; /* z = (1/|x|) */ 248336299Smmacy if (hx < 0) 249336299Smmacy { 250336299Smmacy if (((ix - 0x3fff0000) | yisint) == 0) 251336299Smmacy { 252336299Smmacy z = (z - z) / (z - z); /* (-1)**non-int is NaN */ 253336299Smmacy } 254336299Smmacy else if (yisint == 1) 255336299Smmacy z = -z; /* (x<0)**odd = -(|x|**odd) */ 256336299Smmacy } 257336299Smmacy return z; 258336299Smmacy } 259336299Smmacy } 260336299Smmacy 261336299Smmacy /* (x<0)**(non-int) is NaN */ 262336299Smmacy if (((((u_int32_t) hx >> 31) - 1) | yisint) == 0) 263336299Smmacy return (x - x) / (x - x); 264336299Smmacy 265336299Smmacy /* |y| is huge. 266336299Smmacy 2^-16495 = 1/2 of smallest representable value. 267336299Smmacy If (1 - 1/131072)^y underflows, y > 1.4986e9 */ 268336299Smmacy if (iy > 0x401d654b) 269336299Smmacy { 270336299Smmacy /* if (1 - 2^-113)^y underflows, y > 1.1873e38 */ 271336299Smmacy if (iy > 0x407d654b) 272336299Smmacy { 273336299Smmacy if (ix <= 0x3ffeffff) 274336299Smmacy return (hy < 0) ? huge * huge : tiny * tiny; 275336299Smmacy if (ix >= 0x3fff0000) 276336299Smmacy return (hy > 0) ? huge * huge : tiny * tiny; 277336299Smmacy } 278336299Smmacy /* over/underflow if x is not close to one */ 279336299Smmacy if (ix < 0x3ffeffff) 280336299Smmacy return (hy < 0) ? huge * huge : tiny * tiny; 281336299Smmacy if (ix > 0x3fff0000) 282336299Smmacy return (hy > 0) ? huge * huge : tiny * tiny; 283336299Smmacy } 284336299Smmacy 285336299Smmacy n = 0; 286336299Smmacy /* take care subnormal number */ 287336299Smmacy if (ix < 0x00010000) 288336299Smmacy { 289336299Smmacy ax *= two113; 290336299Smmacy n -= 113; 291336299Smmacy o.value = ax; 292336299Smmacy ix = o.parts32.mswhi; 293336299Smmacy } 294336299Smmacy n += ((ix) >> 16) - 0x3fff; 295336299Smmacy j = ix & 0x0000ffff; 296336299Smmacy /* determine interval */ 297336299Smmacy ix = j | 0x3fff0000; /* normalize ix */ 298336299Smmacy if (j <= 0x3988) 299336299Smmacy k = 0; /* |x|<sqrt(3/2) */ 300336299Smmacy else if (j < 0xbb67) 301336299Smmacy k = 1; /* |x|<sqrt(3) */ 302336299Smmacy else 303336299Smmacy { 304336299Smmacy k = 0; 305336299Smmacy n += 1; 306336299Smmacy ix -= 0x00010000; 307336299Smmacy } 308336299Smmacy 309336299Smmacy o.value = ax; 310336299Smmacy o.parts32.mswhi = ix; 311336299Smmacy ax = o.value; 312336299Smmacy 313336299Smmacy /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */ 314336299Smmacy u = ax - bp[k]; /* bp[0]=1.0, bp[1]=1.5 */ 315336299Smmacy v = one / (ax + bp[k]); 316336299Smmacy s = u * v; 317336299Smmacy s_h = s; 318336299Smmacy 319336299Smmacy o.value = s_h; 320336299Smmacy o.parts32.lswlo = 0; 321336299Smmacy o.parts32.lswhi &= 0xf8000000; 322336299Smmacy s_h = o.value; 323336299Smmacy /* t_h=ax+bp[k] High */ 324336299Smmacy t_h = ax + bp[k]; 325336299Smmacy o.value = t_h; 326336299Smmacy o.parts32.lswlo = 0; 327336299Smmacy o.parts32.lswhi &= 0xf8000000; 328336299Smmacy t_h = o.value; 329336299Smmacy t_l = ax - (t_h - bp[k]); 330336299Smmacy s_l = v * ((u - s_h * t_h) - s_h * t_l); 331336299Smmacy /* compute log(ax) */ 332336299Smmacy s2 = s * s; 333336299Smmacy u = LN[0] + s2 * (LN[1] + s2 * (LN[2] + s2 * (LN[3] + s2 * LN[4]))); 334336299Smmacy v = LD[0] + s2 * (LD[1] + s2 * (LD[2] + s2 * (LD[3] + s2 * (LD[4] + s2)))); 335336299Smmacy r = s2 * s2 * u / v; 336336299Smmacy r += s_l * (s_h + s); 337336299Smmacy s2 = s_h * s_h; 338336299Smmacy t_h = 3.0 + s2 + r; 339336299Smmacy o.value = t_h; 340336299Smmacy o.parts32.lswlo = 0; 341336299Smmacy o.parts32.lswhi &= 0xf8000000; 342336299Smmacy t_h = o.value; 343336299Smmacy t_l = r - ((t_h - 3.0) - s2); 344336299Smmacy /* u+v = s*(1+...) */ 345336299Smmacy u = s_h * t_h; 346336299Smmacy v = s_l * t_h + t_l * s; 347336299Smmacy /* 2/(3log2)*(s+...) */ 348336299Smmacy p_h = u + v; 349336299Smmacy o.value = p_h; 350336299Smmacy o.parts32.lswlo = 0; 351336299Smmacy o.parts32.lswhi &= 0xf8000000; 352336299Smmacy p_h = o.value; 353336299Smmacy p_l = v - (p_h - u); 354336299Smmacy z_h = cp_h * p_h; /* cp_h+cp_l = 2/(3*log2) */ 355336299Smmacy z_l = cp_l * p_h + p_l * cp + dp_l[k]; 356336299Smmacy /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */ 357336299Smmacy t = (long double) n; 358336299Smmacy t1 = (((z_h + z_l) + dp_h[k]) + t); 359336299Smmacy o.value = t1; 360336299Smmacy o.parts32.lswlo = 0; 361336299Smmacy o.parts32.lswhi &= 0xf8000000; 362336299Smmacy t1 = o.value; 363336299Smmacy t2 = z_l - (((t1 - t) - dp_h[k]) - z_h); 364336299Smmacy 365336299Smmacy /* s (sign of result -ve**odd) = -1 else = 1 */ 366336299Smmacy s = one; 367336299Smmacy if (((((u_int32_t) hx >> 31) - 1) | (yisint - 1)) == 0) 368336299Smmacy s = -one; /* (-ve)**(odd int) */ 369336299Smmacy 370336299Smmacy /* split up y into yy1+y2 and compute (yy1+y2)*(t1+t2) */ 371336299Smmacy yy1 = y; 372336299Smmacy o.value = yy1; 373336299Smmacy o.parts32.lswlo = 0; 374336299Smmacy o.parts32.lswhi &= 0xf8000000; 375336299Smmacy yy1 = o.value; 376336299Smmacy p_l = (y - yy1) * t1 + y * t2; 377336299Smmacy p_h = yy1 * t1; 378336299Smmacy z = p_l + p_h; 379336299Smmacy o.value = z; 380336299Smmacy j = o.parts32.mswhi; 381336299Smmacy if (j >= 0x400d0000) /* z >= 16384 */ 382336299Smmacy { 383336299Smmacy /* if z > 16384 */ 384336299Smmacy if (((j - 0x400d0000) | o.parts32.mswlo | o.parts32.lswhi | 385336299Smmacy o.parts32.lswlo) != 0) 386336299Smmacy return s * huge * huge; /* overflow */ 387336299Smmacy else 388336299Smmacy { 389336299Smmacy if (p_l + ovt > z - p_h) 390336299Smmacy return s * huge * huge; /* overflow */ 391336299Smmacy } 392336299Smmacy } 393336299Smmacy else if ((j & 0x7fffffff) >= 0x400d01b9) /* z <= -16495 */ 394336299Smmacy { 395336299Smmacy /* z < -16495 */ 396336299Smmacy if (((j - 0xc00d01bc) | o.parts32.mswlo | o.parts32.lswhi | 397336299Smmacy o.parts32.lswlo) 398336299Smmacy != 0) 399336299Smmacy return s * tiny * tiny; /* underflow */ 400336299Smmacy else 401336299Smmacy { 402336299Smmacy if (p_l <= z - p_h) 403336299Smmacy return s * tiny * tiny; /* underflow */ 404336299Smmacy } 405336299Smmacy } 406336299Smmacy /* compute 2**(p_h+p_l) */ 407336299Smmacy i = j & 0x7fffffff; 408336299Smmacy k = (i >> 16) - 0x3fff; 409336299Smmacy n = 0; 410336299Smmacy if (i > 0x3ffe0000) 411336299Smmacy { /* if |z| > 0.5, set n = [z+0.5] */ 412336299Smmacy n = floorl (z + 0.5L); 413336299Smmacy t = n; 414336299Smmacy p_h -= t; 415336299Smmacy } 416336299Smmacy t = p_l + p_h; 417336299Smmacy o.value = t; 418336299Smmacy o.parts32.lswlo = 0; 419336299Smmacy o.parts32.lswhi &= 0xf8000000; 420336299Smmacy t = o.value; 421336299Smmacy u = t * lg2_h; 422336299Smmacy v = (p_l - (t - p_h)) * lg2 + t * lg2_l; 423336299Smmacy z = u + v; 424336299Smmacy w = v - (z - u); 425336299Smmacy /* exp(z) */ 426336299Smmacy t = z * z; 427336299Smmacy u = PN[0] + t * (PN[1] + t * (PN[2] + t * (PN[3] + t * PN[4]))); 428336299Smmacy v = PD[0] + t * (PD[1] + t * (PD[2] + t * (PD[3] + t))); 429336299Smmacy t1 = z - t * u / v; 430336299Smmacy r = (z * t1) / (t1 - two) - (w + z * w); 431336299Smmacy z = one - (r - z); 432336299Smmacy o.value = z; 433336299Smmacy j = o.parts32.mswhi; 434336299Smmacy j += (n << 16); 435336299Smmacy if ((j >> 16) <= 0) 436336299Smmacy z = scalbnl (z, n); /* subnormal output */ 437336299Smmacy else 438336299Smmacy { 439336299Smmacy o.parts32.mswhi = j; 440336299Smmacy z = o.value; 441336299Smmacy } 442336299Smmacy return s * z; 443336299Smmacy} 444