s_expl.c revision 238784
1238722Skargl/*- 2238722Skargl * Copyright (c) 2009-2012 Steven G. Kargl 3238722Skargl * All rights reserved. 4238722Skargl * 5238722Skargl * Redistribution and use in source and binary forms, with or without 6238722Skargl * modification, are permitted provided that the following conditions 7238722Skargl * are met: 8238722Skargl * 1. Redistributions of source code must retain the above copyright 9238722Skargl * notice unmodified, this list of conditions, and the following 10238722Skargl * disclaimer. 11238722Skargl * 2. Redistributions in binary form must reproduce the above copyright 12238722Skargl * notice, this list of conditions and the following disclaimer in the 13238722Skargl * documentation and/or other materials provided with the distribution. 14238722Skargl * 15238722Skargl * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16238722Skargl * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17238722Skargl * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18238722Skargl * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19238722Skargl * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20238722Skargl * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21238722Skargl * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22238722Skargl * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23238722Skargl * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24238722Skargl * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25238722Skargl * 26238722Skargl * Optimized by Bruce D. Evans. 27238722Skargl */ 28238722Skargl 29238722Skargl#include <sys/cdefs.h> 30238722Skargl__FBSDID("$FreeBSD: head/lib/msun/ld80/s_expl.c 238784 2012-07-26 04:05:08Z kargl $"); 31238722Skargl 32238722Skargl/* 33238722Skargl * Compute the exponential of x for Intel 80-bit format. This is based on: 34238722Skargl * 35238722Skargl * PTP Tang, "Table-driven implementation of the exponential function 36238722Skargl * in IEEE floating-point arithmetic," ACM Trans. Math. Soft., 15, 37238722Skargl * 144-157 (1989). 38238722Skargl * 39238784Skargl * where the 32 table entries have been expanded to INTERVALS (see below). 40238722Skargl */ 41238722Skargl 42238722Skargl#include <float.h> 43238722Skargl 44238722Skargl#ifdef __i386__ 45238722Skargl#include <ieeefp.h> 46238722Skargl#endif 47238722Skargl 48238783Skargl#include "fpmath.h" 49238722Skargl#include "math.h" 50238722Skargl#include "math_private.h" 51238722Skargl 52238722Skargl#define BIAS (LDBL_MAX_EXP - 1) 53238722Skargl 54238722Skarglstatic const long double 55238722Skarglhuge = 0x1p10000L, 56238722Skargltwom10000 = 0x1p-10000L; 57238722Skargl/* XXX Prevent gcc from erroneously constant folding this: */ 58238722Skarglstatic volatile const long double tiny = 0x1p-10000L; 59238722Skargl 60238722Skarglstatic const union IEEEl2bits 61238722Skargl/* log(2**16384 - 0.5) rounded towards zero: */ 62238722Skarglo_threshold = LD80C(0xb17217f7d1cf79ab, 13, 0, 11356.5234062941439488L), 63238722Skargl/* log(2**(-16381-64-1)) rounded towards zero: */ 64238722Skarglu_threshold = LD80C(0xb21dfe7f09e2baa9, 13, 1, -11399.4985314888605581L); 65238722Skargl 66238722Skarglstatic const double __aligned(64) 67238722Skargl/* 68238784Skargl * ln2/INTERVALS = L1+L2 (hi+lo decomposition for multiplication). L1 must 69238784Skargl * have at least 22 (= log2(|LDBL_MIN_EXP-extras|) + log2(INTERVALS)) lowest 70238784Skargl * bits zero so that multiplication of it by n is exact. 71238722Skargl */ 72238722SkarglL1 = 5.4152123484527692e-3, /* 0x162e42ff000000.0p-60 */ 73238722SkarglL2 = -3.2819649005320973e-13, /* -0x1718432a1b0e26.0p-94 */ 74238722SkarglINV_L = 1.8466496523378731e+2, /* 0x171547652b82fe.0p-45 */ 75238722Skargl/* 76238722Skargl * Domain [-0.002708, 0.002708], range ~[-5.7136e-24, 5.7110e-24]: 77238722Skargl * |exp(x) - p(x)| < 2**-77.2 78238784Skargl * (0.002708 is ln2/(2*INTERVALS) rounded up a little). 79238722Skargl */ 80238722SkarglP2 = 0.5, 81238722SkarglP3 = 1.6666666666666119e-1, /* 0x15555555555490.0p-55 */ 82238722SkarglP4 = 4.1666666666665887e-2, /* 0x155555555554e5.0p-57 */ 83238722SkarglP5 = 8.3333354987869413e-3, /* 0x1111115b789919.0p-59 */ 84238722SkarglP6 = 1.3888891738560272e-3; /* 0x16c16c651633ae.0p-62 */ 85238722Skargl 86238722Skargl/* 87238784Skargl * 2^(i/INTERVALS) for i in [0,INTERVALS] is represented by two values where 88238784Skargl * the first 47 (?!) bits of the significand is stored in hi and the next 53 89238722Skargl * bits are in lo. 90238722Skargl */ 91238784Skargl#define INTERVALS 128 92238722Skargl 93238722Skarglstatic const struct { 94238722Skargl double hi; 95238722Skargl double lo; 96238784Skargl} s[INTERVALS] __aligned(16) = { 97238722Skargl 0x1p+0, 0x0p+0, 98238722Skargl 0x1.0163da9fb330p+0, 0x1.ab6c25335719bp-47, 99238722Skargl 0x1.02c9a3e77804p+0, 0x1.07737be56527cp-47, 100238722Skargl 0x1.04315e86e7f8p+0, 0x1.2f5ce3e688369p-50, 101238722Skargl 0x1.059b0d315854p+0, 0x1.a1d73e2a475b4p-47, 102238722Skargl 0x1.0706b29ddf6cp+0, 0x1.dc6dc403a9d88p-48, 103238722Skargl 0x1.0874518759bcp+0, 0x1.01186be4bb285p-49, 104238722Skargl 0x1.09e3ecac6f38p+0, 0x1.a290f03062c27p-51, 105238722Skargl 0x1.0b5586cf9890p+0, 0x1.ec5317256e308p-49, 106238722Skargl 0x1.0cc922b7247cp+0, 0x1.ba03db82dc49fp-47, 107238722Skargl 0x1.0e3ec32d3d18p+0, 0x1.10103a1727c58p-47, 108238722Skargl 0x1.0fb66affed30p+0, 0x1.af232091dd8a1p-48, 109238722Skargl 0x1.11301d0125b4p+0, 0x1.0a4ebbf1aed93p-48, 110238722Skargl 0x1.12abdc06c31cp+0, 0x1.7f72575a649adp-49, 111238722Skargl 0x1.1429aaea92dcp+0, 0x1.fb34101943b26p-48, 112238722Skargl 0x1.15a98c8a58e4p+0, 0x1.12480d573dd56p-48, 113238722Skargl 0x1.172b83c7d514p+0, 0x1.d6e6fbe462876p-47, 114238722Skargl 0x1.18af9388c8dcp+0, 0x1.4dddfb85cd1e1p-47, 115238722Skargl 0x1.1a35beb6fcb4p+0, 0x1.a9e5b4c7b4969p-47, 116238722Skargl 0x1.1bbe084045ccp+0, 0x1.39ab1e72b4428p-48, 117238722Skargl 0x1.1d4873168b98p+0, 0x1.53c02dc0144c8p-47, 118238722Skargl 0x1.1ed5022fcd90p+0, 0x1.cb8819ff61122p-48, 119238722Skargl 0x1.2063b88628ccp+0, 0x1.63b8eeb029509p-48, 120238722Skargl 0x1.21f49917ddc8p+0, 0x1.62552fd29294cp-48, 121238722Skargl 0x1.2387a6e75620p+0, 0x1.c3360fd6d8e0bp-47, 122238722Skargl 0x1.251ce4fb2a60p+0, 0x1.f9ac155bef4f5p-47, 123238722Skargl 0x1.26b4565e27ccp+0, 0x1.d257a673281d4p-48, 124238722Skargl 0x1.284dfe1f5638p+0, 0x1.2d9e2b9e07941p-53, 125238722Skargl 0x1.29e9df51fdecp+0, 0x1.09612e8afad12p-47, 126238722Skargl 0x1.2b87fd0dad98p+0, 0x1.ffbbd48ca71f9p-49, 127238722Skargl 0x1.2d285a6e4030p+0, 0x1.680123aa6da0fp-49, 128238722Skargl 0x1.2ecafa93e2f4p+0, 0x1.611ca0f45d524p-48, 129238722Skargl 0x1.306fe0a31b70p+0, 0x1.52de8d5a46306p-48, 130238722Skargl 0x1.32170fc4cd80p+0, 0x1.89a9ce78e1804p-47, 131238722Skargl 0x1.33c08b26416cp+0, 0x1.fa64e43086cb3p-47, 132238722Skargl 0x1.356c55f929fcp+0, 0x1.864a311a3b1bap-47, 133238722Skargl 0x1.371a7373aa9cp+0, 0x1.54e28aa05e8a9p-49, 134238722Skargl 0x1.38cae6d05d84p+0, 0x1.2c2d4e586cdf7p-47, 135238722Skargl 0x1.3a7db34e59fcp+0, 0x1.b750de494cf05p-47, 136238722Skargl 0x1.3c32dc313a8cp+0, 0x1.242000f9145acp-47, 137238722Skargl 0x1.3dea64c12340p+0, 0x1.11ada0911f09fp-47, 138238722Skargl 0x1.3fa4504ac800p+0, 0x1.ba0bf701aa418p-48, 139238722Skargl 0x1.4160a21f72e0p+0, 0x1.4fc2192dc79eep-47, 140238722Skargl 0x1.431f5d950a88p+0, 0x1.6dc704439410dp-48, 141238722Skargl 0x1.44e086061890p+0, 0x1.68189b7a04ef8p-47, 142238722Skargl 0x1.46a41ed1d004p+0, 0x1.772512f45922ap-48, 143238722Skargl 0x1.486a2b5c13ccp+0, 0x1.013c1a3b69063p-48, 144238722Skargl 0x1.4a32af0d7d3cp+0, 0x1.e672d8bcf46f9p-48, 145238722Skargl 0x1.4bfdad5362a0p+0, 0x1.38ea1cbd7f621p-47, 146238722Skargl 0x1.4dcb299fddd0p+0, 0x1.ac766dde353c2p-49, 147238722Skargl 0x1.4f9b2769d2c8p+0, 0x1.35699ec5b4d50p-47, 148238722Skargl 0x1.516daa2cf664p+0, 0x1.c112f52c84d82p-52, 149238722Skargl 0x1.5342b569d4f8p+0, 0x1.df0a83c49d86ap-52, 150238722Skargl 0x1.551a4ca5d920p+0, 0x1.d8a5d8c40486ap-49, 151238722Skargl 0x1.56f4736b527cp+0, 0x1.a66ecb004764fp-48, 152238722Skargl 0x1.58d12d497c7cp+0, 0x1.e9295e15b9a1ep-47, 153238722Skargl 0x1.5ab07dd48540p+0, 0x1.4ac64980a8c8fp-47, 154238722Skargl 0x1.5c9268a59468p+0, 0x1.b80e258dc0b4cp-47, 155238722Skargl 0x1.5e76f15ad214p+0, 0x1.0dd37c9840733p-49, 156238722Skargl 0x1.605e1b976dc0p+0, 0x1.160edeb25490ep-49, 157238722Skargl 0x1.6247eb03a558p+0, 0x1.2c7c3e81bf4b7p-50, 158238722Skargl 0x1.6434634ccc30p+0, 0x1.fc76f8714c4eep-48, 159238722Skargl 0x1.662388255220p+0, 0x1.24893ecf14dc8p-47, 160238722Skargl 0x1.68155d44ca94p+0, 0x1.9840e2b913dd0p-47, 161238722Skargl 0x1.6a09e667f3bcp+0, 0x1.921165f626cddp-49, 162238722Skargl 0x1.6c012750bda8p+0, 0x1.f76bb54cc007ap-47, 163238722Skargl 0x1.6dfb23c651a0p+0, 0x1.779107165f0dep-47, 164238722Skargl 0x1.6ff7df951948p+0, 0x1.e7c3f0da79f11p-51, 165238722Skargl 0x1.71f75e8ec5f4p+0, 0x1.9ee91b8797785p-47, 166238722Skargl 0x1.73f9a48a5814p+0, 0x1.9deae4d273456p-47, 167238722Skargl 0x1.75feb564267cp+0, 0x1.17edd35467491p-49, 168238722Skargl 0x1.780694fde5d0p+0, 0x1.fb0cd7014042cp-47, 169238722Skargl 0x1.7a11473eb018p+0, 0x1.b5f54408fdb37p-50, 170238722Skargl 0x1.7c1ed0130c10p+0, 0x1.93e2499a22c9cp-47, 171238722Skargl 0x1.7e2f336cf4e4p+0, 0x1.1082e815d0abdp-47, 172238722Skargl 0x1.80427543e1a0p+0, 0x1.1b60de67649a3p-48, 173238722Skargl 0x1.82589994cce0p+0, 0x1.28acf88afab35p-48, 174238722Skargl 0x1.8471a4623c78p+0, 0x1.667297b5cbe32p-47, 175238722Skargl 0x1.868d99b4492cp+0, 0x1.640720ec85613p-47, 176238722Skargl 0x1.88ac7d98a668p+0, 0x1.966530bcdf2d5p-48, 177238722Skargl 0x1.8ace5422aa0cp+0, 0x1.b5ba7c55a192dp-48, 178238722Skargl 0x1.8cf3216b5448p+0, 0x1.7de55439a2c39p-49, 179238722Skargl 0x1.8f1ae9915770p+0, 0x1.b15cc13a2e397p-47, 180238722Skargl 0x1.9145b0b91ffcp+0, 0x1.622986d1a7daep-50, 181238722Skargl 0x1.93737b0cdc5cp+0, 0x1.27a280e1f92a0p-47, 182238722Skargl 0x1.95a44cbc8520p+0, 0x1.dd36906d2b420p-49, 183238722Skargl 0x1.97d829fde4e4p+0, 0x1.f173d241f23d1p-49, 184238722Skargl 0x1.9a0f170ca078p+0, 0x1.cdd1884dc6234p-47, 185238722Skargl 0x1.9c49182a3f08p+0, 0x1.01c7c46b071f3p-48, 186238722Skargl 0x1.9e86319e3230p+0, 0x1.18c12653c7326p-47, 187238722Skargl 0x1.a0c667b5de54p+0, 0x1.2594d6d45c656p-47, 188238722Skargl 0x1.a309bec4a2d0p+0, 0x1.9ac60b8fbb86dp-47, 189238722Skargl 0x1.a5503b23e254p+0, 0x1.c8b424491caf8p-48, 190238722Skargl 0x1.a799e1330b34p+0, 0x1.86f2dfb2b158fp-48, 191238722Skargl 0x1.a9e6b5579fd8p+0, 0x1.fa1f5921deffap-47, 192238722Skargl 0x1.ac36bbfd3f34p+0, 0x1.ce06dcb351893p-47, 193238722Skargl 0x1.ae89f995ad38p+0, 0x1.6af439a68bb99p-47, 194238722Skargl 0x1.b0e07298db64p+0, 0x1.2c8421566fe38p-47, 195238722Skargl 0x1.b33a2b84f15cp+0, 0x1.d7b5fe873decap-47, 196238722Skargl 0x1.b59728de5590p+0, 0x1.cc71c40888b24p-47, 197238722Skargl 0x1.b7f76f2fb5e4p+0, 0x1.baa9ec206ad4fp-50, 198238722Skargl 0x1.ba5b030a1064p+0, 0x1.30819678d5eb7p-49, 199238722Skargl 0x1.bcc1e904bc1cp+0, 0x1.2247ba0f45b3dp-48, 200238722Skargl 0x1.bf2c25bd71e0p+0, 0x1.10811ae04a31cp-49, 201238722Skargl 0x1.c199bdd85528p+0, 0x1.c2220cb12a092p-48, 202238722Skargl 0x1.c40ab5fffd04p+0, 0x1.d368a6fc1078cp-47, 203238722Skargl 0x1.c67f12e57d14p+0, 0x1.694426ffa41e5p-49, 204238722Skargl 0x1.c8f6d9406e78p+0, 0x1.a88d65e24402ep-47, 205238722Skargl 0x1.cb720dcef904p+0, 0x1.48a81e5e8f4a5p-47, 206238722Skargl 0x1.cdf0b555dc3cp+0, 0x1.ce227c4ac7d63p-47, 207238722Skargl 0x1.d072d4a07894p+0, 0x1.dc68791790d0bp-47, 208238722Skargl 0x1.d2f87080d89cp+0, 0x1.8c56f091cc4f5p-47, 209238722Skargl 0x1.d5818dcfba48p+0, 0x1.c976816bad9b8p-50, 210238722Skargl 0x1.d80e316c9838p+0, 0x1.7bb84f9d04880p-48, 211238722Skargl 0x1.da9e603db328p+0, 0x1.5c2300696db53p-50, 212238722Skargl 0x1.dd321f301b44p+0, 0x1.025b4aef1e032p-47, 213238722Skargl 0x1.dfc97337b9b4p+0, 0x1.eb968cac39ed3p-48, 214238722Skargl 0x1.e264614f5a10p+0, 0x1.45093b0fd0bd7p-47, 215238722Skargl 0x1.e502ee78b3fcp+0, 0x1.b139e8980a9cdp-47, 216238722Skargl 0x1.e7a51fbc74c8p+0, 0x1.a5aa4594191bcp-51, 217238722Skargl 0x1.ea4afa2a490cp+0, 0x1.9858f73a18f5ep-48, 218238722Skargl 0x1.ecf482d8e67cp+0, 0x1.846d81897dca5p-47, 219238722Skargl 0x1.efa1bee615a0p+0, 0x1.3bb8fe90d496dp-47, 220238722Skargl 0x1.f252b376bba8p+0, 0x1.74e8696fc3639p-48, 221238722Skargl 0x1.f50765b6e454p+0, 0x1.9d3e12dd8a18bp-54, 222238722Skargl 0x1.f7bfdad9cbe0p+0, 0x1.38913b4bfe72cp-48, 223238722Skargl 0x1.fa7c1819e90cp+0, 0x1.82e90a7e74b26p-48, 224238722Skargl 0x1.fd3c22b8f71cp+0, 0x1.884badd25995ep-47 225238722Skargl}; 226238722Skargl 227238722Skargllong double 228238722Skarglexpl(long double x) 229238722Skargl{ 230238722Skargl union IEEEl2bits u, v; 231238722Skargl long double fn, r, r1, r2, q, t, t23, t45, twopk, twopkp10000, z; 232238722Skargl int k, n, n2; 233238722Skargl uint16_t hx, ix; 234238722Skargl 235238722Skargl /* Filter out exceptional cases. */ 236238722Skargl u.e = x; 237238722Skargl hx = u.xbits.expsign; 238238722Skargl ix = hx & 0x7fff; 239238722Skargl if (ix >= BIAS + 13) { /* |x| >= 8192 or x is NaN */ 240238722Skargl if (ix == BIAS + LDBL_MAX_EXP) { 241238722Skargl if (hx & 0x8000 && u.xbits.man == 1ULL << 63) 242238722Skargl return (0.0L); /* x is -Inf */ 243238722Skargl return (x + x); /* x is +Inf, NaN or unsupported */ 244238722Skargl } 245238722Skargl if (x > o_threshold.e) 246238722Skargl return (huge * huge); 247238722Skargl if (x < u_threshold.e) 248238722Skargl return (tiny * tiny); 249238722Skargl } else if (ix <= BIAS - 34) { /* |x| < 0x1p-33 */ 250238722Skargl /* includes pseudo-denormals */ 251238722Skargl if (huge + x > 1.0L) /* trigger inexact iff x != 0 */ 252238722Skargl return (1.0L + x); 253238722Skargl } 254238722Skargl 255238722Skargl ENTERI(); 256238722Skargl 257238722Skargl /* Reduce x to (k*ln2 + midpoint[n2] + r1 + r2). */ 258238722Skargl /* Use a specialized rint() to get fn. Assume round-to-nearest. */ 259238722Skargl fn = x * INV_L + 0x1.8p63 - 0x1.8p63; 260238722Skargl r = x - fn * L1 - fn * L2; /* r = r1 + r2 done independently. */ 261238722Skargl#if defined(HAVE_EFFICIENT_IRINTL) 262238722Skargl n = irintl(fn); 263238722Skargl#elif defined(HAVE_EFFICIENT_IRINT) 264238722Skargl n = irint(fn); 265238722Skargl#else 266238722Skargl n = (int)fn; 267238722Skargl#endif 268238784Skargl n2 = (unsigned)n % INTERVALS; /* Tang's j. */ 269238784Skargl k = (n - n2) / INTERVALS; 270238722Skargl r1 = x - fn * L1; 271238722Skargl r2 = -fn * L2; 272238722Skargl 273238722Skargl /* Prepare scale factors. */ 274238722Skargl v.xbits.man = 1ULL << 63; 275238722Skargl if (k >= LDBL_MIN_EXP) { 276238722Skargl v.xbits.expsign = BIAS + k; 277238722Skargl twopk = v.e; 278238722Skargl } else { 279238722Skargl v.xbits.expsign = BIAS + k + 10000; 280238722Skargl twopkp10000 = v.e; 281238722Skargl } 282238722Skargl 283238722Skargl /* Evaluate expl(midpoint[n2] + r1 + r2) = s[n2] * expl(r1 + r2). */ 284238722Skargl /* Here q = q(r), not q(r1), since r1 is lopped like L1. */ 285238722Skargl t45 = r * P5 + P4; 286238722Skargl z = r * r; 287238722Skargl t23 = r * P3 + P2; 288238722Skargl q = r2 + z * t23 + z * z * t45 + z * z * z * P6; 289238722Skargl t = (long double)s[n2].lo + s[n2].hi; 290238722Skargl t = s[n2].lo + t * (q + r1) + s[n2].hi; 291238722Skargl 292238722Skargl /* Scale by 2**k. */ 293238722Skargl if (k >= LDBL_MIN_EXP) { 294238722Skargl if (k == LDBL_MAX_EXP) 295238722Skargl RETURNI(t * 2.0L * 0x1p16383L); 296238722Skargl RETURNI(t * twopk); 297238722Skargl } else { 298238722Skargl RETURNI(t * twopkp10000 * twom10000); 299238722Skargl } 300238722Skargl} 301