Cross Reference: /freebsd-11.0-release/lib/msun/ld80/s

Deleted Added

sdiff udiff text old ( 238783 ) new ( 238784 )

full compact

s_expl.c (238783)	s_expl.c (238784)
1/- 2 Copyright (c) 2009-2012 Steven G. Kargl 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright --- 13 unchanged lines hidden (view full) --- 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 * Optimized by Bruce D. Evans. 27 */ 28 29#include <sys/cdefs.h>	1/- 2 Copyright (c) 2009-2012 Steven G. Kargl 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright --- 13 unchanged lines hidden (view full) --- 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 * Optimized by Bruce D. Evans. 27 */ 28 29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/lib/msun/ld80/s_expl.c 238783 2012-07-26 03:59:33Z kargl $");	30__FBSDID("$FreeBSD: head/lib/msun/ld80/s_expl.c 238784 2012-07-26 04:05:08Z kargl $");
31 32/* 33 * Compute the exponential of x for Intel 80-bit format. This is based on: 34 * 35 * PTP Tang, "Table-driven implementation of the exponential function 36 * in IEEE floating-point arithmetic," ACM Trans. Math. Soft., 15, 37 * 144-157 (1989). 38 *	31 32/* 33 * Compute the exponential of x for Intel 80-bit format. This is based on: 34 * 35 * PTP Tang, "Table-driven implementation of the exponential function 36 * in IEEE floating-point arithmetic," ACM Trans. Math. Soft., 15, 37 * 144-157 (1989). 38 *
39 * where the 32 table entries have been expanded to NUM (see below).	39 * where the 32 table entries have been expanded to INTERVALS (see below).
40 / 41 42#include <float.h> 43 44#ifdef __i386__ 45#include <ieeefp.h> 46#endif 47 --- 12 unchanged lines hidden* (view full) --- 60static const union IEEEl2bits 61/* log(2*16384 - 0.5) rounded towards zero: / 62o_threshold = LD80C(0xb17217f7d1cf79ab, 13, 0, 11356.5234062941439488L), 63/* log(2*(-16381-64-1)) rounded towards zero: / 64u_threshold = LD80C(0xb21dfe7f09e2baa9, 13, 1, -11399.4985314888605581L); 65 66static const double __aligned(64) 67/*	40 / 41 42#include <float.h> 43 44#ifdef __i386__ 45#include <ieeefp.h> 46#endif 47 --- 12 unchanged lines hidden* (view full) --- 60static const union IEEEl2bits 61/* log(2*16384 - 0.5) rounded towards zero: / 62o_threshold = LD80C(0xb17217f7d1cf79ab, 13, 0, 11356.5234062941439488L), 63/* log(2*(-16381-64-1)) rounded towards zero: / 64u_threshold = LD80C(0xb21dfe7f09e2baa9, 13, 1, -11399.4985314888605581L); 65 66static const double __aligned(64) 67/*
68 * ln2/NUM = L1+L2 (hi+lo decomposition for multiplication). L1 must have 69 * at least 22 (= log2(\|LDBL_MIN_EXP-extras\|) + log2(NUM)) lowest bits zero 70 * so that multiplication of it by n is exact.	68 * ln2/INTERVALS = L1+L2 (hi+lo decomposition for multiplication). L1 must 69 * have at least 22 (= log2(\|LDBL_MIN_EXP-extras\|) + log2(INTERVALS)) lowest 70 * bits zero so that multiplication of it by n is exact.
71 / 72L1 = 5.4152123484527692e-3, / 0x162e42ff000000.0p-60 / 73L2 = -3.2819649005320973e-13, / -0x1718432a1b0e26.0p-94 / 74INV_L = 1.8466496523378731e+2, / 0x171547652b82fe.0p-45 / 75/ 76 * Domain [-0.002708, 0.002708], range ~[-5.7136e-24, 5.7110e-24]: 77 * \|exp(x) - p(x)\| < 2**-77.2	71 / 72L1 = 5.4152123484527692e-3, / 0x162e42ff000000.0p-60 / 73L2 = -3.2819649005320973e-13, / -0x1718432a1b0e26.0p-94 / 74INV_L = 1.8466496523378731e+2, / 0x171547652b82fe.0p-45 / 75/ 76 * Domain [-0.002708, 0.002708], range ~[-5.7136e-24, 5.7110e-24]: 77 * \|exp(x) - p(x)\| < 2**-77.2
78 * (0.002708 is ln2/(2*NUM) rounded up a little).	78 * (0.002708 is ln2/(2*INTERVALS) rounded up a little).
79 / 80P2 = 0.5, 81P3 = 1.6666666666666119e-1, / 0x15555555555490.0p-55 / 82P4 = 4.1666666666665887e-2, / 0x155555555554e5.0p-57 / 83P5 = 8.3333354987869413e-3, / 0x1111115b789919.0p-59 / 84P6 = 1.3888891738560272e-3; / 0x16c16c651633ae.0p-62 / 85 86/	79 / 80P2 = 0.5, 81P3 = 1.6666666666666119e-1, / 0x15555555555490.0p-55 / 82P4 = 4.1666666666665887e-2, / 0x155555555554e5.0p-57 / 83P5 = 8.3333354987869413e-3, / 0x1111115b789919.0p-59 / 84P6 = 1.3888891738560272e-3; / 0x16c16c651633ae.0p-62 / 85 86/
87 * 2^(i/NUM) for i in [0,NUM] is represented by two values where the 88 * first 47 (?!) bits of the significand is stored in hi and the next 53	87 * 2^(i/INTERVALS) for i in [0,INTERVALS] is represented by two values where 88 * the first 47 (?!) bits of the significand is stored in hi and the next 53
89 * bits are in lo. 90 */	89 * bits are in lo. 90 */
91#define NUM 128	91#define INTERVALS 128
92 93static const struct { 94 double hi; 95 double lo;	92 93static const struct { 94 double hi; 95 double lo;
96} s[NUM] __aligned(16) = {	96} s[INTERVALS] __aligned(16) = {
97 0x1p+0, 0x0p+0, 98 0x1.0163da9fb330p+0, 0x1.ab6c25335719bp-47, 99 0x1.02c9a3e77804p+0, 0x1.07737be56527cp-47, 100 0x1.04315e86e7f8p+0, 0x1.2f5ce3e688369p-50, 101 0x1.059b0d315854p+0, 0x1.a1d73e2a475b4p-47, 102 0x1.0706b29ddf6cp+0, 0x1.dc6dc403a9d88p-48, 103 0x1.0874518759bcp+0, 0x1.01186be4bb285p-49, 104 0x1.09e3ecac6f38p+0, 0x1.a290f03062c27p-51, --- 155 unchanged lines hidden (view full) --- 260 r = x - fn * L1 - fn * L2; /* r = r1 + r2 done independently. / 261#if defined(HAVE_EFFICIENT_IRINTL) 262* n = irintl(fn); 263#elif defined(HAVE_EFFICIENT_IRINT) 264 n = irint(fn); 265#else 266 n = (int)fn; 267#endif	97 0x1p+0, 0x0p+0, 98 0x1.0163da9fb330p+0, 0x1.ab6c25335719bp-47, 99 0x1.02c9a3e77804p+0, 0x1.07737be56527cp-47, 100 0x1.04315e86e7f8p+0, 0x1.2f5ce3e688369p-50, 101 0x1.059b0d315854p+0, 0x1.a1d73e2a475b4p-47, 102 0x1.0706b29ddf6cp+0, 0x1.dc6dc403a9d88p-48, 103 0x1.0874518759bcp+0, 0x1.01186be4bb285p-49, 104 0x1.09e3ecac6f38p+0, 0x1.a290f03062c27p-51, --- 155 unchanged lines hidden (view full) --- 260 r = x - fn * L1 - fn * L2; /* r = r1 + r2 done independently. / 261#if defined(HAVE_EFFICIENT_IRINTL) 262* n = irintl(fn); 263#elif defined(HAVE_EFFICIENT_IRINT) 264 n = irint(fn); 265#else 266 n = (int)fn; 267#endif
268 n2 = (unsigned)n % NUM; /* Tang's j. / 269* k = (n - n2) / NUM;	268 n2 = (unsigned)n % INTERVALS; /* Tang's j. / 269* k = (n - n2) / INTERVALS;
270 r1 = x - fn * L1; 271 r2 = -fn * L2; 272 273 /* Prepare scale factors. / 274* v.xbits.man = 1ULL << 63; 275 if (k >= LDBL_MIN_EXP) { 276 v.xbits.expsign = BIAS + k; 277 twopk = v.e; --- 23 unchanged lines hidden ---	270 r1 = x - fn * L1; 271 r2 = -fn * L2; 272 273 /* Prepare scale factors. / 274* v.xbits.man = 1ULL << 63; 275 if (k >= LDBL_MIN_EXP) { 276 v.xbits.expsign = BIAS + k; 277 twopk = v.e; --- 23 unchanged lines hidden ---