Deleted Added
full compact
s_expl.c (251339) s_expl.c (251343)
1/*-
2 * Copyright (c) 2009-2013 Steven G. Kargl
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright

--- 13 unchanged lines hidden (view full) ---

22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
26 * Optimized by Bruce D. Evans.
27 */
28
29#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 2009-2013 Steven G. Kargl
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright

--- 13 unchanged lines hidden (view full) ---

22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
26 * Optimized by Bruce D. Evans.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/lib/msun/ld80/s_expl.c 251339 2013-06-03 19:13:44Z kargl $");
30__FBSDID("$FreeBSD: head/lib/msun/ld80/s_expl.c 251343 2013-06-03 19:51:32Z kargl $");
31
32/**
33 * Compute the exponential of x for Intel 80-bit format. This is based on:
34 *
35 * PTP Tang, "Table-driven implementation of the exponential function
36 * in IEEE floating-point arithmetic," ACM Trans. Math. Soft., 15,
37 * 144-157 (1989).
38 *

--- 258 unchanged lines hidden (view full) ---

297 if (k >= LDBL_MIN_EXP) {
298 if (k == LDBL_MAX_EXP)
299 RETURNI(t * 2 * 0x1p16383L);
300 RETURNI(t * twopk);
301 } else {
302 RETURNI(t * twopkp10000 * twom10000);
303 }
304}
31
32/**
33 * Compute the exponential of x for Intel 80-bit format. This is based on:
34 *
35 * PTP Tang, "Table-driven implementation of the exponential function
36 * in IEEE floating-point arithmetic," ACM Trans. Math. Soft., 15,
37 * 144-157 (1989).
38 *

--- 258 unchanged lines hidden (view full) ---

297 if (k >= LDBL_MIN_EXP) {
298 if (k == LDBL_MAX_EXP)
299 RETURNI(t * 2 * 0x1p16383L);
300 RETURNI(t * twopk);
301 } else {
302 RETURNI(t * twopkp10000 * twom10000);
303 }
304}
305
306/**
307 * Compute expm1l(x) for Intel 80-bit format. This is based on:
308 *
309 * PTP Tang, "Table-driven implementation of the Expm1 function
310 * in IEEE floating-point arithmetic," ACM Trans. Math. Soft., 18,
311 * 211-222 (1992).
312 */
313
314/*
315 * Our T1 and T2 are chosen to be approximately the points where method
316 * A and method B have the same accuracy. Tang's T1 and T2 are the
317 * points where method A's accuracy changes by a full bit. For Tang,
318 * this drop in accuracy makes method A immediately less accurate than
319 * method B, but our larger INTERVALS makes method A 2 bits more
320 * accurate so it remains the most accurate method significantly
321 * closer to the origin despite losing the full bit in our extended
322 * range for it.
323 */
324static const double
325T1 = -0.1659, /* ~-30.625/128 * log(2) */
326T2 = 0.1659; /* ~30.625/128 * log(2) */
327
328/*
329 * Domain [-0.1659, 0.1659], range ~[-1.2027e-22, 3.4417e-22]:
330 * |(exp(x)-1-x-x**2/2)/x - p(x)| < 2**-71.2
331 */
332static const union IEEEl2bits
333B3 = LD80C(0xaaaaaaaaaaaaaaab, -3, 1.66666666666666666671e-1L),
334B4 = LD80C(0xaaaaaaaaaaaaaaac, -5, 4.16666666666666666712e-2L);
335
336static const double
337B5 = 8.3333333333333245e-3, /* 0x1.111111111110cp-7 */
338B6 = 1.3888888888888861e-3, /* 0x1.6c16c16c16c0ap-10 */
339B7 = 1.9841269841532042e-4, /* 0x1.a01a01a0319f9p-13 */
340B8 = 2.4801587302069236e-5, /* 0x1.a01a01a03cbbcp-16 */
341B9 = 2.7557316558468562e-6, /* 0x1.71de37fd33d67p-19 */
342B10 = 2.7557315829785151e-7, /* 0x1.27e4f91418144p-22 */
343B11 = 2.5063168199779829e-8, /* 0x1.ae94fabdc6b27p-26 */
344B12 = 2.0887164654459567e-9; /* 0x1.1f122d6413fe1p-29 */
345
346long double
347expm1l(long double x)
348{
349 union IEEEl2bits u, v;
350 long double fn, hx2_hi, hx2_lo, q, r, r1, r2, t, twomk, twopk, x_hi;
351 long double x_lo, x2, z;
352 long double x4;
353 int k, n, n2;
354 uint16_t hx, ix;
355
356 /* Filter out exceptional cases. */
357 u.e = x;
358 hx = u.xbits.expsign;
359 ix = hx & 0x7fff;
360 if (ix >= BIAS + 6) { /* |x| >= 64 or x is NaN */
361 if (ix == BIAS + LDBL_MAX_EXP) {
362 if (hx & 0x8000) /* x is -Inf, -NaN or unsupported */
363 return (-1 / x - 1);
364 return (x + x); /* x is +Inf, +NaN or unsupported */
365 }
366 if (x > o_threshold)
367 return (huge * huge);
368 /*
369 * expm1l() never underflows, but it must avoid
370 * unrepresentable large negative exponents. We used a
371 * much smaller threshold for large |x| above than in
372 * expl() so as to handle not so large negative exponents
373 * in the same way as large ones here.
374 */
375 if (hx & 0x8000) /* x <= -64 */
376 return (tiny - 1); /* good for x < -65ln2 - eps */
377 }
378
379 ENTERI();
380
381 if (T1 < x && x < T2) {
382 if (ix < BIAS - 64) { /* |x| < 0x1p-64 (includes pseudos) */
383 /* x (rounded) with inexact if x != 0: */
384 RETURNI(x == 0 ? x :
385 (0x1p100 * x + fabsl(x)) * 0x1p-100);
386 }
387
388 x2 = x * x;
389 x4 = x2 * x2;
390 q = x4 * (x2 * (x4 *
391 /*
392 * XXX the number of terms is no longer good for
393 * pairwise grouping of all except B3, and the
394 * grouping is no longer from highest down.
395 */
396 (x2 * B12 + (x * B11 + B10)) +
397 (x2 * (x * B9 + B8) + (x * B7 + B6))) +
398 (x * B5 + B4.e)) + x2 * x * B3.e;
399
400 x_hi = (float)x;
401 x_lo = x - x_hi;
402 hx2_hi = x_hi * x_hi / 2;
403 hx2_lo = x_lo * (x + x_hi) / 2;
404 if (ix >= BIAS - 7)
405 RETURNI(hx2_lo + x_lo + q + (hx2_hi + x_hi));
406 else
407 RETURNI(hx2_lo + q + hx2_hi + x);
408 }
409
410 /* Reduce x to (k*ln2 + endpoint[n2] + r1 + r2). */
411 /* Use a specialized rint() to get fn. Assume round-to-nearest. */
412 fn = x * INV_L + 0x1.8p63 - 0x1.8p63;
413#if defined(HAVE_EFFICIENT_IRINTL)
414 n = irintl(fn);
415#elif defined(HAVE_EFFICIENT_IRINT)
416 n = irint(fn);
417#else
418 n = (int)fn;
419#endif
420 n2 = (unsigned)n % INTERVALS;
421 k = n >> LOG2_INTERVALS;
422 r1 = x - fn * L1;
423 r2 = fn * -L2;
424 r = r1 + r2;
425
426 /* Prepare scale factor. */
427 v.e = 1;
428 v.xbits.expsign = BIAS + k;
429 twopk = v.e;
430
431 /*
432 * Evaluate lower terms of
433 * expl(endpoint[n2] + r1 + r2) = tbl[n2] * expl(r1 + r2).
434 */
435 z = r * r;
436 q = r2 + z * (A2 + r * A3) + z * z * (A4 + r * A5) + z * z * z * A6;
437
438 t = (long double)tbl[n2].lo + tbl[n2].hi;
439
440 if (k == 0) {
441 t = tbl[n2].lo * (r1 + 1) + t * q + tbl[n2].hi * r1 +
442 (tbl[n2].hi - 1);
443 RETURNI(t);
444 }
445 if (k == -1) {
446 t = tbl[n2].lo * (r1 + 1) + t * q + tbl[n2].hi * r1 +
447 (tbl[n2].hi - 2);
448 RETURNI(t / 2);
449 }
450 if (k < -7) {
451 t = tbl[n2].lo + t * (q + r1) + tbl[n2].hi;
452 RETURNI(t * twopk - 1);
453 }
454 if (k > 2 * LDBL_MANT_DIG - 1) {
455 t = tbl[n2].lo + t * (q + r1) + tbl[n2].hi;
456 if (k == LDBL_MAX_EXP)
457 RETURNI(t * 2 * 0x1p16383L - 1);
458 RETURNI(t * twopk - 1);
459 }
460
461 v.xbits.expsign = BIAS - k;
462 twomk = v.e;
463
464 if (k > LDBL_MANT_DIG - 1)
465 t = tbl[n2].lo - twomk + t * (q + r1) + tbl[n2].hi;
466 else
467 t = tbl[n2].lo + t * (q + r1) + (tbl[n2].hi - twomk);
468 RETURNI(t * twopk);
469}