1/* $NetBSD: s_fmal.c,v 1.4 2017/05/06 18:02:52 christos Exp $ */ 2 3/*- 4 * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29#include <sys/cdefs.h> 30#if 0 31__FBSDID("$FreeBSD: src/lib/msun/src/s_fmal.c,v 1.7 2011/10/21 06:30:43 das Exp $"); 32#else 33__RCSID("$NetBSD: s_fmal.c,v 1.4 2017/05/06 18:02:52 christos Exp $"); 34#endif 35 36#include "namespace.h" 37 38#include <machine/ieee.h> 39#include <fenv.h> 40#include <float.h> 41#include <math.h> 42 43#include "math_private.h" 44 45#ifdef __HAVE_LONG_DOUBLE 46/* 47 * A struct dd represents a floating-point number with twice the precision 48 * of a long double. We maintain the invariant that "hi" stores the high-order 49 * bits of the result. 50 */ 51struct dd { 52 long double hi; 53 long double lo; 54}; 55 56/* 57 * Compute a+b exactly, returning the exact result in a struct dd. We assume 58 * that both a and b are finite, but make no assumptions about their relative 59 * magnitudes. 60 */ 61static inline struct dd 62dd_add(long double a, long double b) 63{ 64 struct dd ret; 65 long double s; 66 67 ret.hi = a + b; 68 s = ret.hi - a; 69 ret.lo = (a - (ret.hi - s)) + (b - s); 70 return (ret); 71} 72 73/* 74 * Compute a+b, with a small tweak: The least significant bit of the 75 * result is adjusted into a sticky bit summarizing all the bits that 76 * were lost to rounding. This adjustment negates the effects of double 77 * rounding when the result is added to another number with a higher 78 * exponent. For an explanation of round and sticky bits, see any reference 79 * on FPU design, e.g., 80 * 81 * J. Coonen. An Implementation Guide to a Proposed Standard for 82 * Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980. 83 */ 84static inline long double 85add_adjusted(long double a, long double b) 86{ 87 struct dd sum; 88 union ieee_ext_u u; 89 90 sum = dd_add(a, b); 91 if (sum.lo != 0) { 92 u.extu_ld = sum.hi; 93 if ((u.extu_ext.ext_fracl & 1) == 0) 94 sum.hi = nextafterl(sum.hi, INFINITY * sum.lo); 95 } 96 return (sum.hi); 97} 98 99/* 100 * Compute ldexp(a+b, scale) with a single rounding error. It is assumed 101 * that the result will be subnormal, and care is taken to ensure that 102 * double rounding does not occur. 103 */ 104static inline long double 105add_and_denormalize(long double a, long double b, int scale) 106{ 107 struct dd sum; 108 int bits_lost; 109 union ieee_ext_u u; 110 111 sum = dd_add(a, b); 112 113 /* 114 * If we are losing at least two bits of accuracy to denormalization, 115 * then the first lost bit becomes a round bit, and we adjust the 116 * lowest bit of sum.hi to make it a sticky bit summarizing all the 117 * bits in sum.lo. With the sticky bit adjusted, the hardware will 118 * break any ties in the correct direction. 119 * 120 * If we are losing only one bit to denormalization, however, we must 121 * break the ties manually. 122 */ 123 if (sum.lo != 0) { 124 u.extu_ld = sum.hi; 125 bits_lost = -u.extu_ext.ext_exp - scale + 1; 126 if ((bits_lost != 1) ^ (int)(u.extu_ext.ext_fracl & 1)) 127 sum.hi = nextafterl(sum.hi, INFINITY * sum.lo); 128 } 129 return (ldexp((double)sum.hi, scale)); 130} 131 132/* 133 * Compute a*b exactly, returning the exact result in a struct dd. We assume 134 * that both a and b are normalized, so no underflow or overflow will occur. 135 * The current rounding mode must be round-to-nearest. 136 */ 137static inline struct dd 138dd_mul(long double a, long double b) 139{ 140#if LDBL_MANT_DIG == 64 141 static const long double split = 0x1p32L + 1.0; 142#elif LDBL_MANT_DIG == 113 143 static const long double split = 0x1p57L + 1.0; 144#endif 145 struct dd ret; 146 long double ha, hb, la, lb, p, q; 147 148 p = a * split; 149 ha = a - p; 150 ha += p; 151 la = a - ha; 152 153 p = b * split; 154 hb = b - p; 155 hb += p; 156 lb = b - hb; 157 158 p = ha * hb; 159 q = ha * lb + la * hb; 160 161 ret.hi = p + q; 162 ret.lo = p - ret.hi + q + la * lb; 163 return (ret); 164} 165 166/* 167 * Fused multiply-add: Compute x * y + z with a single rounding error. 168 * 169 * We use scaling to avoid overflow/underflow, along with the 170 * canonical precision-doubling technique adapted from: 171 * 172 * Dekker, T. A Floating-Point Technique for Extending the 173 * Available Precision. Numer. Math. 18, 224-242 (1971). 174 */ 175long double 176fmal(long double x, long double y, long double z) 177{ 178 long double xs, ys, zs, adj; 179 struct dd xy, r; 180 int oround; 181 int ex, ey, ez; 182 int spread; 183 184 /* 185 * Handle special cases. The order of operations and the particular 186 * return values here are crucial in handling special cases involving 187 * infinities, NaNs, overflows, and signed zeroes correctly. 188 */ 189 if (x == 0.0 || y == 0.0) 190 return (x * y + z); 191 if (z == 0.0) 192 return (x * y); 193 if (!isfinite(x) || !isfinite(y)) 194 return (x * y + z); 195 if (!isfinite(z)) 196 return (z); 197 198 xs = frexpl(x, &ex); 199 ys = frexpl(y, &ey); 200 zs = frexpl(z, &ez); 201 oround = fegetround(); 202 spread = ex + ey - ez; 203 204 /* 205 * If x * y and z are many orders of magnitude apart, the scaling 206 * will overflow, so we handle these cases specially. Rounding 207 * modes other than FE_TONEAREST are painful. 208 */ 209 if (spread < -LDBL_MANT_DIG) { 210 feraiseexcept(FE_INEXACT); 211 if (!isnormal(z)) 212 feraiseexcept(FE_UNDERFLOW); 213 switch (oround) { 214 case FE_TONEAREST: 215 return (z); 216 case FE_TOWARDZERO: 217 if ((x > 0.0) ^ (y < 0.0) ^ (z < 0.0)) 218 return (z); 219 else 220 return (nextafterl(z, 0)); 221 case FE_DOWNWARD: 222 if ((x > 0.0) ^ (y < 0.0)) 223 return (z); 224 else 225 return (nextafterl(z, (long double)-INFINITY)); 226 default: /* FE_UPWARD */ 227 if ((x > 0.0) ^ (y < 0.0)) 228 return (nextafterl(z, (long double)INFINITY)); 229 else 230 return (z); 231 } 232 } 233 if (spread <= LDBL_MANT_DIG * 2) 234 zs = ldexpl(zs, -spread); 235 else 236 zs = copysignl(LDBL_MIN, zs); 237 238 fesetround(FE_TONEAREST); 239 240 /* 241 * Basic approach for round-to-nearest: 242 * 243 * (xy.hi, xy.lo) = x * y (exact) 244 * (r.hi, r.lo) = xy.hi + z (exact) 245 * adj = xy.lo + r.lo (inexact; low bit is sticky) 246 * result = r.hi + adj (correctly rounded) 247 */ 248 xy = dd_mul(xs, ys); 249 r = dd_add(xy.hi, zs); 250 251 spread = ex + ey; 252 253 if (r.hi == 0.0) { 254 /* 255 * When the addends cancel to 0, ensure that the result has 256 * the correct sign. 257 */ 258 fesetround(oround); 259 { 260 volatile long double vzs = zs; /* XXX gcc CSE bug workaround */ 261 return (xy.hi + vzs + ldexpl(xy.lo, spread)); 262 } 263 } 264 265 if (oround != FE_TONEAREST) { 266 /* 267 * There is no need to worry about double rounding in directed 268 * rounding modes. 269 */ 270 fesetround(oround); 271 adj = r.lo + xy.lo; 272 return (ldexpl(r.hi + adj, spread)); 273 } 274 275 adj = add_adjusted(r.lo, xy.lo); 276 if (spread + ilogbl(r.hi) > -16383) 277 return (ldexpl(r.hi + adj, spread)); 278 else 279 return (add_and_denormalize(r.hi, adj, spread)); 280} 281#endif 282