s_fma.c revision 140609
1/*-
2 * Copyright (c) 2005 David Schultz <das@FreeBSD.ORG>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/lib/msun/src/s_fma.c 140609 2005-01-22 09:53:18Z das $");
29
30#include <fenv.h>
31#include <float.h>
32#include <math.h>
33
34/*
35 * Fused multiply-add: Compute x * y + z with a single rounding error.
36 *
37 * We use scaling to avoid overflow/underflow, along with the
38 * canonical precision-doubling technique adapted from:
39 *
40 *	Dekker, T.  A Floating-Point Technique for Extending the
41 *	Available Precision.  Numer. Math. 18, 224-242 (1971).
42 *
43 * This algorithm is sensitive to the rounding precision.  FPUs such
44 * as the i387 must be set in double-precision mode if variables are
45 * to be stored in FP registers in order to avoid incorrect results.
46 * This is the default on FreeBSD, but not on many other systems.
47 *
48 * Tests on an Itanium 2 indicate that the hardware's FMA instruction
49 * is almost twice as fast as this implementation.  The hardware
50 * instruction should be used on platforms that support it.
51 *
52 * XXX May incur an absolute error of 0x1p-1074 for subnormal results
53 *     due to double rounding induced by the final scaling operation.
54 *
55 * XXX On machines supporting quad precision, we should use that, but
56 *     see the caveat in s_fmaf.c.
57 */
58double
59fma(double x, double y, double z)
60{
61	static const double split = 0x1p27 + 1.0;
62	double xs, ys, zs;
63	double c, cc, hx, hy, p, q, tx, ty;
64	double r, rr, s;
65	int oround;
66	int ex, ey, ez;
67	int spread;
68
69	if (x == 0.0 || y == 0.0)
70		return (z);
71	if (z == 0.0)
72		return (x * y);
73
74	/* Results of frexp() are undefined for these cases. */
75	if (!isfinite(x) || !isfinite(y) || !isfinite(z))
76		return (x * y + z);
77
78	xs = frexp(x, &ex);
79	ys = frexp(y, &ey);
80	zs = frexp(z, &ez);
81	oround = fegetround();
82	spread = ex + ey - ez;
83
84	/*
85	 * If x * y and z are many orders of magnitude apart, the scaling
86	 * will overflow, so we handle these cases specially.  Rounding
87	 * modes other than FE_TONEAREST are painful.
88	 */
89	if (spread > DBL_MANT_DIG * 2) {
90		fenv_t env;
91		feraiseexcept(FE_INEXACT);
92		switch(oround) {
93		case FE_TONEAREST:
94			return (x * y);
95		case FE_TOWARDZERO:
96			if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
97				return (x * y);
98			feholdexcept(&env);
99			r = x * y;
100			if (!fetestexcept(FE_INEXACT))
101				r = nextafter(r, 0);
102			feupdateenv(&env);
103			return (r);
104		case FE_DOWNWARD:
105			if (z > 0.0)
106				return (x * y);
107			feholdexcept(&env);
108			r = x * y;
109			if (!fetestexcept(FE_INEXACT))
110				r = nextafter(r, -INFINITY);
111			feupdateenv(&env);
112			return (r);
113		default:	/* FE_UPWARD */
114			if (z < 0.0)
115				return (x * y);
116			feholdexcept(&env);
117			r = x * y;
118			if (!fetestexcept(FE_INEXACT))
119				r = nextafter(r, INFINITY);
120			feupdateenv(&env);
121			return (r);
122		}
123	}
124	if (spread < -DBL_MANT_DIG) {
125		feraiseexcept(FE_INEXACT);
126		if (!isnormal(z))
127			feraiseexcept(FE_UNDERFLOW);
128		switch (oround) {
129		case FE_TONEAREST:
130			return (z);
131		case FE_TOWARDZERO:
132			if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
133				return (z);
134			else
135				return (nextafter(z, 0));
136		case FE_DOWNWARD:
137			if (x > 0.0 ^ y < 0.0)
138				return (z);
139			else
140				return (nextafter(z, -INFINITY));
141		default:	/* FE_UPWARD */
142			if (x > 0.0 ^ y < 0.0)
143				return (nextafter(z, INFINITY));
144			else
145				return (z);
146		}
147	}
148
149	/*
150	 * Use Dekker's algorithm to perform the multiplication and
151	 * subsequent addition in twice the machine precision.
152	 * Arrange so that x * y = c + cc, and x * y + z = r + rr.
153	 */
154	fesetround(FE_TONEAREST);
155
156	p = xs * split;
157	hx = xs - p;
158	hx += p;
159	tx = xs - hx;
160
161	p = ys * split;
162	hy = ys - p;
163	hy += p;
164	ty = ys - hy;
165
166	p = hx * hy;
167	q = hx * ty + tx * hy;
168	c = p + q;
169	cc = p - c + q + tx * ty;
170
171	zs = ldexp(zs, -spread);
172	r = c + zs;
173	s = r - c;
174	rr = (c - (r - s)) + (zs - s) + cc;
175
176	fesetround(oround);
177	return (ldexp(r + rr, ex + ey));
178}
179