1/* mpfr_fma -- Floating multiply-add 2 3Copyright 2001, 2002, 2004, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. 4Contributed by the Arenaire and Cacao projects, INRIA. 5 6This file is part of the GNU MPFR Library. 7 8The GNU MPFR Library is free software; you can redistribute it and/or modify 9it under the terms of the GNU Lesser General Public License as published by 10the Free Software Foundation; either version 3 of the License, or (at your 11option) any later version. 12 13The GNU MPFR Library is distributed in the hope that it will be useful, but 14WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16License for more details. 17 18You should have received a copy of the GNU Lesser General Public License 19along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see 20http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 2151 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ 22 23#include "mpfr-impl.h" 24 25/* The fused-multiply-add (fma) of x, y and z is defined by: 26 fma(x,y,z)= x*y + z 27*/ 28 29int 30mpfr_fma (mpfr_ptr s, mpfr_srcptr x, mpfr_srcptr y, mpfr_srcptr z, 31 mpfr_rnd_t rnd_mode) 32{ 33 int inexact; 34 mpfr_t u; 35 MPFR_SAVE_EXPO_DECL (expo); 36 MPFR_GROUP_DECL(group); 37 38 /* particular cases */ 39 if (MPFR_UNLIKELY( MPFR_IS_SINGULAR(x) || 40 MPFR_IS_SINGULAR(y) || 41 MPFR_IS_SINGULAR(z) )) 42 { 43 if (MPFR_IS_NAN(x) || MPFR_IS_NAN(y) || MPFR_IS_NAN(z)) 44 { 45 MPFR_SET_NAN(s); 46 MPFR_RET_NAN; 47 } 48 /* now neither x, y or z is NaN */ 49 else if (MPFR_IS_INF(x) || MPFR_IS_INF(y)) 50 { 51 /* cases Inf*0+z, 0*Inf+z, Inf-Inf */ 52 if ((MPFR_IS_ZERO(y)) || 53 (MPFR_IS_ZERO(x)) || 54 (MPFR_IS_INF(z) && 55 ((MPFR_MULT_SIGN(MPFR_SIGN(x), MPFR_SIGN(y))) != MPFR_SIGN(z)))) 56 { 57 MPFR_SET_NAN(s); 58 MPFR_RET_NAN; 59 } 60 else if (MPFR_IS_INF(z)) /* case Inf-Inf already checked above */ 61 { 62 MPFR_SET_INF(s); 63 MPFR_SET_SAME_SIGN(s, z); 64 MPFR_RET(0); 65 } 66 else /* z is finite */ 67 { 68 MPFR_SET_INF(s); 69 MPFR_SET_SIGN(s, MPFR_MULT_SIGN(MPFR_SIGN(x) , MPFR_SIGN(y))); 70 MPFR_RET(0); 71 } 72 } 73 /* now x and y are finite */ 74 else if (MPFR_IS_INF(z)) 75 { 76 MPFR_SET_INF(s); 77 MPFR_SET_SAME_SIGN(s, z); 78 MPFR_RET(0); 79 } 80 else if (MPFR_IS_ZERO(x) || MPFR_IS_ZERO(y)) 81 { 82 if (MPFR_IS_ZERO(z)) 83 { 84 int sign_p; 85 sign_p = MPFR_MULT_SIGN( MPFR_SIGN(x) , MPFR_SIGN(y) ); 86 MPFR_SET_SIGN(s,(rnd_mode != MPFR_RNDD ? 87 ((MPFR_IS_NEG_SIGN(sign_p) && MPFR_IS_NEG(z)) 88 ? -1 : 1) : 89 ((MPFR_IS_POS_SIGN(sign_p) && MPFR_IS_POS(z)) 90 ? 1 : -1))); 91 MPFR_SET_ZERO(s); 92 MPFR_RET(0); 93 } 94 else 95 return mpfr_set (s, z, rnd_mode); 96 } 97 else /* necessarily z is zero here */ 98 { 99 MPFR_ASSERTD(MPFR_IS_ZERO(z)); 100 return mpfr_mul (s, x, y, rnd_mode); 101 } 102 } 103 104 /* If we take prec(u) >= prec(x) + prec(y), the product u <- x*y 105 is exact, except in case of overflow or underflow. */ 106 MPFR_SAVE_EXPO_MARK (expo); 107 MPFR_GROUP_INIT_1 (group, MPFR_PREC(x) + MPFR_PREC(y), u); 108 109 if (MPFR_UNLIKELY (mpfr_mul (u, x, y, MPFR_RNDN))) 110 { 111 /* overflow or underflow - this case is regarded as rare, thus 112 does not need to be very efficient (even if some tests below 113 could have been done earlier). 114 It is an overflow iff u is an infinity (since MPFR_RNDN was used). 115 Alternatively, we could test the overflow flag, but in this case, 116 mpfr_clear_flags would have been necessary. */ 117 if (MPFR_IS_INF (u)) /* overflow */ 118 { 119 /* Let's eliminate the obvious case where x*y and z have the 120 same sign. No possible cancellation -> real overflow. 121 Also, we know that |z| < 2^emax. If E(x) + E(y) >= emax+3, 122 then |x*y| >= 2^(emax+1), and |x*y + z| >= 2^emax. This case 123 is also an overflow. */ 124 if (MPFR_SIGN (u) == MPFR_SIGN (z) || 125 MPFR_GET_EXP (x) + MPFR_GET_EXP (y) >= __gmpfr_emax + 3) 126 { 127 MPFR_GROUP_CLEAR (group); 128 MPFR_SAVE_EXPO_FREE (expo); 129 return mpfr_overflow (s, rnd_mode, MPFR_SIGN (z)); 130 } 131 132 /* E(x) + E(y) <= emax+2, therefore |x*y| < 2^(emax+2), and 133 (x/4)*y does not overflow (let's recall that the result 134 is exact with an unbounded exponent range). It does not 135 underflow either, because x*y overflows and the exponent 136 range is large enough. */ 137 inexact = mpfr_div_2ui (u, x, 2, MPFR_RNDN); 138 MPFR_ASSERTN (inexact == 0); 139 inexact = mpfr_mul (u, u, y, MPFR_RNDN); 140 MPFR_ASSERTN (inexact == 0); 141 142 /* Now, we need to add z/4... But it may underflow! */ 143 { 144 mpfr_t zo4; 145 mpfr_srcptr zz; 146 MPFR_BLOCK_DECL (flags); 147 148 if (MPFR_GET_EXP (u) > MPFR_GET_EXP (z) && 149 MPFR_GET_EXP (u) - MPFR_GET_EXP (z) > MPFR_PREC (u)) 150 { 151 /* |z| < ulp(u)/2, therefore one can use z instead of z/4. */ 152 zz = z; 153 } 154 else 155 { 156 mpfr_init2 (zo4, MPFR_PREC (z)); 157 if (mpfr_div_2ui (zo4, z, 2, MPFR_RNDZ)) 158 { 159 /* The division by 4 underflowed! */ 160 MPFR_ASSERTN (0); /* TODO... */ 161 } 162 zz = zo4; 163 } 164 165 /* Let's recall that u = x*y/4 and zz = z/4 (or z if the 166 following addition would give the same result). */ 167 MPFR_BLOCK (flags, inexact = mpfr_add (s, u, zz, rnd_mode)); 168 /* u and zz have different signs, so that an overflow 169 is not possible. But an underflow is theoretically 170 possible! */ 171 if (MPFR_UNDERFLOW (flags)) 172 { 173 MPFR_ASSERTN (zz != z); 174 MPFR_ASSERTN (0); /* TODO... */ 175 mpfr_clears (zo4, u, (mpfr_ptr) 0); 176 } 177 else 178 { 179 int inex2; 180 181 if (zz != z) 182 mpfr_clear (zo4); 183 MPFR_GROUP_CLEAR (group); 184 MPFR_ASSERTN (! MPFR_OVERFLOW (flags)); 185 inex2 = mpfr_mul_2ui (s, s, 2, rnd_mode); 186 if (inex2) /* overflow */ 187 { 188 inexact = inex2; 189 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags); 190 } 191 goto end; 192 } 193 } 194 } 195 else /* underflow: one has |xy| < 2^(emin-1). */ 196 { 197 unsigned long scale = 0; 198 mpfr_t scaled_z; 199 mpfr_srcptr new_z; 200 mpfr_exp_t diffexp; 201 mpfr_prec_t pzs; 202 int xy_underflows; 203 204 /* Let's scale z so that ulp(z) > 2^emin and ulp(s) > 2^emin 205 (the + 1 on MPFR_PREC (s) is necessary because the exponent 206 of the result can be EXP(z) - 1). */ 207 diffexp = MPFR_GET_EXP (z) - __gmpfr_emin; 208 pzs = MAX (MPFR_PREC (z), MPFR_PREC (s) + 1); 209 if (diffexp <= pzs) 210 { 211 mpfr_uexp_t uscale; 212 mpfr_t scaled_v; 213 MPFR_BLOCK_DECL (flags); 214 215 uscale = (mpfr_uexp_t) pzs - diffexp + 1; 216 MPFR_ASSERTN (uscale > 0); 217 MPFR_ASSERTN (uscale <= ULONG_MAX); 218 scale = uscale; 219 mpfr_init2 (scaled_z, MPFR_PREC (z)); 220 inexact = mpfr_mul_2ui (scaled_z, z, scale, MPFR_RNDN); 221 MPFR_ASSERTN (inexact == 0); /* TODO: overflow case */ 222 new_z = scaled_z; 223 /* Now we need to recompute u = xy * 2^scale. */ 224 MPFR_BLOCK (flags, 225 if (MPFR_GET_EXP (x) < MPFR_GET_EXP (y)) 226 { 227 mpfr_init2 (scaled_v, MPFR_PREC (x)); 228 mpfr_mul_2ui (scaled_v, x, scale, MPFR_RNDN); 229 mpfr_mul (u, scaled_v, y, MPFR_RNDN); 230 } 231 else 232 { 233 mpfr_init2 (scaled_v, MPFR_PREC (y)); 234 mpfr_mul_2ui (scaled_v, y, scale, MPFR_RNDN); 235 mpfr_mul (u, x, scaled_v, MPFR_RNDN); 236 }); 237 mpfr_clear (scaled_v); 238 MPFR_ASSERTN (! MPFR_OVERFLOW (flags)); 239 xy_underflows = MPFR_UNDERFLOW (flags); 240 } 241 else 242 { 243 new_z = z; 244 xy_underflows = 1; 245 } 246 247 if (xy_underflows) 248 { 249 /* Let's replace xy by sign(xy) * 2^(emin-1). */ 250 MPFR_PREC (u) = MPFR_PREC_MIN; 251 mpfr_setmin (u, __gmpfr_emin); 252 MPFR_SET_SIGN (u, MPFR_MULT_SIGN (MPFR_SIGN (x), 253 MPFR_SIGN (y))); 254 } 255 256 { 257 MPFR_BLOCK_DECL (flags); 258 259 MPFR_BLOCK (flags, inexact = mpfr_add (s, u, new_z, rnd_mode)); 260 MPFR_GROUP_CLEAR (group); 261 if (scale != 0) 262 { 263 int inex2; 264 265 mpfr_clear (scaled_z); 266 /* Here an overflow is theoretically possible, in which case 267 the result may be wrong, hence the assert. An underflow 268 is not possible, but let's check that anyway. */ 269 MPFR_ASSERTN (! MPFR_OVERFLOW (flags)); /* TODO... */ 270 MPFR_ASSERTN (! MPFR_UNDERFLOW (flags)); /* not possible */ 271 inex2 = mpfr_div_2ui (s, s, scale, MPFR_RNDN); 272 /* FIXME: this seems incorrect. MPFR_RNDN -> rnd_mode? 273 Also, handle the double rounding case: 274 s / 2^scale = 2^(emin - 2) in MPFR_RNDN. */ 275 if (inex2) /* underflow */ 276 inexact = inex2; 277 } 278 } 279 280 /* FIXME/TODO: I'm not sure that the following is correct. 281 Check for possible spurious exceptions due to intermediate 282 computations. */ 283 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags); 284 goto end; 285 } 286 } 287 288 inexact = mpfr_add (s, u, z, rnd_mode); 289 MPFR_GROUP_CLEAR (group); 290 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags); 291 end: 292 MPFR_SAVE_EXPO_FREE (expo); 293 return mpfr_check_range (s, inexact, rnd_mode); 294} 295