1/* mpfr_fma -- Floating multiply-add
2
3Copyright 2001, 2002, 2004, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
4Contributed by the AriC and Caramel projects, INRIA.
5
6This file is part of the GNU MPFR Library.
7
8The GNU MPFR Library is free software; you can redistribute it and/or modify
9it under the terms of the GNU Lesser General Public License as published by
10the Free Software Foundation; either version 3 of the License, or (at your
11option) any later version.
12
13The GNU MPFR Library is distributed in the hope that it will be useful, but
14WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16License for more details.
17
18You should have received a copy of the GNU Lesser General Public License
19along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
2151 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22
23#include "mpfr-impl.h"
24
25/* The fused-multiply-add (fma) of x, y and z is defined by:
26   fma(x,y,z)= x*y + z
27*/
28
29int
30mpfr_fma (mpfr_ptr s, mpfr_srcptr x, mpfr_srcptr y, mpfr_srcptr z,
31          mpfr_rnd_t rnd_mode)
32{
33  int inexact;
34  mpfr_t u;
35  MPFR_SAVE_EXPO_DECL (expo);
36  MPFR_GROUP_DECL(group);
37
38  MPFR_LOG_FUNC
39    (("x[%Pu]=%.*Rg y[%Pu]=%.*Rg  z[%Pu]=%.*Rg rnd=%d",
40      mpfr_get_prec (x), mpfr_log_prec, x,
41      mpfr_get_prec (y), mpfr_log_prec, y,
42      mpfr_get_prec (z), mpfr_log_prec, z, rnd_mode),
43     ("s[%Pu]=%.*Rg inexact=%d",
44      mpfr_get_prec (s), mpfr_log_prec, s, inexact));
45
46  /* particular cases */
47  if (MPFR_UNLIKELY( MPFR_IS_SINGULAR(x) ||
48                     MPFR_IS_SINGULAR(y) ||
49                     MPFR_IS_SINGULAR(z) ))
50    {
51      if (MPFR_IS_NAN(x) || MPFR_IS_NAN(y) || MPFR_IS_NAN(z))
52        {
53          MPFR_SET_NAN(s);
54          MPFR_RET_NAN;
55        }
56      /* now neither x, y or z is NaN */
57      else if (MPFR_IS_INF(x) || MPFR_IS_INF(y))
58        {
59          /* cases Inf*0+z, 0*Inf+z, Inf-Inf */
60          if ((MPFR_IS_ZERO(y)) ||
61              (MPFR_IS_ZERO(x)) ||
62              (MPFR_IS_INF(z) &&
63               ((MPFR_MULT_SIGN(MPFR_SIGN(x), MPFR_SIGN(y))) != MPFR_SIGN(z))))
64            {
65              MPFR_SET_NAN(s);
66              MPFR_RET_NAN;
67            }
68          else if (MPFR_IS_INF(z)) /* case Inf-Inf already checked above */
69            {
70              MPFR_SET_INF(s);
71              MPFR_SET_SAME_SIGN(s, z);
72              MPFR_RET(0);
73            }
74          else /* z is finite */
75            {
76              MPFR_SET_INF(s);
77              MPFR_SET_SIGN(s, MPFR_MULT_SIGN(MPFR_SIGN(x) , MPFR_SIGN(y)));
78              MPFR_RET(0);
79            }
80        }
81      /* now x and y are finite */
82      else if (MPFR_IS_INF(z))
83        {
84          MPFR_SET_INF(s);
85          MPFR_SET_SAME_SIGN(s, z);
86          MPFR_RET(0);
87        }
88      else if (MPFR_IS_ZERO(x) || MPFR_IS_ZERO(y))
89        {
90          if (MPFR_IS_ZERO(z))
91            {
92              int sign_p;
93              sign_p = MPFR_MULT_SIGN( MPFR_SIGN(x) , MPFR_SIGN(y) );
94              MPFR_SET_SIGN(s,(rnd_mode != MPFR_RNDD ?
95                               ((MPFR_IS_NEG_SIGN(sign_p) && MPFR_IS_NEG(z))
96                                ? -1 : 1) :
97                               ((MPFR_IS_POS_SIGN(sign_p) && MPFR_IS_POS(z))
98                                ? 1 : -1)));
99              MPFR_SET_ZERO(s);
100              MPFR_RET(0);
101            }
102          else
103            return mpfr_set (s, z, rnd_mode);
104        }
105      else /* necessarily z is zero here */
106        {
107          MPFR_ASSERTD(MPFR_IS_ZERO(z));
108          return mpfr_mul (s, x, y, rnd_mode);
109        }
110    }
111
112  /* If we take prec(u) >= prec(x) + prec(y), the product u <- x*y
113     is exact, except in case of overflow or underflow. */
114  MPFR_SAVE_EXPO_MARK (expo);
115  MPFR_GROUP_INIT_1 (group, MPFR_PREC(x) + MPFR_PREC(y), u);
116
117  if (MPFR_UNLIKELY (mpfr_mul (u, x, y, MPFR_RNDN)))
118    {
119      /* overflow or underflow - this case is regarded as rare, thus
120         does not need to be very efficient (even if some tests below
121         could have been done earlier).
122         It is an overflow iff u is an infinity (since MPFR_RNDN was used).
123         Alternatively, we could test the overflow flag, but in this case,
124         mpfr_clear_flags would have been necessary. */
125      if (MPFR_IS_INF (u))  /* overflow */
126        {
127          /* Let's eliminate the obvious case where x*y and z have the
128             same sign. No possible cancellation -> real overflow.
129             Also, we know that |z| < 2^emax. If E(x) + E(y) >= emax+3,
130             then |x*y| >= 2^(emax+1), and |x*y + z| >= 2^emax. This case
131             is also an overflow. */
132          if (MPFR_SIGN (u) == MPFR_SIGN (z) ||
133              MPFR_GET_EXP (x) + MPFR_GET_EXP (y) >= __gmpfr_emax + 3)
134            {
135              MPFR_GROUP_CLEAR (group);
136              MPFR_SAVE_EXPO_FREE (expo);
137              return mpfr_overflow (s, rnd_mode, MPFR_SIGN (z));
138            }
139
140          /* E(x) + E(y) <= emax+2, therefore |x*y| < 2^(emax+2), and
141             (x/4)*y does not overflow (let's recall that the result
142             is exact with an unbounded exponent range). It does not
143             underflow either, because x*y overflows and the exponent
144             range is large enough. */
145          inexact = mpfr_div_2ui (u, x, 2, MPFR_RNDN);
146          MPFR_ASSERTN (inexact == 0);
147          inexact = mpfr_mul (u, u, y, MPFR_RNDN);
148          MPFR_ASSERTN (inexact == 0);
149
150          /* Now, we need to add z/4... But it may underflow! */
151          {
152            mpfr_t zo4;
153            mpfr_srcptr zz;
154            MPFR_BLOCK_DECL (flags);
155
156            if (MPFR_GET_EXP (u) > MPFR_GET_EXP (z) &&
157                MPFR_GET_EXP (u) - MPFR_GET_EXP (z) > MPFR_PREC (u))
158              {
159                /* |z| < ulp(u)/2, therefore one can use z instead of z/4. */
160                zz = z;
161              }
162            else
163              {
164                mpfr_init2 (zo4, MPFR_PREC (z));
165                if (mpfr_div_2ui (zo4, z, 2, MPFR_RNDZ))
166                  {
167                    /* The division by 4 underflowed! */
168                    MPFR_ASSERTN (0); /* TODO... */
169                  }
170                zz = zo4;
171              }
172
173            /* Let's recall that u = x*y/4 and zz = z/4 (or z if the
174               following addition would give the same result). */
175            MPFR_BLOCK (flags, inexact = mpfr_add (s, u, zz, rnd_mode));
176            /* u and zz have different signs, so that an overflow
177               is not possible. But an underflow is theoretically
178               possible! */
179            if (MPFR_UNDERFLOW (flags))
180              {
181                MPFR_ASSERTN (zz != z);
182                MPFR_ASSERTN (0); /* TODO... */
183                mpfr_clears (zo4, u, (mpfr_ptr) 0);
184              }
185            else
186              {
187                int inex2;
188
189                if (zz != z)
190                  mpfr_clear (zo4);
191                MPFR_GROUP_CLEAR (group);
192                MPFR_ASSERTN (! MPFR_OVERFLOW (flags));
193                inex2 = mpfr_mul_2ui (s, s, 2, rnd_mode);
194                if (inex2)  /* overflow */
195                  {
196                    inexact = inex2;
197                    MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
198                  }
199                goto end;
200              }
201          }
202        }
203      else  /* underflow: one has |xy| < 2^(emin-1). */
204        {
205          unsigned long scale = 0;
206          mpfr_t scaled_z;
207          mpfr_srcptr new_z;
208          mpfr_exp_t diffexp;
209          mpfr_prec_t pzs;
210          int xy_underflows;
211
212          /* Let's scale z so that ulp(z) > 2^emin and ulp(s) > 2^emin
213             (the + 1 on MPFR_PREC (s) is necessary because the exponent
214             of the result can be EXP(z) - 1). */
215          diffexp = MPFR_GET_EXP (z) - __gmpfr_emin;
216          pzs = MAX (MPFR_PREC (z), MPFR_PREC (s) + 1);
217          if (diffexp <= pzs)
218            {
219              mpfr_uexp_t uscale;
220              mpfr_t scaled_v;
221              MPFR_BLOCK_DECL (flags);
222
223              uscale = (mpfr_uexp_t) pzs - diffexp + 1;
224              MPFR_ASSERTN (uscale > 0);
225              MPFR_ASSERTN (uscale <= ULONG_MAX);
226              scale = uscale;
227              mpfr_init2 (scaled_z, MPFR_PREC (z));
228              inexact = mpfr_mul_2ui (scaled_z, z, scale, MPFR_RNDN);
229              MPFR_ASSERTN (inexact == 0);  /* TODO: overflow case */
230              new_z = scaled_z;
231              /* Now we need to recompute u = xy * 2^scale. */
232              MPFR_BLOCK (flags,
233                          if (MPFR_GET_EXP (x) < MPFR_GET_EXP (y))
234                            {
235                              mpfr_init2 (scaled_v, MPFR_PREC (x));
236                              mpfr_mul_2ui (scaled_v, x, scale, MPFR_RNDN);
237                              mpfr_mul (u, scaled_v, y, MPFR_RNDN);
238                            }
239                          else
240                            {
241                              mpfr_init2 (scaled_v, MPFR_PREC (y));
242                              mpfr_mul_2ui (scaled_v, y, scale, MPFR_RNDN);
243                              mpfr_mul (u, x, scaled_v, MPFR_RNDN);
244                            });
245              mpfr_clear (scaled_v);
246              MPFR_ASSERTN (! MPFR_OVERFLOW (flags));
247              xy_underflows = MPFR_UNDERFLOW (flags);
248            }
249          else
250            {
251              new_z = z;
252              xy_underflows = 1;
253            }
254
255          if (xy_underflows)
256            {
257              /* Let's replace xy by sign(xy) * 2^(emin-1). */
258              MPFR_PREC (u) = MPFR_PREC_MIN;
259              mpfr_setmin (u, __gmpfr_emin);
260              MPFR_SET_SIGN (u, MPFR_MULT_SIGN (MPFR_SIGN (x),
261                                                MPFR_SIGN (y)));
262            }
263
264          {
265            MPFR_BLOCK_DECL (flags);
266
267            MPFR_BLOCK (flags, inexact = mpfr_add (s, u, new_z, rnd_mode));
268            MPFR_GROUP_CLEAR (group);
269            if (scale != 0)
270              {
271                int inex2;
272
273                mpfr_clear (scaled_z);
274                /* Here an overflow is theoretically possible, in which case
275                   the result may be wrong, hence the assert. An underflow
276                   is not possible, but let's check that anyway. */
277                MPFR_ASSERTN (! MPFR_OVERFLOW (flags));  /* TODO... */
278                MPFR_ASSERTN (! MPFR_UNDERFLOW (flags));  /* not possible */
279                inex2 = mpfr_div_2ui (s, s, scale, MPFR_RNDN);
280                /* FIXME: this seems incorrect. MPFR_RNDN -> rnd_mode?
281                   Also, handle the double rounding case:
282                   s / 2^scale = 2^(emin - 2) in MPFR_RNDN. */
283                if (inex2)  /* underflow */
284                  inexact = inex2;
285              }
286          }
287
288          /* FIXME/TODO: I'm not sure that the following is correct.
289             Check for possible spurious exceptions due to intermediate
290             computations. */
291          MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
292          goto end;
293        }
294    }
295
296  inexact = mpfr_add (s, u, z, rnd_mode);
297  MPFR_GROUP_CLEAR (group);
298  MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
299 end:
300  MPFR_SAVE_EXPO_FREE (expo);
301  return mpfr_check_range (s, inexact, rnd_mode);
302}
303