1/* mpfr_fma -- Floating multiply-add
2
3Copyright 2001, 2002, 2004, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
4Contributed by the Arenaire and Cacao projects, INRIA.
5
6This file is part of the GNU MPFR Library.
7
8The GNU MPFR Library is free software; you can redistribute it and/or modify
9it under the terms of the GNU Lesser General Public License as published by
10the Free Software Foundation; either version 3 of the License, or (at your
11option) any later version.
12
13The GNU MPFR Library is distributed in the hope that it will be useful, but
14WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16License for more details.
17
18You should have received a copy of the GNU Lesser General Public License
19along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
2151 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22
23#include "mpfr-impl.h"
24
25/* The fused-multiply-add (fma) of x, y and z is defined by:
26   fma(x,y,z)= x*y + z
27*/
28
29int
30mpfr_fma (mpfr_ptr s, mpfr_srcptr x, mpfr_srcptr y, mpfr_srcptr z,
31          mpfr_rnd_t rnd_mode)
32{
33  int inexact;
34  mpfr_t u;
35  MPFR_SAVE_EXPO_DECL (expo);
36  MPFR_GROUP_DECL(group);
37
38  /* particular cases */
39  if (MPFR_UNLIKELY( MPFR_IS_SINGULAR(x) ||
40                     MPFR_IS_SINGULAR(y) ||
41                     MPFR_IS_SINGULAR(z) ))
42    {
43      if (MPFR_IS_NAN(x) || MPFR_IS_NAN(y) || MPFR_IS_NAN(z))
44        {
45          MPFR_SET_NAN(s);
46          MPFR_RET_NAN;
47        }
48      /* now neither x, y or z is NaN */
49      else if (MPFR_IS_INF(x) || MPFR_IS_INF(y))
50        {
51          /* cases Inf*0+z, 0*Inf+z, Inf-Inf */
52          if ((MPFR_IS_ZERO(y)) ||
53              (MPFR_IS_ZERO(x)) ||
54              (MPFR_IS_INF(z) &&
55               ((MPFR_MULT_SIGN(MPFR_SIGN(x), MPFR_SIGN(y))) != MPFR_SIGN(z))))
56            {
57              MPFR_SET_NAN(s);
58              MPFR_RET_NAN;
59            }
60          else if (MPFR_IS_INF(z)) /* case Inf-Inf already checked above */
61            {
62              MPFR_SET_INF(s);
63              MPFR_SET_SAME_SIGN(s, z);
64              MPFR_RET(0);
65            }
66          else /* z is finite */
67            {
68              MPFR_SET_INF(s);
69              MPFR_SET_SIGN(s, MPFR_MULT_SIGN(MPFR_SIGN(x) , MPFR_SIGN(y)));
70              MPFR_RET(0);
71            }
72        }
73      /* now x and y are finite */
74      else if (MPFR_IS_INF(z))
75        {
76          MPFR_SET_INF(s);
77          MPFR_SET_SAME_SIGN(s, z);
78          MPFR_RET(0);
79        }
80      else if (MPFR_IS_ZERO(x) || MPFR_IS_ZERO(y))
81        {
82          if (MPFR_IS_ZERO(z))
83            {
84              int sign_p;
85              sign_p = MPFR_MULT_SIGN( MPFR_SIGN(x) , MPFR_SIGN(y) );
86              MPFR_SET_SIGN(s,(rnd_mode != MPFR_RNDD ?
87                               ((MPFR_IS_NEG_SIGN(sign_p) && MPFR_IS_NEG(z))
88                                ? -1 : 1) :
89                               ((MPFR_IS_POS_SIGN(sign_p) && MPFR_IS_POS(z))
90                                ? 1 : -1)));
91              MPFR_SET_ZERO(s);
92              MPFR_RET(0);
93            }
94          else
95            return mpfr_set (s, z, rnd_mode);
96        }
97      else /* necessarily z is zero here */
98        {
99          MPFR_ASSERTD(MPFR_IS_ZERO(z));
100          return mpfr_mul (s, x, y, rnd_mode);
101        }
102    }
103
104  /* If we take prec(u) >= prec(x) + prec(y), the product u <- x*y
105     is exact, except in case of overflow or underflow. */
106  MPFR_SAVE_EXPO_MARK (expo);
107  MPFR_GROUP_INIT_1 (group, MPFR_PREC(x) + MPFR_PREC(y), u);
108
109  if (MPFR_UNLIKELY (mpfr_mul (u, x, y, MPFR_RNDN)))
110    {
111      /* overflow or underflow - this case is regarded as rare, thus
112         does not need to be very efficient (even if some tests below
113         could have been done earlier).
114         It is an overflow iff u is an infinity (since MPFR_RNDN was used).
115         Alternatively, we could test the overflow flag, but in this case,
116         mpfr_clear_flags would have been necessary. */
117      if (MPFR_IS_INF (u))  /* overflow */
118        {
119          /* Let's eliminate the obvious case where x*y and z have the
120             same sign. No possible cancellation -> real overflow.
121             Also, we know that |z| < 2^emax. If E(x) + E(y) >= emax+3,
122             then |x*y| >= 2^(emax+1), and |x*y + z| >= 2^emax. This case
123             is also an overflow. */
124          if (MPFR_SIGN (u) == MPFR_SIGN (z) ||
125              MPFR_GET_EXP (x) + MPFR_GET_EXP (y) >= __gmpfr_emax + 3)
126            {
127              MPFR_GROUP_CLEAR (group);
128              MPFR_SAVE_EXPO_FREE (expo);
129              return mpfr_overflow (s, rnd_mode, MPFR_SIGN (z));
130            }
131
132          /* E(x) + E(y) <= emax+2, therefore |x*y| < 2^(emax+2), and
133             (x/4)*y does not overflow (let's recall that the result
134             is exact with an unbounded exponent range). It does not
135             underflow either, because x*y overflows and the exponent
136             range is large enough. */
137          inexact = mpfr_div_2ui (u, x, 2, MPFR_RNDN);
138          MPFR_ASSERTN (inexact == 0);
139          inexact = mpfr_mul (u, u, y, MPFR_RNDN);
140          MPFR_ASSERTN (inexact == 0);
141
142          /* Now, we need to add z/4... But it may underflow! */
143          {
144            mpfr_t zo4;
145            mpfr_srcptr zz;
146            MPFR_BLOCK_DECL (flags);
147
148            if (MPFR_GET_EXP (u) > MPFR_GET_EXP (z) &&
149                MPFR_GET_EXP (u) - MPFR_GET_EXP (z) > MPFR_PREC (u))
150              {
151                /* |z| < ulp(u)/2, therefore one can use z instead of z/4. */
152                zz = z;
153              }
154            else
155              {
156                mpfr_init2 (zo4, MPFR_PREC (z));
157                if (mpfr_div_2ui (zo4, z, 2, MPFR_RNDZ))
158                  {
159                    /* The division by 4 underflowed! */
160                    MPFR_ASSERTN (0); /* TODO... */
161                  }
162                zz = zo4;
163              }
164
165            /* Let's recall that u = x*y/4 and zz = z/4 (or z if the
166               following addition would give the same result). */
167            MPFR_BLOCK (flags, inexact = mpfr_add (s, u, zz, rnd_mode));
168            /* u and zz have different signs, so that an overflow
169               is not possible. But an underflow is theoretically
170               possible! */
171            if (MPFR_UNDERFLOW (flags))
172              {
173                MPFR_ASSERTN (zz != z);
174                MPFR_ASSERTN (0); /* TODO... */
175                mpfr_clears (zo4, u, (mpfr_ptr) 0);
176              }
177            else
178              {
179                int inex2;
180
181                if (zz != z)
182                  mpfr_clear (zo4);
183                MPFR_GROUP_CLEAR (group);
184                MPFR_ASSERTN (! MPFR_OVERFLOW (flags));
185                inex2 = mpfr_mul_2ui (s, s, 2, rnd_mode);
186                if (inex2)  /* overflow */
187                  {
188                    inexact = inex2;
189                    MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
190                  }
191                goto end;
192              }
193          }
194        }
195      else  /* underflow: one has |xy| < 2^(emin-1). */
196        {
197          unsigned long scale = 0;
198          mpfr_t scaled_z;
199          mpfr_srcptr new_z;
200          mpfr_exp_t diffexp;
201          mpfr_prec_t pzs;
202          int xy_underflows;
203
204          /* Let's scale z so that ulp(z) > 2^emin and ulp(s) > 2^emin
205             (the + 1 on MPFR_PREC (s) is necessary because the exponent
206             of the result can be EXP(z) - 1). */
207          diffexp = MPFR_GET_EXP (z) - __gmpfr_emin;
208          pzs = MAX (MPFR_PREC (z), MPFR_PREC (s) + 1);
209          if (diffexp <= pzs)
210            {
211              mpfr_uexp_t uscale;
212              mpfr_t scaled_v;
213              MPFR_BLOCK_DECL (flags);
214
215              uscale = (mpfr_uexp_t) pzs - diffexp + 1;
216              MPFR_ASSERTN (uscale > 0);
217              MPFR_ASSERTN (uscale <= ULONG_MAX);
218              scale = uscale;
219              mpfr_init2 (scaled_z, MPFR_PREC (z));
220              inexact = mpfr_mul_2ui (scaled_z, z, scale, MPFR_RNDN);
221              MPFR_ASSERTN (inexact == 0);  /* TODO: overflow case */
222              new_z = scaled_z;
223              /* Now we need to recompute u = xy * 2^scale. */
224              MPFR_BLOCK (flags,
225                          if (MPFR_GET_EXP (x) < MPFR_GET_EXP (y))
226                            {
227                              mpfr_init2 (scaled_v, MPFR_PREC (x));
228                              mpfr_mul_2ui (scaled_v, x, scale, MPFR_RNDN);
229                              mpfr_mul (u, scaled_v, y, MPFR_RNDN);
230                            }
231                          else
232                            {
233                              mpfr_init2 (scaled_v, MPFR_PREC (y));
234                              mpfr_mul_2ui (scaled_v, y, scale, MPFR_RNDN);
235                              mpfr_mul (u, x, scaled_v, MPFR_RNDN);
236                            });
237              mpfr_clear (scaled_v);
238              MPFR_ASSERTN (! MPFR_OVERFLOW (flags));
239              xy_underflows = MPFR_UNDERFLOW (flags);
240            }
241          else
242            {
243              new_z = z;
244              xy_underflows = 1;
245            }
246
247          if (xy_underflows)
248            {
249              /* Let's replace xy by sign(xy) * 2^(emin-1). */
250              MPFR_PREC (u) = MPFR_PREC_MIN;
251              mpfr_setmin (u, __gmpfr_emin);
252              MPFR_SET_SIGN (u, MPFR_MULT_SIGN (MPFR_SIGN (x),
253                                                MPFR_SIGN (y)));
254            }
255
256          {
257            MPFR_BLOCK_DECL (flags);
258
259            MPFR_BLOCK (flags, inexact = mpfr_add (s, u, new_z, rnd_mode));
260            MPFR_GROUP_CLEAR (group);
261            if (scale != 0)
262              {
263                int inex2;
264
265                mpfr_clear (scaled_z);
266                /* Here an overflow is theoretically possible, in which case
267                   the result may be wrong, hence the assert. An underflow
268                   is not possible, but let's check that anyway. */
269                MPFR_ASSERTN (! MPFR_OVERFLOW (flags));  /* TODO... */
270                MPFR_ASSERTN (! MPFR_UNDERFLOW (flags));  /* not possible */
271                inex2 = mpfr_div_2ui (s, s, scale, MPFR_RNDN);
272                /* FIXME: this seems incorrect. MPFR_RNDN -> rnd_mode?
273                   Also, handle the double rounding case:
274                   s / 2^scale = 2^(emin - 2) in MPFR_RNDN. */
275                if (inex2)  /* underflow */
276                  inexact = inex2;
277              }
278          }
279
280          /* FIXME/TODO: I'm not sure that the following is correct.
281             Check for possible spurious exceptions due to intermediate
282             computations. */
283          MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
284          goto end;
285        }
286    }
287
288  inexact = mpfr_add (s, u, z, rnd_mode);
289  MPFR_GROUP_CLEAR (group);
290  MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
291 end:
292  MPFR_SAVE_EXPO_FREE (expo);
293  return mpfr_check_range (s, inexact, rnd_mode);
294}
295