1/* mpfr_fms -- Floating multiply-subtract
2
3Copyright 2001, 2002, 2004, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
4Contributed by the AriC and Caramel projects, INRIA.
5
6This file is part of the GNU MPFR Library.
7
8The GNU MPFR Library is free software; you can redistribute it and/or modify
9it under the terms of the GNU Lesser General Public License as published by
10the Free Software Foundation; either version 3 of the License, or (at your
11option) any later version.
12
13The GNU MPFR Library is distributed in the hope that it will be useful, but
14WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16License for more details.
17
18You should have received a copy of the GNU Lesser General Public License
19along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
2151 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22
23#include "mpfr-impl.h"
24
25/* The fused-multiply-subtract (fms) of x, y and z is defined by:
26   fms(x,y,z)= x*y - z
27   Note: this is neither in IEEE754R, nor in LIA-2, but both the
28   PowerPC and the Itanium define fms as x*y - z.
29*/
30
31int
32mpfr_fms (mpfr_ptr s, mpfr_srcptr x, mpfr_srcptr y, mpfr_srcptr z,
33          mpfr_rnd_t rnd_mode)
34{
35  int inexact;
36  mpfr_t u;
37  MPFR_SAVE_EXPO_DECL (expo);
38  MPFR_GROUP_DECL(group);
39
40  MPFR_LOG_FUNC
41    (("x[%Pu]=%.*Rg y[%Pu]=%.*Rg  z[%Pu]=%.*Rg rnd=%d",
42      mpfr_get_prec (x), mpfr_log_prec, x,
43      mpfr_get_prec (y), mpfr_log_prec, y,
44      mpfr_get_prec (z), mpfr_log_prec, z, rnd_mode),
45     ("s[%Pu]=%.*Rg inexact=%d",
46      mpfr_get_prec (s), mpfr_log_prec, s, inexact));
47
48  /* particular cases */
49  if (MPFR_UNLIKELY( MPFR_IS_SINGULAR(x) ||
50                     MPFR_IS_SINGULAR(y) ||
51                     MPFR_IS_SINGULAR(z) ))
52    {
53      if (MPFR_IS_NAN(x) || MPFR_IS_NAN(y) || MPFR_IS_NAN(z))
54        {
55          MPFR_SET_NAN(s);
56          MPFR_RET_NAN;
57        }
58      /* now neither x, y or z is NaN */
59      else if (MPFR_IS_INF(x) || MPFR_IS_INF(y))
60        {
61          /* cases Inf*0-z, 0*Inf-z, Inf-Inf */
62          if ((MPFR_IS_ZERO(y)) ||
63              (MPFR_IS_ZERO(x)) ||
64              (MPFR_IS_INF(z) &&
65               ((MPFR_MULT_SIGN(MPFR_SIGN(x), MPFR_SIGN(y))) == MPFR_SIGN(z))))
66            {
67              MPFR_SET_NAN(s);
68              MPFR_RET_NAN;
69            }
70          else if (MPFR_IS_INF(z)) /* case Inf-Inf already checked above */
71            {
72              MPFR_SET_INF(s);
73              MPFR_SET_OPPOSITE_SIGN(s, z);
74              MPFR_RET(0);
75            }
76          else /* z is finite */
77            {
78              MPFR_SET_INF(s);
79              MPFR_SET_SIGN(s, MPFR_MULT_SIGN(MPFR_SIGN(x) , MPFR_SIGN(y)));
80              MPFR_RET(0);
81            }
82        }
83      /* now x and y are finite */
84      else if (MPFR_IS_INF(z))
85        {
86          MPFR_SET_INF(s);
87          MPFR_SET_OPPOSITE_SIGN(s, z);
88          MPFR_RET(0);
89        }
90      else if (MPFR_IS_ZERO(x) || MPFR_IS_ZERO(y))
91        {
92          if (MPFR_IS_ZERO(z))
93            {
94              int sign_p;
95              sign_p = MPFR_MULT_SIGN( MPFR_SIGN(x) , MPFR_SIGN(y) );
96              MPFR_SET_SIGN(s,(rnd_mode != MPFR_RNDD ?
97                               ((MPFR_IS_NEG_SIGN(sign_p) && MPFR_IS_POS(z))
98                                ? -1 : 1) :
99                               ((MPFR_IS_POS_SIGN(sign_p) && MPFR_IS_NEG(z))
100                                ? 1 : -1)));
101              MPFR_SET_ZERO(s);
102              MPFR_RET(0);
103            }
104          else
105            return mpfr_neg (s, z, rnd_mode);
106        }
107      else /* necessarily z is zero here */
108        {
109          MPFR_ASSERTD(MPFR_IS_ZERO(z));
110          return mpfr_mul (s, x, y, rnd_mode);
111        }
112    }
113
114  /* If we take prec(u) >= prec(x) + prec(y), the product u <- x*y
115     is exact, except in case of overflow or underflow. */
116  MPFR_SAVE_EXPO_MARK (expo);
117  MPFR_GROUP_INIT_1 (group, MPFR_PREC(x) + MPFR_PREC(y), u);
118
119  if (MPFR_UNLIKELY (mpfr_mul (u, x, y, MPFR_RNDN)))
120    {
121      /* overflow or underflow - this case is regarded as rare, thus
122         does not need to be very efficient (even if some tests below
123         could have been done earlier).
124         It is an overflow iff u is an infinity (since MPFR_RNDN was used).
125         Alternatively, we could test the overflow flag, but in this case,
126         mpfr_clear_flags would have been necessary. */
127      if (MPFR_IS_INF (u))  /* overflow */
128        {
129          /* Let's eliminate the obvious case where x*y and z have the
130             same sign. No possible cancellation -> real overflow.
131             Also, we know that |z| < 2^emax. If E(x) + E(y) >= emax+3,
132             then |x*y| >= 2^(emax+1), and |x*y - z| >= 2^emax. This case
133             is also an overflow. */
134          if (MPFR_SIGN (u) != MPFR_SIGN (z) ||
135              MPFR_GET_EXP (x) + MPFR_GET_EXP (y) >= __gmpfr_emax + 3)
136            {
137              MPFR_GROUP_CLEAR (group);
138              MPFR_SAVE_EXPO_FREE (expo);
139              return mpfr_overflow (s, rnd_mode, - MPFR_SIGN (z));
140            }
141
142          /* E(x) + E(y) <= emax+2, therefore |x*y| < 2^(emax+2), and
143             (x/4)*y does not overflow (let's recall that the result
144             is exact with an unbounded exponent range). It does not
145             underflow either, because x*y overflows and the exponent
146             range is large enough. */
147          inexact = mpfr_div_2ui (u, x, 2, MPFR_RNDN);
148          MPFR_ASSERTN (inexact == 0);
149          inexact = mpfr_mul (u, u, y, MPFR_RNDN);
150          MPFR_ASSERTN (inexact == 0);
151
152          /* Now, we need to subtract z/4... But it may underflow! */
153          {
154            mpfr_t zo4;
155            mpfr_srcptr zz;
156            MPFR_BLOCK_DECL (flags);
157
158            if (MPFR_GET_EXP (u) > MPFR_GET_EXP (z) &&
159                MPFR_GET_EXP (u) - MPFR_GET_EXP (z) > MPFR_PREC (u))
160              {
161                /* |z| < ulp(u)/2, therefore one can use z instead of z/4. */
162                zz = z;
163              }
164            else
165              {
166                mpfr_init2 (zo4, MPFR_PREC (z));
167                if (mpfr_div_2ui (zo4, z, 2, MPFR_RNDZ))
168                  {
169                    /* The division by 4 underflowed! */
170                    MPFR_ASSERTN (0); /* TODO... */
171                  }
172                zz = zo4;
173              }
174
175            /* Let's recall that u = x*y/4 and zz = z/4 (or z if the
176               following subtraction would give the same result). */
177            MPFR_BLOCK (flags, inexact = mpfr_sub (s, u, zz, rnd_mode));
178            /* u and zz have the same sign, so that an overflow
179               is not possible. But an underflow is theoretically
180               possible! */
181            if (MPFR_UNDERFLOW (flags))
182              {
183                MPFR_ASSERTN (zz != z);
184                MPFR_ASSERTN (0); /* TODO... */
185                mpfr_clears (zo4, u, (mpfr_ptr) 0);
186              }
187            else
188              {
189                int inex2;
190
191                if (zz != z)
192                  mpfr_clear (zo4);
193                MPFR_GROUP_CLEAR (group);
194                MPFR_ASSERTN (! MPFR_OVERFLOW (flags));
195                inex2 = mpfr_mul_2ui (s, s, 2, rnd_mode);
196                if (inex2)  /* overflow */
197                  {
198                    inexact = inex2;
199                    MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
200                  }
201                goto end;
202              }
203          }
204        }
205      else  /* underflow: one has |xy| < 2^(emin-1). */
206        {
207          unsigned long scale = 0;
208          mpfr_t scaled_z;
209          mpfr_srcptr new_z;
210          mpfr_exp_t diffexp;
211          mpfr_prec_t pzs;
212          int xy_underflows;
213
214          /* Let's scale z so that ulp(z) > 2^emin and ulp(s) > 2^emin
215             (the + 1 on MPFR_PREC (s) is necessary because the exponent
216             of the result can be EXP(z) - 1). */
217          diffexp = MPFR_GET_EXP (z) - __gmpfr_emin;
218          pzs = MAX (MPFR_PREC (z), MPFR_PREC (s) + 1);
219          if (diffexp <= pzs)
220            {
221              mpfr_uexp_t uscale;
222              mpfr_t scaled_v;
223              MPFR_BLOCK_DECL (flags);
224
225              uscale = (mpfr_uexp_t) pzs - diffexp + 1;
226              MPFR_ASSERTN (uscale > 0);
227              MPFR_ASSERTN (uscale <= ULONG_MAX);
228              scale = uscale;
229              mpfr_init2 (scaled_z, MPFR_PREC (z));
230              inexact = mpfr_mul_2ui (scaled_z, z, scale, MPFR_RNDN);
231              MPFR_ASSERTN (inexact == 0);  /* TODO: overflow case */
232              new_z = scaled_z;
233              /* Now we need to recompute u = xy * 2^scale. */
234              MPFR_BLOCK (flags,
235                          if (MPFR_GET_EXP (x) < MPFR_GET_EXP (y))
236                            {
237                              mpfr_init2 (scaled_v, MPFR_PREC (x));
238                              mpfr_mul_2ui (scaled_v, x, scale, MPFR_RNDN);
239                              mpfr_mul (u, scaled_v, y, MPFR_RNDN);
240                            }
241                          else
242                            {
243                              mpfr_init2 (scaled_v, MPFR_PREC (y));
244                              mpfr_mul_2ui (scaled_v, y, scale, MPFR_RNDN);
245                              mpfr_mul (u, x, scaled_v, MPFR_RNDN);
246                            });
247              mpfr_clear (scaled_v);
248              MPFR_ASSERTN (! MPFR_OVERFLOW (flags));
249              xy_underflows = MPFR_UNDERFLOW (flags);
250            }
251          else
252            {
253              new_z = z;
254              xy_underflows = 1;
255            }
256
257          if (xy_underflows)
258            {
259              /* Let's replace xy by sign(xy) * 2^(emin-1). */
260              MPFR_PREC (u) = MPFR_PREC_MIN;
261              mpfr_setmin (u, __gmpfr_emin);
262              MPFR_SET_SIGN (u, MPFR_MULT_SIGN (MPFR_SIGN (x),
263                                                MPFR_SIGN (y)));
264            }
265
266          {
267            MPFR_BLOCK_DECL (flags);
268
269            MPFR_BLOCK (flags, inexact = mpfr_sub (s, u, new_z, rnd_mode));
270            MPFR_GROUP_CLEAR (group);
271            if (scale != 0)
272              {
273                int inex2;
274
275                mpfr_clear (scaled_z);
276                /* Here an overflow is theoretically possible, in which case
277                   the result may be wrong, hence the assert. An underflow
278                   is not possible, but let's check that anyway. */
279                MPFR_ASSERTN (! MPFR_OVERFLOW (flags));  /* TODO... */
280                MPFR_ASSERTN (! MPFR_UNDERFLOW (flags));  /* not possible */
281                inex2 = mpfr_div_2ui (s, s, scale, MPFR_RNDN);
282                /* FIXME: this seems incorrect. MPFR_RNDN -> rnd_mode?
283                   Also, handle the double rounding case:
284                   s / 2^scale = 2^(emin - 2) in MPFR_RNDN. */
285                if (inex2)  /* underflow */
286                  inexact = inex2;
287              }
288          }
289
290          /* FIXME/TODO: I'm not sure that the following is correct.
291             Check for possible spurious exceptions due to intermediate
292             computations. */
293          MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
294          goto end;
295        }
296    }
297
298  inexact = mpfr_sub (s, u, z, rnd_mode);
299  MPFR_GROUP_CLEAR (group);
300  MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
301 end:
302  MPFR_SAVE_EXPO_FREE (expo);
303  return mpfr_check_range (s, inexact, rnd_mode);
304}
305