1/* mpfr_agm -- arithmetic-geometric mean of two floating-point numbers
2
3Copyright 1999-2023 Free Software Foundation, Inc.
4Contributed by the AriC and Caramba projects, INRIA.
5
6This file is part of the GNU MPFR Library.
7
8The GNU MPFR Library is free software; you can redistribute it and/or modify
9it under the terms of the GNU Lesser General Public License as published by
10the Free Software Foundation; either version 3 of the License, or (at your
11option) any later version.
12
13The GNU MPFR Library is distributed in the hope that it will be useful, but
14WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16License for more details.
17
18You should have received a copy of the GNU Lesser General Public License
19along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
2151 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22
23#define MPFR_NEED_LONGLONG_H
24#include "mpfr-impl.h"
25
26/* agm(x,y) is between x and y, so we don't need to save exponent range */
27int
28mpfr_agm (mpfr_ptr r, mpfr_srcptr op2, mpfr_srcptr op1, mpfr_rnd_t rnd_mode)
29{
30  int compare, inexact;
31  mp_size_t s;
32  mpfr_prec_t p, q;
33  mp_limb_t *up, *vp, *ufp, *vfp;
34  mpfr_t u, v, uf, vf, sc1, sc2;
35  mpfr_exp_t scaleop = 0, scaleit;
36  unsigned long n; /* number of iterations */
37  MPFR_ZIV_DECL (loop);
38  MPFR_TMP_DECL(marker);
39  MPFR_SAVE_EXPO_DECL (expo);
40
41  MPFR_LOG_FUNC
42    (("op2[%Pu]=%.*Rg op1[%Pu]=%.*Rg rnd=%d",
43      mpfr_get_prec (op2), mpfr_log_prec, op2,
44      mpfr_get_prec (op1), mpfr_log_prec, op1, rnd_mode),
45     ("r[%Pu]=%.*Rg inexact=%d",
46      mpfr_get_prec (r), mpfr_log_prec, r, inexact));
47
48  /* Deal with special values */
49  if (MPFR_ARE_SINGULAR (op1, op2))
50    {
51      /* If a or b is NaN, the result is NaN */
52      if (MPFR_IS_NAN(op1) || MPFR_IS_NAN(op2))
53        {
54          MPFR_SET_NAN(r);
55          MPFR_RET_NAN;
56        }
57      /* now one of a or b is Inf or 0 */
58      /* If a and b is +Inf, the result is +Inf.
59         Otherwise if a or b is -Inf or 0, the result is NaN */
60      else if (MPFR_IS_INF(op1) || MPFR_IS_INF(op2))
61        {
62          if (MPFR_IS_STRICTPOS(op1) && MPFR_IS_STRICTPOS(op2))
63            {
64              MPFR_SET_INF(r);
65              MPFR_SET_SAME_SIGN(r, op1);
66              MPFR_RET(0); /* exact */
67            }
68          else
69            {
70              MPFR_SET_NAN(r);
71              MPFR_RET_NAN;
72            }
73        }
74      else /* a and b are neither NaN nor Inf, and one is zero */
75        {  /* If a or b is 0, the result is +0, in particular because the
76              result is always >= 0 with our definition (Maple sometimes
77              chooses a different sign for GaussAGM, but it uses another
78              definition, with possible negative results). */
79          MPFR_ASSERTD (MPFR_IS_ZERO (op1) || MPFR_IS_ZERO (op2));
80          MPFR_SET_POS (r);
81          MPFR_SET_ZERO (r);
82          MPFR_RET (0); /* exact */
83        }
84    }
85
86  /* If a or b is negative (excluding -Infinity), the result is NaN */
87  if (MPFR_UNLIKELY(MPFR_IS_NEG(op1) || MPFR_IS_NEG(op2)))
88    {
89      MPFR_SET_NAN(r);
90      MPFR_RET_NAN;
91    }
92
93  /* Precision of the following calculus */
94  q = MPFR_PREC(r);
95  p = q + MPFR_INT_CEIL_LOG2(q) + 15;
96  MPFR_ASSERTD (p >= 7); /* see algorithms.tex */
97  s = MPFR_PREC2LIMBS (p);
98
99  /* b (op2) and a (op1) are the 2 operands but we want b >= a */
100  compare = mpfr_cmp (op1, op2);
101  if (MPFR_UNLIKELY( compare == 0 ))
102    return mpfr_set (r, op1, rnd_mode);
103  else if (compare > 0)
104    {
105      mpfr_srcptr t = op1;
106      op1 = op2;
107      op2 = t;
108    }
109
110  /* Now b (=op2) > a (=op1) */
111
112  MPFR_SAVE_EXPO_MARK (expo);
113
114  MPFR_TMP_MARK(marker);
115
116  /* Main loop */
117  MPFR_ZIV_INIT (loop, p);
118  for (;;)
119    {
120      mpfr_prec_t eq;
121      unsigned long err = 0;  /* must be set to 0 at each Ziv iteration */
122      MPFR_BLOCK_DECL (flags);
123
124      /* Init temporary vars */
125      MPFR_TMP_INIT (up, u, p, s);
126      MPFR_TMP_INIT (vp, v, p, s);
127      MPFR_TMP_INIT (ufp, uf, p, s);
128      MPFR_TMP_INIT (vfp, vf, p, s);
129
130      /* Calculus of un and vn */
131    retry:
132      MPFR_BLOCK (flags,
133                  mpfr_mul (u, op1, op2, MPFR_RNDN);
134                  /* mpfr_mul(...): faster since PREC(op) < PREC(u) */
135                  mpfr_add (v, op1, op2, MPFR_RNDN);
136                  /* mpfr_add with !=prec is still good */);
137      if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags) || MPFR_UNDERFLOW (flags)))
138        {
139          mpfr_exp_t e1, e2;
140
141          MPFR_ASSERTN (scaleop == 0);
142          e1 = MPFR_GET_EXP (op1);
143          e2 = MPFR_GET_EXP (op2);
144
145          /* Let's determine scaleop to avoid an overflow/underflow. */
146          if (MPFR_OVERFLOW (flags))
147            {
148              /* Let's recall that emin <= e1 <= e2 <= emax.
149                 There has been an overflow. Thus e2 >= emax/2.
150                 If the mpfr_mul overflowed, then e1 + e2 > emax.
151                 If the mpfr_add overflowed, then e2 = emax.
152                 We want: (e1 + scale) + (e2 + scale) <= emax,
153                 i.e. scale <= (emax - e1 - e2) / 2. Let's take
154                 scale = min(floor((emax - e1 - e2) / 2), -1).
155                 This is OK, as:
156                 1. emin <= scale <= -1.
157                 2. e1 + scale >= emin. Indeed:
158                    * If e1 + e2 > emax, then
159                      e1 + scale >= e1 + (emax - e1 - e2) / 2 - 1
160                                 >= (emax + e1 - emax) / 2 - 1
161                                 >= e1 / 2 - 1 >= emin.
162                    * Otherwise, mpfr_mul didn't overflow, therefore
163                      mpfr_add overflowed and e2 = emax, so that
164                      e1 > emin (see restriction below).
165                      e1 + scale > emin - 1, thus e1 + scale >= emin.
166                 3. e2 + scale <= emax, since scale < 0. */
167              if (e1 + e2 > MPFR_EMAX_MAX)
168                {
169                  scaleop = - (((e1 + e2) - MPFR_EMAX_MAX + 1) / 2);
170                  MPFR_ASSERTN (scaleop < 0);
171                }
172              else
173                {
174                  /* The addition necessarily overflowed. */
175                  MPFR_ASSERTN (e2 == MPFR_EMAX_MAX);
176                  /* The case where e1 = emin and e2 = emax is not supported
177                     here. This would mean that the precision of e2 would be
178                     huge (and possibly not supported in practice anyway). */
179                  MPFR_ASSERTN (e1 > MPFR_EMIN_MIN);
180                  /* Note: this case is probably impossible to have in practice
181                     since we need e2 = emax, and no overflow in the product.
182                     Since the product is >= 2^(e1+e2-2), it implies
183                     e1 + e2 - 2 <= emax, thus e1 <= 2. Now to get an overflow
184                     we need op1 >= 1/2 ulp(op2), which implies that the
185                     precision of op2 should be at least emax-2. On a 64-bit
186                     computer this is impossible to have, and would require
187                     a huge amount of memory on a 32-bit computer. */
188                  scaleop = -1;
189                }
190
191            }
192          else  /* underflow only (in the multiplication) */
193            {
194              /* We have e1 + e2 <= emin (so, e1 <= e2 <= 0).
195                 We want: (e1 + scale) + (e2 + scale) >= emin + 1,
196                 i.e. scale >= (emin + 1 - e1 - e2) / 2. let's take
197                 scale = ceil((emin + 1 - e1 - e2) / 2). This is OK, as:
198                 1. 1 <= scale <= emax.
199                 2. e1 + scale >= emin + 1 >= emin.
200                 3. e2 + scale <= scale <= emax. */
201              MPFR_ASSERTN (e1 <= e2 && e2 <= 0);
202              scaleop = (MPFR_EMIN_MIN + 2 - e1 - e2) / 2;
203              MPFR_ASSERTN (scaleop > 0);
204            }
205
206          MPFR_ALIAS (sc1, op1, MPFR_SIGN (op1), e1 + scaleop);
207          MPFR_ALIAS (sc2, op2, MPFR_SIGN (op2), e2 + scaleop);
208          op1 = sc1;
209          op2 = sc2;
210          MPFR_LOG_MSG (("Exception in pre-iteration, scale = %"
211                         MPFR_EXP_FSPEC "d\n", scaleop));
212          goto retry;
213        }
214
215      MPFR_CLEAR_FLAGS ();
216      mpfr_sqrt (u, u, MPFR_RNDN);
217      mpfr_div_2ui (v, v, 1, MPFR_RNDN);
218
219      scaleit = 0;
220      n = 1;
221      while (mpfr_cmp2 (u, v, &eq) != 0 && eq <= p - 2)
222        {
223          MPFR_BLOCK_DECL (flags2);
224
225          MPFR_LOG_MSG (("Iteration n = %lu\n", n));
226
227        retry2:
228          mpfr_add (vf, u, v, MPFR_RNDN);  /* No overflow? */
229          mpfr_div_2ui (vf, vf, 1, MPFR_RNDN);
230          /* See proof in algorithms.tex */
231          if (eq > p / 4)
232            {
233              mpfr_t w;
234              MPFR_BLOCK_DECL (flags3);
235
236              MPFR_LOG_MSG (("4*eq > p\n", 0));
237
238              /* vf = V(k) */
239              mpfr_init2 (w, (p + 1) / 2);
240              MPFR_BLOCK
241                (flags3,
242                 mpfr_sub (w, v, u, MPFR_RNDN);       /* e = V(k-1)-U(k-1) */
243                 mpfr_sqr (w, w, MPFR_RNDN);          /* e = e^2 */
244                 mpfr_div_2ui (w, w, 4, MPFR_RNDN);   /* e*= (1/2)^2*1/4  */
245                 mpfr_div (w, w, vf, MPFR_RNDN);      /* 1/4*e^2/V(k) */
246                 );
247              if (MPFR_LIKELY (! MPFR_UNDERFLOW (flags3)))
248                {
249                  mpfr_sub (v, vf, w, MPFR_RNDN);
250                  err = MPFR_GET_EXP (vf) - MPFR_GET_EXP (v); /* 0 or 1 */
251                  mpfr_clear (w);
252                  break;
253                }
254              /* There has been an underflow because of the cancellation
255                 between V(k-1) and U(k-1). Let's use the conventional
256                 method. */
257              MPFR_LOG_MSG (("4*eq > p -> underflow\n", 0));
258              mpfr_clear (w);
259              MPFR_CLEAR_UNDERFLOW ();
260            }
261          /* U(k) increases, so that U.V can overflow (but not underflow). */
262          MPFR_BLOCK (flags2, mpfr_mul (uf, u, v, MPFR_RNDN););
263          if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags2)))
264            {
265              mpfr_exp_t scale2;
266
267              scale2 = - (((MPFR_GET_EXP (u) + MPFR_GET_EXP (v))
268                           - MPFR_EMAX_MAX + 1) / 2);
269              MPFR_EXP (u) += scale2;
270              MPFR_EXP (v) += scale2;
271              scaleit += scale2;
272              MPFR_LOG_MSG (("Overflow in iteration n = %lu, scaleit = %"
273                             MPFR_EXP_FSPEC "d (%" MPFR_EXP_FSPEC "d)\n",
274                             n, scaleit, scale2));
275              MPFR_CLEAR_OVERFLOW ();
276              goto retry2;
277            }
278          mpfr_sqrt (u, uf, MPFR_RNDN);
279          mpfr_swap (v, vf);
280          n ++;
281        }
282
283      MPFR_LOG_MSG (("End of iterations (n = %lu)\n", n));
284
285      /* the error on v is bounded by (18n+51) ulps, or twice if there
286         was an exponent loss in the final subtraction */
287      err += MPFR_INT_CEIL_LOG2(18 * n + 51); /* 18n+51 should not overflow
288                                                 since n is about log(p) */
289      /* we should have n+2 <= 2^(p/4) [see algorithms.tex] */
290      if (MPFR_LIKELY (MPFR_INT_CEIL_LOG2(n + 2) <= p / 4 &&
291                       MPFR_CAN_ROUND (v, p - err, q, rnd_mode)))
292        break; /* Stop the loop */
293
294      /* Next iteration */
295      MPFR_ZIV_NEXT (loop, p);
296      s = MPFR_PREC2LIMBS (p);
297    }
298  MPFR_ZIV_FREE (loop);
299
300  if (MPFR_UNLIKELY ((__gmpfr_flags & (MPFR_FLAGS_ALL ^ MPFR_FLAGS_INEXACT))
301                     != 0))
302    {
303      MPFR_ASSERTN (! mpfr_overflow_p ());   /* since mpfr_clear_flags */
304      MPFR_ASSERTN (! mpfr_underflow_p ());  /* since mpfr_clear_flags */
305      MPFR_ASSERTN (! mpfr_divby0_p ());     /* since mpfr_clear_flags */
306      MPFR_ASSERTN (! mpfr_nanflag_p ());    /* since mpfr_clear_flags */
307    }
308
309  /* Setting of the result */
310  inexact = mpfr_set (r, v, rnd_mode);
311  MPFR_EXP (r) -= scaleop + scaleit;
312
313  /* Let's clean */
314  MPFR_TMP_FREE(marker);
315
316  MPFR_SAVE_EXPO_FREE (expo);
317  /* From the definition of the AGM, underflow and overflow
318     are not possible. */
319  return mpfr_check_range (r, inexact, rnd_mode);
320  /* agm(u,v) can be exact for u, v rational only for u=v.
321     Proof (due to Nicolas Brisebarre): it suffices to consider
322     u=1 and v<1. Then 1/AGM(1,v) = 2F1(1/2,1/2,1;1-v^2),
323     and a theorem due to G.V. Chudnovsky states that for x a
324     non-zero algebraic number with |x|<1, then
325     2F1(1/2,1/2,1;x) and 2F1(-1/2,1/2,1;x) are algebraically
326     independent over Q. */
327}
328