1/* mpfr_agm -- arithmetic-geometric mean of two floating-point numbers 2 3Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. 4Contributed by the AriC and Caramel projects, INRIA. 5 6This file is part of the GNU MPFR Library. 7 8The GNU MPFR Library is free software; you can redistribute it and/or modify 9it under the terms of the GNU Lesser General Public License as published by 10the Free Software Foundation; either version 3 of the License, or (at your 11option) any later version. 12 13The GNU MPFR Library is distributed in the hope that it will be useful, but 14WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16License for more details. 17 18You should have received a copy of the GNU Lesser General Public License 19along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see 20http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 2151 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ 22 23#define MPFR_NEED_LONGLONG_H 24#include "mpfr-impl.h" 25 26/* agm(x,y) is between x and y, so we don't need to save exponent range */ 27int 28mpfr_agm (mpfr_ptr r, mpfr_srcptr op2, mpfr_srcptr op1, mpfr_rnd_t rnd_mode) 29{ 30 int compare, inexact; 31 mp_size_t s; 32 mpfr_prec_t p, q; 33 mp_limb_t *up, *vp, *ufp, *vfp; 34 mpfr_t u, v, uf, vf, sc1, sc2; 35 mpfr_exp_t scaleop = 0, scaleit; 36 unsigned long n; /* number of iterations */ 37 MPFR_ZIV_DECL (loop); 38 MPFR_TMP_DECL(marker); 39 MPFR_SAVE_EXPO_DECL (expo); 40 41 MPFR_LOG_FUNC 42 (("op2[%Pu]=%.*Rg op1[%Pu]=%.*Rg rnd=%d", 43 mpfr_get_prec (op2), mpfr_log_prec, op2, 44 mpfr_get_prec (op1), mpfr_log_prec, op1, rnd_mode), 45 ("r[%Pu]=%.*Rg inexact=%d", 46 mpfr_get_prec (r), mpfr_log_prec, r, inexact)); 47 48 /* Deal with special values */ 49 if (MPFR_ARE_SINGULAR (op1, op2)) 50 { 51 /* If a or b is NaN, the result is NaN */ 52 if (MPFR_IS_NAN(op1) || MPFR_IS_NAN(op2)) 53 { 54 MPFR_SET_NAN(r); 55 MPFR_RET_NAN; 56 } 57 /* now one of a or b is Inf or 0 */ 58 /* If a and b is +Inf, the result is +Inf. 59 Otherwise if a or b is -Inf or 0, the result is NaN */ 60 else if (MPFR_IS_INF(op1) || MPFR_IS_INF(op2)) 61 { 62 if (MPFR_IS_STRICTPOS(op1) && MPFR_IS_STRICTPOS(op2)) 63 { 64 MPFR_SET_INF(r); 65 MPFR_SET_SAME_SIGN(r, op1); 66 MPFR_RET(0); /* exact */ 67 } 68 else 69 { 70 MPFR_SET_NAN(r); 71 MPFR_RET_NAN; 72 } 73 } 74 else /* a and b are neither NaN nor Inf, and one is zero */ 75 { /* If a or b is 0, the result is +0 since a sqrt is positive */ 76 MPFR_ASSERTD (MPFR_IS_ZERO (op1) || MPFR_IS_ZERO (op2)); 77 MPFR_SET_POS (r); 78 MPFR_SET_ZERO (r); 79 MPFR_RET (0); /* exact */ 80 } 81 } 82 83 /* If a or b is negative (excluding -Infinity), the result is NaN */ 84 if (MPFR_UNLIKELY(MPFR_IS_NEG(op1) || MPFR_IS_NEG(op2))) 85 { 86 MPFR_SET_NAN(r); 87 MPFR_RET_NAN; 88 } 89 90 /* Precision of the following calculus */ 91 q = MPFR_PREC(r); 92 p = q + MPFR_INT_CEIL_LOG2(q) + 15; 93 MPFR_ASSERTD (p >= 7); /* see algorithms.tex */ 94 s = MPFR_PREC2LIMBS (p); 95 96 /* b (op2) and a (op1) are the 2 operands but we want b >= a */ 97 compare = mpfr_cmp (op1, op2); 98 if (MPFR_UNLIKELY( compare == 0 )) 99 { 100 mpfr_set (r, op1, rnd_mode); 101 MPFR_RET (0); /* exact */ 102 } 103 else if (compare > 0) 104 { 105 mpfr_srcptr t = op1; 106 op1 = op2; 107 op2 = t; 108 } 109 110 /* Now b (=op2) > a (=op1) */ 111 112 MPFR_SAVE_EXPO_MARK (expo); 113 114 MPFR_TMP_MARK(marker); 115 116 /* Main loop */ 117 MPFR_ZIV_INIT (loop, p); 118 for (;;) 119 { 120 mpfr_prec_t eq; 121 unsigned long err = 0; /* must be set to 0 at each Ziv iteration */ 122 MPFR_BLOCK_DECL (flags); 123 124 /* Init temporary vars */ 125 MPFR_TMP_INIT (up, u, p, s); 126 MPFR_TMP_INIT (vp, v, p, s); 127 MPFR_TMP_INIT (ufp, uf, p, s); 128 MPFR_TMP_INIT (vfp, vf, p, s); 129 130 /* Calculus of un and vn */ 131 retry: 132 MPFR_BLOCK (flags, 133 mpfr_mul (u, op1, op2, MPFR_RNDN); 134 /* mpfr_mul(...): faster since PREC(op) < PREC(u) */ 135 mpfr_add (v, op1, op2, MPFR_RNDN); 136 /* mpfr_add with !=prec is still good */); 137 if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags) || MPFR_UNDERFLOW (flags))) 138 { 139 mpfr_exp_t e1 , e2; 140 141 MPFR_ASSERTN (scaleop == 0); 142 e1 = MPFR_GET_EXP (op1); 143 e2 = MPFR_GET_EXP (op2); 144 145 /* Let's determine scaleop to avoid an overflow/underflow. */ 146 if (MPFR_OVERFLOW (flags)) 147 { 148 /* Let's recall that emin <= e1 <= e2 <= emax. 149 There has been an overflow. Thus e2 >= emax/2. 150 If the mpfr_mul overflowed, then e1 + e2 > emax. 151 If the mpfr_add overflowed, then e2 = emax. 152 We want: (e1 + scale) + (e2 + scale) <= emax, 153 i.e. scale <= (emax - e1 - e2) / 2. Let's take 154 scale = min(floor((emax - e1 - e2) / 2), -1). 155 This is OK, as: 156 1. emin <= scale <= -1. 157 2. e1 + scale >= emin. Indeed: 158 * If e1 + e2 > emax, then 159 e1 + scale >= e1 + (emax - e1 - e2) / 2 - 1 160 >= (emax + e1 - emax) / 2 - 1 161 >= e1 / 2 - 1 >= emin. 162 * Otherwise, mpfr_mul didn't overflow, therefore 163 mpfr_add overflowed and e2 = emax, so that 164 e1 > emin (see restriction below). 165 e1 + scale > emin - 1, thus e1 + scale >= emin. 166 3. e2 + scale <= emax, since scale < 0. */ 167 if (e1 + e2 > MPFR_EXT_EMAX) 168 { 169 scaleop = - (((e1 + e2) - MPFR_EXT_EMAX + 1) / 2); 170 MPFR_ASSERTN (scaleop < 0); 171 } 172 else 173 { 174 /* The addition necessarily overflowed. */ 175 MPFR_ASSERTN (e2 == MPFR_EXT_EMAX); 176 /* The case where e1 = emin and e2 = emax is not supported 177 here. This would mean that the precision of e2 would be 178 huge (and possibly not supported in practice anyway). */ 179 MPFR_ASSERTN (e1 > MPFR_EXT_EMIN); 180 scaleop = -1; 181 } 182 183 } 184 else /* underflow only (in the multiplication) */ 185 { 186 /* We have e1 + e2 <= emin (so, e1 <= e2 <= 0). 187 We want: (e1 + scale) + (e2 + scale) >= emin + 1, 188 i.e. scale >= (emin + 1 - e1 - e2) / 2. let's take 189 scale = ceil((emin + 1 - e1 - e2) / 2). This is OK, as: 190 1. 1 <= scale <= emax. 191 2. e1 + scale >= emin + 1 >= emin. 192 3. e2 + scale <= scale <= emax. */ 193 MPFR_ASSERTN (e1 <= e2 && e2 <= 0); 194 scaleop = (MPFR_EXT_EMIN + 2 - e1 - e2) / 2; 195 MPFR_ASSERTN (scaleop > 0); 196 } 197 198 MPFR_ALIAS (sc1, op1, MPFR_SIGN (op1), e1 + scaleop); 199 MPFR_ALIAS (sc2, op2, MPFR_SIGN (op2), e2 + scaleop); 200 op1 = sc1; 201 op2 = sc2; 202 MPFR_LOG_MSG (("Exception in pre-iteration, scale = %" 203 MPFR_EXP_FSPEC "d\n", scaleop)); 204 goto retry; 205 } 206 207 mpfr_clear_flags (); 208 mpfr_sqrt (u, u, MPFR_RNDN); 209 mpfr_div_2ui (v, v, 1, MPFR_RNDN); 210 211 scaleit = 0; 212 n = 1; 213 while (mpfr_cmp2 (u, v, &eq) != 0 && eq <= p - 2) 214 { 215 MPFR_BLOCK_DECL (flags2); 216 217 MPFR_LOG_MSG (("Iteration n = %lu\n", n)); 218 219 retry2: 220 mpfr_add (vf, u, v, MPFR_RNDN); /* No overflow? */ 221 mpfr_div_2ui (vf, vf, 1, MPFR_RNDN); 222 /* See proof in algorithms.tex */ 223 if (4*eq > p) 224 { 225 mpfr_t w; 226 MPFR_BLOCK_DECL (flags3); 227 228 MPFR_LOG_MSG (("4*eq > p\n", 0)); 229 230 /* vf = V(k) */ 231 mpfr_init2 (w, (p + 1) / 2); 232 MPFR_BLOCK 233 (flags3, 234 mpfr_sub (w, v, u, MPFR_RNDN); /* e = V(k-1)-U(k-1) */ 235 mpfr_sqr (w, w, MPFR_RNDN); /* e = e^2 */ 236 mpfr_div_2ui (w, w, 4, MPFR_RNDN); /* e*= (1/2)^2*1/4 */ 237 mpfr_div (w, w, vf, MPFR_RNDN); /* 1/4*e^2/V(k) */ 238 ); 239 if (MPFR_LIKELY (! MPFR_UNDERFLOW (flags3))) 240 { 241 mpfr_sub (v, vf, w, MPFR_RNDN); 242 err = MPFR_GET_EXP (vf) - MPFR_GET_EXP (v); /* 0 or 1 */ 243 mpfr_clear (w); 244 break; 245 } 246 /* There has been an underflow because of the cancellation 247 between V(k-1) and U(k-1). Let's use the conventional 248 method. */ 249 MPFR_LOG_MSG (("4*eq > p -> underflow\n", 0)); 250 mpfr_clear (w); 251 mpfr_clear_underflow (); 252 } 253 /* U(k) increases, so that U.V can overflow (but not underflow). */ 254 MPFR_BLOCK (flags2, mpfr_mul (uf, u, v, MPFR_RNDN);); 255 if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags2))) 256 { 257 mpfr_exp_t scale2; 258 259 scale2 = - (((MPFR_GET_EXP (u) + MPFR_GET_EXP (v)) 260 - MPFR_EXT_EMAX + 1) / 2); 261 MPFR_EXP (u) += scale2; 262 MPFR_EXP (v) += scale2; 263 scaleit += scale2; 264 MPFR_LOG_MSG (("Overflow in iteration n = %lu, scaleit = %" 265 MPFR_EXP_FSPEC "d (%" MPFR_EXP_FSPEC "d)\n", 266 n, scaleit, scale2)); 267 mpfr_clear_overflow (); 268 goto retry2; 269 } 270 mpfr_sqrt (u, uf, MPFR_RNDN); 271 mpfr_swap (v, vf); 272 n ++; 273 } 274 275 MPFR_LOG_MSG (("End of iterations (n = %lu)\n", n)); 276 277 /* the error on v is bounded by (18n+51) ulps, or twice if there 278 was an exponent loss in the final subtraction */ 279 err += MPFR_INT_CEIL_LOG2(18 * n + 51); /* 18n+51 should not overflow 280 since n is about log(p) */ 281 /* we should have n+2 <= 2^(p/4) [see algorithms.tex] */ 282 if (MPFR_LIKELY (MPFR_INT_CEIL_LOG2(n + 2) <= p / 4 && 283 MPFR_CAN_ROUND (v, p - err, q, rnd_mode))) 284 break; /* Stop the loop */ 285 286 /* Next iteration */ 287 MPFR_ZIV_NEXT (loop, p); 288 s = MPFR_PREC2LIMBS (p); 289 } 290 MPFR_ZIV_FREE (loop); 291 292 if (MPFR_UNLIKELY ((__gmpfr_flags & (MPFR_FLAGS_ALL ^ MPFR_FLAGS_INEXACT)) 293 != 0)) 294 { 295 MPFR_ASSERTN (! mpfr_overflow_p ()); /* since mpfr_clear_flags */ 296 MPFR_ASSERTN (! mpfr_underflow_p ()); /* since mpfr_clear_flags */ 297 MPFR_ASSERTN (! mpfr_divby0_p ()); /* since mpfr_clear_flags */ 298 MPFR_ASSERTN (! mpfr_nanflag_p ()); /* since mpfr_clear_flags */ 299 } 300 301 /* Setting of the result */ 302 inexact = mpfr_set (r, v, rnd_mode); 303 MPFR_EXP (r) -= scaleop + scaleit; 304 305 /* Let's clean */ 306 MPFR_TMP_FREE(marker); 307 308 MPFR_SAVE_EXPO_FREE (expo); 309 /* From the definition of the AGM, underflow and overflow 310 are not possible. */ 311 return mpfr_check_range (r, inexact, rnd_mode); 312 /* agm(u,v) can be exact for u, v rational only for u=v. 313 Proof (due to Nicolas Brisebarre): it suffices to consider 314 u=1 and v<1. Then 1/AGM(1,v) = 2F1(1/2,1/2,1;1-v^2), 315 and a theorem due to G.V. Chudnovsky states that for x a 316 non-zero algebraic number with |x|<1, then 317 2F1(1/2,1/2,1;x) and 2F1(-1/2,1/2,1;x) are algebraically 318 independent over Q. */ 319} 320