1/* mpfr_exp -- exponential of a floating-point number 2 3Copyright 1999, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. 4Contributed by the AriC and Caramel projects, INRIA. 5 6This file is part of the GNU MPFR Library. 7 8The GNU MPFR Library is free software; you can redistribute it and/or modify 9it under the terms of the GNU Lesser General Public License as published by 10the Free Software Foundation; either version 3 of the License, or (at your 11option) any later version. 12 13The GNU MPFR Library is distributed in the hope that it will be useful, but 14WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16License for more details. 17 18You should have received a copy of the GNU Lesser General Public License 19along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see 20http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 2151 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ 22 23#define MPFR_NEED_LONGLONG_H /* for MPFR_MPZ_SIZEINBASE2 */ 24#include "mpfr-impl.h" 25 26/* y <- exp(p/2^r) within 1 ulp, using 2^m terms from the series 27 Assume |p/2^r| < 1. 28 We use the following binary splitting formula: 29 P(a,b) = p if a+1=b, P(a,c)*P(c,b) otherwise 30 Q(a,b) = a*2^r if a+1=b [except Q(0,1)=1], Q(a,c)*Q(c,b) otherwise 31 T(a,b) = P(a,b) if a+1=b, Q(c,b)*T(a,c)+P(a,c)*T(c,b) otherwise 32 Then exp(p/2^r) ~ T(0,i)/Q(0,i) for i so that (p/2^r)^i/i! is small enough. 33 34 Since P(a,b) = p^(b-a), and we consider only values of b-a of the form 2^j, 35 we don't need to compute P(), we only precompute p^(2^j) in the ptoj[] array 36 below. 37 38 Since Q(a,b) is divisible by 2^(r*(b-a-1)), we don't compute the power of 39 two part. 40*/ 41static void 42mpfr_exp_rational (mpfr_ptr y, mpz_ptr p, long r, int m, 43 mpz_t *Q, mpfr_prec_t *mult) 44{ 45 unsigned long n, i, j; 46 mpz_t *S, *ptoj; 47 mpfr_prec_t *log2_nb_terms; 48 mpfr_exp_t diff, expo; 49 mpfr_prec_t precy = MPFR_PREC(y), prec_i_have, prec_ptoj; 50 int k, l; 51 52 MPFR_ASSERTN ((size_t) m < sizeof (long) * CHAR_BIT - 1); 53 54 S = Q + (m+1); 55 ptoj = Q + 2*(m+1); /* ptoj[i] = mantissa^(2^i) */ 56 log2_nb_terms = mult + (m+1); 57 58 /* Normalize p */ 59 MPFR_ASSERTD (mpz_cmp_ui (p, 0) != 0); 60 n = mpz_scan1 (p, 0); /* number of trailing zeros in p */ 61 mpz_tdiv_q_2exp (p, p, n); 62 r -= n; /* since |p/2^r| < 1 and p >= 1, r >= 1 */ 63 64 /* Set initial var */ 65 mpz_set (ptoj[0], p); 66 for (k = 1; k < m; k++) 67 mpz_mul (ptoj[k], ptoj[k-1], ptoj[k-1]); /* ptoj[k] = p^(2^k) */ 68 mpz_set_ui (Q[0], 1); 69 mpz_set_ui (S[0], 1); 70 k = 0; 71 mult[0] = 0; /* the multiplier P[k]/Q[k] for the remaining terms 72 satisfies P[k]/Q[k] <= 2^(-mult[k]) */ 73 log2_nb_terms[0] = 0; /* log2(#terms) [exact in 1st loop where 2^k] */ 74 prec_i_have = 0; 75 76 /* Main Loop */ 77 n = 1UL << m; 78 for (i = 1; (prec_i_have < precy) && (i < n); i++) 79 { 80 /* invariant: Q[0]*Q[1]*...*Q[k] equals i! */ 81 k++; 82 log2_nb_terms[k] = 0; /* 1 term */ 83 mpz_set_ui (Q[k], i + 1); 84 mpz_set_ui (S[k], i + 1); 85 j = i + 1; /* we have computed j = i+1 terms so far */ 86 l = 0; 87 while ((j & 1) == 0) /* combine and reduce */ 88 { 89 /* invariant: S[k] corresponds to 2^l consecutive terms */ 90 mpz_mul (S[k], S[k], ptoj[l]); 91 mpz_mul (S[k-1], S[k-1], Q[k]); 92 /* Q[k] corresponds to 2^l consecutive terms too. 93 Since it does not contains the factor 2^(r*2^l), 94 when going from l to l+1 we need to multiply 95 by 2^(r*2^(l+1))/2^(r*2^l) = 2^(r*2^l) */ 96 mpz_mul_2exp (S[k-1], S[k-1], r << l); 97 mpz_add (S[k-1], S[k-1], S[k]); 98 mpz_mul (Q[k-1], Q[k-1], Q[k]); 99 log2_nb_terms[k-1] ++; /* number of terms in S[k-1] 100 is a power of 2 by construction */ 101 MPFR_MPZ_SIZEINBASE2 (prec_i_have, Q[k]); 102 MPFR_MPZ_SIZEINBASE2 (prec_ptoj, ptoj[l]); 103 mult[k-1] += prec_i_have + (r << l) - prec_ptoj - 1; 104 prec_i_have = mult[k] = mult[k-1]; 105 /* since mult[k] >= mult[k-1] + nbits(Q[k]), 106 we have Q[0]*...*Q[k] <= 2^mult[k] = 2^prec_i_have */ 107 l ++; 108 j >>= 1; 109 k --; 110 } 111 } 112 113 /* accumulate all products in S[0] and Q[0]. Warning: contrary to above, 114 here we do not have log2_nb_terms[k-1] = log2_nb_terms[k]+1. */ 115 l = 0; /* number of accumulated terms in the right part S[k]/Q[k] */ 116 while (k > 0) 117 { 118 j = log2_nb_terms[k-1]; 119 mpz_mul (S[k], S[k], ptoj[j]); 120 mpz_mul (S[k-1], S[k-1], Q[k]); 121 l += 1 << log2_nb_terms[k]; 122 mpz_mul_2exp (S[k-1], S[k-1], r * l); 123 mpz_add (S[k-1], S[k-1], S[k]); 124 mpz_mul (Q[k-1], Q[k-1], Q[k]); 125 k--; 126 } 127 128 /* Q[0] now equals i! */ 129 MPFR_MPZ_SIZEINBASE2 (prec_i_have, S[0]); 130 diff = (mpfr_exp_t) prec_i_have - 2 * (mpfr_exp_t) precy; 131 expo = diff; 132 if (diff >= 0) 133 mpz_fdiv_q_2exp (S[0], S[0], diff); 134 else 135 mpz_mul_2exp (S[0], S[0], -diff); 136 137 MPFR_MPZ_SIZEINBASE2 (prec_i_have, Q[0]); 138 diff = (mpfr_exp_t) prec_i_have - (mpfr_prec_t) precy; 139 expo -= diff; 140 if (diff > 0) 141 mpz_fdiv_q_2exp (Q[0], Q[0], diff); 142 else 143 mpz_mul_2exp (Q[0], Q[0], -diff); 144 145 mpz_tdiv_q (S[0], S[0], Q[0]); 146 mpfr_set_z (y, S[0], MPFR_RNDD); 147 MPFR_SET_EXP (y, MPFR_GET_EXP (y) + expo - r * (i - 1) ); 148} 149 150#define shift (GMP_NUMB_BITS/2) 151 152int 153mpfr_exp_3 (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd_mode) 154{ 155 mpfr_t t, x_copy, tmp; 156 mpz_t uk; 157 mpfr_exp_t ttt, shift_x; 158 unsigned long twopoweri; 159 mpz_t *P; 160 mpfr_prec_t *mult; 161 int i, k, loop; 162 int prec_x; 163 mpfr_prec_t realprec, Prec; 164 int iter; 165 int inexact = 0; 166 MPFR_SAVE_EXPO_DECL (expo); 167 MPFR_ZIV_DECL (ziv_loop); 168 169 MPFR_LOG_FUNC 170 (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec(x), mpfr_log_prec, x, rnd_mode), 171 ("y[%Pu]=%.*Rg inexact=%d", mpfr_get_prec(y), mpfr_log_prec, y, 172 inexact)); 173 174 MPFR_SAVE_EXPO_MARK (expo); 175 176 /* decompose x */ 177 /* we first write x = 1.xxxxxxxxxxxxx 178 ----- k bits -- */ 179 prec_x = MPFR_INT_CEIL_LOG2 (MPFR_PREC (x)) - MPFR_LOG2_GMP_NUMB_BITS; 180 if (prec_x < 0) 181 prec_x = 0; 182 183 ttt = MPFR_GET_EXP (x); 184 mpfr_init2 (x_copy, MPFR_PREC(x)); 185 mpfr_set (x_copy, x, MPFR_RNDD); 186 187 /* we shift to get a number less than 1 */ 188 if (ttt > 0) 189 { 190 shift_x = ttt; 191 mpfr_div_2ui (x_copy, x, ttt, MPFR_RNDN); 192 ttt = MPFR_GET_EXP (x_copy); 193 } 194 else 195 shift_x = 0; 196 MPFR_ASSERTD (ttt <= 0); 197 198 /* Init prec and vars */ 199 realprec = MPFR_PREC (y) + MPFR_INT_CEIL_LOG2 (prec_x + MPFR_PREC (y)); 200 Prec = realprec + shift + 2 + shift_x; 201 mpfr_init2 (t, Prec); 202 mpfr_init2 (tmp, Prec); 203 mpz_init (uk); 204 205 /* Main loop */ 206 MPFR_ZIV_INIT (ziv_loop, realprec); 207 for (;;) 208 { 209 int scaled = 0; 210 MPFR_BLOCK_DECL (flags); 211 212 k = MPFR_INT_CEIL_LOG2 (Prec) - MPFR_LOG2_GMP_NUMB_BITS; 213 214 /* now we have to extract */ 215 twopoweri = GMP_NUMB_BITS; 216 217 /* Allocate tables */ 218 P = (mpz_t*) (*__gmp_allocate_func) (3*(k+2)*sizeof(mpz_t)); 219 for (i = 0; i < 3*(k+2); i++) 220 mpz_init (P[i]); 221 mult = (mpfr_prec_t*) (*__gmp_allocate_func) (2*(k+2)*sizeof(mpfr_prec_t)); 222 223 /* Particular case for i==0 */ 224 mpfr_extract (uk, x_copy, 0); 225 MPFR_ASSERTD (mpz_cmp_ui (uk, 0) != 0); 226 mpfr_exp_rational (tmp, uk, shift + twopoweri - ttt, k + 1, P, mult); 227 for (loop = 0; loop < shift; loop++) 228 mpfr_sqr (tmp, tmp, MPFR_RNDD); 229 twopoweri *= 2; 230 231 /* General case */ 232 iter = (k <= prec_x) ? k : prec_x; 233 for (i = 1; i <= iter; i++) 234 { 235 mpfr_extract (uk, x_copy, i); 236 if (MPFR_LIKELY (mpz_cmp_ui (uk, 0) != 0)) 237 { 238 mpfr_exp_rational (t, uk, twopoweri - ttt, k - i + 1, P, mult); 239 mpfr_mul (tmp, tmp, t, MPFR_RNDD); 240 } 241 MPFR_ASSERTN (twopoweri <= LONG_MAX/2); 242 twopoweri *=2; 243 } 244 245 /* Clear tables */ 246 for (i = 0; i < 3*(k+2); i++) 247 mpz_clear (P[i]); 248 (*__gmp_free_func) (P, 3*(k+2)*sizeof(mpz_t)); 249 (*__gmp_free_func) (mult, 2*(k+2)*sizeof(mpfr_prec_t)); 250 251 if (shift_x > 0) 252 { 253 MPFR_BLOCK (flags, { 254 for (loop = 0; loop < shift_x - 1; loop++) 255 mpfr_sqr (tmp, tmp, MPFR_RNDD); 256 mpfr_sqr (t, tmp, MPFR_RNDD); 257 } ); 258 259 if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags))) 260 { 261 /* tmp <= exact result, so that it is a real overflow. */ 262 inexact = mpfr_overflow (y, rnd_mode, 1); 263 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, MPFR_FLAGS_OVERFLOW); 264 break; 265 } 266 267 if (MPFR_UNLIKELY (MPFR_UNDERFLOW (flags))) 268 { 269 /* This may be a spurious underflow. So, let's scale 270 the result. */ 271 mpfr_mul_2ui (tmp, tmp, 1, MPFR_RNDD); /* no overflow, exact */ 272 mpfr_sqr (t, tmp, MPFR_RNDD); 273 if (MPFR_IS_ZERO (t)) 274 { 275 /* approximate result < 2^(emin - 3), thus 276 exact result < 2^(emin - 2). */ 277 inexact = mpfr_underflow (y, (rnd_mode == MPFR_RNDN) ? 278 MPFR_RNDZ : rnd_mode, 1); 279 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, MPFR_FLAGS_UNDERFLOW); 280 break; 281 } 282 scaled = 1; 283 } 284 } 285 286 if (mpfr_can_round (shift_x > 0 ? t : tmp, realprec, MPFR_RNDD, MPFR_RNDZ, 287 MPFR_PREC(y) + (rnd_mode == MPFR_RNDN))) 288 { 289 inexact = mpfr_set (y, shift_x > 0 ? t : tmp, rnd_mode); 290 if (MPFR_UNLIKELY (scaled && MPFR_IS_PURE_FP (y))) 291 { 292 int inex2; 293 mpfr_exp_t ey; 294 295 /* The result has been scaled and needs to be corrected. */ 296 ey = MPFR_GET_EXP (y); 297 inex2 = mpfr_mul_2si (y, y, -2, rnd_mode); 298 if (inex2) /* underflow */ 299 { 300 if (rnd_mode == MPFR_RNDN && inexact < 0 && 301 MPFR_IS_ZERO (y) && ey == __gmpfr_emin + 1) 302 { 303 /* Double rounding case: in MPFR_RNDN, the scaled 304 result has been rounded downward to 2^emin. 305 As the exact result is > 2^(emin - 2), correct 306 rounding must be done upward. */ 307 /* TODO: make sure in coverage tests that this line 308 is reached. */ 309 inexact = mpfr_underflow (y, MPFR_RNDU, 1); 310 } 311 else 312 { 313 /* No double rounding. */ 314 inexact = inex2; 315 } 316 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, MPFR_FLAGS_UNDERFLOW); 317 } 318 } 319 break; 320 } 321 322 MPFR_ZIV_NEXT (ziv_loop, realprec); 323 Prec = realprec + shift + 2 + shift_x; 324 mpfr_set_prec (t, Prec); 325 mpfr_set_prec (tmp, Prec); 326 } 327 MPFR_ZIV_FREE (ziv_loop); 328 329 mpz_clear (uk); 330 mpfr_clear (tmp); 331 mpfr_clear (t); 332 mpfr_clear (x_copy); 333 MPFR_SAVE_EXPO_FREE (expo); 334 return inexact; 335} 336