1/* mpfr_exp -- exponential of a floating-point number 2 3Copyright 1999, 2001-2023 Free Software Foundation, Inc. 4Contributed by the AriC and Caramba projects, INRIA. 5 6This file is part of the GNU MPFR Library. 7 8The GNU MPFR Library is free software; you can redistribute it and/or modify 9it under the terms of the GNU Lesser General Public License as published by 10the Free Software Foundation; either version 3 of the License, or (at your 11option) any later version. 12 13The GNU MPFR Library is distributed in the hope that it will be useful, but 14WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16License for more details. 17 18You should have received a copy of the GNU Lesser General Public License 19along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see 20https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 2151 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ 22 23#define MPFR_NEED_LONGLONG_H /* for MPFR_MPZ_SIZEINBASE2 */ 24#include "mpfr-impl.h" 25 26/* y <- exp(p/2^r) within 1 ulp, using 2^m terms from the series 27 Assume |p/2^r| < 1. 28 We use the following binary splitting formula: 29 P(a,b) = p if a+1=b, P(a,c)*P(c,b) otherwise 30 Q(a,b) = a*2^r if a+1=b [except Q(0,1)=1], Q(a,c)*Q(c,b) otherwise 31 T(a,b) = P(a,b) if a+1=b, Q(c,b)*T(a,c)+P(a,c)*T(c,b) otherwise 32 Then exp(p/2^r) ~ T(0,i)/Q(0,i) for i so that (p/2^r)^i/i! is small enough. 33 34 Since P(a,b) = p^(b-a), and we consider only values of b-a of the form 2^j, 35 we don't need to compute P(), we only precompute p^(2^j) in the ptoj[] array 36 below. 37 38 Since Q(a,b) is divisible by 2^(r*(b-a-1)), we don't compute the power of 39 two part. 40*/ 41static void 42mpfr_exp_rational (mpfr_ptr y, mpz_ptr p, long r, int m, 43 mpz_t *Q, mpfr_prec_t *mult) 44{ 45 mp_bitcnt_t n, h, i, j; /* unsigned type, which is >= unsigned long */ 46 mpz_t *S, *ptoj; 47 mpfr_prec_t *log2_nb_terms; 48 mpfr_exp_t diff, expo; 49 mpfr_prec_t precy = MPFR_PREC(y), prec_i_have, prec_ptoj; 50 int k, l; 51 52 MPFR_ASSERTN ((size_t) m < sizeof (long) * CHAR_BIT - 1); 53 54 S = Q + (m+1); 55 ptoj = Q + 2*(m+1); /* ptoj[i] = mantissa^(2^i) */ 56 log2_nb_terms = mult + (m+1); 57 58 /* Normalize p */ 59 MPFR_ASSERTD (mpz_cmp_ui (p, 0) != 0); 60 n = mpz_scan1 (p, 0); /* number of trailing zeros in p */ 61 MPFR_ASSERTN (n <= LONG_MAX); /* This is a limitation. */ 62 mpz_tdiv_q_2exp (p, p, n); 63 r -= (long) n; /* since |p/2^r| < 1 and p >= 1, r >= 1 */ 64 65 /* Set initial var */ 66 mpz_set (ptoj[0], p); 67 for (k = 1; k < m; k++) 68 mpz_mul (ptoj[k], ptoj[k-1], ptoj[k-1]); /* ptoj[k] = p^(2^k) */ 69 mpz_set_ui (Q[0], 1); 70 mpz_set_ui (S[0], 1); 71 k = 0; 72 mult[0] = 0; /* the multiplier P[k]/Q[k] for the remaining terms 73 satisfies P[k]/Q[k] <= 2^(-mult[k]) */ 74 log2_nb_terms[0] = 0; /* log2(#terms) [exact in 1st loop where 2^k] */ 75 prec_i_have = 0; 76 77 /* Main Loop */ 78 n = 1UL << m; 79 MPFR_ASSERTN (n != 0); /* no overflow */ 80 for (i = 1; (prec_i_have < precy) && (i < n); i++) 81 { 82 /* invariant: Q[0]*Q[1]*...*Q[k] equals i! */ 83 k++; 84 log2_nb_terms[k] = 0; /* 1 term */ 85 mpz_set_ui (Q[k], i + 1); 86 mpz_set_ui (S[k], i + 1); 87 j = i + 1; /* we have computed j = i+1 terms so far */ 88 l = 0; 89 while ((j & 1) == 0) /* combine and reduce */ 90 { 91 /* invariant: S[k] corresponds to 2^l consecutive terms */ 92 mpz_mul (S[k], S[k], ptoj[l]); 93 mpz_mul (S[k-1], S[k-1], Q[k]); 94 /* Q[k] corresponds to 2^l consecutive terms too. 95 Since it does not contains the factor 2^(r*2^l), 96 when going from l to l+1 we need to multiply 97 by 2^(r*2^(l+1))/2^(r*2^l) = 2^(r*2^l) */ 98 mpz_mul_2exp (S[k-1], S[k-1], r << l); 99 mpz_add (S[k-1], S[k-1], S[k]); 100 mpz_mul (Q[k-1], Q[k-1], Q[k]); 101 log2_nb_terms[k-1] ++; /* number of terms in S[k-1] 102 is a power of 2 by construction */ 103 MPFR_MPZ_SIZEINBASE2 (prec_i_have, Q[k]); 104 MPFR_MPZ_SIZEINBASE2 (prec_ptoj, ptoj[l]); 105 mult[k-1] += prec_i_have + (r << l) - prec_ptoj - 1; 106 prec_i_have = mult[k] = mult[k-1]; 107 /* since mult[k] >= mult[k-1] + nbits(Q[k]), 108 we have Q[0]*...*Q[k] <= 2^mult[k] = 2^prec_i_have */ 109 l ++; 110 j >>= 1; 111 k --; 112 } 113 } 114 115 /* accumulate all products in S[0] and Q[0]. Warning: contrary to above, 116 here we do not have log2_nb_terms[k-1] = log2_nb_terms[k]+1. */ 117 h = 0; /* number of accumulated terms in the right part S[k]/Q[k] */ 118 while (k > 0) 119 { 120 j = log2_nb_terms[k-1]; 121 mpz_mul (S[k], S[k], ptoj[j]); 122 mpz_mul (S[k-1], S[k-1], Q[k]); 123 h += (mp_bitcnt_t) 1 << log2_nb_terms[k]; 124 mpz_mul_2exp (S[k-1], S[k-1], r * h); 125 mpz_add (S[k-1], S[k-1], S[k]); 126 mpz_mul (Q[k-1], Q[k-1], Q[k]); 127 k--; 128 } 129 130 /* Q[0] now equals i! */ 131 MPFR_MPZ_SIZEINBASE2 (prec_i_have, S[0]); 132 diff = (mpfr_exp_t) prec_i_have - 2 * (mpfr_exp_t) precy; 133 expo = diff; 134 if (diff >= 0) 135 mpz_fdiv_q_2exp (S[0], S[0], diff); 136 else 137 mpz_mul_2exp (S[0], S[0], -diff); 138 139 MPFR_MPZ_SIZEINBASE2 (prec_i_have, Q[0]); 140 diff = (mpfr_exp_t) prec_i_have - (mpfr_prec_t) precy; 141 expo -= diff; 142 if (diff > 0) 143 mpz_fdiv_q_2exp (Q[0], Q[0], diff); 144 else 145 mpz_mul_2exp (Q[0], Q[0], -diff); 146 147 mpz_tdiv_q (S[0], S[0], Q[0]); 148 mpfr_set_z (y, S[0], MPFR_RNDD); 149 /* TODO: Check/prove that the following expression doesn't overflow. */ 150 expo = MPFR_GET_EXP (y) + expo - r * (i - 1); 151 MPFR_SET_EXP (y, expo); 152} 153 154#define shift (GMP_NUMB_BITS/2) 155 156int 157mpfr_exp_3 (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd_mode) 158{ 159 mpfr_t t, x_copy, tmp; 160 mpz_t uk; 161 mpfr_exp_t ttt, shift_x; 162 unsigned long twopoweri; 163 mpz_t *P; 164 mpfr_prec_t *mult; 165 int i, k, loop; 166 int prec_x; 167 mpfr_prec_t realprec, Prec; 168 int iter; 169 int inexact = 0; 170 MPFR_SAVE_EXPO_DECL (expo); 171 MPFR_ZIV_DECL (ziv_loop); 172 173 MPFR_LOG_FUNC 174 (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec(x), mpfr_log_prec, x, rnd_mode), 175 ("y[%Pu]=%.*Rg inexact=%d", mpfr_get_prec(y), mpfr_log_prec, y, 176 inexact)); 177 178 MPFR_SAVE_EXPO_MARK (expo); 179 180 /* decompose x */ 181 /* we first write x = 1.xxxxxxxxxxxxx 182 ----- k bits -- */ 183 prec_x = MPFR_INT_CEIL_LOG2 (MPFR_PREC (x)) - MPFR_LOG2_GMP_NUMB_BITS; 184 if (prec_x < 0) 185 prec_x = 0; 186 187 ttt = MPFR_GET_EXP (x); 188 mpfr_init2 (x_copy, MPFR_PREC(x)); 189 mpfr_set (x_copy, x, MPFR_RNDD); 190 191 /* we shift to get a number less than 1 */ 192 if (ttt > 0) 193 { 194 shift_x = ttt; 195 mpfr_div_2ui (x_copy, x, ttt, MPFR_RNDN); 196 ttt = MPFR_GET_EXP (x_copy); 197 } 198 else 199 shift_x = 0; 200 MPFR_ASSERTD (ttt <= 0); 201 202 /* Init prec and vars */ 203 realprec = MPFR_PREC (y) + MPFR_INT_CEIL_LOG2 (prec_x + MPFR_PREC (y)); 204 Prec = realprec + shift + 2 + shift_x; 205 mpfr_init2 (t, Prec); 206 mpfr_init2 (tmp, Prec); 207 mpz_init (uk); 208 209 /* Main loop */ 210 MPFR_ZIV_INIT (ziv_loop, realprec); 211 for (;;) 212 { 213 int scaled = 0; 214 MPFR_BLOCK_DECL (flags); 215 216 k = MPFR_INT_CEIL_LOG2 (Prec) - MPFR_LOG2_GMP_NUMB_BITS; 217 218 /* now we have to extract */ 219 twopoweri = GMP_NUMB_BITS; 220 221 /* Allocate tables */ 222 P = (mpz_t*) mpfr_allocate_func (3*(k+2)*sizeof(mpz_t)); 223 for (i = 0; i < 3*(k+2); i++) 224 mpz_init (P[i]); 225 mult = (mpfr_prec_t*) mpfr_allocate_func (2*(k+2)*sizeof(mpfr_prec_t)); 226 227 /* Particular case for i==0 */ 228 mpfr_extract (uk, x_copy, 0); 229 MPFR_ASSERTD (mpz_cmp_ui (uk, 0) != 0); 230 mpfr_exp_rational (tmp, uk, shift + twopoweri - ttt, k + 1, P, mult); 231 for (loop = 0; loop < shift; loop++) 232 mpfr_sqr (tmp, tmp, MPFR_RNDD); 233 twopoweri *= 2; 234 235 /* General case */ 236 iter = (k <= prec_x) ? k : prec_x; 237 for (i = 1; i <= iter; i++) 238 { 239 mpfr_extract (uk, x_copy, i); 240 if (MPFR_LIKELY (mpz_cmp_ui (uk, 0) != 0)) 241 { 242 mpfr_exp_rational (t, uk, twopoweri - ttt, k - i + 1, P, mult); 243 mpfr_mul (tmp, tmp, t, MPFR_RNDD); 244 } 245 MPFR_ASSERTN (twopoweri <= LONG_MAX/2); 246 twopoweri *=2; 247 } 248 249 /* Clear tables */ 250 for (i = 0; i < 3*(k+2); i++) 251 mpz_clear (P[i]); 252 mpfr_free_func (P, 3*(k+2)*sizeof(mpz_t)); 253 mpfr_free_func (mult, 2*(k+2)*sizeof(mpfr_prec_t)); 254 255 if (shift_x > 0) 256 { 257 MPFR_BLOCK (flags, { 258 for (loop = 0; loop < shift_x - 1; loop++) 259 mpfr_sqr (tmp, tmp, MPFR_RNDD); 260 mpfr_sqr (t, tmp, MPFR_RNDD); 261 } ); 262 263 if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags))) 264 { 265 /* tmp <= exact result, so that it is a real overflow. */ 266 inexact = mpfr_overflow (y, rnd_mode, 1); 267 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, MPFR_FLAGS_OVERFLOW); 268 break; 269 } 270 271 if (MPFR_UNLIKELY (MPFR_UNDERFLOW (flags))) 272 { 273 /* This may be a spurious underflow. So, let's scale 274 the result. */ 275 mpfr_mul_2ui (tmp, tmp, 1, MPFR_RNDD); /* no overflow, exact */ 276 mpfr_sqr (t, tmp, MPFR_RNDD); 277 if (MPFR_IS_ZERO (t)) 278 { 279 /* approximate result < 2^(emin - 3), thus 280 exact result < 2^(emin - 2). */ 281 inexact = mpfr_underflow (y, (rnd_mode == MPFR_RNDN) ? 282 MPFR_RNDZ : rnd_mode, 1); 283 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, MPFR_FLAGS_UNDERFLOW); 284 break; 285 } 286 scaled = 1; 287 } 288 } 289 290 if (MPFR_CAN_ROUND (shift_x > 0 ? t : tmp, realprec, 291 MPFR_PREC(y), rnd_mode)) 292 { 293 inexact = mpfr_set (y, shift_x > 0 ? t : tmp, rnd_mode); 294 if (MPFR_UNLIKELY (scaled && MPFR_IS_PURE_FP (y))) 295 { 296 int inex2; 297 mpfr_exp_t ey; 298 299 /* The result has been scaled and needs to be corrected. */ 300 ey = MPFR_GET_EXP (y); 301 inex2 = mpfr_mul_2si (y, y, -2, rnd_mode); 302 if (inex2) /* underflow */ 303 { 304 if (rnd_mode == MPFR_RNDN && inexact < 0 && 305 MPFR_IS_ZERO (y) && ey == __gmpfr_emin + 1) 306 { 307 /* Double rounding case: in MPFR_RNDN, the scaled 308 result has been rounded downward to 2^emin. 309 As the exact result is > 2^(emin - 2), correct 310 rounding must be done upward. */ 311 /* TODO: make sure in coverage tests that this line 312 is reached. */ 313 inexact = mpfr_underflow (y, MPFR_RNDU, 1); 314 } 315 else 316 { 317 /* No double rounding. */ 318 inexact = inex2; 319 } 320 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, MPFR_FLAGS_UNDERFLOW); 321 } 322 } 323 break; 324 } 325 326 MPFR_ZIV_NEXT (ziv_loop, realprec); 327 Prec = realprec + shift + 2 + shift_x; 328 mpfr_set_prec (t, Prec); 329 mpfr_set_prec (tmp, Prec); 330 } 331 MPFR_ZIV_FREE (ziv_loop); 332 333 mpz_clear (uk); 334 mpfr_clear (tmp); 335 mpfr_clear (t); 336 mpfr_clear (x_copy); 337 MPFR_SAVE_EXPO_FREE (expo); 338 return inexact; 339} 340