1/* mpfr_exp_2 -- exponential of a floating-point number 2 using algorithms in O(n^(1/2)*M(n)) and O(n^(1/3)*M(n)) 3 4Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. 5Contributed by the AriC and Caramel projects, INRIA. 6 7This file is part of the GNU MPFR Library. 8 9The GNU MPFR Library is free software; you can redistribute it and/or modify 10it under the terms of the GNU Lesser General Public License as published by 11the Free Software Foundation; either version 3 of the License, or (at your 12option) any later version. 13 14The GNU MPFR Library is distributed in the hope that it will be useful, but 15WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 16or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 17License for more details. 18 19You should have received a copy of the GNU Lesser General Public License 20along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see 21http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 2251 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ 23 24/* #define DEBUG */ 25#define MPFR_NEED_LONGLONG_H /* for count_leading_zeros */ 26#include "mpfr-impl.h" 27 28static unsigned long 29mpfr_exp2_aux (mpz_t, mpfr_srcptr, mpfr_prec_t, mpfr_exp_t *); 30static unsigned long 31mpfr_exp2_aux2 (mpz_t, mpfr_srcptr, mpfr_prec_t, mpfr_exp_t *); 32static mpfr_exp_t 33mpz_normalize (mpz_t, mpz_t, mpfr_exp_t); 34static mpfr_exp_t 35mpz_normalize2 (mpz_t, mpz_t, mpfr_exp_t, mpfr_exp_t); 36 37/* if k = the number of bits of z > q, divides z by 2^(k-q) and returns k-q. 38 Otherwise do nothing and return 0. 39 */ 40static mpfr_exp_t 41mpz_normalize (mpz_t rop, mpz_t z, mpfr_exp_t q) 42{ 43 size_t k; 44 45 MPFR_MPZ_SIZEINBASE2 (k, z); 46 MPFR_ASSERTD (k == (mpfr_uexp_t) k); 47 if (q < 0 || (mpfr_uexp_t) k > (mpfr_uexp_t) q) 48 { 49 mpz_fdiv_q_2exp (rop, z, (unsigned long) ((mpfr_uexp_t) k - q)); 50 return (mpfr_exp_t) k - q; 51 } 52 if (MPFR_UNLIKELY(rop != z)) 53 mpz_set (rop, z); 54 return 0; 55} 56 57/* if expz > target, shift z by (expz-target) bits to the left. 58 if expz < target, shift z by (target-expz) bits to the right. 59 Returns target. 60*/ 61static mpfr_exp_t 62mpz_normalize2 (mpz_t rop, mpz_t z, mpfr_exp_t expz, mpfr_exp_t target) 63{ 64 if (target > expz) 65 mpz_fdiv_q_2exp (rop, z, target - expz); 66 else 67 mpz_mul_2exp (rop, z, expz - target); 68 return target; 69} 70 71/* use Brent's formula exp(x) = (1+r+r^2/2!+r^3/3!+...)^(2^K)*2^n 72 where x = n*log(2)+(2^K)*r 73 together with the Paterson-Stockmeyer O(t^(1/2)) algorithm for the 74 evaluation of power series. The resulting complexity is O(n^(1/3)*M(n)). 75 This function returns with the exact flags due to exp. 76*/ 77int 78mpfr_exp_2 (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd_mode) 79{ 80 long n; 81 unsigned long K, k, l, err; /* FIXME: Which type ? */ 82 int error_r; 83 mpfr_exp_t exps, expx; 84 mpfr_prec_t q, precy; 85 int inexact; 86 mpfr_t r, s; 87 mpz_t ss; 88 MPFR_ZIV_DECL (loop); 89 90 MPFR_LOG_FUNC 91 (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec(x), mpfr_log_prec, x, rnd_mode), 92 ("y[%Pu]=%.*Rg inexact=%d", mpfr_get_prec(y), mpfr_log_prec, y, 93 inexact)); 94 95 expx = MPFR_GET_EXP (x); 96 precy = MPFR_PREC(y); 97 98 /* Warning: we cannot use the 'double' type here, since on 64-bit machines 99 x may be as large as 2^62*log(2) without overflow, and then x/log(2) 100 is about 2^62: not every integer of that size can be represented as a 101 'double', thus the argument reduction would fail. */ 102 if (expx <= -2) 103 /* |x| <= 0.25, thus n = round(x/log(2)) = 0 */ 104 n = 0; 105 else 106 { 107 mpfr_init2 (r, sizeof (long) * CHAR_BIT); 108 mpfr_const_log2 (r, MPFR_RNDZ); 109 mpfr_div (r, x, r, MPFR_RNDN); 110 n = mpfr_get_si (r, MPFR_RNDN); 111 mpfr_clear (r); 112 } 113 /* we have |x| <= (|n|+1)*log(2) */ 114 MPFR_LOG_MSG (("d(x)=%1.30e n=%ld\n", mpfr_get_d1(x), n)); 115 116 /* error_r bounds the cancelled bits in x - n*log(2) */ 117 if (MPFR_UNLIKELY (n == 0)) 118 error_r = 0; 119 else 120 { 121 count_leading_zeros (error_r, (mp_limb_t) SAFE_ABS (unsigned long, n) + 1); 122 error_r = GMP_NUMB_BITS - error_r; 123 /* we have |x| <= 2^error_r * log(2) */ 124 } 125 126 /* for the O(n^(1/2)*M(n)) method, the Taylor series computation of 127 n/K terms costs about n/(2K) multiplications when computed in fixed 128 point */ 129 K = (precy < MPFR_EXP_2_THRESHOLD) ? __gmpfr_isqrt ((precy + 1) / 2) 130 : __gmpfr_cuberoot (4*precy); 131 l = (precy - 1) / K + 1; 132 err = K + MPFR_INT_CEIL_LOG2 (2 * l + 18); 133 /* add K extra bits, i.e. failure probability <= 1/2^K = O(1/precy) */ 134 q = precy + err + K + 8; 135 /* if |x| >> 1, take into account the cancelled bits */ 136 if (expx > 0) 137 q += expx; 138 139 /* Note: due to the mpfr_prec_round below, it is not possible to use 140 the MPFR_GROUP_* macros here. */ 141 142 mpfr_init2 (r, q + error_r); 143 mpfr_init2 (s, q + error_r); 144 145 /* the algorithm consists in computing an upper bound of exp(x) using 146 a precision of q bits, and see if we can round to MPFR_PREC(y) taking 147 into account the maximal error. Otherwise we increase q. */ 148 MPFR_ZIV_INIT (loop, q); 149 for (;;) 150 { 151 MPFR_LOG_MSG (("n=%ld K=%lu l=%lu q=%lu error_r=%d\n", 152 n, K, l, (unsigned long) q, error_r)); 153 154 /* First reduce the argument to r = x - n * log(2), 155 so that r is small in absolute value. We want an upper 156 bound on r to get an upper bound on exp(x). */ 157 158 /* if n<0, we have to get an upper bound of log(2) 159 in order to get an upper bound of r = x-n*log(2) */ 160 mpfr_const_log2 (s, (n >= 0) ? MPFR_RNDZ : MPFR_RNDU); 161 /* s is within 1 ulp(s) of log(2) */ 162 163 mpfr_mul_ui (r, s, (n < 0) ? -n : n, (n >= 0) ? MPFR_RNDZ : MPFR_RNDU); 164 /* r is within 3 ulps of |n|*log(2) */ 165 if (n < 0) 166 MPFR_CHANGE_SIGN (r); 167 /* r <= n*log(2), within 3 ulps */ 168 169 MPFR_LOG_VAR (x); 170 MPFR_LOG_VAR (r); 171 172 mpfr_sub (r, x, r, MPFR_RNDU); 173 174 if (MPFR_IS_PURE_FP (r)) 175 { 176 while (MPFR_IS_NEG (r)) 177 { /* initial approximation n was too large */ 178 n--; 179 mpfr_add (r, r, s, MPFR_RNDU); 180 } 181 182 /* since there was a cancellation in x - n*log(2), the low error_r 183 bits from r are zero and thus non significant, thus we can reduce 184 the working precision */ 185 if (error_r > 0) 186 mpfr_prec_round (r, q, MPFR_RNDU); 187 /* the error on r is at most 3 ulps (3 ulps if error_r = 0, 188 and 1 + 3/2 if error_r > 0) */ 189 MPFR_LOG_VAR (r); 190 MPFR_ASSERTD (MPFR_IS_POS (r)); 191 mpfr_div_2ui (r, r, K, MPFR_RNDU); /* r = (x-n*log(2))/2^K, exact */ 192 193 mpz_init (ss); 194 exps = mpfr_get_z_2exp (ss, s); 195 /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! */ 196 MPFR_ASSERTD (MPFR_IS_PURE_FP (r) && MPFR_EXP (r) < 0); 197 l = (precy < MPFR_EXP_2_THRESHOLD) 198 ? mpfr_exp2_aux (ss, r, q, &exps) /* naive method */ 199 : mpfr_exp2_aux2 (ss, r, q, &exps); /* Paterson/Stockmeyer meth */ 200 201 MPFR_LOG_MSG (("l=%lu q=%lu (K+l)*q^2=%1.3e\n", 202 l, (unsigned long) q, (K + l) * (double) q * q)); 203 204 for (k = 0; k < K; k++) 205 { 206 mpz_mul (ss, ss, ss); 207 exps <<= 1; 208 exps += mpz_normalize (ss, ss, q); 209 } 210 mpfr_set_z (s, ss, MPFR_RNDN); 211 212 MPFR_SET_EXP(s, MPFR_GET_EXP (s) + exps); 213 mpz_clear (ss); 214 215 /* error is at most 2^K*l, plus 2 to take into account of 216 the error of 3 ulps on r */ 217 err = K + MPFR_INT_CEIL_LOG2 (l) + 2; 218 219 MPFR_LOG_MSG (("before mult. by 2^n:\n", 0)); 220 MPFR_LOG_VAR (s); 221 MPFR_LOG_MSG (("err=%lu bits\n", K)); 222 223 if (MPFR_LIKELY (MPFR_CAN_ROUND (s, q - err, precy, rnd_mode))) 224 { 225 mpfr_clear_flags (); 226 inexact = mpfr_mul_2si (y, s, n, rnd_mode); 227 break; 228 } 229 } 230 231 MPFR_ZIV_NEXT (loop, q); 232 mpfr_set_prec (r, q + error_r); 233 mpfr_set_prec (s, q + error_r); 234 } 235 MPFR_ZIV_FREE (loop); 236 237 mpfr_clear (r); 238 mpfr_clear (s); 239 240 return inexact; 241} 242 243/* s <- 1 + r/1! + r^2/2! + ... + r^l/l! while MPFR_EXP(r^l/l!)+MPFR_EXPR(r)>-q 244 using naive method with O(l) multiplications. 245 Return the number of iterations l. 246 The absolute error on s is less than 3*l*(l+1)*2^(-q). 247 Version using fixed-point arithmetic with mpz instead 248 of mpfr for internal computations. 249 NOTE[VL]: the following sentence seems to be obsolete since MY_INIT_MPZ 250 is no longer used (r6919); qn was the number of limbs of q. 251 s must have at least qn+1 limbs (qn should be enough, but currently fails 252 since mpz_mul_2exp(s, s, q-1) reallocates qn+1 limbs) 253*/ 254static unsigned long 255mpfr_exp2_aux (mpz_t s, mpfr_srcptr r, mpfr_prec_t q, mpfr_exp_t *exps) 256{ 257 unsigned long l; 258 mpfr_exp_t dif, expt, expr; 259 mpz_t t, rr; 260 mp_size_t sbit, tbit; 261 262 MPFR_ASSERTN (MPFR_IS_PURE_FP (r)); 263 264 expt = 0; 265 *exps = 1 - (mpfr_exp_t) q; /* s = 2^(q-1) */ 266 mpz_init (t); 267 mpz_init (rr); 268 mpz_set_ui(t, 1); 269 mpz_set_ui(s, 1); 270 mpz_mul_2exp(s, s, q-1); 271 expr = mpfr_get_z_2exp(rr, r); /* no error here */ 272 273 l = 0; 274 for (;;) { 275 l++; 276 mpz_mul(t, t, rr); 277 expt += expr; 278 MPFR_MPZ_SIZEINBASE2 (sbit, s); 279 MPFR_MPZ_SIZEINBASE2 (tbit, t); 280 dif = *exps + sbit - expt - tbit; 281 /* truncates the bits of t which are < ulp(s) = 2^(1-q) */ 282 expt += mpz_normalize(t, t, (mpfr_exp_t) q-dif); /* error at most 2^(1-q) */ 283 mpz_fdiv_q_ui (t, t, l); /* error at most 2^(1-q) */ 284 /* the error wrt t^l/l! is here at most 3*l*ulp(s) */ 285 MPFR_ASSERTD (expt == *exps); 286 if (mpz_sgn (t) == 0) 287 break; 288 mpz_add(s, s, t); /* no error here: exact */ 289 /* ensures rr has the same size as t: after several shifts, the error 290 on rr is still at most ulp(t)=ulp(s) */ 291 MPFR_MPZ_SIZEINBASE2 (tbit, t); 292 expr += mpz_normalize(rr, rr, tbit); 293 } 294 295 mpz_clear (t); 296 mpz_clear (rr); 297 298 return 3 * l * (l + 1); 299} 300 301/* s <- 1 + r/1! + r^2/2! + ... + r^l/l! while MPFR_EXP(r^l/l!)+MPFR_EXPR(r)>-q 302 using Paterson-Stockmeyer algorithm with O(sqrt(l)) multiplications. 303 Return l. 304 Uses m multiplications of full size and 2l/m of decreasing size, 305 i.e. a total equivalent to about m+l/m full multiplications, 306 i.e. 2*sqrt(l) for m=sqrt(l). 307 NOTE[VL]: The following sentence seems to be obsolete since MY_INIT_MPZ 308 is no longer used (r6919); sizer was the number of limbs of r. 309 Version using mpz. ss must have at least (sizer+1) limbs. 310 The error is bounded by (l^2+4*l) ulps where l is the return value. 311*/ 312static unsigned long 313mpfr_exp2_aux2 (mpz_t s, mpfr_srcptr r, mpfr_prec_t q, mpfr_exp_t *exps) 314{ 315 mpfr_exp_t expr, *expR, expt; 316 mpfr_prec_t ql; 317 unsigned long l, m, i; 318 mpz_t t, *R, rr, tmp; 319 mp_size_t sbit, rrbit; 320 MPFR_TMP_DECL(marker); 321 322 /* estimate value of l */ 323 MPFR_ASSERTD (MPFR_GET_EXP (r) < 0); 324 l = q / (- MPFR_GET_EXP (r)); 325 m = __gmpfr_isqrt (l); 326 /* we access R[2], thus we need m >= 2 */ 327 if (m < 2) 328 m = 2; 329 330 MPFR_TMP_MARK(marker); 331 R = (mpz_t*) MPFR_TMP_ALLOC ((m + 1) * sizeof (mpz_t)); /* R[i] is r^i */ 332 expR = (mpfr_exp_t*) MPFR_TMP_ALLOC((m + 1) * sizeof (mpfr_exp_t)); 333 /* expR[i] is the exponent for R[i] */ 334 mpz_init (tmp); 335 mpz_init (rr); 336 mpz_init (t); 337 mpz_set_ui (s, 0); 338 *exps = 1 - q; /* 1 ulp = 2^(1-q) */ 339 for (i = 0 ; i <= m ; i++) 340 mpz_init (R[i]); 341 expR[1] = mpfr_get_z_2exp (R[1], r); /* exact operation: no error */ 342 expR[1] = mpz_normalize2 (R[1], R[1], expR[1], 1 - q); /* error <= 1 ulp */ 343 mpz_mul (t, R[1], R[1]); /* err(t) <= 2 ulps */ 344 mpz_fdiv_q_2exp (R[2], t, q - 1); /* err(R[2]) <= 3 ulps */ 345 expR[2] = 1 - q; 346 for (i = 3 ; i <= m ; i++) 347 { 348 if ((i & 1) == 1) 349 mpz_mul (t, R[i-1], R[1]); /* err(t) <= 2*i-2 */ 350 else 351 mpz_mul (t, R[i/2], R[i/2]); 352 mpz_fdiv_q_2exp (R[i], t, q - 1); /* err(R[i]) <= 2*i-1 ulps */ 353 expR[i] = 1 - q; 354 } 355 mpz_set_ui (R[0], 1); 356 mpz_mul_2exp (R[0], R[0], q-1); 357 expR[0] = 1-q; /* R[0]=1 */ 358 mpz_set_ui (rr, 1); 359 expr = 0; /* rr contains r^l/l! */ 360 /* by induction: err(rr) <= 2*l ulps */ 361 362 l = 0; 363 ql = q; /* precision used for current giant step */ 364 do 365 { 366 /* all R[i] must have exponent 1-ql */ 367 if (l != 0) 368 for (i = 0 ; i < m ; i++) 369 expR[i] = mpz_normalize2 (R[i], R[i], expR[i], 1 - ql); 370 /* the absolute error on R[i]*rr is still 2*i-1 ulps */ 371 expt = mpz_normalize2 (t, R[m-1], expR[m-1], 1 - ql); 372 /* err(t) <= 2*m-1 ulps */ 373 /* computes t = 1 + r/(l+1) + ... + r^(m-1)*l!/(l+m-1)! 374 using Horner's scheme */ 375 for (i = m-1 ; i-- != 0 ; ) 376 { 377 mpz_fdiv_q_ui (t, t, l+i+1); /* err(t) += 1 ulp */ 378 mpz_add (t, t, R[i]); 379 } 380 /* now err(t) <= (3m-2) ulps */ 381 382 /* now multiplies t by r^l/l! and adds to s */ 383 mpz_mul (t, t, rr); 384 expt += expr; 385 expt = mpz_normalize2 (t, t, expt, *exps); 386 /* err(t) <= (3m-1) + err_rr(l) <= (3m-2) + 2*l */ 387 MPFR_ASSERTD (expt == *exps); 388 mpz_add (s, s, t); /* no error here */ 389 390 /* updates rr, the multiplication of the factors l+i could be done 391 using binary splitting too, but it is not sure it would save much */ 392 mpz_mul (t, rr, R[m]); /* err(t) <= err(rr) + 2m-1 */ 393 expr += expR[m]; 394 mpz_set_ui (tmp, 1); 395 for (i = 1 ; i <= m ; i++) 396 mpz_mul_ui (tmp, tmp, l + i); 397 mpz_fdiv_q (t, t, tmp); /* err(t) <= err(rr) + 2m */ 398 l += m; 399 if (MPFR_UNLIKELY (mpz_sgn (t) == 0)) 400 break; 401 expr += mpz_normalize (rr, t, ql); /* err_rr(l+1) <= err_rr(l) + 2m+1 */ 402 if (MPFR_UNLIKELY (mpz_sgn (rr) == 0)) 403 rrbit = 1; 404 else 405 MPFR_MPZ_SIZEINBASE2 (rrbit, rr); 406 MPFR_MPZ_SIZEINBASE2 (sbit, s); 407 ql = q - *exps - sbit + expr + rrbit; 408 /* TODO: Wrong cast. I don't want what is right, but this is 409 certainly wrong */ 410 } 411 while ((size_t) expr + rrbit > (size_t) -q); 412 413 for (i = 0 ; i <= m ; i++) 414 mpz_clear (R[i]); 415 MPFR_TMP_FREE(marker); 416 mpz_clear (rr); 417 mpz_clear (t); 418 mpz_clear (tmp); 419 420 return l * (l + 4); 421} 422