1/* mpfr_pow_z -- power function x^z with z a MPZ 2 3Copyright 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. 4Contributed by the Arenaire and Cacao projects, INRIA. 5 6This file is part of the GNU MPFR Library. 7 8The GNU MPFR Library is free software; you can redistribute it and/or modify 9it under the terms of the GNU Lesser General Public License as published by 10the Free Software Foundation; either version 3 of the License, or (at your 11option) any later version. 12 13The GNU MPFR Library is distributed in the hope that it will be useful, but 14WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16License for more details. 17 18You should have received a copy of the GNU Lesser General Public License 19along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see 20http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 2151 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ 22 23#define MPFR_NEED_LONGLONG_H 24#include "mpfr-impl.h" 25 26/* y <- x^|z| with z != 0 27 if cr=1: ensures correct rounding of y 28 if cr=0: does not ensure correct rounding, but avoid spurious overflow 29 or underflow, and uses the precision of y as working precision (warning, 30 y and x might be the same variable). */ 31static int 32mpfr_pow_pos_z (mpfr_ptr y, mpfr_srcptr x, mpz_srcptr z, mpfr_rnd_t rnd, int cr) 33{ 34 mpfr_t res; 35 mpfr_prec_t prec, err; 36 int inexact; 37 mpfr_rnd_t rnd1, rnd2; 38 mpz_t absz; 39 mp_size_t size_z; 40 MPFR_ZIV_DECL (loop); 41 MPFR_BLOCK_DECL (flags); 42 43 MPFR_LOG_FUNC (("x[%#R]=%R z=? rnd=%d cr=%d", x, x, rnd, cr), 44 ("y[%#R]=%R inexact=%d", y, y, inexact)); 45 46 MPFR_ASSERTD (mpz_sgn (z) != 0); 47 48 if (MPFR_UNLIKELY (mpz_cmpabs_ui (z, 1) == 0)) 49 return mpfr_set (y, x, rnd); 50 51 absz[0] = z[0]; 52 SIZ (absz) = ABS(SIZ(absz)); /* Hack to get abs(z) */ 53 MPFR_MPZ_SIZEINBASE2 (size_z, z); 54 55 /* round toward 1 (or -1) to avoid spurious overflow or underflow, 56 i.e. if an overflow or underflow occurs, it is a real exception 57 and is not just due to the rounding error. */ 58 rnd1 = (MPFR_EXP(x) >= 1) ? MPFR_RNDZ 59 : (MPFR_IS_POS(x) ? MPFR_RNDU : MPFR_RNDD); 60 rnd2 = (MPFR_EXP(x) >= 1) ? MPFR_RNDD : MPFR_RNDU; 61 62 if (cr != 0) 63 prec = MPFR_PREC (y) + 3 + size_z + MPFR_INT_CEIL_LOG2 (MPFR_PREC (y)); 64 else 65 prec = MPFR_PREC (y); 66 mpfr_init2 (res, prec); 67 68 MPFR_ZIV_INIT (loop, prec); 69 for (;;) 70 { 71 unsigned int inexmul; /* will be non-zero if res may be inexact */ 72 mp_size_t i = size_z; 73 74 /* now 2^(i-1) <= z < 2^i */ 75 /* see below (case z < 0) for the error analysis, which is identical, 76 except if z=n, the maximal relative error is here 2(n-1)2^(-prec) 77 instead of 2(2n-1)2^(-prec) for z<0. */ 78 MPFR_ASSERTD (prec > (mpfr_prec_t) i); 79 err = prec - 1 - (mpfr_prec_t) i; 80 81 MPFR_BLOCK (flags, 82 inexmul = mpfr_mul (res, x, x, rnd2); 83 MPFR_ASSERTD (i >= 2); 84 if (mpz_tstbit (absz, i - 2)) 85 inexmul |= mpfr_mul (res, res, x, rnd1); 86 for (i -= 3; i >= 0 && !MPFR_BLOCK_EXCEP; i--) 87 { 88 inexmul |= mpfr_mul (res, res, res, rnd2); 89 if (mpz_tstbit (absz, i)) 90 inexmul |= mpfr_mul (res, res, x, rnd1); 91 }); 92 if (MPFR_LIKELY (inexmul == 0 || cr == 0 93 || MPFR_OVERFLOW (flags) || MPFR_UNDERFLOW (flags) 94 || MPFR_CAN_ROUND (res, err, MPFR_PREC (y), rnd))) 95 break; 96 /* Can't decide correct rounding, increase the precision */ 97 MPFR_ZIV_NEXT (loop, prec); 98 mpfr_set_prec (res, prec); 99 } 100 MPFR_ZIV_FREE (loop); 101 102 /* Check Overflow */ 103 if (MPFR_OVERFLOW (flags)) 104 { 105 MPFR_LOG_MSG (("overflow\n", 0)); 106 inexact = mpfr_overflow (y, rnd, mpz_odd_p (absz) ? 107 MPFR_SIGN (x) : MPFR_SIGN_POS); 108 } 109 /* Check Underflow */ 110 else if (MPFR_UNDERFLOW (flags)) 111 { 112 MPFR_LOG_MSG (("underflow\n", 0)); 113 if (rnd == MPFR_RNDN) 114 { 115 mpfr_t y2, zz; 116 117 /* We cannot decide now whether the result should be rounded 118 toward zero or +Inf. So, let's use the general case of 119 mpfr_pow, which can do that. But the problem is that the 120 result can be exact! However, it is sufficient to try to 121 round on 2 bits (the precision does not matter in case of 122 underflow, since MPFR does not have subnormals), in which 123 case, the result cannot be exact due to previous filtering 124 of trivial cases. */ 125 MPFR_ASSERTD (mpfr_cmp_si_2exp (x, MPFR_SIGN (x), 126 MPFR_EXP (x) - 1) != 0); 127 mpfr_init2 (y2, 2); 128 mpfr_init2 (zz, ABS (SIZ (z)) * GMP_NUMB_BITS); 129 inexact = mpfr_set_z (zz, z, MPFR_RNDN); 130 MPFR_ASSERTN (inexact == 0); 131 inexact = mpfr_pow_general (y2, x, zz, rnd, 1, 132 (mpfr_save_expo_t *) NULL); 133 mpfr_clear (zz); 134 mpfr_set (y, y2, MPFR_RNDN); 135 mpfr_clear (y2); 136 __gmpfr_flags = MPFR_FLAGS_INEXACT | MPFR_FLAGS_UNDERFLOW; 137 } 138 else 139 { 140 inexact = mpfr_underflow (y, rnd, mpz_odd_p (absz) ? 141 MPFR_SIGN (x) : MPFR_SIGN_POS); 142 } 143 } 144 else 145 inexact = mpfr_set (y, res, rnd); 146 147 mpfr_clear (res); 148 return inexact; 149} 150 151/* The computation of y = pow(x,z) is done by 152 * y = set_ui(1) if z = 0 153 * y = pow_ui(x,z) if z > 0 154 * y = pow_ui(1/x,-z) if z < 0 155 * 156 * Note: in case z < 0, we could also compute 1/pow_ui(x,-z). However, in 157 * case MAX < 1/MIN, where MAX is the largest positive value, i.e., 158 * MAX = nextbelow(+Inf), and MIN is the smallest positive value, i.e., 159 * MIN = nextabove(+0), then x^(-z) might produce an overflow, whereas 160 * x^z is representable. 161 */ 162 163int 164mpfr_pow_z (mpfr_ptr y, mpfr_srcptr x, mpz_srcptr z, mpfr_rnd_t rnd) 165{ 166 int inexact; 167 mpz_t tmp; 168 MPFR_SAVE_EXPO_DECL (expo); 169 170 MPFR_LOG_FUNC (("x[%#R]=%R z=? rnd=%d", x, x, rnd), 171 ("y[%#R]=%R inexact=%d", y, y, inexact)); 172 173 /* x^0 = 1 for any x, even a NaN */ 174 if (MPFR_UNLIKELY (mpz_sgn (z) == 0)) 175 return mpfr_set_ui (y, 1, rnd); 176 177 if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x))) 178 { 179 if (MPFR_IS_NAN (x)) 180 { 181 MPFR_SET_NAN (y); 182 MPFR_RET_NAN; 183 } 184 else if (MPFR_IS_INF (x)) 185 { 186 /* Inf^n = Inf, (-Inf)^n = Inf for n even, -Inf for n odd */ 187 /* Inf ^(-n) = 0, sign = + if x>0 or z even */ 188 if (mpz_sgn (z) > 0) 189 MPFR_SET_INF (y); 190 else 191 MPFR_SET_ZERO (y); 192 if (MPFR_UNLIKELY (MPFR_IS_NEG (x) && mpz_odd_p (z))) 193 MPFR_SET_NEG (y); 194 else 195 MPFR_SET_POS (y); 196 MPFR_RET (0); 197 } 198 else /* x is zero */ 199 { 200 MPFR_ASSERTD (MPFR_IS_ZERO(x)); 201 if (mpz_sgn (z) > 0) 202 /* 0^n = +/-0 for any n */ 203 MPFR_SET_ZERO (y); 204 else 205 /* 0^(-n) if +/- INF */ 206 MPFR_SET_INF (y); 207 if (MPFR_LIKELY (MPFR_IS_POS (x) || mpz_even_p (z))) 208 MPFR_SET_POS (y); 209 else 210 MPFR_SET_NEG (y); 211 MPFR_RET(0); 212 } 213 } 214 215 MPFR_SAVE_EXPO_MARK (expo); 216 217 /* detect exact powers: x^-n is exact iff x is a power of 2 218 Do it if n > 0 too as this is faster and this filtering is 219 needed in case of underflow. */ 220 if (MPFR_UNLIKELY (mpfr_cmp_si_2exp (x, MPFR_SIGN (x), 221 MPFR_EXP (x) - 1) == 0)) 222 { 223 mpfr_exp_t expx = MPFR_EXP (x); /* warning: x and y may be the same 224 variable */ 225 226 MPFR_LOG_MSG (("x^n with x power of two\n", 0)); 227 mpfr_set_si (y, mpz_odd_p (z) ? MPFR_INT_SIGN(x) : 1, rnd); 228 MPFR_ASSERTD (MPFR_IS_FP (y)); 229 mpz_init (tmp); 230 mpz_mul_si (tmp, z, expx - 1); 231 MPFR_ASSERTD (MPFR_GET_EXP (y) == 1); 232 mpz_add_ui (tmp, tmp, 1); 233 inexact = 0; 234 if (MPFR_UNLIKELY (mpz_cmp_si (tmp, __gmpfr_emin) < 0)) 235 { 236 MPFR_LOG_MSG (("underflow\n", 0)); 237 /* |y| is a power of two, thus |y| <= 2^(emin-2), and in 238 rounding to nearest, the value must be rounded to 0. */ 239 if (rnd == MPFR_RNDN) 240 rnd = MPFR_RNDZ; 241 inexact = mpfr_underflow (y, rnd, MPFR_SIGN (y)); 242 } 243 else if (MPFR_UNLIKELY (mpz_cmp_si (tmp, __gmpfr_emax) > 0)) 244 { 245 MPFR_LOG_MSG (("overflow\n", 0)); 246 inexact = mpfr_overflow (y, rnd, MPFR_SIGN (y)); 247 } 248 else 249 MPFR_SET_EXP (y, mpz_get_si (tmp)); 250 mpz_clear (tmp); 251 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags); 252 } 253 else if (mpz_sgn (z) > 0) 254 { 255 inexact = mpfr_pow_pos_z (y, x, z, rnd, 1); 256 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags); 257 } 258 else 259 { 260 /* Declaration of the intermediary variable */ 261 mpfr_t t; 262 mpfr_prec_t Nt; /* Precision of the intermediary variable */ 263 mpfr_rnd_t rnd1; 264 mp_size_t size_z; 265 MPFR_ZIV_DECL (loop); 266 267 MPFR_MPZ_SIZEINBASE2 (size_z, z); 268 269 /* initial working precision */ 270 Nt = MPFR_PREC (y); 271 Nt = Nt + size_z + 3 + MPFR_INT_CEIL_LOG2 (Nt); 272 /* ensures Nt >= bits(z)+2 */ 273 274 /* initialise of intermediary variable */ 275 mpfr_init2 (t, Nt); 276 277 /* We will compute rnd(rnd1(1/x) ^ (-z)), where rnd1 is the rounding 278 toward sign(x), to avoid spurious overflow or underflow. */ 279 rnd1 = MPFR_EXP (x) < 1 ? MPFR_RNDZ : 280 (MPFR_SIGN (x) > 0 ? MPFR_RNDU : MPFR_RNDD); 281 282 MPFR_ZIV_INIT (loop, Nt); 283 for (;;) 284 { 285 MPFR_BLOCK_DECL (flags); 286 287 /* compute (1/x)^(-z), -z>0 */ 288 /* As emin = -emax, an underflow cannot occur in the division. 289 And if an overflow occurs, then this means that x^z overflows 290 too (since we have rounded toward 1 or -1). */ 291 MPFR_BLOCK (flags, mpfr_ui_div (t, 1, x, rnd1)); 292 MPFR_ASSERTD (! MPFR_UNDERFLOW (flags)); 293 /* t = (1/x)*(1+theta) where |theta| <= 2^(-Nt) */ 294 if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags))) 295 goto overflow; 296 MPFR_BLOCK (flags, mpfr_pow_pos_z (t, t, z, rnd, 0)); 297 /* Now if z=-n, t = x^z*(1+theta)^(2n-1) where |theta| <= 2^(-Nt), 298 with theta maybe different from above. If (2n-1)*2^(-Nt) <= 1/2, 299 which is satisfied as soon as Nt >= bits(z)+2, then we can use 300 Lemma \ref{lemma_graillat} from algorithms.tex, which yields 301 t = x^z*(1+theta) with |theta| <= 2(2n-1)*2^(-Nt), thus the 302 error is bounded by 2(2n-1) ulps <= 2^(bits(z)+2) ulps. */ 303 if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags))) 304 { 305 overflow: 306 MPFR_ZIV_FREE (loop); 307 mpfr_clear (t); 308 MPFR_SAVE_EXPO_FREE (expo); 309 MPFR_LOG_MSG (("overflow\n", 0)); 310 return mpfr_overflow (y, rnd, 311 mpz_odd_p (z) ? MPFR_SIGN (x) : 312 MPFR_SIGN_POS); 313 } 314 if (MPFR_UNLIKELY (MPFR_UNDERFLOW (flags))) 315 { 316 MPFR_ZIV_FREE (loop); 317 mpfr_clear (t); 318 MPFR_LOG_MSG (("underflow\n", 0)); 319 if (rnd == MPFR_RNDN) 320 { 321 mpfr_t y2, zz; 322 323 /* We cannot decide now whether the result should be 324 rounded toward zero or away from zero. So, like 325 in mpfr_pow_pos_z, let's use the general case of 326 mpfr_pow in precision 2. */ 327 MPFR_ASSERTD (mpfr_cmp_si_2exp (x, MPFR_SIGN (x), 328 MPFR_EXP (x) - 1) != 0); 329 mpfr_init2 (y2, 2); 330 mpfr_init2 (zz, ABS (SIZ (z)) * GMP_NUMB_BITS); 331 inexact = mpfr_set_z (zz, z, MPFR_RNDN); 332 MPFR_ASSERTN (inexact == 0); 333 inexact = mpfr_pow_general (y2, x, zz, rnd, 1, 334 (mpfr_save_expo_t *) NULL); 335 mpfr_clear (zz); 336 mpfr_set (y, y2, MPFR_RNDN); 337 mpfr_clear (y2); 338 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, MPFR_FLAGS_UNDERFLOW); 339 goto end; 340 } 341 else 342 { 343 MPFR_SAVE_EXPO_FREE (expo); 344 return mpfr_underflow (y, rnd, mpz_odd_p (z) ? 345 MPFR_SIGN (x) : MPFR_SIGN_POS); 346 } 347 } 348 if (MPFR_LIKELY (MPFR_CAN_ROUND (t, Nt - size_z - 2, MPFR_PREC (y), 349 rnd))) 350 break; 351 /* actualisation of the precision */ 352 MPFR_ZIV_NEXT (loop, Nt); 353 mpfr_set_prec (t, Nt); 354 } 355 MPFR_ZIV_FREE (loop); 356 357 inexact = mpfr_set (y, t, rnd); 358 mpfr_clear (t); 359 } 360 361 end: 362 MPFR_SAVE_EXPO_FREE (expo); 363 return mpfr_check_range (y, inexact, rnd); 364} 365