1/* mpfr_atan -- arc-tangent of a floating-point number 2 3Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. 4Contributed by the Arenaire and Cacao projects, INRIA. 5 6This file is part of the GNU MPFR Library, and was contributed by Mathieu Dutour. 7 8The GNU MPFR Library is free software; you can redistribute it and/or modify 9it under the terms of the GNU Lesser General Public License as published by 10the Free Software Foundation; either version 3 of the License, or (at your 11option) any later version. 12 13The GNU MPFR Library is distributed in the hope that it will be useful, but 14WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16License for more details. 17 18You should have received a copy of the GNU Lesser General Public License 19along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see 20http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 2151 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ 22 23#define MPFR_NEED_LONGLONG_H 24#include "mpfr-impl.h" 25 26/* If x = p/2^r, put in y an approximation of atan(x)/x using 2^m terms 27 for the series expansion, with an error of at most 1 ulp. 28 Assumes |x| < 1. 29 30 If X=x^2, we want 1 - X/3 + X^2/5 - ... + (-1)^k*X^k/(2k+1) + ... 31 32 Assume p is non-zero. 33 34 When we sum terms up to x^k/(2k+1), the denominator Q[0] is 35 3*5*7*...*(2k+1) ~ (2k/e)^k. 36*/ 37static void 38mpfr_atan_aux (mpfr_ptr y, mpz_ptr p, long r, int m, mpz_t *tab) 39{ 40 mpz_t *S, *Q, *ptoj; 41 unsigned long n, i, k, j, l; 42 mpfr_exp_t diff, expo; 43 int im, done; 44 mpfr_prec_t mult, *accu, *log2_nb_terms; 45 mpfr_prec_t precy = MPFR_PREC(y); 46 47 MPFR_ASSERTD(mpz_cmp_ui (p, 0) != 0); 48 49 accu = (mpfr_prec_t*) (*__gmp_allocate_func) ((2 * m + 2) * sizeof (mpfr_prec_t)); 50 log2_nb_terms = accu + m + 1; 51 52 /* Set Tables */ 53 S = tab; /* S */ 54 ptoj = S + 1*(m+1); /* p^2^j Precomputed table */ 55 Q = S + 2*(m+1); /* Product of Odd integer table */ 56 57 /* From p to p^2, and r to 2r */ 58 mpz_mul (p, p, p); 59 MPFR_ASSERTD (2 * r > r); 60 r = 2 * r; 61 62 /* Normalize p */ 63 n = mpz_scan1 (p, 0); 64 mpz_tdiv_q_2exp (p, p, n); /* exact */ 65 MPFR_ASSERTD (r > n); 66 r -= n; 67 /* since |p/2^r| < 1, and p is a non-zero integer, necessarily r > 0 */ 68 69 MPFR_ASSERTD (mpz_sgn (p) > 0); 70 MPFR_ASSERTD (m > 0); 71 72 /* check if p=1 (special case) */ 73 l = 0; 74 /* 75 We compute by binary splitting, with X = x^2 = p/2^r: 76 P(a,b) = p if a+1=b, P(a,c)*P(c,b) otherwise 77 Q(a,b) = (2a+1)*2^r if a+1=b [except Q(0,1)=1], Q(a,c)*Q(c,b) otherwise 78 S(a,b) = p*(2a+1) if a+1=b, Q(c,b)*S(a,c)+Q(a,c)*P(a,c)*S(c,b) otherwise 79 Then atan(x)/x ~ S(0,i)/Q(0,i) for i so that (p/2^r)^i/i is small enough. 80 The factor 2^(r*(b-a)) in Q(a,b) is implicit, thus we have to take it 81 into account when we compute with Q. 82 */ 83 accu[0] = 0; /* accu[k] = Mult[0] + ... + Mult[k], where Mult[j] is the 84 number of bits of the corresponding term S[j]/Q[j] */ 85 if (mpz_cmp_ui (p, 1) != 0) 86 { 87 /* p <> 1: precompute ptoj table */ 88 mpz_set (ptoj[0], p); 89 for (im = 1 ; im <= m ; im ++) 90 mpz_mul (ptoj[im], ptoj[im - 1], ptoj[im - 1]); 91 /* main loop */ 92 n = 1UL << m; 93 /* the ith term being X^i/(2i+1) with X=p/2^r, we can stop when 94 p^i/2^(r*i) < 2^(-precy), i.e. r*i > precy + log2(p^i) */ 95 for (i = k = done = 0; (i < n) && (done == 0); i += 2, k ++) 96 { 97 /* initialize both S[k],Q[k] and S[k+1],Q[k+1] */ 98 mpz_set_ui (Q[k+1], 2 * i + 3); /* Q(i+1,i+2) */ 99 mpz_mul_ui (S[k+1], p, 2 * i + 1); /* S(i+1,i+2) */ 100 mpz_mul_2exp (S[k], Q[k+1], r); 101 mpz_sub (S[k], S[k], S[k+1]); /* S(i,i+2) */ 102 mpz_mul_ui (Q[k], Q[k+1], 2 * i + 1); /* Q(i,i+2) */ 103 log2_nb_terms[k] = 1; /* S[k]/Q[k] corresponds to 2 terms */ 104 for (j = (i + 2) >> 1, l = 1; (j & 1) == 0; l ++, j >>= 1, k --) 105 { 106 /* invariant: S[k-1]/Q[k-1] and S[k]/Q[k] correspond 107 to 2^l terms each. We combine them into S[k-1]/Q[k-1] */ 108 MPFR_ASSERTD (k > 0); 109 mpz_mul (S[k], S[k], Q[k-1]); 110 mpz_mul (S[k], S[k], ptoj[l]); 111 mpz_mul (S[k-1], S[k-1], Q[k]); 112 mpz_mul_2exp (S[k-1], S[k-1], r << l); 113 mpz_add (S[k-1], S[k-1], S[k]); 114 mpz_mul (Q[k-1], Q[k-1], Q[k]); 115 log2_nb_terms[k-1] = l + 1; 116 /* now S[k-1]/Q[k-1] corresponds to 2^(l+1) terms */ 117 MPFR_MPZ_SIZEINBASE2(mult, ptoj[l+1]); 118 /* FIXME: precompute bits(ptoj[l+1]) outside the loop? */ 119 mult = (r << (l + 1)) - mult - 1; 120 accu[k-1] = (k == 1) ? mult : accu[k-2] + mult; 121 if (accu[k-1] > precy) 122 done = 1; 123 } 124 } 125 } 126 else /* special case p=1: the ith term being X^i/(2i+1) with X=1/2^r, 127 we can stop when r*i > precy i.e. i > precy/r */ 128 { 129 n = 1UL << m; 130 for (i = k = 0; (i < n) && (i <= precy / r); i += 2, k ++) 131 { 132 mpz_set_ui (Q[k + 1], 2 * i + 3); 133 mpz_mul_2exp (S[k], Q[k+1], r); 134 mpz_sub_ui (S[k], S[k], 1 + 2 * i); 135 mpz_mul_ui (Q[k], Q[k + 1], 1 + 2 * i); 136 log2_nb_terms[k] = 1; /* S[k]/Q[k] corresponds to 2 terms */ 137 for (j = (i + 2) >> 1, l = 1; (j & 1) == 0; l++, j >>= 1, k --) 138 { 139 MPFR_ASSERTD (k > 0); 140 mpz_mul (S[k], S[k], Q[k-1]); 141 mpz_mul (S[k-1], S[k-1], Q[k]); 142 mpz_mul_2exp (S[k-1], S[k-1], r << l); 143 mpz_add (S[k-1], S[k-1], S[k]); 144 mpz_mul (Q[k-1], Q[k-1], Q[k]); 145 log2_nb_terms[k-1] = l + 1; 146 } 147 } 148 } 149 150 /* we need to combine S[0]/Q[0]...S[k-1]/Q[k-1] */ 151 l = 0; /* number of terms accumulated in S[k]/Q[k] */ 152 while (k > 1) 153 { 154 k --; 155 /* combine S[k-1]/Q[k-1] and S[k]/Q[k] */ 156 j = log2_nb_terms[k-1]; 157 mpz_mul (S[k], S[k], Q[k-1]); 158 if (mpz_cmp_ui (p, 1) != 0) 159 mpz_mul (S[k], S[k], ptoj[j]); 160 mpz_mul (S[k-1], S[k-1], Q[k]); 161 l += 1 << log2_nb_terms[k]; 162 mpz_mul_2exp (S[k-1], S[k-1], r * l); 163 mpz_add (S[k-1], S[k-1], S[k]); 164 mpz_mul (Q[k-1], Q[k-1], Q[k]); 165 } 166 (*__gmp_free_func) (accu, (2 * m + 2) * sizeof (mpfr_prec_t)); 167 168 MPFR_MPZ_SIZEINBASE2 (diff, S[0]); 169 diff -= 2 * precy; 170 expo = diff; 171 if (diff >= 0) 172 mpz_tdiv_q_2exp (S[0], S[0], diff); 173 else 174 mpz_mul_2exp (S[0], S[0], -diff); 175 176 MPFR_MPZ_SIZEINBASE2 (diff, Q[0]); 177 diff -= precy; 178 expo -= diff; 179 if (diff >= 0) 180 mpz_tdiv_q_2exp (Q[0], Q[0], diff); 181 else 182 mpz_mul_2exp (Q[0], Q[0], -diff); 183 184 mpz_tdiv_q (S[0], S[0], Q[0]); 185 mpfr_set_z (y, S[0], MPFR_RNDD); 186 MPFR_SET_EXP (y, MPFR_EXP(y) + expo - r * (i - 1)); 187} 188 189int 190mpfr_atan (mpfr_ptr atan, mpfr_srcptr x, mpfr_rnd_t rnd_mode) 191{ 192 mpfr_t xp, arctgt, sk, tmp, tmp2; 193 mpz_t ukz; 194 mpz_t *tabz; 195 mpfr_exp_t exptol; 196 mpfr_prec_t prec, realprec, est_lost, lost; 197 unsigned long twopoweri, log2p, red; 198 int comparaison, inexact; 199 int i, n0, oldn0; 200 MPFR_GROUP_DECL (group); 201 MPFR_SAVE_EXPO_DECL (expo); 202 MPFR_ZIV_DECL (loop); 203 204 MPFR_LOG_FUNC (("x[%#R]=%R rnd=%d", x, x, rnd_mode), 205 ("atan[%#R]=%R inexact=%d", atan, atan, inexact)); 206 207 /* Singular cases */ 208 if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x))) 209 { 210 if (MPFR_IS_NAN (x)) 211 { 212 MPFR_SET_NAN (atan); 213 MPFR_RET_NAN; 214 } 215 else if (MPFR_IS_INF (x)) 216 { 217 MPFR_SAVE_EXPO_MARK (expo); 218 if (MPFR_IS_POS (x)) /* arctan(+inf) = Pi/2 */ 219 inexact = mpfr_const_pi (atan, rnd_mode); 220 else /* arctan(-inf) = -Pi/2 */ 221 { 222 inexact = -mpfr_const_pi (atan, 223 MPFR_INVERT_RND (rnd_mode)); 224 MPFR_CHANGE_SIGN (atan); 225 } 226 mpfr_div_2ui (atan, atan, 1, rnd_mode); /* exact (no exceptions) */ 227 MPFR_SAVE_EXPO_FREE (expo); 228 return mpfr_check_range (atan, inexact, rnd_mode); 229 } 230 else /* x is necessarily 0 */ 231 { 232 MPFR_ASSERTD (MPFR_IS_ZERO (x)); 233 MPFR_SET_ZERO (atan); 234 MPFR_SET_SAME_SIGN (atan, x); 235 MPFR_RET (0); 236 } 237 } 238 239 /* atan(x) = x - x^3/3 + x^5/5... 240 so the error is < 2^(3*EXP(x)-1) 241 so `EXP(x)-(3*EXP(x)-1)` = -2*EXP(x)+1 */ 242 MPFR_FAST_COMPUTE_IF_SMALL_INPUT (atan, x, -2 * MPFR_GET_EXP (x), 1, 0, 243 rnd_mode, {}); 244 245 /* Set x_p=|x| */ 246 MPFR_TMP_INIT_ABS (xp, x); 247 248 MPFR_SAVE_EXPO_MARK (expo); 249 250 /* Other simple case arctan(-+1)=-+pi/4 */ 251 comparaison = mpfr_cmp_ui (xp, 1); 252 if (MPFR_UNLIKELY (comparaison == 0)) 253 { 254 int neg = MPFR_IS_NEG (x); 255 inexact = mpfr_const_pi (atan, MPFR_IS_POS (x) ? rnd_mode 256 : MPFR_INVERT_RND (rnd_mode)); 257 if (neg) 258 { 259 inexact = -inexact; 260 MPFR_CHANGE_SIGN (atan); 261 } 262 mpfr_div_2ui (atan, atan, 2, rnd_mode); /* exact (no exceptions) */ 263 MPFR_SAVE_EXPO_FREE (expo); 264 return mpfr_check_range (atan, inexact, rnd_mode); 265 } 266 267 realprec = MPFR_PREC (atan) + MPFR_INT_CEIL_LOG2 (MPFR_PREC (atan)) + 4; 268 prec = realprec + GMP_NUMB_BITS; 269 270 /* Initialisation */ 271 mpz_init (ukz); 272 MPFR_GROUP_INIT_4 (group, prec, sk, tmp, tmp2, arctgt); 273 oldn0 = 0; 274 tabz = (mpz_t *) 0; 275 276 MPFR_ZIV_INIT (loop, prec); 277 for (;;) 278 { 279 /* First, if |x| < 1, we need to have more prec to be able to round (sup) 280 n0 = ceil(log(prec_requested + 2 + 1+ln(2.4)/ln(2))/log(2)) */ 281 mpfr_prec_t sup; 282 sup = MPFR_GET_EXP (xp) < 0 ? 2 - MPFR_GET_EXP (xp) : 1; /* sup >= 1 */ 283 284 n0 = MPFR_INT_CEIL_LOG2 ((realprec + sup) + 3); 285 /* since realprec >= 4, n0 >= ceil(log2(8)) >= 3, thus 3*n0 > 2 */ 286 prec = (realprec + sup) + 1 + MPFR_INT_CEIL_LOG2 (3*n0-2); 287 288 /* the number of lost bits due to argument reduction is 289 9 - 2 * EXP(sk), which we estimate by 9 + 2*ceil(log2(p)) 290 since we manage that sk < 1/p */ 291 if (MPFR_PREC (atan) > 100) 292 { 293 log2p = MPFR_INT_CEIL_LOG2(prec) / 2 - 3; 294 est_lost = 9 + 2 * log2p; 295 prec += est_lost; 296 } 297 else 298 log2p = est_lost = 0; /* don't reduce the argument */ 299 300 /* Initialisation */ 301 MPFR_GROUP_REPREC_4 (group, prec, sk, tmp, tmp2, arctgt); 302 if (MPFR_LIKELY (oldn0 == 0)) 303 { 304 oldn0 = 3 * (n0 + 1); 305 tabz = (mpz_t *) (*__gmp_allocate_func) (oldn0 * sizeof (mpz_t)); 306 for (i = 0; i < oldn0; i++) 307 mpz_init (tabz[i]); 308 } 309 else if (MPFR_UNLIKELY (oldn0 < 3 * (n0 + 1))) 310 { 311 tabz = (mpz_t *) (*__gmp_reallocate_func) 312 (tabz, oldn0 * sizeof (mpz_t), 3 * (n0 + 1)*sizeof (mpz_t)); 313 for (i = oldn0; i < 3 * (n0 + 1); i++) 314 mpz_init (tabz[i]); 315 oldn0 = 3 * (n0 + 1); 316 } 317 318 /* The mpfr_ui_div below mustn't underflow. This is guaranteed by 319 MPFR_SAVE_EXPO_MARK, but let's check that for maintainability. */ 320 MPFR_ASSERTD (__gmpfr_emax <= 1 - __gmpfr_emin); 321 322 if (comparaison > 0) /* use atan(xp) = Pi/2 - atan(1/xp) */ 323 mpfr_ui_div (sk, 1, xp, MPFR_RNDN); 324 else 325 mpfr_set (sk, xp, MPFR_RNDN); 326 327 /* now 0 < sk <= 1 */ 328 329 /* Argument reduction: atan(x) = 2 atan((sqrt(1+x^2)-1)/x). 330 We want |sk| < k/sqrt(p) where p is the target precision. */ 331 lost = 0; 332 for (red = 0; MPFR_GET_EXP(sk) > - (mpfr_exp_t) log2p; red ++) 333 { 334 lost = 9 - 2 * MPFR_EXP(sk); 335 mpfr_mul (tmp, sk, sk, MPFR_RNDN); 336 mpfr_add_ui (tmp, tmp, 1, MPFR_RNDN); 337 mpfr_sqrt (tmp, tmp, MPFR_RNDN); 338 mpfr_sub_ui (tmp, tmp, 1, MPFR_RNDN); 339 if (red == 0 && comparaison > 0) 340 /* use xp = 1/sk */ 341 mpfr_mul (sk, tmp, xp, MPFR_RNDN); 342 else 343 mpfr_div (sk, tmp, sk, MPFR_RNDN); 344 } 345 346 /* we started from x0 = 1/|x| if |x| > 1, and |x| otherwise, thus 347 we had x0 = min(|x|, 1/|x|) <= 1, and applied 'red' times the 348 argument reduction x -> (sqrt(1+x^2)-1)/x, which keeps 0 < x < 1, 349 thus 0 < sk <= 1, and sk=1 can occur only if red=0 */ 350 351 /* If sk=1, then if |x| < 1, we have 1 - 2^(-prec-1) <= |x| < 1, 352 or if |x| > 1, we have 1 - 2^(-prec-1) <= 1/|x| < 1, thus in all 353 cases ||x| - 1| <= 2^(-prec), from which it follows 354 |atan|x| - Pi/4| <= 2^(-prec), given the Taylor expansion 355 atan(1+x) = Pi/4 + x/2 - x^2/4 + ... 356 Since Pi/4 = 0.785..., the error is at most one ulp. 357 */ 358 if (MPFR_UNLIKELY(mpfr_cmp_ui (sk, 1) == 0)) 359 { 360 mpfr_const_pi (arctgt, MPFR_RNDN); /* 1/2 ulp extra error */ 361 mpfr_div_2ui (arctgt, arctgt, 2, MPFR_RNDN); /* exact */ 362 realprec = prec - 2; 363 goto can_round; 364 } 365 366 /* Assignation */ 367 MPFR_SET_ZERO (arctgt); 368 twopoweri = 1 << 0; 369 MPFR_ASSERTD (n0 >= 4); 370 for (i = 0 ; i < n0; i++) 371 { 372 if (MPFR_UNLIKELY (MPFR_IS_ZERO (sk))) 373 break; 374 /* Calculation of trunc(tmp) --> mpz */ 375 mpfr_mul_2ui (tmp, sk, twopoweri, MPFR_RNDN); 376 mpfr_trunc (tmp, tmp); 377 if (!MPFR_IS_ZERO (tmp)) 378 { 379 /* tmp = ukz*2^exptol */ 380 exptol = mpfr_get_z_2exp (ukz, tmp); 381 /* since the s_k are decreasing (see algorithms.tex), 382 and s_0 = min(|x|, 1/|x|) < 1, we have sk < 1, 383 thus exptol < 0 */ 384 MPFR_ASSERTD (exptol < 0); 385 mpz_tdiv_q_2exp (ukz, ukz, (unsigned long int) (-exptol)); 386 /* since tmp is a non-zero integer, and tmp = ukzold*2^exptol, 387 we now have ukz = tmp, thus ukz is non-zero */ 388 /* Calculation of arctan(Ak) */ 389 mpfr_set_z (tmp, ukz, MPFR_RNDN); 390 mpfr_div_2ui (tmp, tmp, twopoweri, MPFR_RNDN); 391 mpfr_atan_aux (tmp2, ukz, twopoweri, n0 - i, tabz); 392 mpfr_mul (tmp2, tmp2, tmp, MPFR_RNDN); 393 /* Addition */ 394 mpfr_add (arctgt, arctgt, tmp2, MPFR_RNDN); 395 /* Next iteration */ 396 mpfr_sub (tmp2, sk, tmp, MPFR_RNDN); 397 mpfr_mul (sk, sk, tmp, MPFR_RNDN); 398 mpfr_add_ui (sk, sk, 1, MPFR_RNDN); 399 mpfr_div (sk, tmp2, sk, MPFR_RNDN); 400 } 401 twopoweri <<= 1; 402 } 403 /* Add last step (Arctan(sk) ~= sk */ 404 mpfr_add (arctgt, arctgt, sk, MPFR_RNDN); 405 406 /* argument reduction */ 407 mpfr_mul_2exp (arctgt, arctgt, red, MPFR_RNDN); 408 409 if (comparaison > 0) 410 { /* atan(x) = Pi/2-atan(1/x) for x > 0 */ 411 mpfr_const_pi (tmp, MPFR_RNDN); 412 mpfr_div_2ui (tmp, tmp, 1, MPFR_RNDN); 413 mpfr_sub (arctgt, tmp, arctgt, MPFR_RNDN); 414 } 415 MPFR_SET_POS (arctgt); 416 417 can_round: 418 if (MPFR_LIKELY (MPFR_CAN_ROUND (arctgt, realprec + est_lost - lost, 419 MPFR_PREC (atan), rnd_mode))) 420 break; 421 MPFR_ZIV_NEXT (loop, realprec); 422 } 423 MPFR_ZIV_FREE (loop); 424 425 inexact = mpfr_set4 (atan, arctgt, rnd_mode, MPFR_SIGN (x)); 426 427 for (i = 0 ; i < oldn0 ; i++) 428 mpz_clear (tabz[i]); 429 mpz_clear (ukz); 430 (*__gmp_free_func) (tabz, oldn0 * sizeof (mpz_t)); 431 MPFR_GROUP_CLEAR (group); 432 433 MPFR_SAVE_EXPO_FREE (expo); 434 return mpfr_check_range (atan, inexact, rnd_mode); 435} 436