1/* mpfr_atan -- arc-tangent of a floating-point number
2
3Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
4Contributed by the Arenaire and Cacao projects, INRIA.
5
6This file is part of the GNU MPFR Library, and was contributed by Mathieu Dutour.
7
8The GNU MPFR Library is free software; you can redistribute it and/or modify
9it under the terms of the GNU Lesser General Public License as published by
10the Free Software Foundation; either version 3 of the License, or (at your
11option) any later version.
12
13The GNU MPFR Library is distributed in the hope that it will be useful, but
14WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16License for more details.
17
18You should have received a copy of the GNU Lesser General Public License
19along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
2151 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22
23#define MPFR_NEED_LONGLONG_H
24#include "mpfr-impl.h"
25
26/* If x = p/2^r, put in y an approximation of atan(x)/x using 2^m terms
27   for the series expansion, with an error of at most 1 ulp.
28   Assumes |x| < 1.
29
30   If X=x^2, we want 1 - X/3 + X^2/5 - ... + (-1)^k*X^k/(2k+1) + ...
31
32   Assume p is non-zero.
33
34   When we sum terms up to x^k/(2k+1), the denominator Q[0] is
35   3*5*7*...*(2k+1) ~ (2k/e)^k.
36*/
37static void
38mpfr_atan_aux (mpfr_ptr y, mpz_ptr p, long r, int m, mpz_t *tab)
39{
40  mpz_t *S, *Q, *ptoj;
41  unsigned long n, i, k, j, l;
42  mpfr_exp_t diff, expo;
43  int im, done;
44  mpfr_prec_t mult, *accu, *log2_nb_terms;
45  mpfr_prec_t precy = MPFR_PREC(y);
46
47  MPFR_ASSERTD(mpz_cmp_ui (p, 0) != 0);
48
49  accu = (mpfr_prec_t*) (*__gmp_allocate_func) ((2 * m + 2) * sizeof (mpfr_prec_t));
50  log2_nb_terms = accu + m + 1;
51
52  /* Set Tables */
53  S    = tab;           /* S */
54  ptoj = S + 1*(m+1);   /* p^2^j Precomputed table */
55  Q    = S + 2*(m+1);   /* Product of Odd integer  table  */
56
57  /* From p to p^2, and r to 2r */
58  mpz_mul (p, p, p);
59  MPFR_ASSERTD (2 * r > r);
60  r = 2 * r;
61
62  /* Normalize p */
63  n = mpz_scan1 (p, 0);
64  mpz_tdiv_q_2exp (p, p, n); /* exact */
65  MPFR_ASSERTD (r > n);
66  r -= n;
67  /* since |p/2^r| < 1, and p is a non-zero integer, necessarily r > 0 */
68
69  MPFR_ASSERTD (mpz_sgn (p) > 0);
70  MPFR_ASSERTD (m > 0);
71
72  /* check if p=1 (special case) */
73  l = 0;
74  /*
75    We compute by binary splitting, with X = x^2 = p/2^r:
76    P(a,b) = p if a+1=b, P(a,c)*P(c,b) otherwise
77    Q(a,b) = (2a+1)*2^r if a+1=b [except Q(0,1)=1], Q(a,c)*Q(c,b) otherwise
78    S(a,b) = p*(2a+1) if a+1=b, Q(c,b)*S(a,c)+Q(a,c)*P(a,c)*S(c,b) otherwise
79    Then atan(x)/x ~ S(0,i)/Q(0,i) for i so that (p/2^r)^i/i is small enough.
80    The factor 2^(r*(b-a)) in Q(a,b) is implicit, thus we have to take it
81    into account when we compute with Q.
82  */
83  accu[0] = 0; /* accu[k] = Mult[0] + ... + Mult[k], where Mult[j] is the
84                  number of bits of the corresponding term S[j]/Q[j] */
85  if (mpz_cmp_ui (p, 1) != 0)
86    {
87      /* p <> 1: precompute ptoj table */
88      mpz_set (ptoj[0], p);
89      for (im = 1 ; im <= m ; im ++)
90        mpz_mul (ptoj[im], ptoj[im - 1], ptoj[im - 1]);
91      /* main loop */
92      n = 1UL << m;
93      /* the ith term being X^i/(2i+1) with X=p/2^r, we can stop when
94         p^i/2^(r*i) < 2^(-precy), i.e. r*i > precy + log2(p^i) */
95      for (i = k = done = 0; (i < n) && (done == 0); i += 2, k ++)
96        {
97          /* initialize both S[k],Q[k] and S[k+1],Q[k+1] */
98          mpz_set_ui (Q[k+1], 2 * i + 3); /* Q(i+1,i+2) */
99          mpz_mul_ui (S[k+1], p, 2 * i + 1); /* S(i+1,i+2) */
100          mpz_mul_2exp (S[k], Q[k+1], r);
101          mpz_sub (S[k], S[k], S[k+1]); /* S(i,i+2) */
102          mpz_mul_ui (Q[k], Q[k+1], 2 * i + 1); /* Q(i,i+2) */
103          log2_nb_terms[k] = 1; /* S[k]/Q[k] corresponds to 2 terms */
104          for (j = (i + 2) >> 1, l = 1; (j & 1) == 0; l ++, j >>= 1, k --)
105            {
106              /* invariant: S[k-1]/Q[k-1] and S[k]/Q[k] correspond
107                 to 2^l terms each. We combine them into S[k-1]/Q[k-1] */
108              MPFR_ASSERTD (k > 0);
109              mpz_mul (S[k], S[k], Q[k-1]);
110              mpz_mul (S[k], S[k], ptoj[l]);
111              mpz_mul (S[k-1], S[k-1], Q[k]);
112              mpz_mul_2exp (S[k-1], S[k-1], r << l);
113              mpz_add (S[k-1], S[k-1], S[k]);
114              mpz_mul (Q[k-1], Q[k-1], Q[k]);
115              log2_nb_terms[k-1] = l + 1;
116              /* now S[k-1]/Q[k-1] corresponds to 2^(l+1) terms */
117              MPFR_MPZ_SIZEINBASE2(mult, ptoj[l+1]);
118              /* FIXME: precompute bits(ptoj[l+1]) outside the loop? */
119              mult = (r << (l + 1)) - mult - 1;
120              accu[k-1] = (k == 1) ? mult : accu[k-2] + mult;
121              if (accu[k-1] > precy)
122                done = 1;
123            }
124        }
125    }
126  else /* special case p=1: the ith term being X^i/(2i+1) with X=1/2^r,
127          we can stop when r*i > precy i.e. i > precy/r */
128    {
129      n = 1UL << m;
130      for (i = k = 0; (i < n) && (i <= precy / r); i += 2, k ++)
131        {
132          mpz_set_ui (Q[k + 1], 2 * i + 3);
133          mpz_mul_2exp (S[k], Q[k+1], r);
134          mpz_sub_ui (S[k], S[k], 1 + 2 * i);
135          mpz_mul_ui (Q[k], Q[k + 1], 1 + 2 * i);
136          log2_nb_terms[k] = 1; /* S[k]/Q[k] corresponds to 2 terms */
137          for (j = (i + 2) >> 1, l = 1; (j & 1) == 0; l++, j >>= 1, k --)
138            {
139              MPFR_ASSERTD (k > 0);
140              mpz_mul (S[k], S[k], Q[k-1]);
141              mpz_mul (S[k-1], S[k-1], Q[k]);
142              mpz_mul_2exp (S[k-1], S[k-1], r << l);
143              mpz_add (S[k-1], S[k-1], S[k]);
144              mpz_mul (Q[k-1], Q[k-1], Q[k]);
145              log2_nb_terms[k-1] = l + 1;
146            }
147        }
148    }
149
150  /* we need to combine S[0]/Q[0]...S[k-1]/Q[k-1] */
151  l = 0; /* number of terms accumulated in S[k]/Q[k] */
152  while (k > 1)
153    {
154      k --;
155      /* combine S[k-1]/Q[k-1] and S[k]/Q[k] */
156      j = log2_nb_terms[k-1];
157      mpz_mul (S[k], S[k], Q[k-1]);
158      if (mpz_cmp_ui (p, 1) != 0)
159        mpz_mul (S[k], S[k], ptoj[j]);
160      mpz_mul (S[k-1], S[k-1], Q[k]);
161      l += 1 << log2_nb_terms[k];
162      mpz_mul_2exp (S[k-1], S[k-1], r * l);
163      mpz_add (S[k-1], S[k-1], S[k]);
164      mpz_mul (Q[k-1], Q[k-1], Q[k]);
165    }
166  (*__gmp_free_func) (accu, (2 * m + 2) * sizeof (mpfr_prec_t));
167
168  MPFR_MPZ_SIZEINBASE2 (diff, S[0]);
169  diff -= 2 * precy;
170  expo = diff;
171  if (diff >= 0)
172    mpz_tdiv_q_2exp (S[0], S[0], diff);
173  else
174    mpz_mul_2exp (S[0], S[0], -diff);
175
176  MPFR_MPZ_SIZEINBASE2 (diff, Q[0]);
177  diff -= precy;
178  expo -= diff;
179  if (diff >= 0)
180    mpz_tdiv_q_2exp (Q[0], Q[0], diff);
181  else
182    mpz_mul_2exp (Q[0], Q[0], -diff);
183
184  mpz_tdiv_q (S[0], S[0], Q[0]);
185  mpfr_set_z (y, S[0], MPFR_RNDD);
186  MPFR_SET_EXP (y, MPFR_EXP(y) + expo - r * (i - 1));
187}
188
189int
190mpfr_atan (mpfr_ptr atan, mpfr_srcptr x, mpfr_rnd_t rnd_mode)
191{
192  mpfr_t xp, arctgt, sk, tmp, tmp2;
193  mpz_t  ukz;
194  mpz_t *tabz;
195  mpfr_exp_t exptol;
196  mpfr_prec_t prec, realprec, est_lost, lost;
197  unsigned long twopoweri, log2p, red;
198  int comparaison, inexact;
199  int i, n0, oldn0;
200  MPFR_GROUP_DECL (group);
201  MPFR_SAVE_EXPO_DECL (expo);
202  MPFR_ZIV_DECL (loop);
203
204  MPFR_LOG_FUNC (("x[%#R]=%R rnd=%d", x, x, rnd_mode),
205                 ("atan[%#R]=%R inexact=%d", atan, atan, inexact));
206
207  /* Singular cases */
208  if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
209    {
210      if (MPFR_IS_NAN (x))
211        {
212          MPFR_SET_NAN (atan);
213          MPFR_RET_NAN;
214        }
215      else if (MPFR_IS_INF (x))
216        {
217          MPFR_SAVE_EXPO_MARK (expo);
218          if (MPFR_IS_POS (x))  /* arctan(+inf) = Pi/2 */
219            inexact = mpfr_const_pi (atan, rnd_mode);
220          else /* arctan(-inf) = -Pi/2 */
221            {
222              inexact = -mpfr_const_pi (atan,
223                                        MPFR_INVERT_RND (rnd_mode));
224              MPFR_CHANGE_SIGN (atan);
225            }
226          mpfr_div_2ui (atan, atan, 1, rnd_mode);  /* exact (no exceptions) */
227          MPFR_SAVE_EXPO_FREE (expo);
228          return mpfr_check_range (atan, inexact, rnd_mode);
229        }
230      else /* x is necessarily 0 */
231        {
232          MPFR_ASSERTD (MPFR_IS_ZERO (x));
233          MPFR_SET_ZERO (atan);
234          MPFR_SET_SAME_SIGN (atan, x);
235          MPFR_RET (0);
236        }
237    }
238
239  /* atan(x) = x - x^3/3 + x^5/5...
240     so the error is < 2^(3*EXP(x)-1)
241     so `EXP(x)-(3*EXP(x)-1)` = -2*EXP(x)+1 */
242  MPFR_FAST_COMPUTE_IF_SMALL_INPUT (atan, x, -2 * MPFR_GET_EXP (x), 1, 0,
243                                    rnd_mode, {});
244
245  /* Set x_p=|x| */
246  MPFR_TMP_INIT_ABS (xp, x);
247
248  MPFR_SAVE_EXPO_MARK (expo);
249
250  /* Other simple case arctan(-+1)=-+pi/4 */
251  comparaison = mpfr_cmp_ui (xp, 1);
252  if (MPFR_UNLIKELY (comparaison == 0))
253    {
254      int neg = MPFR_IS_NEG (x);
255      inexact = mpfr_const_pi (atan, MPFR_IS_POS (x) ? rnd_mode
256                               : MPFR_INVERT_RND (rnd_mode));
257      if (neg)
258        {
259          inexact = -inexact;
260          MPFR_CHANGE_SIGN (atan);
261        }
262      mpfr_div_2ui (atan, atan, 2, rnd_mode);  /* exact (no exceptions) */
263      MPFR_SAVE_EXPO_FREE (expo);
264      return mpfr_check_range (atan, inexact, rnd_mode);
265    }
266
267  realprec = MPFR_PREC (atan) + MPFR_INT_CEIL_LOG2 (MPFR_PREC (atan)) + 4;
268  prec = realprec + GMP_NUMB_BITS;
269
270  /* Initialisation */
271  mpz_init (ukz);
272  MPFR_GROUP_INIT_4 (group, prec, sk, tmp, tmp2, arctgt);
273  oldn0 = 0;
274  tabz = (mpz_t *) 0;
275
276  MPFR_ZIV_INIT (loop, prec);
277  for (;;)
278    {
279      /* First, if |x| < 1, we need to have more prec to be able to round (sup)
280         n0 = ceil(log(prec_requested + 2 + 1+ln(2.4)/ln(2))/log(2)) */
281      mpfr_prec_t sup;
282      sup = MPFR_GET_EXP (xp) < 0 ? 2 - MPFR_GET_EXP (xp) : 1; /* sup >= 1 */
283
284      n0 = MPFR_INT_CEIL_LOG2 ((realprec + sup) + 3);
285      /* since realprec >= 4, n0 >= ceil(log2(8)) >= 3, thus 3*n0 > 2 */
286      prec = (realprec + sup) + 1 + MPFR_INT_CEIL_LOG2 (3*n0-2);
287
288      /* the number of lost bits due to argument reduction is
289         9 - 2 * EXP(sk), which we estimate by 9 + 2*ceil(log2(p))
290         since we manage that sk < 1/p */
291      if (MPFR_PREC (atan) > 100)
292        {
293          log2p = MPFR_INT_CEIL_LOG2(prec) / 2 - 3;
294          est_lost = 9 + 2 * log2p;
295          prec += est_lost;
296        }
297      else
298        log2p = est_lost = 0; /* don't reduce the argument */
299
300      /* Initialisation */
301      MPFR_GROUP_REPREC_4 (group, prec, sk, tmp, tmp2, arctgt);
302      if (MPFR_LIKELY (oldn0 == 0))
303        {
304          oldn0 = 3 * (n0 + 1);
305          tabz = (mpz_t *) (*__gmp_allocate_func) (oldn0 * sizeof (mpz_t));
306          for (i = 0; i < oldn0; i++)
307            mpz_init (tabz[i]);
308        }
309      else if (MPFR_UNLIKELY (oldn0 < 3 * (n0 + 1)))
310        {
311          tabz = (mpz_t *) (*__gmp_reallocate_func)
312            (tabz, oldn0 * sizeof (mpz_t), 3 * (n0 + 1)*sizeof (mpz_t));
313          for (i = oldn0; i < 3 * (n0 + 1); i++)
314            mpz_init (tabz[i]);
315          oldn0 = 3 * (n0 + 1);
316        }
317
318      /* The mpfr_ui_div below mustn't underflow. This is guaranteed by
319         MPFR_SAVE_EXPO_MARK, but let's check that for maintainability. */
320      MPFR_ASSERTD (__gmpfr_emax <= 1 - __gmpfr_emin);
321
322      if (comparaison > 0) /* use atan(xp) = Pi/2 - atan(1/xp) */
323        mpfr_ui_div (sk, 1, xp, MPFR_RNDN);
324      else
325        mpfr_set (sk, xp, MPFR_RNDN);
326
327      /* now 0 < sk <= 1 */
328
329      /* Argument reduction: atan(x) = 2 atan((sqrt(1+x^2)-1)/x).
330         We want |sk| < k/sqrt(p) where p is the target precision. */
331      lost = 0;
332      for (red = 0; MPFR_GET_EXP(sk) > - (mpfr_exp_t) log2p; red ++)
333        {
334          lost = 9 - 2 * MPFR_EXP(sk);
335          mpfr_mul (tmp, sk, sk, MPFR_RNDN);
336          mpfr_add_ui (tmp, tmp, 1, MPFR_RNDN);
337          mpfr_sqrt (tmp, tmp, MPFR_RNDN);
338          mpfr_sub_ui (tmp, tmp, 1, MPFR_RNDN);
339          if (red == 0 && comparaison > 0)
340            /* use xp = 1/sk */
341            mpfr_mul (sk, tmp, xp, MPFR_RNDN);
342          else
343            mpfr_div (sk, tmp, sk, MPFR_RNDN);
344        }
345
346      /* we started from x0 = 1/|x| if |x| > 1, and |x| otherwise, thus
347         we had x0 = min(|x|, 1/|x|) <= 1, and applied 'red' times the
348         argument reduction x -> (sqrt(1+x^2)-1)/x, which keeps 0 < x < 1,
349         thus 0 < sk <= 1, and sk=1 can occur only if red=0 */
350
351      /* If sk=1, then if |x| < 1, we have 1 - 2^(-prec-1) <= |x| < 1,
352         or if |x| > 1, we have 1 - 2^(-prec-1) <= 1/|x| < 1, thus in all
353         cases ||x| - 1| <= 2^(-prec), from which it follows
354         |atan|x| - Pi/4| <= 2^(-prec), given the Taylor expansion
355         atan(1+x) = Pi/4 + x/2 - x^2/4 + ...
356         Since Pi/4 = 0.785..., the error is at most one ulp.
357      */
358      if (MPFR_UNLIKELY(mpfr_cmp_ui (sk, 1) == 0))
359        {
360          mpfr_const_pi (arctgt, MPFR_RNDN); /* 1/2 ulp extra error */
361          mpfr_div_2ui (arctgt, arctgt, 2, MPFR_RNDN); /* exact */
362          realprec = prec - 2;
363          goto can_round;
364        }
365
366      /* Assignation  */
367      MPFR_SET_ZERO (arctgt);
368      twopoweri = 1 << 0;
369      MPFR_ASSERTD (n0 >= 4);
370      for (i = 0 ; i < n0; i++)
371        {
372          if (MPFR_UNLIKELY (MPFR_IS_ZERO (sk)))
373            break;
374          /* Calculation of trunc(tmp) --> mpz */
375          mpfr_mul_2ui (tmp, sk, twopoweri, MPFR_RNDN);
376          mpfr_trunc (tmp, tmp);
377          if (!MPFR_IS_ZERO (tmp))
378            {
379              /* tmp = ukz*2^exptol */
380              exptol = mpfr_get_z_2exp (ukz, tmp);
381              /* since the s_k are decreasing (see algorithms.tex),
382                 and s_0 = min(|x|, 1/|x|) < 1, we have sk < 1,
383                 thus exptol < 0 */
384              MPFR_ASSERTD (exptol < 0);
385              mpz_tdiv_q_2exp (ukz, ukz, (unsigned long int) (-exptol));
386              /* since tmp is a non-zero integer, and tmp = ukzold*2^exptol,
387                 we now have ukz = tmp, thus ukz is non-zero */
388              /* Calculation of arctan(Ak) */
389              mpfr_set_z (tmp, ukz, MPFR_RNDN);
390              mpfr_div_2ui (tmp, tmp, twopoweri, MPFR_RNDN);
391              mpfr_atan_aux (tmp2, ukz, twopoweri, n0 - i, tabz);
392              mpfr_mul (tmp2, tmp2, tmp, MPFR_RNDN);
393              /* Addition */
394              mpfr_add (arctgt, arctgt, tmp2, MPFR_RNDN);
395              /* Next iteration */
396              mpfr_sub (tmp2, sk, tmp, MPFR_RNDN);
397              mpfr_mul (sk, sk, tmp, MPFR_RNDN);
398              mpfr_add_ui (sk, sk, 1, MPFR_RNDN);
399              mpfr_div (sk, tmp2, sk, MPFR_RNDN);
400            }
401          twopoweri <<= 1;
402        }
403      /* Add last step (Arctan(sk) ~= sk */
404      mpfr_add (arctgt, arctgt, sk, MPFR_RNDN);
405
406      /* argument reduction */
407      mpfr_mul_2exp (arctgt, arctgt, red, MPFR_RNDN);
408
409      if (comparaison > 0)
410        { /* atan(x) = Pi/2-atan(1/x) for x > 0 */
411          mpfr_const_pi (tmp, MPFR_RNDN);
412          mpfr_div_2ui (tmp, tmp, 1, MPFR_RNDN);
413          mpfr_sub (arctgt, tmp, arctgt, MPFR_RNDN);
414        }
415      MPFR_SET_POS (arctgt);
416
417    can_round:
418      if (MPFR_LIKELY (MPFR_CAN_ROUND (arctgt, realprec + est_lost - lost,
419                                       MPFR_PREC (atan), rnd_mode)))
420        break;
421      MPFR_ZIV_NEXT (loop, realprec);
422    }
423  MPFR_ZIV_FREE (loop);
424
425  inexact = mpfr_set4 (atan, arctgt, rnd_mode, MPFR_SIGN (x));
426
427  for (i = 0 ; i < oldn0 ; i++)
428    mpz_clear (tabz[i]);
429  mpz_clear (ukz);
430  (*__gmp_free_func) (tabz, oldn0 * sizeof (mpz_t));
431  MPFR_GROUP_CLEAR (group);
432
433  MPFR_SAVE_EXPO_FREE (expo);
434  return mpfr_check_range (atan, inexact, rnd_mode);
435}
436