1/* mpfr_lngamma -- lngamma function
2
3Copyright 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
4Contributed by the Arenaire and Cacao projects, INRIA.
5
6This file is part of the GNU MPFR Library.
7
8The GNU MPFR Library is free software; you can redistribute it and/or modify
9it under the terms of the GNU Lesser General Public License as published by
10the Free Software Foundation; either version 3 of the License, or (at your
11option) any later version.
12
13The GNU MPFR Library is distributed in the hope that it will be useful, but
14WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16License for more details.
17
18You should have received a copy of the GNU Lesser General Public License
19along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
2151 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22
23#define MPFR_NEED_LONGLONG_H
24#include "mpfr-impl.h"
25
26/* given a precision p, return alpha, such that the argument reduction
27   will use k = alpha*p*log(2).
28
29   Warning: we should always have alpha >= log(2)/(2Pi) ~ 0.11,
30   and the smallest value of alpha multiplied by the smallest working
31   precision should be >= 4.
32*/
33static void
34mpfr_gamma_alpha (mpfr_t s, mpfr_prec_t p)
35{
36  if (p <= 100)
37    mpfr_set_ui_2exp (s, 614, -10, MPFR_RNDN); /* about 0.6 */
38  else if (p <= 500)
39    mpfr_set_ui_2exp (s, 819, -10, MPFR_RNDN); /* about 0.8 */
40  else if (p <= 1000)
41    mpfr_set_ui_2exp (s, 1331, -10, MPFR_RNDN); /* about 1.3 */
42  else if (p <= 2000)
43    mpfr_set_ui_2exp (s, 1741, -10, MPFR_RNDN); /* about 1.7 */
44  else if (p <= 5000)
45    mpfr_set_ui_2exp (s, 2253, -10, MPFR_RNDN); /* about 2.2 */
46  else if (p <= 10000)
47    mpfr_set_ui_2exp (s, 3482, -10, MPFR_RNDN); /* about 3.4 */
48  else
49    mpfr_set_ui_2exp (s, 9, -1, MPFR_RNDN); /* 4.5 */
50}
51
52#ifndef IS_GAMMA
53static int
54unit_bit (mpfr_srcptr (x))
55{
56  mpfr_exp_t expo;
57  mpfr_prec_t prec;
58  mp_limb_t x0;
59
60  expo = MPFR_GET_EXP (x);
61  if (expo <= 0)
62    return 0;  /* |x| < 1 */
63
64  prec = MPFR_PREC (x);
65  if (expo > prec)
66    return 0;  /* y is a multiple of 2^(expo-prec), thus an even integer */
67
68  /* Now, the unit bit is represented. */
69
70  prec = ((prec - 1) / GMP_NUMB_BITS + 1) * GMP_NUMB_BITS - expo;
71  /* number of represented fractional bits (including the trailing 0's) */
72
73  x0 = *(MPFR_MANT (x) + prec / GMP_NUMB_BITS);
74  /* limb containing the unit bit */
75
76  return (x0 >> (prec % GMP_NUMB_BITS)) & 1;
77}
78#endif
79
80/* lngamma(x) = log(gamma(x)).
81   We use formula [6.1.40] from Abramowitz&Stegun:
82   lngamma(z) = (z-1/2)*log(z) - z + 1/2*log(2*Pi)
83              + sum (Bernoulli[2m]/(2m)/(2m-1)/z^(2m-1),m=1..infinity)
84   According to [6.1.42], if the sum is truncated after m=n, the error
85   R_n(z) is bounded by |B[2n+2]|*K(z)/(2n+1)/(2n+2)/|z|^(2n+1)
86   where K(z) = max (z^2/(u^2+z^2)) for u >= 0.
87   For z real, |K(z)| <= 1 thus R_n(z) is bounded by the first neglected term.
88 */
89#ifdef IS_GAMMA
90#define GAMMA_FUNC mpfr_gamma_aux
91#else
92#define GAMMA_FUNC mpfr_lngamma_aux
93#endif
94
95static int
96GAMMA_FUNC (mpfr_ptr y, mpfr_srcptr z0, mpfr_rnd_t rnd)
97{
98  mpfr_prec_t precy, w; /* working precision */
99  mpfr_t s, t, u, v, z;
100  unsigned long m, k, maxm;
101  mpz_t *INITIALIZED(B);  /* variable B declared as initialized */
102  int inexact, compared;
103  mpfr_exp_t err_s, err_t;
104  unsigned long Bm = 0; /* number of allocated B[] */
105  unsigned long oldBm;
106  double d;
107  MPFR_SAVE_EXPO_DECL (expo);
108
109  compared = mpfr_cmp_ui (z0, 1);
110
111  MPFR_SAVE_EXPO_MARK (expo);
112
113#ifndef IS_GAMMA /* lngamma or lgamma */
114  if (compared == 0 || (compared > 0 && mpfr_cmp_ui (z0, 2) == 0))
115    {
116      MPFR_SAVE_EXPO_FREE (expo);
117      return mpfr_set_ui (y, 0, MPFR_RNDN);  /* lngamma(1 or 2) = +0 */
118    }
119
120  /* Deal here with tiny inputs. We have for -0.3 <= x <= 0.3:
121     - log|x| - gamma*x <= log|gamma(x)| <= - log|x| - gamma*x + x^2 */
122  if (MPFR_EXP(z0) <= - (mpfr_exp_t) MPFR_PREC(y))
123    {
124      mpfr_t l, h, g;
125      int ok, inex2;
126      mpfr_prec_t prec = MPFR_PREC(y) + 14;
127      MPFR_ZIV_DECL (loop);
128
129      MPFR_ZIV_INIT (loop, prec);
130      do
131        {
132          mpfr_init2 (l, prec);
133          if (MPFR_IS_POS(z0))
134            {
135              mpfr_log (l, z0, MPFR_RNDU); /* upper bound for log(z0) */
136              mpfr_init2 (h, MPFR_PREC(l));
137            }
138          else
139            {
140              mpfr_init2 (h, MPFR_PREC(z0));
141              mpfr_neg (h, z0, MPFR_RNDN); /* exact */
142              mpfr_log (l, h, MPFR_RNDU); /* upper bound for log(-z0) */
143              mpfr_set_prec (h, MPFR_PREC(l));
144            }
145          mpfr_neg (l, l, MPFR_RNDD); /* lower bound for -log(|z0|) */
146          mpfr_set (h, l, MPFR_RNDD); /* exact */
147          mpfr_nextabove (h); /* upper bound for -log(|z0|), avoids two calls
148                                 to mpfr_log */
149          mpfr_init2 (g, MPFR_PREC(l));
150          /* if z0>0, we need an upper approximation of Euler's constant
151             for the left bound */
152          mpfr_const_euler (g, MPFR_IS_POS(z0) ? MPFR_RNDU : MPFR_RNDD);
153          mpfr_mul (g, g, z0, MPFR_RNDD);
154          mpfr_sub (l, l, g, MPFR_RNDD);
155          mpfr_const_euler (g, MPFR_IS_POS(z0) ? MPFR_RNDD : MPFR_RNDU); /* cached */
156          mpfr_mul (g, g, z0, MPFR_RNDU);
157          mpfr_sub (h, h, g, MPFR_RNDD);
158          mpfr_mul (g, z0, z0, MPFR_RNDU);
159          mpfr_add (h, h, g, MPFR_RNDU);
160          inexact = mpfr_prec_round (l, MPFR_PREC(y), rnd);
161          inex2 = mpfr_prec_round (h, MPFR_PREC(y), rnd);
162          /* Caution: we not only need l = h, but both inexact flags should
163             agree. Indeed, one of the inexact flags might be zero. In that
164             case if we assume lngamma(z0) cannot be exact, the other flag
165             should be correct. We are conservative here and request that both
166             inexact flags agree. */
167          ok = SAME_SIGN (inexact, inex2) && mpfr_cmp (l, h) == 0;
168          if (ok)
169            mpfr_set (y, h, rnd); /* exact */
170          mpfr_clear (l);
171          mpfr_clear (h);
172          mpfr_clear (g);
173          if (ok)
174            {
175              MPFR_SAVE_EXPO_FREE (expo);
176              return mpfr_check_range (y, inexact, rnd);
177            }
178          /* since we have log|gamma(x)| = - log|x| - gamma*x + O(x^2),
179             if x ~ 2^(-n), then we have a n-bit approximation, thus
180             we can try again with a working precision of n bits,
181             especially when n >> PREC(y).
182             Otherwise we would use the reflection formula evaluating x-1,
183             which would need precision n. */
184          MPFR_ZIV_NEXT (loop, prec);
185        }
186      while (prec <= -MPFR_EXP(z0));
187      MPFR_ZIV_FREE (loop);
188    }
189#endif
190
191  precy = MPFR_PREC(y);
192
193  mpfr_init2 (s, MPFR_PREC_MIN);
194  mpfr_init2 (t, MPFR_PREC_MIN);
195  mpfr_init2 (u, MPFR_PREC_MIN);
196  mpfr_init2 (v, MPFR_PREC_MIN);
197  mpfr_init2 (z, MPFR_PREC_MIN);
198
199  if (compared < 0)
200    {
201      mpfr_exp_t err_u;
202
203      /* use reflection formula:
204         gamma(x) = Pi*(x-1)/sin(Pi*(2-x))/gamma(2-x)
205         thus lngamma(x) = log(Pi*(x-1)/sin(Pi*(2-x))) - lngamma(2-x) */
206
207      w = precy + MPFR_INT_CEIL_LOG2 (precy);
208      while (1)
209        {
210          w += MPFR_INT_CEIL_LOG2 (w) + 14;
211          MPFR_ASSERTD(w >= 3);
212          mpfr_set_prec (s, w);
213          mpfr_set_prec (t, w);
214          mpfr_set_prec (u, w);
215          mpfr_set_prec (v, w);
216          /* In the following, we write r for a real of absolute value
217             at most 2^(-w). Different instances of r may represent different
218             values. */
219          mpfr_ui_sub (s, 2, z0, MPFR_RNDD); /* s = (2-z0) * (1+2r) >= 1 */
220          mpfr_const_pi (t, MPFR_RNDN);      /* t = Pi * (1+r) */
221          mpfr_lngamma (u, s, MPFR_RNDN); /* lngamma(2-x) */
222          /* Let s = (2-z0) + h. By construction, -(2-z0)*2^(1-w) <= h <= 0.
223             We have lngamma(s) = lngamma(2-z0) + h*Psi(z), z in [2-z0+h,2-z0].
224             Since 2-z0+h = s >= 1 and |Psi(x)| <= max(1,log(x)) for x >= 1,
225             the error on u is bounded by
226             ulp(u)/2 + (2-z0)*max(1,log(2-z0))*2^(1-w)
227             = (1/2 + (2-z0)*max(1,log(2-z0))*2^(1-E(u))) ulp(u) */
228          d = (double) MPFR_GET_EXP(s) * 0.694; /* upper bound for log(2-z0) */
229          err_u = MPFR_GET_EXP(s) + __gmpfr_ceil_log2 (d) + 1 - MPFR_GET_EXP(u);
230          err_u = (err_u >= 0) ? err_u + 1 : 0;
231          /* now the error on u is bounded by 2^err_u ulps */
232
233          mpfr_mul (s, s, t, MPFR_RNDN); /* Pi*(2-x) * (1+r)^4 */
234          err_s = MPFR_GET_EXP(s); /* 2-x <= 2^err_s */
235          mpfr_sin (s, s, MPFR_RNDN); /* sin(Pi*(2-x)) */
236          /* the error on s is bounded by 1/2*ulp(s) + [(1+2^(-w))^4-1]*(2-x)
237             <= 1/2*ulp(s) + 5*2^(-w)*(2-x) for w >= 3
238             <= (1/2 + 5 * 2^(-E(s)) * (2-x)) ulp(s) */
239          err_s += 3 - MPFR_GET_EXP(s);
240          err_s = (err_s >= 0) ? err_s + 1 : 0;
241          /* the error on s is bounded by 2^err_s ulp(s), thus by
242             2^(err_s+1)*2^(-w)*|s| since ulp(s) <= 2^(1-w)*|s|.
243             Now n*2^(-w) can always be written |(1+r)^n-1| for some
244             |r|<=2^(-w), thus taking n=2^(err_s+1) we see that
245             |S - s| <= |(1+r)^(2^(err_s+1))-1| * |s|, where S is the
246             true value.
247             In fact if ulp(s) <= ulp(S) the same inequality holds for
248             |S| instead of |s| in the right hand side, i.e., we can
249             write s = (1+r)^(2^(err_s+1)) * S.
250             But if ulp(S) < ulp(s), we need to add one ``bit'' to the error,
251             to get s = (1+r)^(2^(err_s+2)) * S. This is true since with
252             E = n*2^(-w) we have |s - S| <= E * |s|, thus
253             |s - S| <= E/(1-E) * |S|.
254             Now E/(1-E) is bounded by 2E as long as E<=1/2,
255             and 2E can be written (1+r)^(2n)-1 as above.
256          */
257          err_s += 2; /* exponent of relative error */
258
259          mpfr_sub_ui (v, z0, 1, MPFR_RNDN); /* v = (x-1) * (1+r) */
260          mpfr_mul (v, v, t, MPFR_RNDN); /* v = Pi*(x-1) * (1+r)^3 */
261          mpfr_div (v, v, s, MPFR_RNDN); /* Pi*(x-1)/sin(Pi*(2-x)) */
262          mpfr_abs (v, v, MPFR_RNDN);
263          /* (1+r)^(3+2^err_s+1) */
264          err_s = (err_s <= 1) ? 3 : err_s + 1;
265          /* now (1+r)^M with M <= 2^err_s */
266          mpfr_log (v, v, MPFR_RNDN);
267          /* log(v*(1+e)) = log(v)+log(1+e) where |e| <= 2^(err_s-w).
268             Since |log(1+e)| <= 2*e for |e| <= 1/4, the error on v is
269             bounded by ulp(v)/2 + 2^(err_s+1-w). */
270          if (err_s + 2 > w)
271            {
272              w += err_s + 2;
273            }
274          else
275            {
276              err_s += 1 - MPFR_GET_EXP(v);
277              err_s = (err_s >= 0) ? err_s + 1 : 0;
278              /* the error on v is bounded by 2^err_s ulps */
279              err_u += MPFR_GET_EXP(u); /* absolute error on u */
280              err_s += MPFR_GET_EXP(v); /* absolute error on v */
281              mpfr_sub (s, v, u, MPFR_RNDN);
282              /* the total error on s is bounded by ulp(s)/2 + 2^(err_u-w)
283                 + 2^(err_s-w) <= ulp(s)/2 + 2^(max(err_u,err_s)+1-w) */
284              err_s = (err_s >= err_u) ? err_s : err_u;
285              err_s += 1 - MPFR_GET_EXP(s); /* error is 2^err_s ulp(s) */
286              err_s = (err_s >= 0) ? err_s + 1 : 0;
287              if (mpfr_can_round (s, w - err_s, MPFR_RNDN, MPFR_RNDZ, precy
288                                  + (rnd == MPFR_RNDN)))
289                goto end;
290            }
291        }
292    }
293
294  /* now z0 > 1 */
295
296  MPFR_ASSERTD (compared > 0);
297
298  /* since k is O(w), the value of log(z0*...*(z0+k-1)) is about w*log(w),
299     so there is a cancellation of ~log(w) in the argument reconstruction */
300  w = precy + MPFR_INT_CEIL_LOG2 (precy);
301
302  do
303    {
304      w += MPFR_INT_CEIL_LOG2 (w) + 13;
305      MPFR_ASSERTD (w >= 3);
306
307      /* argument reduction: we compute gamma(z0 + k), where the series
308         has error term B_{2n}/(z0+k)^(2n) ~ (n/(Pi*e*(z0+k)))^(2n)
309         and we need k steps of argument reconstruction. Assuming k is large
310         with respect to z0, and k = n, we get 1/(Pi*e)^(2n) ~ 2^(-w), i.e.,
311         k ~ w*log(2)/2/log(Pi*e) ~ 0.1616 * w.
312         However, since the series is more expensive to compute, the optimal
313         value seems to be k ~ 4.5 * w experimentally. */
314      mpfr_set_prec (s, 53);
315      mpfr_gamma_alpha (s, w);
316      mpfr_set_ui_2exp (s, 9, -1, MPFR_RNDU);
317      mpfr_mul_ui (s, s, w, MPFR_RNDU);
318      if (mpfr_cmp (z0, s) < 0)
319        {
320          mpfr_sub (s, s, z0, MPFR_RNDU);
321          k = mpfr_get_ui (s, MPFR_RNDU);
322          if (k < 3)
323            k = 3;
324        }
325      else
326        k = 3;
327
328      mpfr_set_prec (s, w);
329      mpfr_set_prec (t, w);
330      mpfr_set_prec (u, w);
331      mpfr_set_prec (v, w);
332      mpfr_set_prec (z, w);
333
334      mpfr_add_ui (z, z0, k, MPFR_RNDN);
335      /* z = (z0+k)*(1+t1) with |t1| <= 2^(-w) */
336
337      /* z >= 4 ensures the relative error on log(z) is small,
338         and also (z-1/2)*log(z)-z >= 0 */
339      MPFR_ASSERTD (mpfr_cmp_ui (z, 4) >= 0);
340
341      mpfr_log (s, z, MPFR_RNDN); /* log(z) */
342      /* we have s = log((z0+k)*(1+t1))*(1+t2) with |t1|, |t2| <= 2^(-w).
343         Since w >= 2 and z0+k >= 4, we can write log((z0+k)*(1+t1))
344         = log(z0+k) * (1+t3) with |t3| <= 2^(-w), thus we have
345         s = log(z0+k) * (1+t4)^2 with |t4| <= 2^(-w) */
346      mpfr_mul_2ui (t, z, 1, MPFR_RNDN); /* t = 2z * (1+t5) */
347      mpfr_sub_ui (t, t, 1, MPFR_RNDN); /* t = 2z-1 * (1+t6)^3 */
348      /* since we can write 2z*(1+t5) = (2z-1)*(1+t5') with
349         t5' = 2z/(2z-1) * t5, thus |t5'| <= 8/7 * t5 */
350      mpfr_mul (s, s, t, MPFR_RNDN); /* (2z-1)*log(z) * (1+t7)^6 */
351      mpfr_div_2ui (s, s, 1, MPFR_RNDN); /* (z-1/2)*log(z) * (1+t7)^6 */
352      mpfr_sub (s, s, z, MPFR_RNDN); /* (z-1/2)*log(z)-z */
353      /* s = [(z-1/2)*log(z)-z]*(1+u)^14, s >= 1/2 */
354
355      mpfr_ui_div (u, 1, z, MPFR_RNDN); /* 1/z * (1+u), u <= 1/4 since z >= 4 */
356
357      /* the first term is B[2]/2/z = 1/12/z: t=1/12/z, C[2]=1 */
358      mpfr_div_ui (t, u, 12, MPFR_RNDN); /* 1/(12z) * (1+u)^2, t <= 3/128 */
359      mpfr_set (v, t, MPFR_RNDN);        /* (1+u)^2, v < 2^(-5) */
360      mpfr_add (s, s, v, MPFR_RNDN);     /* (1+u)^15 */
361
362      mpfr_mul (u, u, u, MPFR_RNDN); /* 1/z^2 * (1+u)^3 */
363
364      if (Bm == 0)
365        {
366          B = mpfr_bernoulli_internal ((mpz_t *) 0, 0);
367          B = mpfr_bernoulli_internal (B, 1);
368          Bm = 2;
369        }
370
371      /* m <= maxm ensures that 2*m*(2*m+1) <= ULONG_MAX */
372      maxm = 1UL << (GMP_NUMB_BITS / 2 - 1);
373
374      /* s:(1+u)^15, t:(1+u)^2, t <= 3/128 */
375
376      for (m = 2; MPFR_GET_EXP(v) + (mpfr_exp_t) w >= MPFR_GET_EXP(s); m++)
377        {
378          mpfr_mul (t, t, u, MPFR_RNDN); /* (1+u)^(10m-14) */
379          if (m <= maxm)
380            {
381              mpfr_mul_ui (t, t, 2*(m-1)*(2*m-3), MPFR_RNDN);
382              mpfr_div_ui (t, t, 2*m*(2*m-1), MPFR_RNDN);
383              mpfr_div_ui (t, t, 2*m*(2*m+1), MPFR_RNDN);
384            }
385          else
386            {
387              mpfr_mul_ui (t, t, 2*(m-1), MPFR_RNDN);
388              mpfr_mul_ui (t, t, 2*m-3, MPFR_RNDN);
389              mpfr_div_ui (t, t, 2*m, MPFR_RNDN);
390              mpfr_div_ui (t, t, 2*m-1, MPFR_RNDN);
391              mpfr_div_ui (t, t, 2*m, MPFR_RNDN);
392              mpfr_div_ui (t, t, 2*m+1, MPFR_RNDN);
393            }
394          /* (1+u)^(10m-8) */
395          /* invariant: t=1/(2m)/(2m-1)/z^(2m-1)/(2m+1)! */
396          if (Bm <= m)
397            {
398              B = mpfr_bernoulli_internal (B, m); /* B[2m]*(2m+1)!, exact */
399              Bm ++;
400            }
401          mpfr_mul_z (v, t, B[m], MPFR_RNDN); /* (1+u)^(10m-7) */
402          MPFR_ASSERTD(MPFR_GET_EXP(v) <= - (2 * m + 3));
403          mpfr_add (s, s, v, MPFR_RNDN);
404        }
405      /* m <= 1/2*Pi*e*z ensures that |v[m]| < 1/2^(2m+3) */
406      MPFR_ASSERTD ((double) m <= 4.26 * mpfr_get_d (z, MPFR_RNDZ));
407
408      /* We have sum([(1+u)^(10m-7)-1]*1/2^(2m+3), m=2..infinity)
409         <= 1.46*u for u <= 2^(-3).
410         We have 0 < lngamma(z) - [(z - 1/2) ln(z) - z + 1/2 ln(2 Pi)] < 0.021
411         for z >= 4, thus since the initial s >= 0.85, the different values of
412         s differ by at most one binade, and the total rounding error on s
413         in the for-loop is bounded by 2*(m-1)*ulp(final_s).
414         The error coming from the v's is bounded by
415         1.46*2^(-w) <= 2*ulp(final_s).
416         Thus the total error so far is bounded by [(1+u)^15-1]*s+2m*ulp(s)
417         <= (2m+47)*ulp(s).
418         Taking into account the truncation error (which is bounded by the last
419         term v[] according to 6.1.42 in A&S), the bound is (2m+48)*ulp(s).
420      */
421
422      /* add 1/2*log(2*Pi) and subtract log(z0*(z0+1)*...*(z0+k-1)) */
423      mpfr_const_pi (v, MPFR_RNDN); /* v = Pi*(1+u) */
424      mpfr_mul_2ui (v, v, 1, MPFR_RNDN); /* v = 2*Pi * (1+u) */
425      if (k)
426        {
427          unsigned long l;
428          mpfr_set (t, z0, MPFR_RNDN); /* t = z0*(1+u) */
429          for (l = 1; l < k; l++)
430            {
431              mpfr_add_ui (u, z0, l, MPFR_RNDN); /* u = (z0+l)*(1+u) */
432              mpfr_mul (t, t, u, MPFR_RNDN);     /* (1+u)^(2l+1) */
433            }
434          /* now t: (1+u)^(2k-1) */
435          /* instead of computing log(sqrt(2*Pi)/t), we compute
436             1/2*log(2*Pi/t^2), which trades a square root for a square */
437          mpfr_mul (t, t, t, MPFR_RNDN); /* (z0*...*(z0+k-1))^2, (1+u)^(4k-1) */
438          mpfr_div (v, v, t, MPFR_RNDN);
439          /* 2*Pi/(z0*...*(z0+k-1))^2 (1+u)^(4k+1) */
440        }
441#ifdef IS_GAMMA
442      err_s = MPFR_GET_EXP(s);
443      mpfr_exp (s, s, MPFR_RNDN);
444      /* before the exponential, we have s = s0 + h where
445         |h| <= (2m+48)*ulp(s), thus exp(s0) = exp(s) * exp(-h).
446         For |h| <= 1/4, we have |exp(h)-1| <= 1.2*|h| thus
447         |exp(s) - exp(s0)| <= 1.2 * exp(s) * (2m+48)* 2^(EXP(s)-w). */
448      d = 1.2 * (2.0 * (double) m + 48.0);
449      /* the error on s is bounded by d*2^err_s * 2^(-w) */
450      mpfr_sqrt (t, v, MPFR_RNDN);
451      /* let v0 be the exact value of v. We have v = v0*(1+u)^(4k+1),
452         thus t = sqrt(v0)*(1+u)^(2k+3/2). */
453      mpfr_mul (s, s, t, MPFR_RNDN);
454      /* the error on input s is bounded by (1+u)^(d*2^err_s),
455         and that on t is (1+u)^(2k+3/2), thus the
456         total error is (1+u)^(d*2^err_s+2k+5/2) */
457      err_s += __gmpfr_ceil_log2 (d);
458      err_t = __gmpfr_ceil_log2 (2.0 * (double) k + 2.5);
459      err_s = (err_s >= err_t) ? err_s + 1 : err_t + 1;
460#else
461      mpfr_log (t, v, MPFR_RNDN);
462      /* let v0 be the exact value of v. We have v = v0*(1+u)^(4k+1),
463         thus log(v) = log(v0) + (4k+1)*log(1+u). Since |log(1+u)/u| <= 1.07
464         for |u| <= 2^(-3), the absolute error on log(v) is bounded by
465         1.07*(4k+1)*u, and the rounding error by ulp(t). */
466      mpfr_div_2ui (t, t, 1, MPFR_RNDN);
467      /* the error on t is now bounded by ulp(t) + 0.54*(4k+1)*2^(-w).
468         We have sqrt(2*Pi)/(z0*(z0+1)*...*(z0+k-1)) <= sqrt(2*Pi)/k! <= 0.5
469         since k>=3, thus t <= -0.5 and ulp(t) >= 2^(-w).
470         Thus the error on t is bounded by (2.16*k+1.54)*ulp(t). */
471      err_t = MPFR_GET_EXP(t) + (mpfr_exp_t)
472        __gmpfr_ceil_log2 (2.2 * (double) k + 1.6);
473      err_s = MPFR_GET_EXP(s) + (mpfr_exp_t)
474        __gmpfr_ceil_log2 (2.0 * (double) m + 48.0);
475      mpfr_add (s, s, t, MPFR_RNDN); /* this is a subtraction in fact */
476      /* the final error in ulp(s) is
477         <= 1 + 2^(err_t-EXP(s)) + 2^(err_s-EXP(s))
478         <= 2^(1+max(err_t,err_s)-EXP(s)) if err_t <> err_s
479         <= 2^(2+max(err_t,err_s)-EXP(s)) if err_t = err_s */
480      err_s = (err_t == err_s) ? 1 + err_s : ((err_t > err_s) ? err_t : err_s);
481      err_s += 1 - MPFR_GET_EXP(s);
482#endif
483    }
484  while (MPFR_UNLIKELY (!MPFR_CAN_ROUND (s, w - err_s, precy, rnd)));
485
486  oldBm = Bm;
487  while (Bm--)
488    mpz_clear (B[Bm]);
489  (*__gmp_free_func) (B, oldBm * sizeof (mpz_t));
490
491 end:
492  inexact = mpfr_set (y, s, rnd);
493
494  mpfr_clear (s);
495  mpfr_clear (t);
496  mpfr_clear (u);
497  mpfr_clear (v);
498  mpfr_clear (z);
499
500  MPFR_SAVE_EXPO_FREE (expo);
501  return mpfr_check_range (y, inexact, rnd);
502}
503
504#ifndef IS_GAMMA
505
506int
507mpfr_lngamma (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd)
508{
509  int inex;
510
511  MPFR_LOG_FUNC (("x[%#R]=%R rnd=%d", x, x, rnd),
512                 ("lngamma[%#R]=%R inexact=%d", y, y, inex));
513
514  /* special cases */
515  if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
516    {
517      if (MPFR_IS_NAN (x) || MPFR_IS_NEG (x))
518        {
519          MPFR_SET_NAN (y);
520          MPFR_RET_NAN;
521        }
522      else /* lngamma(+Inf) = lngamma(+0) = +Inf */
523        {
524          MPFR_SET_INF (y);
525          MPFR_SET_POS (y);
526          MPFR_RET (0);  /* exact */
527        }
528    }
529
530  /* if x < 0 and -2k-1 <= x <= -2k, then lngamma(x) = NaN */
531  if (MPFR_IS_NEG (x) && (unit_bit (x) == 0 || mpfr_integer_p (x)))
532    {
533      MPFR_SET_NAN (y);
534      MPFR_RET_NAN;
535    }
536
537  inex = mpfr_lngamma_aux (y, x, rnd);
538  return inex;
539}
540
541int
542mpfr_lgamma (mpfr_ptr y, int *signp, mpfr_srcptr x, mpfr_rnd_t rnd)
543{
544  int inex;
545
546  MPFR_LOG_FUNC (("x[%#R]=%R rnd=%d", x, x, rnd),
547                 ("lgamma[%#R]=%R inexact=%d", y, y, inex));
548
549  *signp = 1;  /* most common case */
550
551  if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
552    {
553      if (MPFR_IS_NAN (x))
554        {
555          MPFR_SET_NAN (y);
556          MPFR_RET_NAN;
557        }
558      else
559        {
560          *signp = MPFR_INT_SIGN (x);
561          MPFR_SET_INF (y);
562          MPFR_SET_POS (y);
563          MPFR_RET (0);
564        }
565    }
566
567  if (MPFR_IS_NEG (x))
568    {
569      if (mpfr_integer_p (x))
570        {
571          MPFR_SET_INF (y);
572          MPFR_SET_POS (y);
573          MPFR_RET (0);
574        }
575
576      if (unit_bit (x) == 0)
577        *signp = -1;
578
579      /* For tiny negative x, we have gamma(x) = 1/x - euler + O(x),
580         thus |gamma(x)| = -1/x + euler + O(x), and
581         log |gamma(x)| = -log(-x) - euler*x + O(x^2).
582         More precisely we have for -0.4 <= x < 0:
583         -log(-x) <= log |gamma(x)| <= -log(-x) - x.
584         Since log(x) is not representable, we may have an instance of the
585         Table Maker Dilemma. The only way to ensure correct rounding is to
586         compute an interval [l,h] such that l <= -log(-x) and
587         -log(-x) - x <= h, and check whether l and h round to the same number
588         for the target precision and rounding modes. */
589      if (MPFR_EXP(x) + 1 <= - (mpfr_exp_t) MPFR_PREC(y))
590        /* since PREC(y) >= 1, this ensures EXP(x) <= -2,
591           thus |x| <= 0.25 < 0.4 */
592        {
593          mpfr_t l, h;
594          int ok, inex2;
595          mpfr_prec_t w = MPFR_PREC (y) + 14;
596
597          while (1)
598            {
599              mpfr_init2 (l, w);
600              mpfr_init2 (h, w);
601              /* we want a lower bound on -log(-x), thus an upper bound
602                 on log(-x), thus an upper bound on -x. */
603              mpfr_neg (l, x, MPFR_RNDU); /* upper bound on -x */
604              mpfr_log (l, l, MPFR_RNDU); /* upper bound for log(-x) */
605              mpfr_neg (l, l, MPFR_RNDD); /* lower bound for -log(-x) */
606              mpfr_neg (h, x, MPFR_RNDD); /* lower bound on -x */
607              mpfr_log (h, h, MPFR_RNDD); /* lower bound on log(-x) */
608              mpfr_neg (h, h, MPFR_RNDU); /* upper bound for -log(-x) */
609              mpfr_sub (h, h, x, MPFR_RNDU); /* upper bound for -log(-x) - x */
610              inex = mpfr_prec_round (l, MPFR_PREC (y), rnd);
611              inex2 = mpfr_prec_round (h, MPFR_PREC (y), rnd);
612              /* Caution: we not only need l = h, but both inexact flags
613                 should agree. Indeed, one of the inexact flags might be
614                 zero. In that case if we assume ln|gamma(x)| cannot be
615                 exact, the other flag should be correct. We are conservative
616                 here and request that both inexact flags agree. */
617              ok = SAME_SIGN (inex, inex2) && mpfr_equal_p (l, h);
618              if (ok)
619                mpfr_set (y, h, rnd); /* exact */
620              mpfr_clear (l);
621              mpfr_clear (h);
622              if (ok)
623                return inex;
624              /* if ulp(log(-x)) <= |x| there is no reason to loop,
625                 since the width of [l, h] will be at least |x| */
626              if (MPFR_EXP(l) < MPFR_EXP(x) + (mpfr_exp_t) w)
627                break;
628              w += MPFR_INT_CEIL_LOG2(w) + 3;
629            }
630        }
631    }
632
633  inex = mpfr_lngamma_aux (y, x, rnd);
634  return inex;
635}
636
637#endif
638