1/* mpfr_lngamma -- lngamma function
2
3Copyright 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
4Contributed by the AriC and Caramel projects, INRIA.
5
6This file is part of the GNU MPFR Library.
7
8The GNU MPFR Library is free software; you can redistribute it and/or modify
9it under the terms of the GNU Lesser General Public License as published by
10the Free Software Foundation; either version 3 of the License, or (at your
11option) any later version.
12
13The GNU MPFR Library is distributed in the hope that it will be useful, but
14WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16License for more details.
17
18You should have received a copy of the GNU Lesser General Public License
19along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
2151 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22
23#define MPFR_NEED_LONGLONG_H
24#include "mpfr-impl.h"
25
26/* given a precision p, return alpha, such that the argument reduction
27   will use k = alpha*p*log(2).
28
29   Warning: we should always have alpha >= log(2)/(2Pi) ~ 0.11,
30   and the smallest value of alpha multiplied by the smallest working
31   precision should be >= 4.
32*/
33static void
34mpfr_gamma_alpha (mpfr_t s, mpfr_prec_t p)
35{
36  if (p <= 100)
37    mpfr_set_ui_2exp (s, 614, -10, MPFR_RNDN); /* about 0.6 */
38  else if (p <= 500)
39    mpfr_set_ui_2exp (s, 819, -10, MPFR_RNDN); /* about 0.8 */
40  else if (p <= 1000)
41    mpfr_set_ui_2exp (s, 1331, -10, MPFR_RNDN); /* about 1.3 */
42  else if (p <= 2000)
43    mpfr_set_ui_2exp (s, 1741, -10, MPFR_RNDN); /* about 1.7 */
44  else if (p <= 5000)
45    mpfr_set_ui_2exp (s, 2253, -10, MPFR_RNDN); /* about 2.2 */
46  else if (p <= 10000)
47    mpfr_set_ui_2exp (s, 3482, -10, MPFR_RNDN); /* about 3.4 */
48  else
49    mpfr_set_ui_2exp (s, 9, -1, MPFR_RNDN); /* 4.5 */
50}
51
52#ifdef IS_GAMMA
53
54/* This function is called in case of intermediate overflow/underflow.
55   The s1 and s2 arguments are temporary MPFR numbers, having the
56   working precision. If the result could be determined, then the
57   flags are updated via pexpo, y is set to the result, and the
58   (non-zero) ternary value is returned. Otherwise 0 is returned
59   in order to perform the next Ziv iteration. */
60static int
61mpfr_explgamma (mpfr_ptr y, mpfr_srcptr x, mpfr_save_expo_t *pexpo,
62                mpfr_ptr s1, mpfr_ptr s2, mpfr_rnd_t rnd)
63{
64  mpfr_t t1, t2;
65  int inex1, inex2, sign;
66  MPFR_BLOCK_DECL (flags1);
67  MPFR_BLOCK_DECL (flags2);
68  MPFR_GROUP_DECL (group);
69
70  MPFR_BLOCK (flags1, inex1 = mpfr_lgamma (s1, &sign, x, MPFR_RNDD));
71  MPFR_ASSERTN (inex1 != 0);
72  /* s1 = RNDD(lngamma(x)), inexact */
73  if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags1)))
74    {
75      if (MPFR_SIGN (s1) > 0)
76        {
77          MPFR_SAVE_EXPO_UPDATE_FLAGS (*pexpo, MPFR_FLAGS_OVERFLOW);
78          return mpfr_overflow (y, rnd, sign);
79        }
80      else
81        {
82          MPFR_SAVE_EXPO_UPDATE_FLAGS (*pexpo, MPFR_FLAGS_UNDERFLOW);
83          return mpfr_underflow (y, rnd == MPFR_RNDN ? MPFR_RNDZ : rnd, sign);
84        }
85    }
86
87  mpfr_set (s2, s1, MPFR_RNDN);     /* exact */
88  mpfr_nextabove (s2);              /* v = RNDU(lngamma(z0)) */
89
90  if (sign < 0)
91    rnd = MPFR_INVERT_RND (rnd);  /* since the result with be negated */
92  MPFR_GROUP_INIT_2 (group, MPFR_PREC (y), t1, t2);
93  MPFR_BLOCK (flags1, inex1 = mpfr_exp (t1, s1, rnd));
94  MPFR_BLOCK (flags2, inex2 = mpfr_exp (t2, s2, rnd));
95  /* t1 is the rounding with mode 'rnd' of a lower bound on |Gamma(x)|,
96     t2 is the rounding with mode 'rnd' of an upper bound, thus if both
97     are equal, so is the wanted result. If t1 and t2 differ or the flags
98     differ, at some point of Ziv's loop they should agree. */
99  if (mpfr_equal_p (t1, t2) && flags1 == flags2)
100    {
101      MPFR_ASSERTN ((inex1 > 0 && inex2 > 0) || (inex1 < 0 && inex2 < 0));
102      mpfr_set4 (y, t1, MPFR_RNDN, sign);  /* exact */
103      if (sign < 0)
104        inex1 = - inex1;
105      MPFR_SAVE_EXPO_UPDATE_FLAGS (*pexpo, flags1);
106    }
107  else
108    inex1 = 0;  /* couldn't determine the result */
109  MPFR_GROUP_CLEAR (group);
110
111  return inex1;
112}
113
114#else
115
116static int
117unit_bit (mpfr_srcptr x)
118{
119  mpfr_exp_t expo;
120  mpfr_prec_t prec;
121  mp_limb_t x0;
122
123  expo = MPFR_GET_EXP (x);
124  if (expo <= 0)
125    return 0;  /* |x| < 1 */
126
127  prec = MPFR_PREC (x);
128  if (expo > prec)
129    return 0;  /* y is a multiple of 2^(expo-prec), thus an even integer */
130
131  /* Now, the unit bit is represented. */
132
133  prec = MPFR_PREC2LIMBS (prec) * GMP_NUMB_BITS - expo;
134  /* number of represented fractional bits (including the trailing 0's) */
135
136  x0 = *(MPFR_MANT (x) + prec / GMP_NUMB_BITS);
137  /* limb containing the unit bit */
138
139  return (x0 >> (prec % GMP_NUMB_BITS)) & 1;
140}
141
142#endif
143
144/* lngamma(x) = log(gamma(x)).
145   We use formula [6.1.40] from Abramowitz&Stegun:
146   lngamma(z) = (z-1/2)*log(z) - z + 1/2*log(2*Pi)
147              + sum (Bernoulli[2m]/(2m)/(2m-1)/z^(2m-1),m=1..infinity)
148   According to [6.1.42], if the sum is truncated after m=n, the error
149   R_n(z) is bounded by |B[2n+2]|*K(z)/(2n+1)/(2n+2)/|z|^(2n+1)
150   where K(z) = max (z^2/(u^2+z^2)) for u >= 0.
151   For z real, |K(z)| <= 1 thus R_n(z) is bounded by the first neglected term.
152 */
153#ifdef IS_GAMMA
154#define GAMMA_FUNC mpfr_gamma_aux
155#else
156#define GAMMA_FUNC mpfr_lngamma_aux
157#endif
158
159static int
160GAMMA_FUNC (mpfr_ptr y, mpfr_srcptr z0, mpfr_rnd_t rnd)
161{
162  mpfr_prec_t precy, w; /* working precision */
163  mpfr_t s, t, u, v, z;
164  unsigned long m, k, maxm;
165  mpz_t *INITIALIZED(B);  /* variable B declared as initialized */
166  int compared;
167  int inexact = 0;  /* 0 means: result y not set yet */
168  mpfr_exp_t err_s, err_t;
169  unsigned long Bm = 0; /* number of allocated B[] */
170  unsigned long oldBm;
171  double d;
172  MPFR_SAVE_EXPO_DECL (expo);
173  MPFR_ZIV_DECL (loop);
174
175  compared = mpfr_cmp_ui (z0, 1);
176
177  MPFR_SAVE_EXPO_MARK (expo);
178
179#ifndef IS_GAMMA /* lngamma or lgamma */
180  if (compared == 0 || (compared > 0 && mpfr_cmp_ui (z0, 2) == 0))
181    {
182      MPFR_SAVE_EXPO_FREE (expo);
183      return mpfr_set_ui (y, 0, MPFR_RNDN);  /* lngamma(1 or 2) = +0 */
184    }
185
186  /* Deal here with tiny inputs. We have for -0.3 <= x <= 0.3:
187     - log|x| - gamma*x <= log|gamma(x)| <= - log|x| - gamma*x + x^2 */
188  if (MPFR_EXP(z0) <= - (mpfr_exp_t) MPFR_PREC(y))
189    {
190      mpfr_t l, h, g;
191      int ok, inex1, inex2;
192      mpfr_prec_t prec = MPFR_PREC(y) + 14;
193      MPFR_ZIV_DECL (loop);
194
195      MPFR_ZIV_INIT (loop, prec);
196      do
197        {
198          mpfr_init2 (l, prec);
199          if (MPFR_IS_POS(z0))
200            {
201              mpfr_log (l, z0, MPFR_RNDU); /* upper bound for log(z0) */
202              mpfr_init2 (h, MPFR_PREC(l));
203            }
204          else
205            {
206              mpfr_init2 (h, MPFR_PREC(z0));
207              mpfr_neg (h, z0, MPFR_RNDN); /* exact */
208              mpfr_log (l, h, MPFR_RNDU); /* upper bound for log(-z0) */
209              mpfr_set_prec (h, MPFR_PREC(l));
210            }
211          mpfr_neg (l, l, MPFR_RNDD); /* lower bound for -log(|z0|) */
212          mpfr_set (h, l, MPFR_RNDD); /* exact */
213          mpfr_nextabove (h); /* upper bound for -log(|z0|), avoids two calls
214                                 to mpfr_log */
215          mpfr_init2 (g, MPFR_PREC(l));
216          /* if z0>0, we need an upper approximation of Euler's constant
217             for the left bound */
218          mpfr_const_euler (g, MPFR_IS_POS(z0) ? MPFR_RNDU : MPFR_RNDD);
219          mpfr_mul (g, g, z0, MPFR_RNDD);
220          mpfr_sub (l, l, g, MPFR_RNDD);
221          mpfr_const_euler (g, MPFR_IS_POS(z0) ? MPFR_RNDD : MPFR_RNDU); /* cached */
222          mpfr_mul (g, g, z0, MPFR_RNDU);
223          mpfr_sub (h, h, g, MPFR_RNDD);
224          mpfr_mul (g, z0, z0, MPFR_RNDU);
225          mpfr_add (h, h, g, MPFR_RNDU);
226          inex1 = mpfr_prec_round (l, MPFR_PREC(y), rnd);
227          inex2 = mpfr_prec_round (h, MPFR_PREC(y), rnd);
228          /* Caution: we not only need l = h, but both inexact flags should
229             agree. Indeed, one of the inexact flags might be zero. In that
230             case if we assume lngamma(z0) cannot be exact, the other flag
231             should be correct. We are conservative here and request that both
232             inexact flags agree. */
233          ok = SAME_SIGN (inex1, inex2) && mpfr_cmp (l, h) == 0;
234          if (ok)
235            mpfr_set (y, h, rnd); /* exact */
236          mpfr_clear (l);
237          mpfr_clear (h);
238          mpfr_clear (g);
239          if (ok)
240            {
241              MPFR_ZIV_FREE (loop);
242              MPFR_SAVE_EXPO_FREE (expo);
243              return mpfr_check_range (y, inex1, rnd);
244            }
245          /* since we have log|gamma(x)| = - log|x| - gamma*x + O(x^2),
246             if x ~ 2^(-n), then we have a n-bit approximation, thus
247             we can try again with a working precision of n bits,
248             especially when n >> PREC(y).
249             Otherwise we would use the reflection formula evaluating x-1,
250             which would need precision n. */
251          MPFR_ZIV_NEXT (loop, prec);
252        }
253      while (prec <= -MPFR_EXP(z0));
254      MPFR_ZIV_FREE (loop);
255    }
256#endif
257
258  precy = MPFR_PREC(y);
259
260  mpfr_init2 (s, MPFR_PREC_MIN);
261  mpfr_init2 (t, MPFR_PREC_MIN);
262  mpfr_init2 (u, MPFR_PREC_MIN);
263  mpfr_init2 (v, MPFR_PREC_MIN);
264  mpfr_init2 (z, MPFR_PREC_MIN);
265
266  if (compared < 0)
267    {
268      mpfr_exp_t err_u;
269
270      /* use reflection formula:
271         gamma(x) = Pi*(x-1)/sin(Pi*(2-x))/gamma(2-x)
272         thus lngamma(x) = log(Pi*(x-1)/sin(Pi*(2-x))) - lngamma(2-x) */
273
274      w = precy + MPFR_INT_CEIL_LOG2 (precy);
275      w += MPFR_INT_CEIL_LOG2 (w) + 14;
276      MPFR_ZIV_INIT (loop, w);
277      while (1)
278        {
279          MPFR_ASSERTD(w >= 3);
280          mpfr_set_prec (s, w);
281          mpfr_set_prec (t, w);
282          mpfr_set_prec (u, w);
283          mpfr_set_prec (v, w);
284          /* In the following, we write r for a real of absolute value
285             at most 2^(-w). Different instances of r may represent different
286             values. */
287          mpfr_ui_sub (s, 2, z0, MPFR_RNDD); /* s = (2-z0) * (1+2r) >= 1 */
288          mpfr_const_pi (t, MPFR_RNDN);      /* t = Pi * (1+r) */
289          mpfr_lngamma (u, s, MPFR_RNDN); /* lngamma(2-x) */
290          /* Let s = (2-z0) + h. By construction, -(2-z0)*2^(1-w) <= h <= 0.
291             We have lngamma(s) = lngamma(2-z0) + h*Psi(z), z in [2-z0+h,2-z0].
292             Since 2-z0+h = s >= 1 and |Psi(x)| <= max(1,log(x)) for x >= 1,
293             the error on u is bounded by
294             ulp(u)/2 + (2-z0)*max(1,log(2-z0))*2^(1-w)
295             = (1/2 + (2-z0)*max(1,log(2-z0))*2^(1-E(u))) ulp(u) */
296          d = (double) MPFR_GET_EXP(s) * 0.694; /* upper bound for log(2-z0) */
297          err_u = MPFR_GET_EXP(s) + __gmpfr_ceil_log2 (d) + 1 - MPFR_GET_EXP(u);
298          err_u = (err_u >= 0) ? err_u + 1 : 0;
299          /* now the error on u is bounded by 2^err_u ulps */
300
301          mpfr_mul (s, s, t, MPFR_RNDN); /* Pi*(2-x) * (1+r)^4 */
302          err_s = MPFR_GET_EXP(s); /* 2-x <= 2^err_s */
303          mpfr_sin (s, s, MPFR_RNDN); /* sin(Pi*(2-x)) */
304          /* the error on s is bounded by 1/2*ulp(s) + [(1+2^(-w))^4-1]*(2-x)
305             <= 1/2*ulp(s) + 5*2^(-w)*(2-x) for w >= 3
306             <= (1/2 + 5 * 2^(-E(s)) * (2-x)) ulp(s) */
307          err_s += 3 - MPFR_GET_EXP(s);
308          err_s = (err_s >= 0) ? err_s + 1 : 0;
309          /* the error on s is bounded by 2^err_s ulp(s), thus by
310             2^(err_s+1)*2^(-w)*|s| since ulp(s) <= 2^(1-w)*|s|.
311             Now n*2^(-w) can always be written |(1+r)^n-1| for some
312             |r|<=2^(-w), thus taking n=2^(err_s+1) we see that
313             |S - s| <= |(1+r)^(2^(err_s+1))-1| * |s|, where S is the
314             true value.
315             In fact if ulp(s) <= ulp(S) the same inequality holds for
316             |S| instead of |s| in the right hand side, i.e., we can
317             write s = (1+r)^(2^(err_s+1)) * S.
318             But if ulp(S) < ulp(s), we need to add one ``bit'' to the error,
319             to get s = (1+r)^(2^(err_s+2)) * S. This is true since with
320             E = n*2^(-w) we have |s - S| <= E * |s|, thus
321             |s - S| <= E/(1-E) * |S|.
322             Now E/(1-E) is bounded by 2E as long as E<=1/2,
323             and 2E can be written (1+r)^(2n)-1 as above.
324          */
325          err_s += 2; /* exponent of relative error */
326
327          mpfr_sub_ui (v, z0, 1, MPFR_RNDN); /* v = (x-1) * (1+r) */
328          mpfr_mul (v, v, t, MPFR_RNDN); /* v = Pi*(x-1) * (1+r)^3 */
329          mpfr_div (v, v, s, MPFR_RNDN); /* Pi*(x-1)/sin(Pi*(2-x)) */
330          mpfr_abs (v, v, MPFR_RNDN);
331          /* (1+r)^(3+2^err_s+1) */
332          err_s = (err_s <= 1) ? 3 : err_s + 1;
333          /* now (1+r)^M with M <= 2^err_s */
334          mpfr_log (v, v, MPFR_RNDN);
335          /* log(v*(1+e)) = log(v)+log(1+e) where |e| <= 2^(err_s-w).
336             Since |log(1+e)| <= 2*e for |e| <= 1/4, the error on v is
337             bounded by ulp(v)/2 + 2^(err_s+1-w). */
338          if (err_s + 2 > w)
339            {
340              w += err_s + 2;
341            }
342          else
343            {
344              err_s += 1 - MPFR_GET_EXP(v);
345              err_s = (err_s >= 0) ? err_s + 1 : 0;
346              /* the error on v is bounded by 2^err_s ulps */
347              err_u += MPFR_GET_EXP(u); /* absolute error on u */
348              err_s += MPFR_GET_EXP(v); /* absolute error on v */
349              mpfr_sub (s, v, u, MPFR_RNDN);
350              /* the total error on s is bounded by ulp(s)/2 + 2^(err_u-w)
351                 + 2^(err_s-w) <= ulp(s)/2 + 2^(max(err_u,err_s)+1-w) */
352              err_s = (err_s >= err_u) ? err_s : err_u;
353              err_s += 1 - MPFR_GET_EXP(s); /* error is 2^err_s ulp(s) */
354              err_s = (err_s >= 0) ? err_s + 1 : 0;
355              if (mpfr_can_round (s, w - err_s, MPFR_RNDN, MPFR_RNDZ, precy
356                                  + (rnd == MPFR_RNDN)))
357                goto end;
358            }
359          MPFR_ZIV_NEXT (loop, w);
360        }
361      MPFR_ZIV_FREE (loop);
362    }
363
364  /* now z0 > 1 */
365
366  MPFR_ASSERTD (compared > 0);
367
368  /* since k is O(w), the value of log(z0*...*(z0+k-1)) is about w*log(w),
369     so there is a cancellation of ~log(w) in the argument reconstruction */
370  w = precy + MPFR_INT_CEIL_LOG2 (precy);
371  w += MPFR_INT_CEIL_LOG2 (w) + 13;
372  MPFR_ZIV_INIT (loop, w);
373  while (1)
374    {
375      MPFR_ASSERTD (w >= 3);
376
377      /* argument reduction: we compute gamma(z0 + k), where the series
378         has error term B_{2n}/(z0+k)^(2n) ~ (n/(Pi*e*(z0+k)))^(2n)
379         and we need k steps of argument reconstruction. Assuming k is large
380         with respect to z0, and k = n, we get 1/(Pi*e)^(2n) ~ 2^(-w), i.e.,
381         k ~ w*log(2)/2/log(Pi*e) ~ 0.1616 * w.
382         However, since the series is more expensive to compute, the optimal
383         value seems to be k ~ 4.5 * w experimentally. */
384      mpfr_set_prec (s, 53);
385      mpfr_gamma_alpha (s, w);
386      mpfr_set_ui_2exp (s, 9, -1, MPFR_RNDU);
387      mpfr_mul_ui (s, s, w, MPFR_RNDU);
388      if (mpfr_cmp (z0, s) < 0)
389        {
390          mpfr_sub (s, s, z0, MPFR_RNDU);
391          k = mpfr_get_ui (s, MPFR_RNDU);
392          if (k < 3)
393            k = 3;
394        }
395      else
396        k = 3;
397
398      mpfr_set_prec (s, w);
399      mpfr_set_prec (t, w);
400      mpfr_set_prec (u, w);
401      mpfr_set_prec (v, w);
402      mpfr_set_prec (z, w);
403
404      mpfr_add_ui (z, z0, k, MPFR_RNDN);
405      /* z = (z0+k)*(1+t1) with |t1| <= 2^(-w) */
406
407      /* z >= 4 ensures the relative error on log(z) is small,
408         and also (z-1/2)*log(z)-z >= 0 */
409      MPFR_ASSERTD (mpfr_cmp_ui (z, 4) >= 0);
410
411      mpfr_log (s, z, MPFR_RNDN); /* log(z) */
412      /* we have s = log((z0+k)*(1+t1))*(1+t2) with |t1|, |t2| <= 2^(-w).
413         Since w >= 2 and z0+k >= 4, we can write log((z0+k)*(1+t1))
414         = log(z0+k) * (1+t3) with |t3| <= 2^(-w), thus we have
415         s = log(z0+k) * (1+t4)^2 with |t4| <= 2^(-w) */
416      mpfr_mul_2ui (t, z, 1, MPFR_RNDN); /* t = 2z * (1+t5) */
417      mpfr_sub_ui (t, t, 1, MPFR_RNDN); /* t = 2z-1 * (1+t6)^3 */
418      /* since we can write 2z*(1+t5) = (2z-1)*(1+t5') with
419         t5' = 2z/(2z-1) * t5, thus |t5'| <= 8/7 * t5 */
420      mpfr_mul (s, s, t, MPFR_RNDN); /* (2z-1)*log(z) * (1+t7)^6 */
421      mpfr_div_2ui (s, s, 1, MPFR_RNDN); /* (z-1/2)*log(z) * (1+t7)^6 */
422      mpfr_sub (s, s, z, MPFR_RNDN); /* (z-1/2)*log(z)-z */
423      /* s = [(z-1/2)*log(z)-z]*(1+u)^14, s >= 1/2 */
424
425      mpfr_ui_div (u, 1, z, MPFR_RNDN); /* 1/z * (1+u), u <= 1/4 since z >= 4 */
426
427      /* the first term is B[2]/2/z = 1/12/z: t=1/12/z, C[2]=1 */
428      mpfr_div_ui (t, u, 12, MPFR_RNDN); /* 1/(12z) * (1+u)^2, t <= 3/128 */
429      mpfr_set (v, t, MPFR_RNDN);        /* (1+u)^2, v < 2^(-5) */
430      mpfr_add (s, s, v, MPFR_RNDN);     /* (1+u)^15 */
431
432      mpfr_mul (u, u, u, MPFR_RNDN); /* 1/z^2 * (1+u)^3 */
433
434      if (Bm == 0)
435        {
436          B = mpfr_bernoulli_internal ((mpz_t *) 0, 0);
437          B = mpfr_bernoulli_internal (B, 1);
438          Bm = 2;
439        }
440
441      /* m <= maxm ensures that 2*m*(2*m+1) <= ULONG_MAX */
442      maxm = 1UL << (GMP_NUMB_BITS / 2 - 1);
443
444      /* s:(1+u)^15, t:(1+u)^2, t <= 3/128 */
445
446      for (m = 2; MPFR_GET_EXP(v) + (mpfr_exp_t) w >= MPFR_GET_EXP(s); m++)
447        {
448          mpfr_mul (t, t, u, MPFR_RNDN); /* (1+u)^(10m-14) */
449          if (m <= maxm)
450            {
451              mpfr_mul_ui (t, t, 2*(m-1)*(2*m-3), MPFR_RNDN);
452              mpfr_div_ui (t, t, 2*m*(2*m-1), MPFR_RNDN);
453              mpfr_div_ui (t, t, 2*m*(2*m+1), MPFR_RNDN);
454            }
455          else
456            {
457              mpfr_mul_ui (t, t, 2*(m-1), MPFR_RNDN);
458              mpfr_mul_ui (t, t, 2*m-3, MPFR_RNDN);
459              mpfr_div_ui (t, t, 2*m, MPFR_RNDN);
460              mpfr_div_ui (t, t, 2*m-1, MPFR_RNDN);
461              mpfr_div_ui (t, t, 2*m, MPFR_RNDN);
462              mpfr_div_ui (t, t, 2*m+1, MPFR_RNDN);
463            }
464          /* (1+u)^(10m-8) */
465          /* invariant: t=1/(2m)/(2m-1)/z^(2m-1)/(2m+1)! */
466          if (Bm <= m)
467            {
468              B = mpfr_bernoulli_internal (B, m); /* B[2m]*(2m+1)!, exact */
469              Bm ++;
470            }
471          mpfr_mul_z (v, t, B[m], MPFR_RNDN); /* (1+u)^(10m-7) */
472          MPFR_ASSERTD(MPFR_GET_EXP(v) <= - (2 * m + 3));
473          mpfr_add (s, s, v, MPFR_RNDN);
474        }
475      /* m <= 1/2*Pi*e*z ensures that |v[m]| < 1/2^(2m+3) */
476      MPFR_ASSERTD ((double) m <= 4.26 * mpfr_get_d (z, MPFR_RNDZ));
477
478      /* We have sum([(1+u)^(10m-7)-1]*1/2^(2m+3), m=2..infinity)
479         <= 1.46*u for u <= 2^(-3).
480         We have 0 < lngamma(z) - [(z - 1/2) ln(z) - z + 1/2 ln(2 Pi)] < 0.021
481         for z >= 4, thus since the initial s >= 0.85, the different values of
482         s differ by at most one binade, and the total rounding error on s
483         in the for-loop is bounded by 2*(m-1)*ulp(final_s).
484         The error coming from the v's is bounded by
485         1.46*2^(-w) <= 2*ulp(final_s).
486         Thus the total error so far is bounded by [(1+u)^15-1]*s+2m*ulp(s)
487         <= (2m+47)*ulp(s).
488         Taking into account the truncation error (which is bounded by the last
489         term v[] according to 6.1.42 in A&S), the bound is (2m+48)*ulp(s).
490      */
491
492      /* add 1/2*log(2*Pi) and subtract log(z0*(z0+1)*...*(z0+k-1)) */
493      mpfr_const_pi (v, MPFR_RNDN); /* v = Pi*(1+u) */
494      mpfr_mul_2ui (v, v, 1, MPFR_RNDN); /* v = 2*Pi * (1+u) */
495      if (k)
496        {
497          unsigned long l;
498          mpfr_set (t, z0, MPFR_RNDN); /* t = z0*(1+u) */
499          for (l = 1; l < k; l++)
500            {
501              mpfr_add_ui (u, z0, l, MPFR_RNDN); /* u = (z0+l)*(1+u) */
502              mpfr_mul (t, t, u, MPFR_RNDN);     /* (1+u)^(2l+1) */
503            }
504          /* now t: (1+u)^(2k-1) */
505          /* instead of computing log(sqrt(2*Pi)/t), we compute
506             1/2*log(2*Pi/t^2), which trades a square root for a square */
507          mpfr_mul (t, t, t, MPFR_RNDN); /* (z0*...*(z0+k-1))^2, (1+u)^(4k-1) */
508          mpfr_div (v, v, t, MPFR_RNDN);
509          /* 2*Pi/(z0*...*(z0+k-1))^2 (1+u)^(4k+1) */
510        }
511#ifdef IS_GAMMA
512      err_s = MPFR_GET_EXP(s);
513      mpfr_exp (s, s, MPFR_RNDN);
514      /* If s is +Inf, we compute exp(lngamma(z0)). */
515      if (mpfr_inf_p (s))
516        {
517          inexact = mpfr_explgamma (y, z0, &expo, s, t, rnd);
518          if (inexact)
519            goto end0;
520          else
521            goto ziv_next;
522        }
523      /* before the exponential, we have s = s0 + h where
524         |h| <= (2m+48)*ulp(s), thus exp(s0) = exp(s) * exp(-h).
525         For |h| <= 1/4, we have |exp(h)-1| <= 1.2*|h| thus
526         |exp(s) - exp(s0)| <= 1.2 * exp(s) * (2m+48)* 2^(EXP(s)-w). */
527      d = 1.2 * (2.0 * (double) m + 48.0);
528      /* the error on s is bounded by d*2^err_s * 2^(-w) */
529      mpfr_sqrt (t, v, MPFR_RNDN);
530      /* let v0 be the exact value of v. We have v = v0*(1+u)^(4k+1),
531         thus t = sqrt(v0)*(1+u)^(2k+3/2). */
532      mpfr_mul (s, s, t, MPFR_RNDN);
533      /* the error on input s is bounded by (1+u)^(d*2^err_s),
534         and that on t is (1+u)^(2k+3/2), thus the
535         total error is (1+u)^(d*2^err_s+2k+5/2) */
536      err_s += __gmpfr_ceil_log2 (d);
537      err_t = __gmpfr_ceil_log2 (2.0 * (double) k + 2.5);
538      err_s = (err_s >= err_t) ? err_s + 1 : err_t + 1;
539#else
540      mpfr_log (t, v, MPFR_RNDN);
541      /* let v0 be the exact value of v. We have v = v0*(1+u)^(4k+1),
542         thus log(v) = log(v0) + (4k+1)*log(1+u). Since |log(1+u)/u| <= 1.07
543         for |u| <= 2^(-3), the absolute error on log(v) is bounded by
544         1.07*(4k+1)*u, and the rounding error by ulp(t). */
545      mpfr_div_2ui (t, t, 1, MPFR_RNDN);
546      /* the error on t is now bounded by ulp(t) + 0.54*(4k+1)*2^(-w).
547         We have sqrt(2*Pi)/(z0*(z0+1)*...*(z0+k-1)) <= sqrt(2*Pi)/k! <= 0.5
548         since k>=3, thus t <= -0.5 and ulp(t) >= 2^(-w).
549         Thus the error on t is bounded by (2.16*k+1.54)*ulp(t). */
550      err_t = MPFR_GET_EXP(t) + (mpfr_exp_t)
551        __gmpfr_ceil_log2 (2.2 * (double) k + 1.6);
552      err_s = MPFR_GET_EXP(s) + (mpfr_exp_t)
553        __gmpfr_ceil_log2 (2.0 * (double) m + 48.0);
554      mpfr_add (s, s, t, MPFR_RNDN); /* this is a subtraction in fact */
555      /* the final error in ulp(s) is
556         <= 1 + 2^(err_t-EXP(s)) + 2^(err_s-EXP(s))
557         <= 2^(1+max(err_t,err_s)-EXP(s)) if err_t <> err_s
558         <= 2^(2+max(err_t,err_s)-EXP(s)) if err_t = err_s */
559      err_s = (err_t == err_s) ? 1 + err_s : ((err_t > err_s) ? err_t : err_s);
560      err_s += 1 - MPFR_GET_EXP(s);
561#endif
562      if (MPFR_LIKELY (MPFR_CAN_ROUND (s, w - err_s, precy, rnd)))
563        break;
564#ifdef IS_GAMMA
565    ziv_next:
566#endif
567      MPFR_ZIV_NEXT (loop, w);
568    }
569
570#ifdef IS_GAMMA
571 end0:
572#endif
573  oldBm = Bm;
574  while (Bm--)
575    mpz_clear (B[Bm]);
576  (*__gmp_free_func) (B, oldBm * sizeof (mpz_t));
577
578 end:
579  if (inexact == 0)
580    inexact = mpfr_set (y, s, rnd);
581  MPFR_ZIV_FREE (loop);
582
583  mpfr_clear (s);
584  mpfr_clear (t);
585  mpfr_clear (u);
586  mpfr_clear (v);
587  mpfr_clear (z);
588
589  MPFR_SAVE_EXPO_FREE (expo);
590  return mpfr_check_range (y, inexact, rnd);
591}
592
593#ifndef IS_GAMMA
594
595int
596mpfr_lngamma (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd)
597{
598  int inex;
599
600  MPFR_LOG_FUNC
601    (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, rnd),
602     ("y[%Pu]=%.*Rg inexact=%d",
603      mpfr_get_prec (y), mpfr_log_prec, y, inex));
604
605  /* special cases */
606  if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
607    {
608      if (MPFR_IS_NAN (x) || MPFR_IS_NEG (x))
609        {
610          MPFR_SET_NAN (y);
611          MPFR_RET_NAN;
612        }
613      else /* lngamma(+Inf) = lngamma(+0) = +Inf */
614        {
615          if (MPFR_IS_ZERO (x))
616            mpfr_set_divby0 ();
617          MPFR_SET_INF (y);
618          MPFR_SET_POS (y);
619          MPFR_RET (0);  /* exact */
620        }
621    }
622
623  /* if x < 0 and -2k-1 <= x <= -2k, then lngamma(x) = NaN */
624  if (MPFR_IS_NEG (x) && (unit_bit (x) == 0 || mpfr_integer_p (x)))
625    {
626      MPFR_SET_NAN (y);
627      MPFR_RET_NAN;
628    }
629
630  inex = mpfr_lngamma_aux (y, x, rnd);
631  return inex;
632}
633
634int
635mpfr_lgamma (mpfr_ptr y, int *signp, mpfr_srcptr x, mpfr_rnd_t rnd)
636{
637  int inex;
638
639  MPFR_LOG_FUNC
640    (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, rnd),
641     ("y[%Pu]=%.*Rg signp=%d inexact=%d",
642      mpfr_get_prec (y), mpfr_log_prec, y, *signp, inex));
643
644  *signp = 1;  /* most common case */
645
646  if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
647    {
648      if (MPFR_IS_NAN (x))
649        {
650          MPFR_SET_NAN (y);
651          MPFR_RET_NAN;
652        }
653      else
654        {
655          if (MPFR_IS_ZERO (x))
656            mpfr_set_divby0 ();
657          *signp = MPFR_INT_SIGN (x);
658          MPFR_SET_INF (y);
659          MPFR_SET_POS (y);
660          MPFR_RET (0);
661        }
662    }
663
664  if (MPFR_IS_NEG (x))
665    {
666      if (mpfr_integer_p (x))
667        {
668          MPFR_SET_INF (y);
669          MPFR_SET_POS (y);
670          mpfr_set_divby0 ();
671          MPFR_RET (0);
672        }
673
674      if (unit_bit (x) == 0)
675        *signp = -1;
676
677      /* For tiny negative x, we have gamma(x) = 1/x - euler + O(x),
678         thus |gamma(x)| = -1/x + euler + O(x), and
679         log |gamma(x)| = -log(-x) - euler*x + O(x^2).
680         More precisely we have for -0.4 <= x < 0:
681         -log(-x) <= log |gamma(x)| <= -log(-x) - x.
682         Since log(x) is not representable, we may have an instance of the
683         Table Maker Dilemma. The only way to ensure correct rounding is to
684         compute an interval [l,h] such that l <= -log(-x) and
685         -log(-x) - x <= h, and check whether l and h round to the same number
686         for the target precision and rounding modes. */
687      if (MPFR_EXP(x) + 1 <= - (mpfr_exp_t) MPFR_PREC(y))
688        /* since PREC(y) >= 1, this ensures EXP(x) <= -2,
689           thus |x| <= 0.25 < 0.4 */
690        {
691          mpfr_t l, h;
692          int ok, inex2;
693          mpfr_prec_t w = MPFR_PREC (y) + 14;
694          mpfr_exp_t expl;
695
696          while (1)
697            {
698              mpfr_init2 (l, w);
699              mpfr_init2 (h, w);
700              /* we want a lower bound on -log(-x), thus an upper bound
701                 on log(-x), thus an upper bound on -x. */
702              mpfr_neg (l, x, MPFR_RNDU); /* upper bound on -x */
703              mpfr_log (l, l, MPFR_RNDU); /* upper bound for log(-x) */
704              mpfr_neg (l, l, MPFR_RNDD); /* lower bound for -log(-x) */
705              mpfr_neg (h, x, MPFR_RNDD); /* lower bound on -x */
706              mpfr_log (h, h, MPFR_RNDD); /* lower bound on log(-x) */
707              mpfr_neg (h, h, MPFR_RNDU); /* upper bound for -log(-x) */
708              mpfr_sub (h, h, x, MPFR_RNDU); /* upper bound for -log(-x) - x */
709              inex = mpfr_prec_round (l, MPFR_PREC (y), rnd);
710              inex2 = mpfr_prec_round (h, MPFR_PREC (y), rnd);
711              /* Caution: we not only need l = h, but both inexact flags
712                 should agree. Indeed, one of the inexact flags might be
713                 zero. In that case if we assume ln|gamma(x)| cannot be
714                 exact, the other flag should be correct. We are conservative
715                 here and request that both inexact flags agree. */
716              ok = SAME_SIGN (inex, inex2) && mpfr_equal_p (l, h);
717              if (ok)
718                mpfr_set (y, h, rnd); /* exact */
719              else
720                expl = MPFR_EXP (l);
721              mpfr_clear (l);
722              mpfr_clear (h);
723              if (ok)
724                return inex;
725              /* if ulp(log(-x)) <= |x| there is no reason to loop,
726                 since the width of [l, h] will be at least |x| */
727              if (expl < MPFR_EXP(x) + (mpfr_exp_t) w)
728                break;
729              w += MPFR_INT_CEIL_LOG2(w) + 3;
730            }
731        }
732    }
733
734  inex = mpfr_lngamma_aux (y, x, rnd);
735  return inex;
736}
737
738#endif
739