1/* mpfr_lngamma -- lngamma function
2
3Copyright 2005-2023 Free Software Foundation, Inc.
4Contributed by the AriC and Caramba projects, INRIA.
5
6This file is part of the GNU MPFR Library.
7
8The GNU MPFR Library is free software; you can redistribute it and/or modify
9it under the terms of the GNU Lesser General Public License as published by
10the Free Software Foundation; either version 3 of the License, or (at your
11option) any later version.
12
13The GNU MPFR Library is distributed in the hope that it will be useful, but
14WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16License for more details.
17
18You should have received a copy of the GNU Lesser General Public License
19along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
2151 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22
23#define MPFR_NEED_LONGLONG_H
24#include "mpfr-impl.h"
25
26/* given a precision p, return alpha, such that the argument reduction
27   will use k = alpha*p*log(2).
28
29   Warning: we should always have alpha >= log(2)/(2Pi) ~ 0.11,
30   and the smallest value of alpha multiplied by the smallest working
31   precision should be >= 4.
32*/
33static void
34mpfr_gamma_alpha (mpfr_ptr s, mpfr_prec_t p)
35{
36  MPFR_LOG_FUNC
37    (("p=%Pu", p),
38     ("s[%Pu]=%.*Rg", mpfr_get_prec (s), mpfr_log_prec, s));
39
40  if (p <= 100)
41    mpfr_set_ui_2exp (s, 614, -10, MPFR_RNDN); /* about 0.6 */
42  else if (p <= 500)
43    mpfr_set_ui_2exp (s, 819, -10, MPFR_RNDN); /* about 0.8 */
44  else if (p <= 1000)
45    mpfr_set_ui_2exp (s, 1331, -10, MPFR_RNDN); /* about 1.3 */
46  else if (p <= 2000)
47    mpfr_set_ui_2exp (s, 1741, -10, MPFR_RNDN); /* about 1.7 */
48  else if (p <= 5000)
49    mpfr_set_ui_2exp (s, 2253, -10, MPFR_RNDN); /* about 2.2 */
50  else if (p <= 10000)
51    mpfr_set_ui_2exp (s, 3482, -10, MPFR_RNDN); /* about 3.4 */
52  else
53    mpfr_set_ui_2exp (s, 9, -1, MPFR_RNDN); /* 4.5 */
54}
55
56#ifdef IS_GAMMA
57
58/* This function is called in case of intermediate overflow/underflow.
59   The s1 and s2 arguments are temporary MPFR numbers, having the
60   working precision. If the result could be determined, then the
61   flags are updated via pexpo, y is set to the result, and the
62   (non-zero) ternary value is returned. Otherwise 0 is returned
63   in order to perform the next Ziv iteration. */
64static int
65mpfr_explgamma (mpfr_ptr y, mpfr_srcptr x, mpfr_save_expo_t *pexpo,
66                mpfr_ptr s1, mpfr_ptr s2, mpfr_rnd_t rnd)
67{
68  mpfr_t t1, t2;
69  int inex1, inex2, sign;
70  MPFR_BLOCK_DECL (flags1);
71  MPFR_BLOCK_DECL (flags2);
72  MPFR_GROUP_DECL (group);
73
74  MPFR_BLOCK (flags1, inex1 = mpfr_lgamma (s1, &sign, x, MPFR_RNDD));
75  MPFR_ASSERTN (inex1 != 0);
76  /* s1 = RNDD(lngamma(x)), inexact */
77  if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags1)))
78    {
79      if (MPFR_IS_POS (s1))
80        {
81          MPFR_SAVE_EXPO_UPDATE_FLAGS (*pexpo, MPFR_FLAGS_OVERFLOW);
82          return mpfr_overflow (y, rnd, sign);
83        }
84      else
85        {
86          MPFR_SAVE_EXPO_UPDATE_FLAGS (*pexpo, MPFR_FLAGS_UNDERFLOW);
87          return mpfr_underflow (y, rnd == MPFR_RNDN ? MPFR_RNDZ : rnd, sign);
88        }
89    }
90
91  mpfr_set (s2, s1, MPFR_RNDN);     /* exact */
92  mpfr_nextabove (s2);              /* v = RNDU(lngamma(z0)) */
93
94  if (sign < 0)
95    rnd = MPFR_INVERT_RND (rnd);  /* since the result with be negated */
96  MPFR_GROUP_INIT_2 (group, MPFR_PREC (y), t1, t2);
97  MPFR_BLOCK (flags1, inex1 = mpfr_exp (t1, s1, rnd));
98  MPFR_BLOCK (flags2, inex2 = mpfr_exp (t2, s2, rnd));
99  /* t1 is the rounding with mode 'rnd' of a lower bound on |Gamma(x)|,
100     t2 is the rounding with mode 'rnd' of an upper bound, thus if both
101     are equal, so is the wanted result. If t1 and t2 differ or the flags
102     differ, at some point of Ziv's loop they should agree. */
103  if (mpfr_equal_p (t1, t2) && flags1 == flags2)
104    {
105      MPFR_ASSERTN ((inex1 > 0 && inex2 > 0) || (inex1 < 0 && inex2 < 0));
106      mpfr_set4 (y, t1, MPFR_RNDN, sign);  /* exact */
107      if (sign < 0)
108        inex1 = - inex1;
109      MPFR_SAVE_EXPO_UPDATE_FLAGS (*pexpo, flags1);
110    }
111  else
112    inex1 = 0;  /* couldn't determine the result */
113  MPFR_GROUP_CLEAR (group);
114
115  return inex1;
116}
117
118#else
119
120static int
121unit_bit (mpfr_srcptr x)
122{
123  mpfr_exp_t expo;
124  mpfr_prec_t prec;
125  mp_limb_t x0;
126
127  expo = MPFR_GET_EXP (x);
128  if (expo <= 0)
129    return 0;  /* |x| < 1 */
130
131  prec = MPFR_PREC (x);
132  if (expo > prec)
133    return 0;  /* y is a multiple of 2^(expo-prec), thus an even integer */
134
135  /* Now, the unit bit is represented. */
136
137  prec = MPFR_PREC2LIMBS (prec) * GMP_NUMB_BITS - expo;
138  /* number of represented fractional bits (including the trailing 0's) */
139
140  x0 = *(MPFR_MANT (x) + prec / GMP_NUMB_BITS);
141  /* limb containing the unit bit */
142
143  return (x0 >> (prec % GMP_NUMB_BITS)) & 1;
144}
145
146#endif
147
148/* FIXME: There is an internal overflow when z is very large.
149   Simple overflow detection with possible false negatives?
150   For the particular cases near the overflow boundary,
151   scaling by a power of two?
152*/
153
154/* lngamma(x) = log(gamma(x)).
155   We use formula [6.1.40] from Abramowitz&Stegun:
156   lngamma(z) = (z-1/2)*log(z) - z + 1/2*log(2*Pi)
157              + sum (Bernoulli[2m]/(2m)/(2m-1)/z^(2m-1),m=1..infinity)
158   According to [6.1.42], if the sum is truncated after m=n, the error
159   R_n(z) is bounded by |B[2n+2]|*K(z)/(2n+1)/(2n+2)/|z|^(2n+1)
160   where K(z) = max (z^2/(u^2+z^2)) for u >= 0.
161   For z real, |K(z)| <= 1 thus R_n(z) is bounded by the first neglected term.
162 */
163#ifdef IS_GAMMA
164#define GAMMA_FUNC mpfr_gamma_aux
165#else
166#define GAMMA_FUNC mpfr_lngamma_aux
167#endif
168
169static int
170GAMMA_FUNC (mpfr_ptr y, mpfr_srcptr z0, mpfr_rnd_t rnd)
171{
172  mpfr_prec_t precy, w; /* working precision */
173  mpfr_t s, t, u, v, z;
174  unsigned long m, k, maxm, l;
175  int compared, inexact;
176  mpfr_exp_t err_s, err_t;
177  double d;
178  MPFR_SAVE_EXPO_DECL (expo);
179  MPFR_ZIV_DECL (loop);
180
181  MPFR_LOG_FUNC
182    (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec (z0), mpfr_log_prec, z0, rnd),
183     ("y[%Pu]=%.*Rg inexact=%d",
184      mpfr_get_prec (y), mpfr_log_prec, y, inexact));
185
186  compared = mpfr_cmp_ui (z0, 1);
187
188  MPFR_SAVE_EXPO_MARK (expo);
189
190#ifndef IS_GAMMA /* lngamma or lgamma */
191  if (compared == 0 || (compared > 0 && mpfr_cmp_ui (z0, 2) == 0))
192    {
193      MPFR_SAVE_EXPO_FREE (expo);
194      return mpfr_set_ui (y, 0, MPFR_RNDN);  /* lngamma(1 or 2) = +0 */
195    }
196
197  /* Deal with very large inputs: according to [6.1.42], if we denote
198     R_n(z) = lngamma(z) - (z-1/2)*log(z) + z - 1/2*log(2*Pi), we have
199     |R_n(z)| <= B_2/2/z, thus for z >= 2 we have
200     |lngamma(z) - [z*(log(z) - 1)]| < 1/2*log(z) + 1. */
201  if (compared > 0 && MPFR_GET_EXP (z0) >= (mpfr_exp_t) MPFR_PREC(y) + 2)
202    {
203      /* Since PREC(y) >= 2, this ensures EXP(z0) >= 4, thus |z0| >= 8,
204         thus 1/2*log(z0) + 1 < log(z0).
205         Since the largest possible z is < 2^(2^62) on a 64-bit machine,
206         the largest value of log(z) is 2^62*log(2.) < 3.2e18 < 2^62,
207         thus if we use at least 62 bits of precision, then log(t)-1 will
208         be exact */
209      mpfr_init2 (t, MPFR_PREC(y) >= 52 ? MPFR_PREC(y) + 10 : 62);
210      mpfr_log (t, z0, MPFR_RNDU); /* error < 1 ulp */
211      inexact = mpfr_sub_ui (t, t, 1, MPFR_RNDU); /* err < 2 ulps, since the
212                                                     exponent of t might have
213                                                     decreased */
214      MPFR_ASSERTD(inexact == 0);
215      mpfr_mul (t, z0, t, MPFR_RNDU); /* err < 1+2*2=5 ulps according to
216                                        "Generic error on multiplication"
217                                        in algorithms.tex */
218      if (MPFR_IS_INF(t))
219        {
220          mpfr_clear (t);
221          MPFR_SAVE_EXPO_FREE (expo);
222          inexact = mpfr_overflow (y, rnd, 1);
223          return inexact;
224        }
225      if (MPFR_GET_EXP(t) - MPFR_PREC(t) >= 62)
226        {
227          /* then ulp(t) >= 2^62 > log(z0) thus the total error is bounded
228             by 6 ulp(t) */
229          if (MPFR_CAN_ROUND (t, MPFR_PREC(t) - 3, MPFR_PREC(y), rnd))
230            {
231              inexact = mpfr_set (y, t, rnd);
232              mpfr_clear (t);
233              MPFR_SAVE_EXPO_FREE (expo);
234              return mpfr_check_range (y, inexact, rnd);
235            }
236        }
237      mpfr_clear (t);
238    }
239
240  /* Deal here with tiny inputs. We have for -0.3 <= x <= 0.3:
241     - log|x| - gamma*x <= log|gamma(x)| <= - log|x| - gamma*x + x^2 */
242  if (MPFR_GET_EXP (z0) <= - (mpfr_exp_t) MPFR_PREC(y))
243    {
244      mpfr_t l, h, g;
245      int ok, inex1, inex2;
246      mpfr_prec_t prec = MPFR_PREC(y) + 14;
247      MPFR_ZIV_DECL (loop);
248
249      MPFR_ZIV_INIT (loop, prec);
250      do
251        {
252          mpfr_init2 (l, prec);
253          if (MPFR_IS_POS(z0))
254            {
255              mpfr_log (l, z0, MPFR_RNDU); /* upper bound for log(z0) */
256              mpfr_init2 (h, MPFR_PREC(l));
257            }
258          else
259            {
260              mpfr_init2 (h, MPFR_PREC(z0));
261              mpfr_neg (h, z0, MPFR_RNDN); /* exact */
262              mpfr_log (l, h, MPFR_RNDU); /* upper bound for log(-z0) */
263              mpfr_set_prec (h, MPFR_PREC(l));
264            }
265          mpfr_neg (l, l, MPFR_RNDD); /* lower bound for -log(|z0|) */
266          mpfr_set (h, l, MPFR_RNDD); /* exact */
267          mpfr_nextabove (h); /* upper bound for -log(|z0|), avoids two calls
268                                 to mpfr_log */
269          mpfr_init2 (g, MPFR_PREC(l));
270          /* if z0>0, we need an upper approximation of Euler's constant
271             for the left bound */
272          mpfr_const_euler (g, MPFR_IS_POS(z0) ? MPFR_RNDU : MPFR_RNDD);
273          mpfr_mul (g, g, z0, MPFR_RNDD);
274          mpfr_sub (l, l, g, MPFR_RNDD);
275          mpfr_const_euler (g, MPFR_IS_POS(z0) ? MPFR_RNDD : MPFR_RNDU); /* cached */
276          mpfr_mul (g, g, z0, MPFR_RNDU);
277          mpfr_sub (h, h, g, MPFR_RNDD);
278          mpfr_sqr (g, z0, MPFR_RNDU);
279          mpfr_add (h, h, g, MPFR_RNDU);
280          inex1 = mpfr_prec_round (l, MPFR_PREC(y), rnd);
281          inex2 = mpfr_prec_round (h, MPFR_PREC(y), rnd);
282          /* Caution: we not only need l = h, but both inexact flags should
283             agree. Indeed, one of the inexact flags might be zero. In that
284             case if we assume lngamma(z0) cannot be exact, the other flag
285             should be correct. We are conservative here and request that both
286             inexact flags agree. */
287          ok = SAME_SIGN (inex1, inex2) && mpfr_equal_p (l, h);
288          if (ok)
289            mpfr_set (y, h, rnd); /* exact */
290          mpfr_clear (l);
291          mpfr_clear (h);
292          mpfr_clear (g);
293          if (ok)
294            {
295              MPFR_ZIV_FREE (loop);
296              MPFR_SAVE_EXPO_FREE (expo);
297              return mpfr_check_range (y, inex1, rnd);
298            }
299          /* since we have log|gamma(x)| = - log|x| - gamma*x + O(x^2),
300             if x ~ 2^(-n), then we have a n-bit approximation, thus
301             we can try again with a working precision of n bits,
302             especially when n >> PREC(y).
303             Otherwise we would use the reflection formula evaluating x-1,
304             which would need precision n. */
305          MPFR_ZIV_NEXT (loop, prec);
306        }
307      while (prec <= - MPFR_GET_EXP (z0));
308      MPFR_ZIV_FREE (loop);
309    }
310#endif
311
312  precy = MPFR_PREC(y);
313
314  mpfr_init2 (s, MPFR_PREC_MIN);
315  mpfr_init2 (t, MPFR_PREC_MIN);
316  mpfr_init2 (u, MPFR_PREC_MIN);
317  mpfr_init2 (v, MPFR_PREC_MIN);
318  mpfr_init2 (z, MPFR_PREC_MIN);
319
320  inexact = 0; /* 0 means: result y not set yet */
321
322  if (compared < 0)
323    {
324      mpfr_exp_t err_u;
325
326      /* use reflection formula:
327         gamma(x) = Pi*(x-1)/sin(Pi*(2-x))/gamma(2-x)
328         thus lngamma(x) = log(Pi*(x-1)/sin(Pi*(2-x))) - lngamma(2-x) */
329
330      w = precy + MPFR_INT_CEIL_LOG2 (precy);
331      w += MPFR_INT_CEIL_LOG2 (w) + 14;
332      MPFR_ZIV_INIT (loop, w);
333      while (1)
334        {
335          MPFR_ASSERTD(w >= 3);
336          mpfr_set_prec (s, w);
337          mpfr_set_prec (t, w);
338          mpfr_set_prec (u, w);
339          mpfr_set_prec (v, w);
340          /* In the following, we write r for a real of absolute value
341             at most 2^(-w). Different instances of r may represent different
342             values. */
343          mpfr_ui_sub (s, 2, z0, MPFR_RNDD); /* s = (2-z0) * (1+2r) >= 1 */
344          mpfr_const_pi (t, MPFR_RNDN);      /* t = Pi * (1+r) */
345          mpfr_lngamma (u, s, MPFR_RNDN); /* lngamma(2-x) */
346          /* Let s = (2-z0) + h. By construction, -(2-z0)*2^(1-w) <= h <= 0.
347             We have lngamma(s) = lngamma(2-z0) + h*Psi(z), z in [2-z0+h,2-z0].
348             Since 2-z0+h = s >= 1 and |Psi(x)| <= max(1,log(x)) for x >= 1,
349             the error on u is bounded by
350             ulp(u)/2 + (2-z0)*max(1,log(2-z0))*2^(1-w)
351             = (1/2 + (2-z0)*max(1,log(2-z0))*2^(1-E(u))) ulp(u) */
352          d = (double) MPFR_GET_EXP(s) * 0.694; /* upper bound for log(2-z0) */
353          if (MPFR_IS_ZERO(u)) /* in that case the error on u is zero */
354            err_u = 0;
355          else
356            err_u = MPFR_GET_EXP(s) + __gmpfr_ceil_log2 (d) + 1 - MPFR_GET_EXP(u);
357          err_u = (err_u >= 0) ? err_u + 1 : 0;
358          /* now the error on u is bounded by 2^err_u ulps */
359
360          mpfr_mul (s, s, t, MPFR_RNDN); /* Pi*(2-x) * (1+r)^4 */
361          err_s = MPFR_GET_EXP(s); /* 2-x <= 2^err_s */
362          mpfr_sin (s, s, MPFR_RNDN); /* sin(Pi*(2-x)) */
363          /* the error on s is bounded by 1/2*ulp(s) + [(1+2^(-w))^4-1]*(2-x)
364             <= 1/2*ulp(s) + 5*2^(-w)*(2-x) for w >= 3
365             <= (1/2 + 5 * 2^(-E(s)) * (2-x)) ulp(s) */
366          err_s += 3 - MPFR_GET_EXP(s);
367          err_s = (err_s >= 0) ? err_s + 1 : 0;
368          /* the error on s is bounded by 2^err_s ulp(s), thus by
369             2^(err_s+1)*2^(-w)*|s| since ulp(s) <= 2^(1-w)*|s|.
370             Now n*2^(-w) can always be written |(1+r)^n-1| for some
371             |r|<=2^(-w), thus taking n=2^(err_s+1) we see that
372             |S - s| <= |(1+r)^(2^(err_s+1))-1| * |s|, where S is the
373             true value.
374             In fact if ulp(s) <= ulp(S) the same inequality holds for
375             |S| instead of |s| in the right hand side, i.e., we can
376             write s = (1+r)^(2^(err_s+1)) * S.
377             But if ulp(S) < ulp(s), we need to add one ``bit'' to the error,
378             to get s = (1+r)^(2^(err_s+2)) * S. This is true since with
379             E = n*2^(-w) we have |s - S| <= E * |s|, thus
380             |s - S| <= E/(1-E) * |S|.
381             Now E/(1-E) is bounded by 2E as long as E<=1/2,
382             and 2E can be written (1+r)^(2n)-1 as above.
383          */
384          err_s += 2; /* exponent of relative error */
385
386          mpfr_sub_ui (v, z0, 1, MPFR_RNDN); /* v = (x-1) * (1+r) */
387          mpfr_mul (v, v, t, MPFR_RNDN); /* v = Pi*(x-1) * (1+r)^3 */
388          mpfr_div (v, v, s, MPFR_RNDN); /* Pi*(x-1)/sin(Pi*(2-x)) */
389          mpfr_abs (v, v, MPFR_RNDN);
390          /* (1+r)^(3+2^err_s+1) */
391          err_s = (err_s <= 1) ? 3 : err_s + 1;
392          /* now (1+r)^M with M <= 2^err_s */
393          mpfr_log (v, v, MPFR_RNDN);
394          /* log(v*(1+e)) = log(v)+log(1+e) where |e| <= 2^(err_s-w).
395             Since |log(1+e)| <= 2*e for |e| <= 1/4, the error on v is
396             bounded by ulp(v)/2 + 2^(err_s+1-w). */
397          if (err_s + 2 > w)
398            {
399              w += err_s + 2;
400            }
401          else
402            {
403              /* if v = 0 here, it was 1 before the call to mpfr_log,
404                 thus the error on v was zero */
405              if (!MPFR_IS_ZERO(v))
406                err_s += 1 - MPFR_GET_EXP(v);
407              err_s = (err_s >= 0) ? err_s + 1 : 0;
408              /* the error on v is bounded by 2^err_s ulps */
409              err_u += MPFR_GET_EXP(u); /* absolute error on u */
410              if (!MPFR_IS_ZERO(v)) /* same as above */
411                err_s += MPFR_GET_EXP(v); /* absolute error on v */
412              mpfr_sub (s, v, u, MPFR_RNDN);
413              /* the total error on s is bounded by ulp(s)/2 + 2^(err_u-w)
414                 + 2^(err_s-w) <= ulp(s)/2 + 2^(max(err_u,err_s)+1-w) */
415              err_s = (err_s >= err_u) ? err_s : err_u;
416              err_s += 1 - MPFR_GET_EXP(s); /* error is 2^err_s ulp(s) */
417              err_s = (err_s >= 0) ? err_s + 1 : 0;
418              if (MPFR_CAN_ROUND (s, w - err_s, precy, rnd))
419                goto end;
420            }
421          MPFR_ZIV_NEXT (loop, w);
422        }
423      MPFR_ZIV_FREE (loop);
424    }
425
426  /* now z0 > 1 */
427
428  MPFR_ASSERTD (compared > 0);
429
430  /* since k is O(w), the value of log(z0*...*(z0+k-1)) is about w*log(w),
431     so there is a cancellation of ~log(w) in the argument reconstruction */
432  w = precy + MPFR_INT_CEIL_LOG2 (precy);
433  w += MPFR_INT_CEIL_LOG2 (w) + 13;
434  MPFR_ZIV_INIT (loop, w);
435  while (1)
436    {
437      MPFR_ASSERTD (w >= 3);
438
439      /* argument reduction: we compute gamma(z0 + k), where the series
440         has error term B_{2n}/(z0+k)^(2n) ~ (n/(Pi*e*(z0+k)))^(2n)
441         and we need k steps of argument reconstruction. Assuming k is large
442         with respect to z0, and k = n, we get 1/(Pi*e)^(2n) ~ 2^(-w), i.e.,
443         k ~ w*log(2)/2/log(Pi*e) ~ 0.1616 * w.
444         However, since the series is slightly more expensive to compute,
445         the optimal value seems to be k ~ 0.25 * w experimentally (with
446         caching of Bernoulli numbers).
447         For only one computation of gamma with large precision, it is better
448         to set k to a larger value, say k ~ w. */
449      mpfr_set_prec (s, 53);
450      mpfr_gamma_alpha (s, w);
451      mpfr_set_ui_2exp (s, 4, -4, MPFR_RNDU);
452      mpfr_mul_ui (s, s, w, MPFR_RNDU);
453      if (mpfr_cmp (z0, s) < 0)
454        {
455          mpfr_sub (s, s, z0, MPFR_RNDU);
456          k = mpfr_get_ui (s, MPFR_RNDU);
457          if (k < 3)
458            k = 3;
459        }
460      else
461        k = 3;
462
463      mpfr_set_prec (s, w);
464      mpfr_set_prec (t, w);
465      mpfr_set_prec (u, w);
466      mpfr_set_prec (v, w);
467      mpfr_set_prec (z, w);
468
469      mpfr_add_ui (z, z0, k, MPFR_RNDN);
470      /* z = (z0+k)*(1+t1) with |t1| <= 2^(-w) */
471
472      /* z >= 4 ensures the relative error on log(z) is small,
473         and also (z-1/2)*log(z)-z >= 0 */
474      MPFR_ASSERTD (mpfr_cmp_ui (z, 4) >= 0);
475
476      mpfr_log (s, z, MPFR_RNDN); /* log(z) */
477      /* we have s = log((z0+k)*(1+t1))*(1+t2) with |t1|, |t2| <= 2^(-w).
478         Since w >= 2 and z0+k >= 4, we can write log((z0+k)*(1+t1))
479         = log(z0+k) * (1+t3) with |t3| <= 2^(-w), thus we have
480         s = log(z0+k) * (1+t4)^2 with |t4| <= 2^(-w) */
481      mpfr_mul_2ui (t, z, 1, MPFR_RNDN); /* t = 2z * (1+t5) */
482      mpfr_sub_ui (t, t, 1, MPFR_RNDN); /* t = 2z-1 * (1+t6)^3 */
483      /* since we can write 2z*(1+t5) = (2z-1)*(1+t5') with
484         t5' = 2z/(2z-1) * t5, thus |t5'| <= 8/7 * t5 */
485      mpfr_mul (s, s, t, MPFR_RNDN); /* (2z-1)*log(z) * (1+t7)^6 */
486      mpfr_div_2ui (s, s, 1, MPFR_RNDN); /* (z-1/2)*log(z) * (1+t7)^6 */
487      mpfr_sub (s, s, z, MPFR_RNDN); /* (z-1/2)*log(z)-z */
488      /* s = [(z-1/2)*log(z)-z]*(1+u)^14, s >= 1/2 */
489
490      mpfr_ui_div (u, 1, z, MPFR_RNDN); /* 1/z * (1+u), u <= 1/4 since z >= 4 */
491
492      /* the first term is B[2]/2/z = 1/12/z: t=1/12/z, C[2]=1 */
493      mpfr_div_ui (t, u, 12, MPFR_RNDN); /* 1/(12z) * (1+u)^2, t <= 3/128 */
494      mpfr_set (v, t, MPFR_RNDN);        /* (1+u)^2, v < 2^(-5) */
495      mpfr_add (s, s, v, MPFR_RNDN);     /* (1+u)^15 */
496
497      mpfr_sqr (u, u, MPFR_RNDN);        /* 1/z^2 * (1+u)^3 */
498
499      /* m <= maxm ensures that 2*m*(2*m+1) <= ULONG_MAX */
500      maxm = 1UL << (sizeof(unsigned long) * CHAR_BIT / 2 - 1);
501
502      /* s:(1+u)^15, t:(1+u)^2, t <= 3/128 */
503
504      for (m = 2; MPFR_GET_EXP(v) + (mpfr_exp_t) w >= MPFR_GET_EXP(s); m++)
505        {
506          mpfr_mul (t, t, u, MPFR_RNDN); /* (1+u)^(10m-14) */
507          if (m <= maxm)
508            {
509              mpfr_mul_ui (t, t, 2*(m-1)*(2*m-3), MPFR_RNDN);
510              mpfr_div_ui (t, t, 2*m*(2*m-1), MPFR_RNDN);
511              mpfr_div_ui (t, t, 2*m*(2*m+1), MPFR_RNDN);
512            }
513          else
514            {
515              mpfr_mul_ui (t, t, 2*(m-1), MPFR_RNDN);
516              mpfr_mul_ui (t, t, 2*m-3, MPFR_RNDN);
517              mpfr_div_ui (t, t, 2*m, MPFR_RNDN);
518              mpfr_div_ui (t, t, 2*m-1, MPFR_RNDN);
519              mpfr_div_ui (t, t, 2*m, MPFR_RNDN);
520              mpfr_div_ui (t, t, 2*m+1, MPFR_RNDN);
521            }
522          /* (1+u)^(10m-8) */
523          /* invariant: t=1/(2m)/(2m-1)/z^(2m-1)/(2m+1)! */
524          mpfr_mul_z (v, t, mpfr_bernoulli_cache(m), MPFR_RNDN); /* (1+u)^(10m-7) */
525          MPFR_ASSERTD(MPFR_GET_EXP(v) <= - (2 * m + 3));
526          mpfr_add (s, s, v, MPFR_RNDN);
527        }
528      /* m <= 1/2*Pi*e*z ensures that |v[m]| < 1/2^(2m+3) */
529      MPFR_ASSERTD ((double) m <= 4.26 * mpfr_get_d (z, MPFR_RNDZ));
530
531      /* We have sum([(1+u)^(10m-7)-1]*1/2^(2m+3), m=2..infinity)
532         <= 1.46*u for u <= 2^(-3).
533         We have 0 < lngamma(z) - [(z - 1/2) ln(z) - z + 1/2 ln(2 Pi)] < 0.021
534         for z >= 4, thus since the initial s >= 0.85, the different values of
535         s differ by at most one binade, and the total rounding error on s
536         in the for-loop is bounded by 2*(m-1)*ulp(final_s).
537         The error coming from the v's is bounded by
538         1.46*2^(-w) <= 2*ulp(final_s).
539         Thus the total error so far is bounded by [(1+u)^15-1]*s+2m*ulp(s)
540         <= (2m+47)*ulp(s).
541         Taking into account the truncation error (which is bounded by the last
542         term v[] according to 6.1.42 in A&S), the bound is (2m+48)*ulp(s).
543      */
544
545      /* add 1/2*log(2*Pi) and subtract log(z0*(z0+1)*...*(z0+k-1)) */
546      mpfr_const_pi (v, MPFR_RNDN); /* v = Pi*(1+u) */
547      mpfr_mul_2ui (v, v, 1, MPFR_RNDN); /* v = 2*Pi * (1+u) */
548      /* k >= 3 */
549      mpfr_set (t, z0, MPFR_RNDN); /* t = z0*(1+u) */
550      l = 1;
551
552/* replace #if 1 by #if 0 for the naive argument reconstruction */
553#if 1
554
555      /* We multiply by (z0+1)*(z0+2)*...*(z0+k-1) by blocks of j consecutive
556         terms where j ~ sqrt(k).
557         If we multiply naively by z0+1, then by z0+2, ..., then by z0+j,
558         the multiplicative term for the rounding error is (1+u)^(2j).
559         The multiplicative term is not larger when we multiply by
560         Z[j] + c[j-1]*Z[j-1] + ... + c[2]*Z[2] + c[1]*z0 + c[0]
561         with c[p] integers, and Z[p] = z0^p * (1+u)^(p-1).
562         Note that all terms are positive.
563         Indeed, since c[1] is exact, c[1]*z0 corresponds to (1+u),
564         then c[1]*z0 + c[0] corresponds to (1+u)^2,
565         c[2]*Z[2] + c[1]*z0 + c[0] to (1+u)^3, ...,
566         c[j-1]*Z[j-1] + ... + c[0] to (1+u)^j,
567         and Z[j] + c[j-1]*Z[j-1] + ... + c[1]*z0 + c[0] to (1+u)^(j+1).
568         With the accumulation in t, we get (1+u)^(j+2) and j+2 <= 2j. */
569      {
570        unsigned long j, i, p;
571        mpfr_t *Z;
572        mpz_t *c;
573        for (j = 2; (j + 1) * (j + 1) < k; j++);
574        /* Z[i] stores z0^i for i <= j */
575        Z = (mpfr_t *) mpfr_allocate_func ((j + 1) * sizeof (mpfr_t));
576        for (i = 2; i <= j; i++)
577          mpfr_init2 (Z[i], w);
578        mpfr_sqr (Z[2], z0, MPFR_RNDN);
579        for (i = 3; i <= j; i++)
580          if ((i & 1) == 0)
581            mpfr_sqr (Z[i], Z[i >> 1], MPFR_RNDN);
582          else
583            mpfr_mul (Z[i], Z[i-1], z0, MPFR_RNDN);
584        c = (mpz_t *) mpfr_allocate_func ((j + 1) * sizeof (mpz_t));
585        for (i = 0; i <= j; i++)
586          mpz_init (c[i]);
587        for (; l + j <= k; l += j)
588          {
589            /* c[i] is the coefficient of x^i in (x+l)*...*(x+l+j-1) */
590            mpz_set_ui (c[0], 1);
591            for (i = 0; i < j; i++)
592              /* multiply (x+l)*(x+l+1)*...*(x+l+i-1) by x+l+i:
593                 (b[i]*x^i + b[i-1]*x^(i-1) + ... + b[0])*(x+l+i) =
594                 b[i]*x^(i+1) + (b[i-1]+(l+i)*b[i])*x^i + ...
595                 + (b[0]+(l+i)*b[1])*x + i*b[0] */
596              {
597                mpz_set (c[i+1], c[i]); /* b[i]*x^(i+1) */
598                for (p = i; p > 0; p--)
599                  {
600                    mpz_mul_ui (c[p], c[p], l + i);
601                    mpz_add (c[p], c[p], c[p-1]); /* b[p-1]+(l+i)*b[p] */
602                  }
603                mpz_mul_ui (c[0], c[0], l+i); /* i*b[0] */
604              }
605            /* now compute z0^j + c[j-1]*z0^(j-1) + ... + c[1]*z0 + c[0] */
606            mpfr_set_z (u, c[0], MPFR_RNDN);
607            for (i = 0; i < j; i++)
608              {
609                mpfr_mul_z (z, (i == 0) ? z0 : Z[i+1], c[i+1], MPFR_RNDN);
610                mpfr_add (u, u, z, MPFR_RNDN);
611              }
612            mpfr_mul (t, t, u, MPFR_RNDN);
613          }
614        for (i = 0; i <= j; i++)
615          mpz_clear (c[i]);
616        mpfr_free_func (c, (j + 1) * sizeof (mpz_t));
617        for (i = 2; i <= j; i++)
618          mpfr_clear (Z[i]);
619        mpfr_free_func (Z, (j + 1) * sizeof (mpfr_t));
620      }
621#endif /* end of fast argument reconstruction */
622
623      for (; l < k; l++)
624        {
625          mpfr_add_ui (u, z0, l, MPFR_RNDN); /* u = (z0+l)*(1+u) */
626          mpfr_mul (t, t, u, MPFR_RNDN);     /* (1+u)^(2l+1) */
627        }
628      /* now t: (1+u)^(2k-1) */
629      /* instead of computing log(sqrt(2*Pi)/t), we compute
630         1/2*log(2*Pi/t^2), which trades a square root for a square */
631      mpfr_sqr (t, t, MPFR_RNDN); /* (z0*...*(z0+k-1))^2, (1+u)^(4k-1) */
632      mpfr_div (v, v, t, MPFR_RNDN);
633      /* 2*Pi/(z0*...*(z0+k-1))^2 (1+u)^(4k+1) */
634#ifdef IS_GAMMA
635      err_s = MPFR_GET_EXP(s);
636      mpfr_exp (s, s, MPFR_RNDN);
637      /* If s is +Inf, we compute exp(lngamma(z0)). */
638      if (mpfr_inf_p (s))
639        {
640          inexact = mpfr_explgamma (y, z0, &expo, s, t, rnd);
641          if (inexact)
642            goto end0;
643          else
644            goto ziv_next;
645        }
646      /* before the exponential, we have s = s0 + h where
647         |h| <= (2m+48)*ulp(s), thus exp(s0) = exp(s) * exp(-h).
648         For |h| <= 1/4, we have |exp(h)-1| <= 1.2*|h| thus
649         |exp(s) - exp(s0)| <= 1.2 * exp(s) * (2m+48)* 2^(EXP(s)-w). */
650      /* d = 1.2 * (2.0 * (double) m + 48.0); */
651      /* the error on s is bounded by d*2^err_s * 2^(-w) */
652      mpfr_sqrt (t, v, MPFR_RNDN);
653      /* let v0 be the exact value of v. We have v = v0*(1+u)^(4k+1),
654         thus t = sqrt(v0)*(1+u)^(2k+3/2). */
655      mpfr_mul (s, s, t, MPFR_RNDN);
656      /* the error on input s is bounded by (1+u)^(d*2^err_s),
657         and that on t is (1+u)^(2k+3/2), thus the
658         total error is (1+u)^(d*2^err_s+2k+5/2) */
659      /* err_s += __gmpfr_ceil_log2 (d); */
660      /* since d = 1.2 * (2m+48), ceil(log2(d)) = 2 + ceil(log2(0.6*m+14.4))
661         <= 2 + ceil(log2(0.6*m+15)) */
662      {
663        unsigned long mm = (1 + m / 5) * 3; /* 0.6*m <= mm */
664        err_s += 2 + __gmpfr_int_ceil_log2 (mm + 15);
665      }
666      err_t = __gmpfr_ceil_log2 (2.0 * (double) k + 2.5);
667      err_s = (err_s >= err_t) ? err_s + 1 : err_t + 1;
668#else
669      mpfr_log (t, v, MPFR_RNDN);
670      /* let v0 be the exact value of v. We have v = v0*(1+u)^(4k+1),
671         thus log(v) = log(v0) + (4k+1)*log(1+u). Since |log(1+u)/u| <= 1.07
672         for |u| <= 2^(-3), the absolute error on log(v) is bounded by
673         1.07*(4k+1)*u, and the rounding error by ulp(t). */
674      mpfr_div_2ui (t, t, 1, MPFR_RNDN);
675      /* the error on t is now bounded by ulp(t) + 0.54*(4k+1)*2^(-w).
676         We have sqrt(2*Pi)/(z0*(z0+1)*...*(z0+k-1)) <= sqrt(2*Pi)/k! <= 0.5
677         since k>=3, thus t <= -0.5 and ulp(t) >= 2^(-w).
678         Thus the error on t is bounded by (2.16*k+1.54)*ulp(t). */
679      err_t = MPFR_GET_EXP(t) + (mpfr_exp_t)
680        __gmpfr_ceil_log2 (2.2 * (double) k + 1.6);
681      err_s = MPFR_GET_EXP(s) + (mpfr_exp_t)
682        __gmpfr_ceil_log2 (2.0 * (double) m + 48.0);
683      mpfr_add (s, s, t, MPFR_RNDN); /* this is a subtraction in fact */
684      /* the final error in ulp(s) is
685         <= 1 + 2^(err_t-EXP(s)) + 2^(err_s-EXP(s))
686         <= 2^(1+max(err_t,err_s)-EXP(s)) if err_t <> err_s
687         <= 2^(2+max(err_t,err_s)-EXP(s)) if err_t = err_s */
688      err_s = (err_t == err_s) ? 1 + err_s : ((err_t > err_s) ? err_t : err_s);
689      err_s += 1 - MPFR_GET_EXP(s);
690#endif
691      if (MPFR_LIKELY (MPFR_CAN_ROUND (s, w - err_s, precy, rnd)))
692        break;
693#ifdef IS_GAMMA
694    ziv_next:
695#endif
696      MPFR_ZIV_NEXT (loop, w);
697    }
698
699#ifdef IS_GAMMA
700 end0:
701#endif
702
703 end:
704  if (inexact == 0)
705    inexact = mpfr_set (y, s, rnd);
706  MPFR_ZIV_FREE (loop);
707
708  mpfr_clear (s);
709  mpfr_clear (t);
710  mpfr_clear (u);
711  mpfr_clear (v);
712  mpfr_clear (z);
713
714  MPFR_SAVE_EXPO_FREE (expo);
715  return mpfr_check_range (y, inexact, rnd);
716}
717
718#ifndef IS_GAMMA
719
720int
721mpfr_lngamma (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd)
722{
723  int inex;
724
725  MPFR_LOG_FUNC
726    (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, rnd),
727     ("y[%Pu]=%.*Rg inexact=%d",
728      mpfr_get_prec (y), mpfr_log_prec, y, inex));
729
730  /* special cases */
731  if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x) ||
732                     (MPFR_IS_NEG (x) && mpfr_integer_p (x))))
733    {
734      if (MPFR_IS_NAN (x))
735        {
736          MPFR_SET_NAN (y);
737          MPFR_RET_NAN;
738        }
739      else /* lngamma(+/-Inf) = lngamma(non-positive integer) = +Inf */
740        {
741          if (!MPFR_IS_INF (x))
742            MPFR_SET_DIVBY0 ();
743          MPFR_SET_INF (y);
744          MPFR_SET_POS (y);
745          MPFR_RET (0);  /* exact */
746        }
747    }
748
749  /* if -2k-1 < x < -2k <= 0, then lngamma(x) = NaN */
750  if (MPFR_IS_NEG (x) && unit_bit (x) == 0)
751    {
752      MPFR_SET_NAN (y);
753      MPFR_RET_NAN;
754    }
755
756  inex = mpfr_lngamma_aux (y, x, rnd);
757  return inex;
758}
759
760int
761mpfr_lgamma (mpfr_ptr y, int *signp, mpfr_srcptr x, mpfr_rnd_t rnd)
762{
763  int inex;
764
765  MPFR_LOG_FUNC
766    (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, rnd),
767     ("y[%Pu]=%.*Rg signp=%d inexact=%d",
768      mpfr_get_prec (y), mpfr_log_prec, y, *signp, inex));
769
770  *signp = 1;  /* most common case */
771
772  if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
773    {
774      if (MPFR_IS_NAN (x))
775        {
776          MPFR_SET_NAN (y);
777          MPFR_RET_NAN;
778        }
779      else
780        {
781          if (MPFR_IS_ZERO (x))
782            MPFR_SET_DIVBY0 ();
783          *signp = MPFR_INT_SIGN (x);
784          MPFR_SET_INF (y);
785          MPFR_SET_POS (y);
786          MPFR_RET (0);
787        }
788    }
789
790  if (MPFR_IS_NEG (x))
791    {
792      if (mpfr_integer_p (x))
793        {
794          MPFR_SET_INF (y);
795          MPFR_SET_POS (y);
796          MPFR_SET_DIVBY0 ();
797          MPFR_RET (0);
798        }
799
800      if (unit_bit (x) == 0)
801        *signp = -1;
802
803      /* For tiny negative x, we have gamma(x) = 1/x - euler + O(x),
804         thus |gamma(x)| = -1/x + euler + O(x), and
805         log |gamma(x)| = -log(-x) - euler*x + O(x^2).
806         More precisely we have for -0.4 <= x < 0:
807         -log(-x) <= log |gamma(x)| <= -log(-x) - x.
808         Since log(x) is not representable, we may have an instance of the
809         Table Maker Dilemma. The only way to ensure correct rounding is to
810         compute an interval [l,h] such that l <= -log(-x) and
811         -log(-x) - x <= h, and check whether l and h round to the same number
812         for the target precision and rounding modes. */
813      if (MPFR_EXP(x) + 1 <= - (mpfr_exp_t) MPFR_PREC(y))
814        /* since PREC(y) >= 1, this ensures EXP(x) <= -2,
815           thus |x| <= 0.25 < 0.4 */
816        {
817          mpfr_t l, h;
818          int ok, inex2;
819          mpfr_prec_t w = MPFR_PREC (y) + 14;
820          mpfr_exp_t expl;
821          MPFR_SAVE_EXPO_DECL (expo);
822
823          MPFR_SAVE_EXPO_MARK (expo);
824
825          while (1)
826            {
827              mpfr_init2 (l, w);
828              mpfr_init2 (h, w);
829              /* we want a lower bound on -log(-x), thus an upper bound
830                 on log(-x), thus an upper bound on -x. */
831              mpfr_neg (l, x, MPFR_RNDU); /* upper bound on -x */
832              mpfr_log (l, l, MPFR_RNDU); /* upper bound for log(-x) */
833              mpfr_neg (l, l, MPFR_RNDD); /* lower bound for -log(-x) */
834              mpfr_neg (h, x, MPFR_RNDD); /* lower bound on -x */
835              mpfr_log (h, h, MPFR_RNDD); /* lower bound on log(-x) */
836              mpfr_neg (h, h, MPFR_RNDU); /* upper bound for -log(-x) */
837              mpfr_sub (h, h, x, MPFR_RNDU); /* upper bound for -log(-x) - x */
838              inex = mpfr_prec_round (l, MPFR_PREC (y), rnd);
839              inex2 = mpfr_prec_round (h, MPFR_PREC (y), rnd);
840              /* Caution: we not only need l = h, but both inexact flags
841                 should agree. Indeed, one of the inexact flags might be
842                 zero. In that case if we assume ln|gamma(x)| cannot be
843                 exact, the other flag should be correct. We are conservative
844                 here and request that both inexact flags agree. */
845              ok = SAME_SIGN (inex, inex2) && mpfr_equal_p (l, h);
846              if (ok)
847                mpfr_set (y, h, rnd); /* exact */
848              else
849                expl = MPFR_EXP (l);
850              mpfr_clear (l);
851              mpfr_clear (h);
852              if (ok)
853                {
854                  MPFR_SAVE_EXPO_FREE (expo);
855                  return mpfr_check_range (y, inex, rnd);
856                }
857              /* if ulp(log(-x)) <= |x| there is no reason to loop,
858                 since the width of [l, h] will be at least |x| */
859              if (expl < MPFR_EXP (x) + w)
860                break;
861              w += MPFR_INT_CEIL_LOG2(w) + 3;
862            }
863
864          MPFR_SAVE_EXPO_FREE (expo);
865        }
866    }
867
868  inex = mpfr_lngamma_aux (y, x, rnd);
869  return inex;
870}
871
872#endif
873