1/*
2 * ====================================================
3 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
4 *
5 * Developed at SunPro, a Sun Microsystems, Inc. business.
6 * Permission to use, copy, modify, and distribute this
7 * software is freely granted, provided that this notice
8 * is preserved.
9 * ====================================================
10 */
11
12/* Long double expansions are
13  Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
14  and are incorporated herein by permission of the author.  The author
15  reserves the right to distribute this material elsewhere under different
16  copying permissions.  These modifications are distributed here under
17  the following terms:
18
19    This library is free software; you can redistribute it and/or
20    modify it under the terms of the GNU Lesser General Public
21    License as published by the Free Software Foundation; either
22    version 2.1 of the License, or (at your option) any later version.
23
24    This library is distributed in the hope that it will be useful,
25    but WITHOUT ANY WARRANTY; without even the implied warranty of
26    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
27    Lesser General Public License for more details.
28
29    You should have received a copy of the GNU Lesser General Public
30    License along with this library; if not, write to the Free Software
31    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA */
32
33/* __ieee754_lgammal_r(x, signgamp)
34 * Reentrant version of the logarithm of the Gamma function
35 * with user provide pointer for the sign of Gamma(x).
36 *
37 * Method:
38 *   1. Argument Reduction for 0 < x <= 8
39 *	Since gamma(1+s)=s*gamma(s), for x in [0,8], we may
40 *	reduce x to a number in [1.5,2.5] by
41 *		lgamma(1+s) = log(s) + lgamma(s)
42 *	for example,
43 *		lgamma(7.3) = log(6.3) + lgamma(6.3)
44 *			    = log(6.3*5.3) + lgamma(5.3)
45 *			    = log(6.3*5.3*4.3*3.3*2.3) + lgamma(2.3)
46 *   2. Polynomial approximation of lgamma around its
47 *	minimun ymin=1.461632144968362245 to maintain monotonicity.
48 *	On [ymin-0.23, ymin+0.27] (i.e., [1.23164,1.73163]), use
49 *		Let z = x-ymin;
50 *		lgamma(x) = -1.214862905358496078218 + z^2*poly(z)
51 *   2. Rational approximation in the primary interval [2,3]
52 *	We use the following approximation:
53 *		s = x-2.0;
54 *		lgamma(x) = 0.5*s + s*P(s)/Q(s)
55 *	Our algorithms are based on the following observation
56 *
57 *                             zeta(2)-1    2    zeta(3)-1    3
58 * lgamma(2+s) = s*(1-Euler) + --------- * s  -  --------- * s  + ...
59 *                                 2                 3
60 *
61 *	where Euler = 0.5771... is the Euler constant, which is very
62 *	close to 0.5.
63 *
64 *   3. For x>=8, we have
65 *	lgamma(x)~(x-0.5)log(x)-x+0.5*log(2pi)+1/(12x)-1/(360x**3)+....
66 *	(better formula:
67 *	   lgamma(x)~(x-0.5)*(log(x)-1)-.5*(log(2pi)-1) + ...)
68 *	Let z = 1/x, then we approximation
69 *		f(z) = lgamma(x) - (x-0.5)(log(x)-1)
70 *	by
71 *				    3       5             11
72 *		w = w0 + w1*z + w2*z  + w3*z  + ... + w6*z
73 *
74 *   4. For negative x, since (G is gamma function)
75 *		-x*G(-x)*G(x) = pi/sin(pi*x),
76 *	we have
77 *		G(x) = pi/(sin(pi*x)*(-x)*G(-x))
78 *	since G(-x) is positive, sign(G(x)) = sign(sin(pi*x)) for x<0
79 *	Hence, for x<0, signgam = sign(sin(pi*x)) and
80 *		lgamma(x) = log(|Gamma(x)|)
81 *			  = log(pi/(|x*sin(pi*x)|)) - lgamma(-x);
82 *	Note: one should avoid compute pi*(-x) directly in the
83 *	      computation of sin(pi*(-x)).
84 *
85 *   5. Special Cases
86 *		lgamma(2+s) ~ s*(1-Euler) for tiny s
87 *		lgamma(1)=lgamma(2)=0
88 *		lgamma(x) ~ -log(x) for tiny x
89 *		lgamma(0) = lgamma(inf) = inf
90 *		lgamma(-integer) = +-inf
91 *
92 */
93
94#include "math.h"
95#include "math_private.h"
96
97#ifdef __STDC__
98static const long double
99#else
100static long double
101#endif
102  half = 0.5L,
103  one = 1.0L,
104  pi = 3.14159265358979323846264L,
105  two63 = 9.223372036854775808e18L,
106
107  /* lgam(1+x) = 0.5 x + x a(x)/b(x)
108     -0.268402099609375 <= x <= 0
109     peak relative error 6.6e-22 */
110  a0 = -6.343246574721079391729402781192128239938E2L,
111  a1 =  1.856560238672465796768677717168371401378E3L,
112  a2 =  2.404733102163746263689288466865843408429E3L,
113  a3 =  8.804188795790383497379532868917517596322E2L,
114  a4 =  1.135361354097447729740103745999661157426E2L,
115  a5 =  3.766956539107615557608581581190400021285E0L,
116
117  b0 =  8.214973713960928795704317259806842490498E3L,
118  b1 =  1.026343508841367384879065363925870888012E4L,
119  b2 =  4.553337477045763320522762343132210919277E3L,
120  b3 =  8.506975785032585797446253359230031874803E2L,
121  b4 =  6.042447899703295436820744186992189445813E1L,
122  /* b5 =  1.000000000000000000000000000000000000000E0 */
123
124
125  tc =  1.4616321449683623412626595423257213284682E0L,
126  tf = -1.2148629053584961146050602565082954242826E-1,/* double precision */
127/* tt = (tail of tf), i.e. tf + tt has extended precision. */
128  tt = 3.3649914684731379602768989080467587736363E-18L,
129  /* lgam ( 1.4616321449683623412626595423257213284682E0 ) =
130-1.2148629053584960809551455717769158215135617312999903886372437313313530E-1 */
131
132  /* lgam (x + tc) = tf + tt + x g(x)/h(x)
133     - 0.230003726999612341262659542325721328468 <= x
134     <= 0.2699962730003876587373404576742786715318
135     peak relative error 2.1e-21 */
136  g0 = 3.645529916721223331888305293534095553827E-18L,
137  g1 = 5.126654642791082497002594216163574795690E3L,
138  g2 = 8.828603575854624811911631336122070070327E3L,
139  g3 = 5.464186426932117031234820886525701595203E3L,
140  g4 = 1.455427403530884193180776558102868592293E3L,
141  g5 = 1.541735456969245924860307497029155838446E2L,
142  g6 = 4.335498275274822298341872707453445815118E0L,
143
144  h0 = 1.059584930106085509696730443974495979641E4L,
145  h1 =  2.147921653490043010629481226937850618860E4L,
146  h2 = 1.643014770044524804175197151958100656728E4L,
147  h3 =  5.869021995186925517228323497501767586078E3L,
148  h4 =  9.764244777714344488787381271643502742293E2L,
149  h5 =  6.442485441570592541741092969581997002349E1L,
150  /* h6 = 1.000000000000000000000000000000000000000E0 */
151
152
153  /* lgam (x+1) = -0.5 x + x u(x)/v(x)
154     -0.100006103515625 <= x <= 0.231639862060546875
155     peak relative error 1.3e-21 */
156  u0 = -8.886217500092090678492242071879342025627E1L,
157  u1 =  6.840109978129177639438792958320783599310E2L,
158  u2 =  2.042626104514127267855588786511809932433E3L,
159  u3 =  1.911723903442667422201651063009856064275E3L,
160  u4 =  7.447065275665887457628865263491667767695E2L,
161  u5 =  1.132256494121790736268471016493103952637E2L,
162  u6 =  4.484398885516614191003094714505960972894E0L,
163
164  v0 =  1.150830924194461522996462401210374632929E3L,
165  v1 =  3.399692260848747447377972081399737098610E3L,
166  v2 =  3.786631705644460255229513563657226008015E3L,
167  v3 =  1.966450123004478374557778781564114347876E3L,
168  v4 =  4.741359068914069299837355438370682773122E2L,
169  v5 =  4.508989649747184050907206782117647852364E1L,
170  /* v6 =  1.000000000000000000000000000000000000000E0 */
171
172
173  /* lgam (x+2) = .5 x + x s(x)/r(x)
174     0 <= x <= 1
175     peak relative error 7.2e-22 */
176  s0 =  1.454726263410661942989109455292824853344E6L,
177  s1 = -3.901428390086348447890408306153378922752E6L,
178  s2 = -6.573568698209374121847873064292963089438E6L,
179  s3 = -3.319055881485044417245964508099095984643E6L,
180  s4 = -7.094891568758439227560184618114707107977E5L,
181  s5 = -6.263426646464505837422314539808112478303E4L,
182  s6 = -1.684926520999477529949915657519454051529E3L,
183
184  r0 = -1.883978160734303518163008696712983134698E7L,
185  r1 = -2.815206082812062064902202753264922306830E7L,
186  r2 = -1.600245495251915899081846093343626358398E7L,
187  r3 = -4.310526301881305003489257052083370058799E6L,
188  r4 = -5.563807682263923279438235987186184968542E5L,
189  r5 = -3.027734654434169996032905158145259713083E4L,
190  r6 = -4.501995652861105629217250715790764371267E2L,
191  /* r6 =  1.000000000000000000000000000000000000000E0 */
192
193
194/* lgam(x) = ( x - 0.5 ) * log(x) - x + LS2PI + 1/x w(1/x^2)
195   x >= 8
196   Peak relative error 1.51e-21
197   w0 = LS2PI - 0.5 */
198  w0 =  4.189385332046727417803e-1L,
199  w1 =  8.333333333333331447505E-2L,
200  w2 = -2.777777777750349603440E-3L,
201  w3 =  7.936507795855070755671E-4L,
202  w4 = -5.952345851765688514613E-4L,
203  w5 =  8.412723297322498080632E-4L,
204  w6 = -1.880801938119376907179E-3L,
205  w7 =  4.885026142432270781165E-3L;
206
207#ifdef __STDC__
208static const long double zero = 0.0L;
209#else
210static long double zero = 0.0L;
211#endif
212
213#ifdef __STDC__
214static long double
215sin_pi (long double x)
216#else
217static long double
218sin_pi (x)
219     long double x;
220#endif
221{
222  long double y, z;
223  int n, ix;
224  u_int32_t se, i0, i1;
225
226  GET_LDOUBLE_WORDS (se, i0, i1, x);
227  ix = se & 0x7fff;
228  ix = (ix << 16) | (i0 >> 16);
229  if (ix < 0x3ffd8000) /* 0.25 */
230    return __sinl (pi * x);
231  y = -x;			/* x is assume negative */
232
233  /*
234   * argument reduction, make sure inexact flag not raised if input
235   * is an integer
236   */
237  z = __floorl (y);
238  if (z != y)
239    {				/* inexact anyway */
240      y  *= 0.5;
241      y = 2.0*(y - __floorl(y));		/* y = |x| mod 2.0 */
242      n = (int) (y*4.0);
243    }
244  else
245    {
246      if (ix >= 0x403f8000)  /* 2^64 */
247	{
248	  y = zero; n = 0;                 /* y must be even */
249	}
250      else
251	{
252	if (ix < 0x403e8000)  /* 2^63 */
253	  z = y + two63;	/* exact */
254	GET_LDOUBLE_WORDS (se, i0, i1, z);
255	n = i1 & 1;
256	y  = n;
257	n <<= 2;
258      }
259    }
260
261  switch (n)
262    {
263    case 0:
264      y = __sinl (pi * y);
265      break;
266    case 1:
267    case 2:
268      y = __cosl (pi * (half - y));
269      break;
270    case 3:
271    case 4:
272      y = __sinl (pi * (one - y));
273      break;
274    case 5:
275    case 6:
276      y = -__cosl (pi * (y - 1.5));
277      break;
278    default:
279      y = __sinl (pi * (y - 2.0));
280      break;
281    }
282  return -y;
283}
284
285
286#ifdef __STDC__
287long double
288__ieee754_lgammal_r (long double x, int *signgamp)
289#else
290long double
291__ieee754_lgammal_r (x, signgamp)
292     long double x;
293     int *signgamp;
294#endif
295{
296  long double t, y, z, nadj, p, p1, p2, q, r, w;
297  int i, ix;
298  u_int32_t se, i0, i1;
299
300  *signgamp = 1;
301  GET_LDOUBLE_WORDS (se, i0, i1, x);
302  ix = se & 0x7fff;
303
304  if ((ix | i0 | i1) == 0)
305    return one / fabsl (x);
306
307  ix = (ix << 16) | (i0 >> 16);
308
309  /* purge off +-inf, NaN, +-0, and negative arguments */
310  if (ix >= 0x7fff0000)
311    return x * x;
312
313  if (ix < 0x3fc08000) /* 2^-63 */
314    {				/* |x|<2**-63, return -log(|x|) */
315      if (se & 0x8000)
316	{
317	  *signgamp = -1;
318	  return -__ieee754_logl (-x);
319	}
320      else
321	return -__ieee754_logl (x);
322    }
323  if (se & 0x8000)
324    {
325      t = sin_pi (x);
326      if (t == zero)
327	return one / fabsl (t);	/* -integer */
328      nadj = __ieee754_logl (pi / fabsl (t * x));
329      if (t < zero)
330	*signgamp = -1;
331      x = -x;
332    }
333
334  /* purge off 1 and 2 */
335  if ((((ix - 0x3fff8000) | i0 | i1) == 0)
336      || (((ix - 0x40008000) | i0 | i1) == 0))
337    r = 0;
338  else if (ix < 0x40008000) /* 2.0 */
339    {
340      /* x < 2.0 */
341      if (ix <= 0x3ffee666) /* 8.99993896484375e-1 */
342	{
343	  /* lgamma(x) = lgamma(x+1) - log(x) */
344	  r = -__ieee754_logl (x);
345	  if (ix >= 0x3ffebb4a) /* 7.31597900390625e-1 */
346	    {
347	      y = x - one;
348	      i = 0;
349	    }
350	  else if (ix >= 0x3ffced33)/* 2.31639862060546875e-1 */
351	    {
352	      y = x - (tc - one);
353	      i = 1;
354	    }
355	  else
356	    {
357	      /* x < 0.23 */
358	      y = x;
359	      i = 2;
360	    }
361	}
362      else
363	{
364	  r = zero;
365	  if (ix >= 0x3fffdda6) /* 1.73162841796875 */
366	    {
367	      /* [1.7316,2] */
368	      y = x - 2.0;
369	      i = 0;
370	    }
371	  else if (ix >= 0x3fff9da6)/* 1.23162841796875 */
372	    {
373	      /* [1.23,1.73] */
374	      y = x - tc;
375	      i = 1;
376	    }
377	  else
378	    {
379	      /* [0.9, 1.23] */
380	      y = x - one;
381	      i = 2;
382	    }
383	}
384      switch (i)
385	{
386	case 0:
387	  p1 = a0 + y * (a1 + y * (a2 + y * (a3 + y * (a4 + y * a5))));
388	  p2 = b0 + y * (b1 + y * (b2 + y * (b3 + y * (b4 + y))));
389	  r += half * y + y * p1/p2;
390	  break;
391	case 1:
392    p1 = g0 + y * (g1 + y * (g2 + y * (g3 + y * (g4 + y * (g5 + y * g6)))));
393    p2 = h0 + y * (h1 + y * (h2 + y * (h3 + y * (h4 + y * (h5 + y)))));
394    p = tt + y * p1/p2;
395	  r += (tf + p);
396	  break;
397	case 2:
398 p1 = y * (u0 + y * (u1 + y * (u2 + y * (u3 + y * (u4 + y * (u5 + y * u6))))));
399      p2 = v0 + y * (v1 + y * (v2 + y * (v3 + y * (v4 + y * (v5 + y)))));
400	  r += (-half * y + p1 / p2);
401	}
402    }
403  else if (ix < 0x40028000) /* 8.0 */
404    {
405      /* x < 8.0 */
406      i = (int) x;
407      t = zero;
408      y = x - (double) i;
409  p = y *
410     (s0 + y * (s1 + y * (s2 + y * (s3 + y * (s4 + y * (s5 + y * s6))))));
411  q = r0 + y * (r1 + y * (r2 + y * (r3 + y * (r4 + y * (r5 + y * (r6 + y))))));
412      r = half * y + p / q;
413      z = one;			/* lgamma(1+s) = log(s) + lgamma(s) */
414      switch (i)
415	{
416	case 7:
417	  z *= (y + 6.0);	/* FALLTHRU */
418	case 6:
419	  z *= (y + 5.0);	/* FALLTHRU */
420	case 5:
421	  z *= (y + 4.0);	/* FALLTHRU */
422	case 4:
423	  z *= (y + 3.0);	/* FALLTHRU */
424	case 3:
425	  z *= (y + 2.0);	/* FALLTHRU */
426	  r += __ieee754_logl (z);
427	  break;
428	}
429    }
430  else if (ix < 0x40418000) /* 2^66 */
431    {
432      /* 8.0 <= x < 2**66 */
433      t = __ieee754_logl (x);
434      z = one / x;
435      y = z * z;
436      w = w0 + z * (w1
437          + y * (w2 + y * (w3 + y * (w4 + y * (w5 + y * (w6 + y * w7))))));
438      r = (x - half) * (t - one) + w;
439    }
440  else
441    /* 2**66 <= x <= inf */
442    r = x * (__ieee754_logl (x) - one);
443  if (se & 0x8000)
444    r = nadj - r;
445  return r;
446}
447