1/* @(#)e_lgamma_r.c 1.3 95/01/18 */
2/*
3 * ====================================================
4 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
5 *
6 * Developed at SunSoft, a Sun Microsystems, Inc. business.
7 * Permission to use, copy, modify, and distribute this
8 * software is freely granted, provided that this notice
9 * is preserved.
10 * ====================================================
11 */
12
13#include <sys/cdefs.h>
14__FBSDID("$FreeBSD: stable/11/lib/msun/ld80/e_lgammal_r.c 324006 2017-09-26 09:01:56Z dim $");
15
16/*
17 * See e_lgamma_r.c for complete comments.
18 *
19 * Converted to long double by Steven G. Kargl.
20 */
21
22#ifdef __i386__
23#include <ieeefp.h>
24#endif
25
26#include "fpmath.h"
27#include "math.h"
28#include "math_private.h"
29
30static const volatile double vzero = 0;
31
32static const double
33zero=  0,
34half=  0.5,
35one =  1;
36
37static const union IEEEl2bits
38piu = LD80C(0xc90fdaa22168c235, 1,  3.14159265358979323851e+00L);
39#define	pi	(piu.e)
40/*
41 * Domain y in [0x1p-70, 0.27], range ~[-4.5264e-22, 4.5264e-22]:
42 * |(lgamma(2 - y) + y / 2) / y - a(y)| < 2**-70.9
43 */
44static const union IEEEl2bits
45a0u = LD80C(0x9e233f1bed863d26, -4,  7.72156649015328606028e-02L),
46a1u = LD80C(0xa51a6625307d3249, -2,  3.22467033424113218889e-01L),
47a2u = LD80C(0x89f000d2abafda8c, -4,  6.73523010531979398946e-02L),
48a3u = LD80C(0xa8991563eca75f26, -6,  2.05808084277991211934e-02L),
49a4u = LD80C(0xf2027e10634ce6b6, -8,  7.38555102796070454026e-03L),
50a5u = LD80C(0xbd6eb76dd22187f4, -9,  2.89051035162703932972e-03L),
51a6u = LD80C(0x9c562ab05e0458ed, -10,  1.19275351624639999297e-03L),
52a7u = LD80C(0x859baed93ee48e46, -11,  5.09674593842117925320e-04L),
53a8u = LD80C(0xe9f28a4432949af2, -13,  2.23109648015769155122e-04L),
54a9u = LD80C(0xd12ad0d9b93c6bb0, -14,  9.97387167479808509830e-05L),
55a10u= LD80C(0xb7522643c78a219b, -15,  4.37071076331030136818e-05L),
56a11u= LD80C(0xca024dcdece2cb79, -16,  2.40813493372040143061e-05L),
57a12u= LD80C(0xbb90fb6968ebdbf9, -19,  2.79495621083634031729e-06L),
58a13u= LD80C(0xba1c9ffeeae07b37, -17,  1.10931287015513924136e-05L);
59#define	a0	(a0u.e)
60#define	a1	(a1u.e)
61#define	a2	(a2u.e)
62#define	a3	(a3u.e)
63#define	a4	(a4u.e)
64#define	a5	(a5u.e)
65#define	a6	(a6u.e)
66#define	a7	(a7u.e)
67#define	a8	(a8u.e)
68#define	a9	(a9u.e)
69#define	a10	(a10u.e)
70#define	a11	(a11u.e)
71#define	a12	(a12u.e)
72#define	a13	(a13u.e)
73/*
74 * Domain x in [tc-0.24, tc+0.28], range ~[-6.1205e-22, 6.1205e-22]:
75 * |(lgamma(x) - tf) -  t(x - tc)| < 2**-70.5
76 */
77static const union IEEEl2bits
78tcu  = LD80C(0xbb16c31ab5f1fb71, 0,  1.46163214496836234128e+00L),
79tfu  = LD80C(0xf8cdcde61c520e0f, -4, -1.21486290535849608093e-01L),
80ttu  = LD80C(0xd46ee54b27d4de99, -69, -2.81152980996018785880e-21L),
81t0u  = LD80C(0x80b9406556a62a6b, -68,  3.40728634996055147231e-21L),
82t1u  = LD80C(0xc7e9c6f6df3f8c39, -67, -1.05833162742737073665e-20L),
83t2u  = LD80C(0xf7b95e4771c55d51, -2,  4.83836122723810583532e-01L),
84t3u  = LD80C(0x97213c6e35e119ff, -3, -1.47587722994530691476e-01L),
85t4u  = LD80C(0x845a14a6a81dc94b, -4,  6.46249402389135358063e-02L),
86t5u  = LD80C(0x864d46fa89997796, -5, -3.27885410884846056084e-02L),
87t6u  = LD80C(0x93373cbd00297438, -6,  1.79706751150707171293e-02L),
88t7u  = LD80C(0xa8fcfca7eddc8d1d, -7, -1.03142230361450732547e-02L),
89t8u  = LD80C(0xc7e7015ff4bc45af, -8,  6.10053603296546099193e-03L),
90t9u  = LD80C(0xf178d2247adc5093, -9, -3.68456964904901200152e-03L),
91t10u = LD80C(0x94188d58f12e5e9f, -9,  2.25976420273774583089e-03L),
92t11u = LD80C(0xb7cbaef14e1406f1, -10, -1.40224943666225639823e-03L),
93t12u = LD80C(0xe63a671e6704ea4d, -11,  8.78250640744776944887e-04L),
94t13u = LD80C(0x914b6c9cae61783e, -11, -5.54255012657716808811e-04L),
95t14u = LD80C(0xb858f5bdb79276fe, -12,  3.51614951536825927370e-04L),
96t15u = LD80C(0xea73e744c34b9591, -13, -2.23591563824520112236e-04L),
97t16u = LD80C(0x99aeabb0d67ba835, -13,  1.46562869351659194136e-04L),
98t17u = LD80C(0xd7c6938325db2024, -14, -1.02889866046435680588e-04L),
99t18u = LD80C(0xe24cb1e3b0474775, -15,  5.39540265505221957652e-05L);
100#define	tc	(tcu.e)
101#define	tf	(tfu.e)
102#define	tt	(ttu.e)
103#define	t0	(t0u.e)
104#define	t1	(t1u.e)
105#define	t2	(t2u.e)
106#define	t3	(t3u.e)
107#define	t4	(t4u.e)
108#define	t5	(t5u.e)
109#define	t6	(t6u.e)
110#define	t7	(t7u.e)
111#define	t8	(t8u.e)
112#define	t9	(t9u.e)
113#define	t10	(t10u.e)
114#define	t11	(t11u.e)
115#define	t12	(t12u.e)
116#define	t13	(t13u.e)
117#define	t14	(t14u.e)
118#define	t15	(t15u.e)
119#define	t16	(t16u.e)
120#define	t17	(t17u.e)
121#define	t18	(t18u.e)
122/*
123 * Domain y in [-0.1, 0.232], range ~[-8.1938e-22, 8.3815e-22]:
124 * |(lgamma(1 + y) + 0.5 * y) / y - u(y) / v(y)| < 2**-71.2
125 */
126static const union IEEEl2bits
127u0u = LD80C(0x9e233f1bed863d27, -4, -7.72156649015328606095e-02L),
128u1u = LD80C(0x98280ee45e4ddd3d, -1,  5.94361239198682739769e-01L),
129u2u = LD80C(0xe330c8ead4130733, 0,  1.77492629495841234275e+00L),
130u3u = LD80C(0xd4a213f1a002ec52, 0,  1.66119622514818078064e+00L),
131u4u = LD80C(0xa5a9ca6f5bc62163, -1,  6.47122051417476492989e-01L),
132u5u = LD80C(0xc980e49cd5b019e6, -4,  9.83903751718671509455e-02L),
133u6u = LD80C(0xff636a8bdce7025b, -9,  3.89691687802305743450e-03L),
134v1u = LD80C(0xbd109c533a19fbf5, 1,  2.95413883330948556544e+00L),
135v2u = LD80C(0xd295cbf96f31f099, 1,  3.29039286955665403176e+00L),
136v3u = LD80C(0xdab8bcfee40496cb, 0,  1.70876276441416471410e+00L),
137v4u = LD80C(0xd2f2dc3638567e9f, -2,  4.12009126299534668571e-01L),
138v5u = LD80C(0xa07d9b0851070f41, -5,  3.91822868305682491442e-02L),
139v6u = LD80C(0xe3cd8318f7adb2c4, -11,  8.68998648222144351114e-04L);
140#define	u0	(u0u.e)
141#define	u1	(u1u.e)
142#define	u2	(u2u.e)
143#define	u3	(u3u.e)
144#define	u4	(u4u.e)
145#define	u5	(u5u.e)
146#define	u6	(u6u.e)
147#define	v1	(v1u.e)
148#define	v2	(v2u.e)
149#define	v3	(v3u.e)
150#define	v4	(v4u.e)
151#define	v5	(v5u.e)
152#define	v6	(v6u.e)
153/*
154 * Domain x in (2, 3], range ~[-3.3648e-22, 3.4416e-22]:
155 * |(lgamma(y+2) - 0.5 * y) / y - s(y)/r(y)| < 2**-72.3
156 * with y = x - 2.
157 */
158static const union IEEEl2bits
159s0u = LD80C(0x9e233f1bed863d27, -4, -7.72156649015328606095e-02L),
160s1u = LD80C(0xd3ff0dcc7fa91f94, -3,  2.07027640921219389860e-01L),
161s2u = LD80C(0xb2bb62782478ef31, -2,  3.49085881391362090549e-01L),
162s3u = LD80C(0xb49f7438c4611a74, -3,  1.76389518704213357954e-01L),
163s4u = LD80C(0x9a957008fa27ecf9, -5,  3.77401710862930008071e-02L),
164s5u = LD80C(0xda9b389a6ca7a7ac, -9,  3.33566791452943399399e-03L),
165s6u = LD80C(0xbc7a2263faf59c14, -14,  8.98728786745638844395e-05L),
166r1u = LD80C(0xbf5cff5b11477d4d, 0,  1.49502555796294337722e+00L),
167r2u = LD80C(0xd9aec89de08e3da6, -1,  8.50323236984473285866e-01L),
168r3u = LD80C(0xeab7ae5057c443f9, -3,  2.29216312078225806131e-01L),
169r4u = LD80C(0xf29707d9bd2b1e37, -6,  2.96130326586640089145e-02L),
170r5u = LD80C(0xd376c2f09736c5a3, -10,  1.61334161411590662495e-03L),
171r6u = LD80C(0xc985983d0cd34e3d, -16,  2.40232770710953450636e-05L),
172r7u = LD80C(0xe5c7a4f7fc2ef13d, -25, -5.34997929289167573510e-08L);
173#define	s0	(s0u.e)
174#define	s1	(s1u.e)
175#define	s2	(s2u.e)
176#define	s3	(s3u.e)
177#define	s4	(s4u.e)
178#define	s5	(s5u.e)
179#define	s6	(s6u.e)
180#define	r1	(r1u.e)
181#define	r2	(r2u.e)
182#define	r3	(r3u.e)
183#define	r4	(r4u.e)
184#define	r5	(r5u.e)
185#define	r6	(r6u.e)
186#define	r7	(r7u.e)
187/*
188 * Domain z in [8, 0x1p70], range ~[-3.0235e-22, 3.0563e-22]:
189 * |lgamma(x) - (x - 0.5) * (log(x) - 1) - w(1/x)| < 2**-71.7
190 */
191static const union IEEEl2bits
192w0u = LD80C(0xd67f1c864beb4a69, -2,  4.18938533204672741776e-01L),
193w1u = LD80C(0xaaaaaaaaaaaaaaa1, -4,  8.33333333333333332678e-02L),
194w2u = LD80C(0xb60b60b60b5491c9, -9, -2.77777777777760927870e-03L),
195w3u = LD80C(0xd00d00cf58aede4c, -11,  7.93650793490637233668e-04L),
196w4u = LD80C(0x9c09bf626783d4a5, -11, -5.95238023926039051268e-04L),
197w5u = LD80C(0xdca7cadc5baa517b, -11,  8.41733700408000822962e-04L),
198w6u = LD80C(0xfb060e361e1ffd07, -10, -1.91515849570245136604e-03L),
199w7u = LD80C(0xcbd5101bb58d1f2b, -8,  6.22046743903262649294e-03L),
200w8u = LD80C(0xad27a668d32c821b, -6, -2.11370706734662081843e-02L);
201#define	w0	(w0u.e)
202#define	w1	(w1u.e)
203#define	w2	(w2u.e)
204#define	w3	(w3u.e)
205#define	w4	(w4u.e)
206#define	w5	(w5u.e)
207#define	w6	(w6u.e)
208#define	w7	(w7u.e)
209#define	w8	(w8u.e)
210
211static long double
212sin_pil(long double x)
213{
214	volatile long double vz;
215	long double y,z;
216	uint64_t n;
217	uint16_t hx;
218
219	y = -x;
220
221	vz = y+0x1p63;
222	z = vz-0x1p63;
223	if (z == y)
224	    return zero;
225
226	vz = y+0x1p61;
227	EXTRACT_LDBL80_WORDS(hx,n,vz);
228	z = vz-0x1p61;
229	if (z > y) {
230	    z -= 0.25;			/* adjust to round down */
231	    n--;
232	}
233	n &= 7;				/* octant of y mod 2 */
234	y = y - z + n * 0.25;		/* y mod 2 */
235
236	switch (n) {
237	    case 0:   y =  __kernel_sinl(pi*y,zero,0); break;
238	    case 1:
239	    case 2:   y =  __kernel_cosl(pi*(0.5-y),zero); break;
240	    case 3:
241	    case 4:   y =  __kernel_sinl(pi*(one-y),zero,0); break;
242	    case 5:
243	    case 6:   y = -__kernel_cosl(pi*(y-1.5),zero); break;
244	    default:  y =  __kernel_sinl(pi*(y-2.0),zero,0); break;
245	    }
246	return -y;
247}
248
249long double
250lgammal_r(long double x, int *signgamp)
251{
252	long double nadj,p,p1,p2,q,r,t,w,y,z;
253	uint64_t lx;
254	int i;
255	uint16_t hx,ix;
256
257	EXTRACT_LDBL80_WORDS(hx,lx,x);
258
259    /* purge +-Inf and NaNs */
260	*signgamp = 1;
261	ix = hx&0x7fff;
262	if(ix==0x7fff) return x*x;
263
264	ENTERI();
265
266    /* purge +-0 and tiny arguments */
267	*signgamp = 1-2*(hx>>15);
268	if(ix<0x3fff-67) {		/* |x|<2**-(p+3), return -log(|x|) */
269	    if((ix|lx)==0)
270		RETURNI(one/vzero);
271	    RETURNI(-logl(fabsl(x)));
272	}
273
274    /* purge negative integers and start evaluation for other x < 0 */
275	if(hx&0x8000) {
276	    *signgamp = 1;
277	    if(ix>=0x3fff+63) 		/* |x|>=2**(p-1), must be -integer */
278		RETURNI(one/vzero);
279	    t = sin_pil(x);
280	    if(t==zero) RETURNI(one/vzero); /* -integer */
281	    nadj = logl(pi/fabsl(t*x));
282	    if(t<zero) *signgamp = -1;
283	    x = -x;
284	}
285
286    /* purge 1 and 2 */
287	if((ix==0x3fff || ix==0x4000) && lx==0x8000000000000000ULL) r = 0;
288    /* for x < 2.0 */
289	else if(ix<0x4000) {
290    /*
291     * XXX Supposedly, one can use the following information to replace the
292     * XXX FP rational expressions.  A similar approach is appropriate
293     * XXX for ld128, but one (may need?) needs to consider llx, too.
294     *
295     * 8.9999961853027344e-01 3ffe e666600000000000
296     * 7.3159980773925781e-01 3ffe bb4a200000000000
297     * 2.3163998126983643e-01 3ffc ed33080000000000
298     * 1.7316312789916992e+00 3fff dda6180000000000
299     * 1.2316322326660156e+00 3fff 9da6200000000000
300     */
301	    if(x<8.9999961853027344e-01) {
302		r = -logl(x);
303		if(x>=7.3159980773925781e-01) {y = 1-x; i= 0;}
304		else if(x>=2.3163998126983643e-01) {y= x-(tc-1); i=1;}
305		else {y = x; i=2;}
306	    } else {
307		r = 0;
308		if(x>=1.7316312789916992e+00) {y=2-x;i=0;}
309		else if(x>=1.2316322326660156e+00) {y=x-tc;i=1;}
310		else {y=x-1;i=2;}
311	    }
312	    switch(i) {
313	      case 0:
314		z = y*y;
315		p1 = a0+z*(a2+z*(a4+z*(a6+z*(a8+z*(a10+z*a12)))));
316		p2 = z*(a1+z*(a3+z*(a5+z*(a7+z*(a9+z*(a11+z*a13))))));
317		p  = y*p1+p2;
318		r  += p-y/2; break;
319	      case 1:
320		p = t0+y*t1+tt+y*y*(t2+y*(t3+y*(t4+y*(t5+y*(t6+y*(t7+y*(t8+
321		    y*(t9+y*(t10+y*(t11+y*(t12+y*(t13+y*(t14+y*(t15+y*(t16+
322		    y*(t17+y*t18))))))))))))))));
323		r += tf + p; break;
324	      case 2:
325		p1 = y*(u0+y*(u1+y*(u2+y*(u3+y*(u4+y*(u5+y*u6))))));
326		p2 = 1+y*(v1+y*(v2+y*(v3+y*(v4+y*(v5+y*v6)))));
327		r += p1/p2-y/2;
328	    }
329	}
330    /* x < 8.0 */
331	else if(ix<0x4002) {
332	    i = x;
333	    y = x-i;
334	    p = y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6))))));
335	    q = 1+y*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*(r6+y*r7))))));
336	    r = y/2+p/q;
337	    z = 1;	/* lgamma(1+s) = log(s) + lgamma(s) */
338	    switch(i) {
339	    case 7: z *= (y+6);		/* FALLTHRU */
340	    case 6: z *= (y+5);		/* FALLTHRU */
341	    case 5: z *= (y+4);		/* FALLTHRU */
342	    case 4: z *= (y+3);		/* FALLTHRU */
343	    case 3: z *= (y+2);		/* FALLTHRU */
344		    r += logl(z); break;
345	    }
346    /* 8.0 <= x < 2**(p+3) */
347	} else if (ix<0x3fff+67) {
348	    t = logl(x);
349	    z = one/x;
350	    y = z*z;
351	    w = w0+z*(w1+y*(w2+y*(w3+y*(w4+y*(w5+y*(w6+y*(w7+y*w8)))))));
352	    r = (x-half)*(t-one)+w;
353    /* 2**(p+3) <= x <= inf */
354	} else
355	    r =  x*(logl(x)-1);
356	if(hx&0x8000) r = nadj - r;
357	RETURNI(r);
358}
359