1/* @(#)s_erf.c 5.1 93/09/24 */
2/*
3 * ====================================================
4 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
5 *
6 * Developed at SunPro, a Sun Microsystems, Inc. business.
7 * Permission to use, copy, modify, and distribute this
8 * software is freely granted, provided that this notice
9 * is preserved.
10 * ====================================================
11 */
12
13#include <sys/cdefs.h>
14__FBSDID("$FreeBSD$");
15
16/*
17 * See s_erf.c for complete comments.
18 *
19 * Converted to long double by Steven G. Kargl.
20 */
21#include <float.h>
22#ifdef __i386__
23#include <ieeefp.h>
24#endif
25
26#include "fpmath.h"
27#include "math.h"
28#include "math_private.h"
29
30/* XXX Prevent compilers from erroneously constant folding: */
31static const volatile long double tiny = 0x1p-10000L;
32
33static const double
34half= 0.5,
35one = 1,
36two = 2;
37/*
38 * In the domain [0, 2**-34], only the first term in the power series
39 * expansion of erf(x) is used.  The magnitude of the first neglected
40 * terms is less than 2**-102.
41 */
42static const union IEEEl2bits
43efxu  = LD80C(0x8375d410a6db446c, -3,  1.28379167095512573902e-1L),
44efx8u = LD80C(0x8375d410a6db446c,  0,  1.02703333676410059122e+0L),
45/*
46 * Domain [0, 0.84375], range ~[-1.423e-22, 1.423e-22]:
47 * |(erf(x) - x)/x - pp(x)/qq(x)| < 2**-72.573
48 */
49pp0u  = LD80C(0x8375d410a6db446c, -3,   1.28379167095512573902e-1L),
50pp1u  = LD80C(0xa46c7d09ec3d0cec, -2,  -3.21140201054840180596e-1L),
51pp2u  = LD80C(0x9b31e66325576f86, -5,  -3.78893851760347812082e-2L),
52pp3u  = LD80C(0x804ac72c9a0b97dd, -7,  -7.83032847030604679616e-3L),
53pp4u  = LD80C(0x9f42bcbc3d5a601d, -12, -3.03765663857082048459e-4L),
54pp5u  = LD80C(0x9ec4ad6193470693, -16, -1.89266527398167917502e-5L),
55qq1u  = LD80C(0xdb4b8eb713188d6b, -2,   4.28310832832310510579e-1L),
56qq2u  = LD80C(0xa5750835b2459bd1, -4,   8.07896272074540216658e-2L),
57qq3u  = LD80C(0x8b85d6bd6a90b51c, -7,   8.51579638189385354266e-3L),
58qq4u  = LD80C(0x87332f82cff4ff96, -11,  5.15746855583604912827e-4L),
59qq5u  = LD80C(0x83466cb6bf9dca00, -16,  1.56492109706256700009e-5L),
60qq6u  = LD80C(0xf5bf98c2f996bf63, -24,  1.14435527803073879724e-7L);
61#define	efx	(efxu.e)
62#define	efx8	(efx8u.e)
63#define	pp0	(pp0u.e)
64#define	pp1	(pp1u.e)
65#define	pp2	(pp2u.e)
66#define	pp3	(pp3u.e)
67#define	pp4	(pp4u.e)
68#define	pp5	(pp5u.e)
69#define	qq1	(qq1u.e)
70#define	qq2	(qq2u.e)
71#define	qq3	(qq3u.e)
72#define	qq4	(qq4u.e)
73#define	qq5	(qq5u.e)
74#define	qq6	(qq6u.e)
75static const union IEEEl2bits
76erxu  = LD80C(0xd7bb3d0000000000, -1,  8.42700779438018798828e-1L),
77/*
78 * Domain [0.84375, 1.25], range ~[-8.132e-22, 8.113e-22]:
79 * |(erf(x) - erx) - pa(x)/qa(x)| < 2**-71.762
80 */
81pa0u  = LD80C(0xe8211158da02c692, -27,  1.35116960705131296711e-8L),
82pa1u  = LD80C(0xd488f89f36988618, -2,   4.15107507167065612570e-1L),
83pa2u  = LD80C(0xece74f8c63fa3942, -4,  -1.15675565215949226989e-1L),
84pa3u  = LD80C(0xc8d31e020727c006, -4,   9.80589241379624665791e-2L),
85pa4u  = LD80C(0x985d5d5fafb0551f, -5,   3.71984145558422368847e-2L),
86pa5u  = LD80C(0xa5b6c4854d2f5452, -8,  -5.05718799340957673661e-3L),
87pa6u  = LD80C(0x85c8d58fe3993a47, -8,   4.08277919612202243721e-3L),
88pa7u  = LD80C(0xddbfbc23677b35cf, -13,  2.11476292145347530794e-4L),
89qa1u  = LD80C(0xb8a977896f5eff3f, -1,   7.21335860303380361298e-1L),
90qa2u  = LD80C(0x9fcd662c3d4eac86, -1,   6.24227891731886593333e-1L),
91qa3u  = LD80C(0x9d0b618eac67ba07, -2,   3.06727455774491855801e-1L),
92qa4u  = LD80C(0x881a4293f6d6c92d, -3,   1.32912674218195890535e-1L),
93qa5u  = LD80C(0xbab144f07dea45bf, -5,   4.55792134233613027584e-2L),
94qa6u  = LD80C(0xa6c34ba438bdc900, -7,   1.01783980070527682680e-2L),
95qa7u  = LD80C(0x8fa866dc20717a91, -9,   2.19204436518951438183e-3L);
96#define erx	(erxu.e)
97#define pa0	(pa0u.e)
98#define pa1	(pa1u.e)
99#define pa2	(pa2u.e)
100#define pa3	(pa3u.e)
101#define pa4	(pa4u.e)
102#define pa5	(pa5u.e)
103#define pa6	(pa6u.e)
104#define pa7	(pa7u.e)
105#define qa1	(qa1u.e)
106#define qa2	(qa2u.e)
107#define qa3	(qa3u.e)
108#define qa4	(qa4u.e)
109#define qa5	(qa5u.e)
110#define qa6	(qa6u.e)
111#define qa7	(qa7u.e)
112static const union IEEEl2bits
113/*
114 * Domain [1.25,2.85715], range ~[-2.334e-22,2.334e-22]:
115 * |log(x*erfc(x)) + x**2 + 0.5625 - ra(x)/sa(x)| < 2**-71.860
116 */
117ra0u  = LD80C(0xa1a091e0fb4f335a, -7, -9.86494298915814308249e-3L),
118ra1u  = LD80C(0xc2b0d045ae37df6b, -1, -7.60510460864878271275e-1L),
119ra2u  = LD80C(0xf2cec3ee7da636c5, 3,  -1.51754798236892278250e+1L),
120ra3u  = LD80C(0x813cc205395adc7d, 7,  -1.29237335516455333420e+2L),
121ra4u  = LD80C(0x8737c8b7b4062c2f, 9,  -5.40871625829510494776e+2L),
122ra5u  = LD80C(0x8ffe5383c08d4943, 10, -1.15194769466026108551e+3L),
123ra6u  = LD80C(0x983573e64d5015a9, 10, -1.21767039790249025544e+3L),
124ra7u  = LD80C(0x92a794e763a6d4db, 9,  -5.86618463370624636688e+2L),
125ra8u  = LD80C(0xd5ad1fae77c3d9a3, 6,  -1.06838132335777049840e+2L),
126ra9u  = LD80C(0x934c1a247807bb9c, 2,  -4.60303980944467334806e+0L),
127sa1u  = LD80C(0xd342f90012bb1189, 4,   2.64077014928547064865e+1L),
128sa2u  = LD80C(0x839be13d9d5da883, 8,   2.63217811300123973067e+2L),
129sa3u  = LD80C(0x9f8cba6d1ae1b24b, 10,  1.27639775710344617587e+3L),
130sa4u  = LD80C(0xcaa83f403713e33e, 11,  3.24251544209971162003e+3L),
131sa5u  = LD80C(0x8796aff2f3c47968, 12,  4.33883591261332837874e+3L),
132sa6u  = LD80C(0xb6ef97f9c753157b, 11,  2.92697460344182158454e+3L),
133sa7u  = LD80C(0xe02aee5f83773d1c, 9,   8.96670799139389559818e+2L),
134sa8u  = LD80C(0xc82b83855b88e07e, 6,   1.00084987800048510018e+2L),
135sa9u  = LD80C(0x92f030aefadf28ad, 1,   2.29591004455459083843e+0L);
136#define ra0	(ra0u.e)
137#define ra1	(ra1u.e)
138#define ra2	(ra2u.e)
139#define ra3	(ra3u.e)
140#define ra4	(ra4u.e)
141#define ra5	(ra5u.e)
142#define ra6	(ra6u.e)
143#define ra7	(ra7u.e)
144#define ra8	(ra8u.e)
145#define ra9	(ra9u.e)
146#define sa1	(sa1u.e)
147#define sa2	(sa2u.e)
148#define sa3	(sa3u.e)
149#define sa4	(sa4u.e)
150#define sa5	(sa5u.e)
151#define sa6	(sa6u.e)
152#define sa7	(sa7u.e)
153#define sa8	(sa8u.e)
154#define sa9	(sa9u.e)
155/*
156 * Domain [2.85715,7], range ~[-8.323e-22,8.390e-22]:
157 * |log(x*erfc(x)) + x**2 + 0.5625 - rb(x)/sb(x)| < 2**-70.326
158 */
159static const union IEEEl2bits
160rb0u = LD80C(0xa1a091cf43abcd26, -7, -9.86494292470284646962e-3L),
161rb1u = LD80C(0xd19d2df1cbb8da0a, -1, -8.18804618389296662837e-1L),
162rb2u = LD80C(0x9a4dd1383e5daf5b, 4,  -1.92879967111618594779e+1L),
163rb3u = LD80C(0xbff0ae9fc0751de6, 7,  -1.91940164551245394969e+2L),
164rb4u = LD80C(0xdde08465310b472b, 9,  -8.87508080766577324539e+2L),
165rb5u = LD80C(0xe796e1d38c8c70a9, 10, -1.85271506669474503781e+3L),
166rb6u = LD80C(0xbaf655a76e0ab3b5, 10, -1.49569795581333675349e+3L),
167rb7u = LD80C(0x95d21e3e75503c21, 8,  -2.99641547972948019157e+2L),
168sb1u = LD80C(0x814487ed823c8cbd, 5,   3.23169247732868256569e+1L),
169sb2u = LD80C(0xbe4bfbb1301304be, 8,   3.80593618534539961773e+2L),
170sb3u = LD80C(0x809c4ade46b927c7, 11,  2.05776827838541292848e+3L),
171sb4u = LD80C(0xa55284359f3395a8, 12,  5.29031455540062116327e+3L),
172sb5u = LD80C(0xbcfa72da9b820874, 12,  6.04730608102312640462e+3L),
173sb6u = LD80C(0x9d09a35988934631, 11,  2.51260238030767176221e+3L),
174sb7u = LD80C(0xd675bbe542c159fa, 7,   2.14459898308561015684e+2L);
175#define rb0	(rb0u.e)
176#define rb1	(rb1u.e)
177#define rb2	(rb2u.e)
178#define rb3	(rb3u.e)
179#define rb4	(rb4u.e)
180#define rb5	(rb5u.e)
181#define rb6	(rb6u.e)
182#define rb7	(rb7u.e)
183#define sb1	(sb1u.e)
184#define sb2	(sb2u.e)
185#define sb3	(sb3u.e)
186#define sb4	(sb4u.e)
187#define sb5	(sb5u.e)
188#define sb6	(sb6u.e)
189#define sb7	(sb7u.e)
190/*
191 * Domain [7,108], range ~[-4.422e-22,4.422e-22]:
192 * |log(x*erfc(x)) + x**2 + 0.5625 - rc(x)/sc(x)| < 2**-70.938
193 */
194static const union IEEEl2bits
195/* err = -4.422092275318925082e-22 -70.937689 */
196rc0u = LD80C(0xa1a091cf437a17ad, -7, -9.86494292470008707260e-3L),
197rc1u = LD80C(0xbe79c5a978122b00, -1, -7.44045595049165939261e-1L),
198rc2u = LD80C(0xdb26f9bbe31a2794, 3,  -1.36970155085888424425e+1L),
199rc3u = LD80C(0xb5f69a38f5747ac8, 6,  -9.09816453742625888546e+1L),
200rc4u = LD80C(0xd79676d970d0a21a, 7,  -2.15587750997584074147e+2L),
201rc5u = LD80C(0xfe528153c45ec97c, 6,  -1.27161142938347796666e+2L),
202sc1u = LD80C(0xc5e8cd46d5604a96, 4,   2.47386727842204312937e+1L),
203sc2u = LD80C(0xc5f0f5a5484520eb, 7,   1.97941248254913378865e+2L),
204sc3u = LD80C(0x964e3c7b34db9170, 9,   6.01222441484087787522e+2L),
205sc4u = LD80C(0x99be1b89faa0596a, 9,   6.14970430845978077827e+2L),
206sc5u = LD80C(0xf80dfcbf37ffc5ea, 6,   1.24027318931184605891e+2L);
207#define rc0	(rc0u.e)
208#define rc1	(rc1u.e)
209#define rc2	(rc2u.e)
210#define rc3	(rc3u.e)
211#define rc4	(rc4u.e)
212#define rc5	(rc5u.e)
213#define sc1	(sc1u.e)
214#define sc2	(sc2u.e)
215#define sc3	(sc3u.e)
216#define sc4	(sc4u.e)
217#define sc5	(sc5u.e)
218
219long double
220erfl(long double x)
221{
222	long double ax,R,S,P,Q,s,y,z,r;
223	uint64_t lx;
224	int32_t i;
225	uint16_t hx;
226
227	EXTRACT_LDBL80_WORDS(hx, lx, x);
228
229	if((hx & 0x7fff) == 0x7fff) {	/* erfl(nan)=nan */
230		i = (hx>>15)<<1;
231		return (1-i)+one/x;	/* erfl(+-inf)=+-1 */
232	}
233
234	ENTERI();
235
236	ax = fabsl(x);
237	if(ax < 0.84375) {
238	    if(ax < 0x1p-34L) {
239	        if(ax < 0x1p-16373L)
240		    RETURNI((8*x+efx8*x)/8);	/* avoid spurious underflow */
241		RETURNI(x + efx*x);
242	    }
243	    z = x*x;
244	    r = pp0+z*(pp1+z*(pp2+z*(pp3+z*(pp4+z*pp5))));
245	    s = one+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*(qq5+z*qq6)))));
246	    y = r/s;
247	    RETURNI(x + x*y);
248	}
249	if(ax < 1.25) {
250	    s = ax-one;
251	    P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*(pa6+s*pa7))))));
252	    Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*(qa6+s*qa7))))));
253	    if(x>=0) RETURNI(erx + P/Q); else RETURNI(-erx - P/Q);
254	}
255	if(ax >= 7) {			/* inf>|x|>= 7 */
256	    if(x>=0) RETURNI(one-tiny); else RETURNI(tiny-one);
257	}
258	s = one/(ax*ax);
259	if(ax < 2.85715) {	/* |x| < 2.85715 */
260	    R=ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(ra5+s*(ra6+s*(ra7+
261		s*(ra8+s*ra9))))))));
262	    S=one+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(sa5+s*(sa6+s*(sa7+
263		s*(sa8+s*sa9))))))));
264	} else {	/* |x| >= 2.85715 */
265	    R=rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(rb5+s*(rb6+s*rb7))))));
266	    S=one+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(sb5+s*(sb6+s*sb7))))));
267	}
268	z=(float)ax;
269	r=expl(-z*z-0.5625)*expl((z-ax)*(z+ax)+R/S);
270	if(x>=0) RETURNI(one-r/ax); else RETURNI(r/ax-one);
271}
272
273long double
274erfcl(long double x)
275{
276	long double ax,R,S,P,Q,s,y,z,r;
277	uint64_t lx;
278	uint16_t hx;
279
280	EXTRACT_LDBL80_WORDS(hx, lx, x);
281
282	if((hx & 0x7fff) == 0x7fff) {	/* erfcl(nan)=nan */
283					/* erfcl(+-inf)=0,2 */
284	    return ((hx>>15)<<1)+one/x;
285	}
286
287	ENTERI();
288
289	ax = fabsl(x);
290	if(ax < 0.84375L) {
291	    if(ax < 0x1p-34L)
292		RETURNI(one-x);
293	    z = x*x;
294	    r = pp0+z*(pp1+z*(pp2+z*(pp3+z*(pp4+z*pp5))));
295	    s = one+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*(qq5+z*qq6)))));
296	    y = r/s;
297	    if(ax < 0.25L) {  	/* x<1/4 */
298		RETURNI(one-(x+x*y));
299	    } else {
300		r = x*y;
301		r += (x-half);
302	       RETURNI(half - r);
303	    }
304	}
305	if(ax < 1.25L) {
306	    s = ax-one;
307	    P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*(pa6+s*pa7))))));
308	    Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*(qa6+s*qa7))))));
309	    if(x>=0) {
310	        z  = one-erx; RETURNI(z - P/Q);
311	    } else {
312		z = (erx+P/Q); RETURNI(one+z);
313	    }
314	}
315
316	if(ax < 108) {			/* |x| < 108 */
317 	    s = one/(ax*ax);
318	    if(ax < 2.85715) {		/* |x| < 2.85715 */
319		R=ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(ra5+s*(ra6+s*(ra7+
320		    s*(ra8+s*ra9))))))));
321		S=one+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(sa5+s*(sa6+s*(sa7+
322		    s*(sa8+s*sa9))))))));
323	    } else if(ax < 7) {		/* | |x| < 7 */
324		R=rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(rb5+s*(rb6+s*rb7))))));
325		S=one+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(sb5+s*(sb6+s*sb7))))));
326	    } else {
327		if(x < -7) RETURNI(two-tiny);/* x < -7 */
328		R=rc0+s*(rc1+s*(rc2+s*(rc3+s*(rc4+s*rc5))));
329		S=one+s*(sc1+s*(sc2+s*(sc3+s*(sc4+s*sc5))));
330	    }
331	    z = (float)ax;
332	    r = expl(-z*z-0.5625)*expl((z-ax)*(z+ax)+R/S);
333	    if(x>0) RETURNI(r/ax); else RETURNI(two-r/ax);
334	} else {
335	    if(x>0) RETURNI(tiny*tiny); else RETURNI(two-tiny);
336	}
337}
338