1221234Skargl/* From: @(#)e_rem_pio2.c 1.4 95/01/18 */
22116Sjkh/*
32116Sjkh * ====================================================
42116Sjkh * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
5221234Skargl * Copyright (c) 2008 Steven G. Kargl, David Schultz, Bruce D. Evans.
62116Sjkh *
7141296Sdas * Developed at SunSoft, a Sun Microsystems, Inc. business.
82116Sjkh * Permission to use, copy, modify, and distribute this
9141296Sdas * software is freely granted, provided that this notice
102116Sjkh * is preserved.
112116Sjkh * ====================================================
12141296Sdas *
13176476Sbde * Optimized by Bruce D. Evans.
142116Sjkh */
152116Sjkh
16176385Sbde#include <sys/cdefs.h>
17176385Sbde__FBSDID("$FreeBSD$");
182116Sjkh
19221234Skargl/* ld128 version of __ieee754_rem_pio2l(x,y)
20141296Sdas *
21141296Sdas * return the remainder of x rem pi/2 in y[0]+y[1]
222116Sjkh * use __kernel_rem_pio2()
232116Sjkh */
242116Sjkh
25176465Sbde#include <float.h>
26176465Sbde
272116Sjkh#include "math.h"
282116Sjkh#include "math_private.h"
29221234Skargl#include "fpmath.h"
302116Sjkh
31221234Skargl#define	BIAS	(LDBL_MAX_EXP - 1)
32221234Skargl
332116Sjkh/*
34221234Skargl * XXX need to verify that nonzero integer multiples of pi/2 within the
35221234Skargl * range get no closer to a long double than 2**-140, or that
36221234Skargl * ilogb(x) + ilogb(min_delta) < 45 - -140.
37221234Skargl */
38221234Skargl/*
39221234Skargl * invpio2:  113 bits of 2/pi
40221234Skargl * pio2_1:   first  68 bits of pi/2
412116Sjkh * pio2_1t:  pi/2 - pio2_1
42221234Skargl * pio2_2:   second 68 bits of pi/2
432116Sjkh * pio2_2t:  pi/2 - (pio2_1+pio2_2)
44221234Skargl * pio2_3:   third  68 bits of pi/2
452116Sjkh * pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)
462116Sjkh */
472116Sjkh
488870Srgrimesstatic const double
492116Sjkhzero =  0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
50221234Skargltwo24 =  1.67772160000000000000e+07; /* 0x41700000, 0x00000000 */
512116Sjkh
52221234Skarglstatic const long double
53221234Skarglinvpio2 =  6.3661977236758134307553505349005747e-01L,	/*  0x145f306dc9c882a53f84eafa3ea6a.0p-113 */
54221234Skarglpio2_1  =  1.5707963267948966192292994253909555e+00L,	/*  0x1921fb54442d18469800000000000.0p-112 */
55221234Skarglpio2_1t =  2.0222662487959507323996846200947577e-21L,	/*  0x13198a2e03707344a4093822299f3.0p-181 */
56221234Skarglpio2_2  =  2.0222662487959507323994779168837751e-21L,	/*  0x13198a2e03707344a400000000000.0p-181 */
57221234Skarglpio2_2t =  2.0670321098263988236496903051604844e-43L,	/*  0x127044533e63a0105df531d89cd91.0p-254 */
58221234Skarglpio2_3  =  2.0670321098263988236499468110329591e-43L,	/*  0x127044533e63a0105e00000000000.0p-254 */
59221234Skarglpio2_3t = -2.5650587247459238361625433492959285e-65L;	/* -0x159c4ec64ddaeb5f78671cbfb2210.0p-327 */
60221234Skargl
61222508Skarglstatic inline __always_inline int
62221234Skargl__ieee754_rem_pio2l(long double x, long double *y)
632116Sjkh{
64221234Skargl	union IEEEl2bits u,u1;
65221234Skargl	long double z,w,t,r,fn;
66221234Skargl	double tx[5],ty[3];
67221234Skargl	int64_t n;
68221234Skargl	int e0,ex,i,j,nx;
69221234Skargl	int16_t expsign;
702116Sjkh
71221234Skargl	u.e = x;
72221234Skargl	expsign = u.xbits.expsign;
73221234Skargl	ex = expsign & 0x7fff;
74221234Skargl	if (ex < BIAS + 45 || ex == BIAS + 45 &&
75221234Skargl	    u.bits.manh < 0x921fb54442d1LL) {
76221234Skargl	    /* |x| ~< 2^45*(pi/2), medium size */
77176465Sbde	    /* Use a specialized rint() to get fn.  Assume round-to-nearest. */
78221234Skargl	    fn = x*invpio2+0x1.8p112;
79221234Skargl	    fn = fn-0x1.8p112;
80221234Skargl#ifdef HAVE_EFFICIENT_I64RINT
81221234Skargl	    n  = i64rint(fn);
82176465Sbde#else
83221234Skargl	    n  = fn;
84176465Sbde#endif
85176476Sbde	    r  = x-fn*pio2_1;
86221234Skargl	    w  = fn*pio2_1t;	/* 1st round good to 180 bit */
87176466Sbde	    {
88221234Skargl		union IEEEl2bits u2;
89221234Skargl	        int ex1;
90221234Skargl	        j  = ex;
91141296Sdas	        y[0] = r-w;
92221234Skargl		u2.e = y[0];
93221234Skargl		ex1 = u2.xbits.expsign & 0x7fff;
94221234Skargl	        i = j-ex1;
95221234Skargl	        if(i>51) {  /* 2nd iteration needed, good to 248 */
962116Sjkh		    t  = r;
97141296Sdas		    w  = fn*pio2_2;
982116Sjkh		    r  = t-w;
99141296Sdas		    w  = fn*pio2_2t-((t-r)-w);
1002116Sjkh		    y[0] = r-w;
101221234Skargl		    u2.e = y[0];
102221234Skargl		    ex1 = u2.xbits.expsign & 0x7fff;
103221234Skargl		    i = j-ex1;
104221234Skargl		    if(i>119) {	/* 3rd iteration need, 316 bits acc */
1052116Sjkh		    	t  = r;	/* will cover all possible cases */
106141296Sdas		    	w  = fn*pio2_3;
1072116Sjkh		    	r  = t-w;
108141296Sdas		    	w  = fn*pio2_3t-((t-r)-w);
1092116Sjkh		    	y[0] = r-w;
1102116Sjkh		    }
1112116Sjkh		}
1122116Sjkh	    }
1132116Sjkh	    y[1] = (r-y[0])-w;
114176476Sbde	    return n;
1152116Sjkh	}
116141296Sdas    /*
1172116Sjkh     * all other (large) arguments
1182116Sjkh     */
119221234Skargl	if(ex==0x7fff) {		/* x is inf or NaN */
1202116Sjkh	    y[0]=y[1]=x-x; return 0;
1212116Sjkh	}
1222116Sjkh    /* set z = scalbn(|x|,ilogb(x)-23) */
123221234Skargl	u1.e = x;
124221234Skargl	e0 = ex - BIAS - 23;		/* e0 = ilogb(|x|)-23; */
125221234Skargl	u1.xbits.expsign = ex - e0;
126221234Skargl	z = u1.e;
127221234Skargl	for(i=0;i<4;i++) {
1282116Sjkh		tx[i] = (double)((int32_t)(z));
1292116Sjkh		z     = (z-tx[i])*two24;
1302116Sjkh	}
131221234Skargl	tx[4] = z;
132221234Skargl	nx = 5;
1332116Sjkh	while(tx[nx-1]==zero) nx--;	/* skip zero term */
134221234Skargl	n  =  __kernel_rem_pio2(tx,ty,e0,nx,3);
135221234Skargl	t = (long double)ty[2] + ty[1];
136221234Skargl	r = t + ty[0];
137221234Skargl	w = ty[0] - (r - t);
138221234Skargl	if(expsign<0) {y[0] = -r; y[1] = -w; return -n;}
139221234Skargl	y[0] = r; y[1] = w; return n;
1402116Sjkh}
141