1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29#include "quad.h"
30
31static const double C[] = {
32	0.0,
33	0.5,
34	1.0,
35	68719476736.0,
36	536870912.0,
37	48.0,
38	16.0,
39	1.52587890625000000000e-05,
40	2.86102294921875000000e-06,
41	5.96046447753906250000e-08,
42	3.72529029846191406250e-09,
43	1.70530256582424044609e-13,
44	7.10542735760100185871e-15,
45	8.67361737988403547206e-19,
46	2.16840434497100886801e-19,
47	1.27054942088145050860e-21,
48	1.21169035041947413311e-27,
49	9.62964972193617926528e-35,
50	4.70197740328915003187e-38
51};
52
53#define	zero		C[0]
54#define	half		C[1]
55#define	one		C[2]
56#define	two36		C[3]
57#define	two29		C[4]
58#define	three2p4	C[5]
59#define	two4		C[6]
60#define	twom16		C[7]
61#define	three2m20	C[8]
62#define	twom24		C[9]
63#define	twom28		C[10]
64#define	three2m44	C[11]
65#define	twom47		C[12]
66#define	twom60		C[13]
67#define	twom62		C[14]
68#define	three2m71	C[15]
69#define	three2m91	C[16]
70#define	twom113		C[17]
71#define	twom124		C[18]
72
73static const unsigned
74	fsr_re = 0x00000000u,
75	fsr_rn = 0xc0000000u;
76
77#ifdef __sparcv9
78
79/*
80 * _Qp_sqrt(pz, x) sets *pz = sqrt(*x).
81 */
82void
83_Qp_sqrt(union longdouble *pz, const union longdouble *x)
84
85#else
86
87/*
88 * _Q_sqrt(x) returns sqrt(*x).
89 */
90union longdouble
91_Q_sqrt(const union longdouble *x)
92
93#endif	/* __sparcv9 */
94
95{
96	union longdouble	z;
97	union xdouble		u;
98	double			c, d, rr, r[2], tt[3], xx[4], zz[5];
99	unsigned int		xm, fsr, lx, wx[3];
100	unsigned int		msw, frac2, frac3, frac4, rm;
101	int			ex, ez;
102
103	if (QUAD_ISZERO(*x)) {
104		Z = *x;
105		QUAD_RETURN(Z);
106	}
107
108	xm = x->l.msw;
109
110	__quad_getfsrp(&fsr);
111
112	/* handle nan and inf cases */
113	if ((xm & 0x7fffffff) >= 0x7fff0000) {
114		if ((x->l.msw & 0xffff) | x->l.frac2 | x->l.frac3 |
115		    x->l.frac4) {
116			if (!(x->l.msw & 0x8000)) {
117				/* snan, signal invalid */
118				if (fsr & FSR_NVM) {
119					__quad_fsqrtq(x, &Z);
120				} else {
121					Z = *x;
122					Z.l.msw |= 0x8000;
123					fsr = (fsr & ~FSR_CEXC) | FSR_NVA |
124					    FSR_NVC;
125					__quad_setfsrp(&fsr);
126				}
127				QUAD_RETURN(Z);
128			}
129			Z = *x;
130			QUAD_RETURN(Z);
131		}
132		if (x->l.msw & 0x80000000) {
133			/* sqrt(-inf), signal invalid */
134			if (fsr & FSR_NVM) {
135				__quad_fsqrtq(x, &Z);
136			} else {
137				Z.l.msw = 0x7fffffff;
138				Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0xffffffff;
139				fsr = (fsr & ~FSR_CEXC) | FSR_NVA | FSR_NVC;
140				__quad_setfsrp(&fsr);
141			}
142			QUAD_RETURN(Z);
143		}
144		/* sqrt(inf), return inf */
145		Z = *x;
146		QUAD_RETURN(Z);
147	}
148
149	/* handle negative numbers */
150	if (xm & 0x80000000) {
151		if (fsr & FSR_NVM) {
152			__quad_fsqrtq(x, &Z);
153		} else {
154			Z.l.msw = 0x7fffffff;
155			Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0xffffffff;
156			fsr = (fsr & ~FSR_CEXC) | FSR_NVA | FSR_NVC;
157			__quad_setfsrp(&fsr);
158		}
159		QUAD_RETURN(Z);
160	}
161
162	/* now x is finite, positive */
163	__quad_setfsrp((unsigned *)&fsr_re);
164
165	/* get the normalized significand and exponent */
166	ex = (int)(xm >> 16);
167	lx = xm & 0xffff;
168	if (ex) {
169		lx |= 0x10000;
170		wx[0] = x->l.frac2;
171		wx[1] = x->l.frac3;
172		wx[2] = x->l.frac4;
173	} else {
174		if (lx | (x->l.frac2 & 0xfffe0000)) {
175			wx[0] = x->l.frac2;
176			wx[1] = x->l.frac3;
177			wx[2] = x->l.frac4;
178			ex = 1;
179		} else if (x->l.frac2 | (x->l.frac3 & 0xfffe0000)) {
180			lx = x->l.frac2;
181			wx[0] = x->l.frac3;
182			wx[1] = x->l.frac4;
183			wx[2] = 0;
184			ex = -31;
185		} else if (x->l.frac3 | (x->l.frac4 & 0xfffe0000)) {
186			lx = x->l.frac3;
187			wx[0] = x->l.frac4;
188			wx[1] = wx[2] = 0;
189			ex = -63;
190		} else {
191			lx = x->l.frac4;
192			wx[0] = wx[1] = wx[2] = 0;
193			ex = -95;
194		}
195		while ((lx & 0x10000) == 0) {
196			lx = (lx << 1) | (wx[0] >> 31);
197			wx[0] = (wx[0] << 1) | (wx[1] >> 31);
198			wx[1] = (wx[1] << 1) | (wx[2] >> 31);
199			wx[2] <<= 1;
200			ex--;
201		}
202	}
203	ez = ex - 0x3fff;
204	if (ez & 1) {
205		/* make exponent even */
206		lx = (lx << 1) | (wx[0] >> 31);
207		wx[0] = (wx[0] << 1) | (wx[1] >> 31);
208		wx[1] = (wx[1] << 1) | (wx[2] >> 31);
209		wx[2] <<= 1;
210		ez--;
211	}
212
213	/* extract the significands into doubles */
214	c = twom16;
215	xx[0] = (double)((int)lx) * c;
216
217	c *= twom24;
218	xx[0] += (double)((int)(wx[0] >> 8)) * c;
219
220	c *= twom24;
221	xx[1] = (double)((int)(((wx[0] << 16) | (wx[1] >> 16)) &
222	    0xffffff)) * c;
223
224	c *= twom24;
225	xx[2] = (double)((int)(((wx[1] << 8) | (wx[2] >> 24)) &
226	    0xffffff)) * c;
227
228	c *= twom24;
229	xx[3] = (double)((int)(wx[2] & 0xffffff)) * c;
230
231	/* approximate the divisor for the Newton iteration */
232	c = xx[0] + xx[1];
233	c = __quad_dp_sqrt(&c);
234	rr = half / c;
235
236	/* compute the first five "digits" of the square root */
237	zz[0] = (c + two29) - two29;
238	tt[0] = zz[0] + zz[0];
239	r[0] = (xx[0] - zz[0] * zz[0]) + xx[1];
240
241	zz[1] = (rr * (r[0] + xx[2]) + three2p4) - three2p4;
242	tt[1] = zz[1] + zz[1];
243	r[0] -= tt[0] * zz[1];
244	r[1] = xx[2] - zz[1] * zz[1];
245	c = (r[1] + three2m20) - three2m20;
246	r[0] += c;
247	r[1] = (r[1] - c) + xx[3];
248
249	zz[2] = (rr * (r[0] + r[1]) + three2m20) - three2m20;
250	tt[2] = zz[2] + zz[2];
251	r[0] -= tt[0] * zz[2];
252	r[1] -= tt[1] * zz[2];
253	c = (r[1] + three2m44) - three2m44;
254	r[0] += c;
255	r[1] = (r[1] - c) - zz[2] * zz[2];
256
257	zz[3] = (rr * (r[0] + r[1]) + three2m44) - three2m44;
258	r[0] = ((r[0] - tt[0] * zz[3]) + r[1]) - tt[1] * zz[3];
259	r[1] = -tt[2] * zz[3];
260	c = (r[1] + three2m91) - three2m91;
261	r[0] += c;
262	r[1] = (r[1] - c) - zz[3] * zz[3];
263
264	zz[4] = (rr * (r[0] + r[1]) + three2m71) - three2m71;
265
266	/* reduce to three doubles, making sure zz[1] is positive */
267	zz[0] += zz[1] - twom47;
268	zz[1] = twom47 + zz[2] + zz[3];
269	zz[2] = zz[4];
270
271	/* if the third term might lie on a rounding boundary, perturb it */
272	if (zz[2] == (twom62 + zz[2]) - twom62) {
273		/* here we just need to get the sign of the remainder */
274		c = (((((r[0] - tt[0] * zz[4]) - tt[1] * zz[4]) + r[1])
275		    - tt[2] * zz[4]) - (zz[3] + zz[3]) * zz[4]) - zz[4] * zz[4];
276		if (c < zero)
277			zz[2] -= twom124;
278		else if (c > zero)
279			zz[2] += twom124;
280	}
281
282	/*
283	 * propagate carries/borrows, using round-to-negative-infinity mode
284	 * to make all terms nonnegative (note that we can't encounter a
285	 * borrow so large that the roundoff is unrepresentable because
286	 * we took care to make zz[1] positive above)
287	 */
288	__quad_setfsrp(&fsr_rn);
289	c = zz[1] + zz[2];
290	zz[2] += (zz[1] - c);
291	zz[1] = c;
292	c = zz[0] + zz[1];
293	zz[1] += (zz[0] - c);
294	zz[0] = c;
295
296	/* adjust exponent and strip off integer bit */
297	ez = (ez >> 1) + 0x3fff;
298	zz[0] -= one;
299
300	/* the first 48 bits of fraction come from zz[0] */
301	u.d = d = two36 + zz[0];
302	msw = u.l.lo;
303	zz[0] -= (d - two36);
304
305	u.d = d = two4 + zz[0];
306	frac2 = u.l.lo;
307	zz[0] -= (d - two4);
308
309	/* the next 32 come from zz[0] and zz[1] */
310	u.d = d = twom28 + (zz[0] + zz[1]);
311	frac3 = u.l.lo;
312	zz[0] -= (d - twom28);
313
314	/* condense the remaining fraction; errors here won't matter */
315	c = zz[0] + zz[1];
316	zz[1] = ((zz[0] - c) + zz[1]) + zz[2];
317	zz[0] = c;
318
319	/* get the last word of fraction */
320	u.d = d = twom60 + (zz[0] + zz[1]);
321	frac4 = u.l.lo;
322	zz[0] -= (d - twom60);
323
324	/* keep track of what's left for rounding; note that the error */
325	/* in computing c will be non-negative due to rounding mode */
326	c = zz[0] + zz[1];
327
328	/* get the rounding mode */
329	rm = fsr >> 30;
330
331	/* round and raise exceptions */
332	fsr &= ~FSR_CEXC;
333	if (c != zero) {
334		fsr |= FSR_NXC;
335
336		/* decide whether to round the fraction up */
337		if (rm == FSR_RP || (rm == FSR_RN && (c > twom113 ||
338		    (c == twom113 && ((frac4 & 1) || (c - zz[0] != zz[1])))))) {
339			/* round up and renormalize if necessary */
340			if (++frac4 == 0)
341				if (++frac3 == 0)
342					if (++frac2 == 0)
343						if (++msw == 0x10000) {
344							msw = 0;
345							ez++;
346						}
347		}
348	}
349
350	/* stow the result */
351	z.l.msw = (ez << 16) | msw;
352	z.l.frac2 = frac2;
353	z.l.frac3 = frac3;
354	z.l.frac4 = frac4;
355
356	if ((fsr & FSR_CEXC) & (fsr >> 23)) {
357		__quad_setfsrp(&fsr);
358		__quad_fsqrtq(x, &Z);
359	} else {
360		Z = z;
361		fsr |= (fsr & 0x1f) << 5;
362		__quad_setfsrp(&fsr);
363	}
364	QUAD_RETURN(Z);
365}
366