1/*
2 * ====================================================
3 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
4 *
5 * Developed at SunPro, a Sun Microsystems, Inc. business.
6 * Permission to use, copy, modify, and distribute this
7 * software is freely granted, provided that this notice
8 * is preserved.
9 * ====================================================
10 */
11
12/* Expansions and modifications for 128-bit long double are
13   Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
14   and are incorporated herein by permission of the author.  The author
15   reserves the right to distribute this material elsewhere under different
16   copying permissions.  These modifications are distributed here under
17   the following terms:
18
19    This library is free software; you can redistribute it and/or
20    modify it under the terms of the GNU Lesser General Public
21    License as published by the Free Software Foundation; either
22    version 2.1 of the License, or (at your option) any later version.
23
24    This library is distributed in the hope that it will be useful,
25    but WITHOUT ANY WARRANTY; without even the implied warranty of
26    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
27    Lesser General Public License for more details.
28
29    You should have received a copy of the GNU Lesser General Public
30    License along with this library; if not, write to the Free Software
31    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA */
32
33/* powq(x,y) return x**y
34 *
35 *		      n
36 * Method:  Let x =  2   * (1+f)
37 *	1. Compute and return log2(x) in two pieces:
38 *		log2(x) = w1 + w2,
39 *	   where w1 has 113-53 = 60 bit trailing zeros.
40 *	2. Perform y*log2(x) = n+y' by simulating muti-precision
41 *	   arithmetic, where |y'|<=0.5.
42 *	3. Return x**y = 2**n*exp(y'*log2)
43 *
44 * Special cases:
45 *	1.  (anything) ** 0  is 1
46 *	2.  (anything) ** 1  is itself
47 *	3.  (anything) ** NAN is NAN
48 *	4.  NAN ** (anything except 0) is NAN
49 *	5.  +-(|x| > 1) **  +INF is +INF
50 *	6.  +-(|x| > 1) **  -INF is +0
51 *	7.  +-(|x| < 1) **  +INF is +0
52 *	8.  +-(|x| < 1) **  -INF is +INF
53 *	9.  +-1         ** +-INF is NAN
54 *	10. +0 ** (+anything except 0, NAN)               is +0
55 *	11. -0 ** (+anything except 0, NAN, odd integer)  is +0
56 *	12. +0 ** (-anything except 0, NAN)               is +INF
57 *	13. -0 ** (-anything except 0, NAN, odd integer)  is +INF
58 *	14. -0 ** (odd integer) = -( +0 ** (odd integer) )
59 *	15. +INF ** (+anything except 0,NAN) is +INF
60 *	16. +INF ** (-anything except 0,NAN) is +0
61 *	17. -INF ** (anything)  = -0 ** (-anything)
62 *	18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
63 *	19. (-anything except 0 and inf) ** (non-integer) is NAN
64 *
65 */
66
67#include "quadmath-imp.h"
68
69static const __float128 bp[] = {
70  1.0Q,
71  1.5Q,
72};
73
74/* log_2(1.5) */
75static const __float128 dp_h[] = {
76  0.0,
77  5.8496250072115607565592654282227158546448E-1Q
78};
79
80/* Low part of log_2(1.5) */
81static const __float128 dp_l[] = {
82  0.0,
83  1.0579781240112554492329533686862998106046E-16Q
84};
85
86static const __float128 zero = 0.0Q,
87  one = 1.0Q,
88  two = 2.0Q,
89  two113 = 1.0384593717069655257060992658440192E34Q,
90  huge = 1.0e3000Q,
91  tiny = 1.0e-3000Q;
92
93/* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))
94   z = (x-1)/(x+1)
95   1 <= x <= 1.25
96   Peak relative error 2.3e-37 */
97static const __float128 LN[] =
98{
99 -3.0779177200290054398792536829702930623200E1Q,
100  6.5135778082209159921251824580292116201640E1Q,
101 -4.6312921812152436921591152809994014413540E1Q,
102  1.2510208195629420304615674658258363295208E1Q,
103 -9.9266909031921425609179910128531667336670E-1Q
104};
105static const __float128 LD[] =
106{
107 -5.129862866715009066465422805058933131960E1Q,
108  1.452015077564081884387441590064272782044E2Q,
109 -1.524043275549860505277434040464085593165E2Q,
110  7.236063513651544224319663428634139768808E1Q,
111 -1.494198912340228235853027849917095580053E1Q
112  /* 1.0E0 */
113};
114
115/* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))
116   0 <= x <= 0.5
117   Peak relative error 5.7e-38  */
118static const __float128 PN[] =
119{
120  5.081801691915377692446852383385968225675E8Q,
121  9.360895299872484512023336636427675327355E6Q,
122  4.213701282274196030811629773097579432957E4Q,
123  5.201006511142748908655720086041570288182E1Q,
124  9.088368420359444263703202925095675982530E-3Q,
125};
126static const __float128 PD[] =
127{
128  3.049081015149226615468111430031590411682E9Q,
129  1.069833887183886839966085436512368982758E8Q,
130  8.259257717868875207333991924545445705394E5Q,
131  1.872583833284143212651746812884298360922E3Q,
132  /* 1.0E0 */
133};
134
135static const __float128
136  /* ln 2 */
137  lg2 = 6.9314718055994530941723212145817656807550E-1Q,
138  lg2_h = 6.9314718055994528622676398299518041312695E-1Q,
139  lg2_l = 2.3190468138462996154948554638754786504121E-17Q,
140  ovt = 8.0085662595372944372e-0017Q,
141  /* 2/(3*log(2)) */
142  cp = 9.6179669392597560490661645400126142495110E-1Q,
143  cp_h = 9.6179669392597555432899980587535537779331E-1Q,
144  cp_l = 5.0577616648125906047157785230014751039424E-17Q;
145
146__float128
147powq (__float128 x, __float128 y)
148{
149  __float128 z, ax, z_h, z_l, p_h, p_l;
150  __float128 y1, t1, t2, r, s, t, u, v, w;
151  __float128 s2, s_h, s_l, t_h, t_l, ay;
152  int32_t i, j, k, yisint, n;
153  uint32_t ix, iy;
154  int32_t hx, hy;
155  ieee854_float128 o, p, q;
156
157  p.value = x;
158  hx = p.words32.w0;
159  ix = hx & 0x7fffffff;
160
161  q.value = y;
162  hy = q.words32.w0;
163  iy = hy & 0x7fffffff;
164
165
166  /* y==zero: x**0 = 1 */
167  if ((iy | q.words32.w1 | q.words32.w2 | q.words32.w3) == 0)
168    return one;
169
170  /* 1.0**y = 1; -1.0**+-Inf = 1 */
171  if (x == one)
172    return one;
173  if (x == -1.0Q && iy == 0x7fff0000
174      && (q.words32.w1 | q.words32.w2 | q.words32.w3) == 0)
175    return one;
176
177  /* +-NaN return x+y */
178  if ((ix > 0x7fff0000)
179      || ((ix == 0x7fff0000)
180	  && ((p.words32.w1 | p.words32.w2 | p.words32.w3) != 0))
181      || (iy > 0x7fff0000)
182      || ((iy == 0x7fff0000)
183	  && ((q.words32.w1 | q.words32.w2 | q.words32.w3) != 0)))
184    return x + y;
185
186  /* determine if y is an odd int when x < 0
187   * yisint = 0       ... y is not an integer
188   * yisint = 1       ... y is an odd int
189   * yisint = 2       ... y is an even int
190   */
191  yisint = 0;
192  if (hx < 0)
193    {
194      if (iy >= 0x40700000)	/* 2^113 */
195	yisint = 2;		/* even integer y */
196      else if (iy >= 0x3fff0000)	/* 1.0 */
197	{
198	  if (floorq (y) == y)
199	    {
200	      z = 0.5 * y;
201	      if (floorq (z) == z)
202		yisint = 2;
203	      else
204		yisint = 1;
205	    }
206	}
207    }
208
209  /* special value of y */
210  if ((q.words32.w1 | q.words32.w2 | q.words32.w3) == 0)
211    {
212      if (iy == 0x7fff0000)	/* y is +-inf */
213	{
214	  if (((ix - 0x3fff0000) | p.words32.w1 | p.words32.w2 | p.words32.w3)
215	      == 0)
216	    return y - y;	/* +-1**inf is NaN */
217	  else if (ix >= 0x3fff0000)	/* (|x|>1)**+-inf = inf,0 */
218	    return (hy >= 0) ? y : zero;
219	  else			/* (|x|<1)**-,+inf = inf,0 */
220	    return (hy < 0) ? -y : zero;
221	}
222      if (iy == 0x3fff0000)
223	{			/* y is  +-1 */
224	  if (hy < 0)
225	    return one / x;
226	  else
227	    return x;
228	}
229      if (hy == 0x40000000)
230	return x * x;		/* y is  2 */
231      if (hy == 0x3ffe0000)
232	{			/* y is  0.5 */
233	  if (hx >= 0)		/* x >= +0 */
234	    return sqrtq (x);
235	}
236    }
237
238  ax = fabsq (x);
239  /* special value of x */
240  if ((p.words32.w1 | p.words32.w2 | p.words32.w3) == 0)
241    {
242      if (ix == 0x7fff0000 || ix == 0 || ix == 0x3fff0000)
243	{
244	  z = ax;		/*x is +-0,+-inf,+-1 */
245	  if (hy < 0)
246	    z = one / z;	/* z = (1/|x|) */
247	  if (hx < 0)
248	    {
249	      if (((ix - 0x3fff0000) | yisint) == 0)
250		{
251		  z = (z - z) / (z - z);	/* (-1)**non-int is NaN */
252		}
253	      else if (yisint == 1)
254		z = -z;		/* (x<0)**odd = -(|x|**odd) */
255	    }
256	  return z;
257	}
258    }
259
260  /* (x<0)**(non-int) is NaN */
261  if (((((uint32_t) hx >> 31) - 1) | yisint) == 0)
262    return (x - x) / (x - x);
263
264  /* |y| is huge.
265     2^-16495 = 1/2 of smallest representable value.
266     If (1 - 1/131072)^y underflows, y > 1.4986e9 */
267  if (iy > 0x401d654b)
268    {
269      /* if (1 - 2^-113)^y underflows, y > 1.1873e38 */
270      if (iy > 0x407d654b)
271	{
272	  if (ix <= 0x3ffeffff)
273	    return (hy < 0) ? huge * huge : tiny * tiny;
274	  if (ix >= 0x3fff0000)
275	    return (hy > 0) ? huge * huge : tiny * tiny;
276	}
277      /* over/underflow if x is not close to one */
278      if (ix < 0x3ffeffff)
279	return (hy < 0) ? huge * huge : tiny * tiny;
280      if (ix > 0x3fff0000)
281	return (hy > 0) ? huge * huge : tiny * tiny;
282    }
283
284  ay = y > 0 ? y : -y;
285  if (ay < 0x1p-128)
286    y = y < 0 ? -0x1p-128 : 0x1p-128;
287
288  n = 0;
289  /* take care subnormal number */
290  if (ix < 0x00010000)
291    {
292      ax *= two113;
293      n -= 113;
294      o.value = ax;
295      ix = o.words32.w0;
296    }
297  n += ((ix) >> 16) - 0x3fff;
298  j = ix & 0x0000ffff;
299  /* determine interval */
300  ix = j | 0x3fff0000;		/* normalize ix */
301  if (j <= 0x3988)
302    k = 0;			/* |x|<sqrt(3/2) */
303  else if (j < 0xbb67)
304    k = 1;			/* |x|<sqrt(3)   */
305  else
306    {
307      k = 0;
308      n += 1;
309      ix -= 0x00010000;
310    }
311
312  o.value = ax;
313  o.words32.w0 = ix;
314  ax = o.value;
315
316  /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
317  u = ax - bp[k];		/* bp[0]=1.0, bp[1]=1.5 */
318  v = one / (ax + bp[k]);
319  s = u * v;
320  s_h = s;
321
322  o.value = s_h;
323  o.words32.w3 = 0;
324  o.words32.w2 &= 0xf8000000;
325  s_h = o.value;
326  /* t_h=ax+bp[k] High */
327  t_h = ax + bp[k];
328  o.value = t_h;
329  o.words32.w3 = 0;
330  o.words32.w2 &= 0xf8000000;
331  t_h = o.value;
332  t_l = ax - (t_h - bp[k]);
333  s_l = v * ((u - s_h * t_h) - s_h * t_l);
334  /* compute log(ax) */
335  s2 = s * s;
336  u = LN[0] + s2 * (LN[1] + s2 * (LN[2] + s2 * (LN[3] + s2 * LN[4])));
337  v = LD[0] + s2 * (LD[1] + s2 * (LD[2] + s2 * (LD[3] + s2 * (LD[4] + s2))));
338  r = s2 * s2 * u / v;
339  r += s_l * (s_h + s);
340  s2 = s_h * s_h;
341  t_h = 3.0 + s2 + r;
342  o.value = t_h;
343  o.words32.w3 = 0;
344  o.words32.w2 &= 0xf8000000;
345  t_h = o.value;
346  t_l = r - ((t_h - 3.0) - s2);
347  /* u+v = s*(1+...) */
348  u = s_h * t_h;
349  v = s_l * t_h + t_l * s;
350  /* 2/(3log2)*(s+...) */
351  p_h = u + v;
352  o.value = p_h;
353  o.words32.w3 = 0;
354  o.words32.w2 &= 0xf8000000;
355  p_h = o.value;
356  p_l = v - (p_h - u);
357  z_h = cp_h * p_h;		/* cp_h+cp_l = 2/(3*log2) */
358  z_l = cp_l * p_h + p_l * cp + dp_l[k];
359  /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
360  t = (__float128) n;
361  t1 = (((z_h + z_l) + dp_h[k]) + t);
362  o.value = t1;
363  o.words32.w3 = 0;
364  o.words32.w2 &= 0xf8000000;
365  t1 = o.value;
366  t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
367
368  /* s (sign of result -ve**odd) = -1 else = 1 */
369  s = one;
370  if (((((uint32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
371    s = -one;			/* (-ve)**(odd int) */
372
373  /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
374  y1 = y;
375  o.value = y1;
376  o.words32.w3 = 0;
377  o.words32.w2 &= 0xf8000000;
378  y1 = o.value;
379  p_l = (y - y1) * t1 + y * t2;
380  p_h = y1 * t1;
381  z = p_l + p_h;
382  o.value = z;
383  j = o.words32.w0;
384  if (j >= 0x400d0000) /* z >= 16384 */
385    {
386      /* if z > 16384 */
387      if (((j - 0x400d0000) | o.words32.w1 | o.words32.w2 | o.words32.w3) != 0)
388	return s * huge * huge;	/* overflow */
389      else
390	{
391	  if (p_l + ovt > z - p_h)
392	    return s * huge * huge;	/* overflow */
393	}
394    }
395  else if ((j & 0x7fffffff) >= 0x400d01b9)	/* z <= -16495 */
396    {
397      /* z < -16495 */
398      if (((j - 0xc00d01bc) | o.words32.w1 | o.words32.w2 | o.words32.w3)
399	  != 0)
400	return s * tiny * tiny;	/* underflow */
401      else
402	{
403	  if (p_l <= z - p_h)
404	    return s * tiny * tiny;	/* underflow */
405	}
406    }
407  /* compute 2**(p_h+p_l) */
408  i = j & 0x7fffffff;
409  k = (i >> 16) - 0x3fff;
410  n = 0;
411  if (i > 0x3ffe0000)
412    {				/* if |z| > 0.5, set n = [z+0.5] */
413      n = floorq (z + 0.5Q);
414      t = n;
415      p_h -= t;
416    }
417  t = p_l + p_h;
418  o.value = t;
419  o.words32.w3 = 0;
420  o.words32.w2 &= 0xf8000000;
421  t = o.value;
422  u = t * lg2_h;
423  v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
424  z = u + v;
425  w = v - (z - u);
426  /*  exp(z) */
427  t = z * z;
428  u = PN[0] + t * (PN[1] + t * (PN[2] + t * (PN[3] + t * PN[4])));
429  v = PD[0] + t * (PD[1] + t * (PD[2] + t * (PD[3] + t)));
430  t1 = z - t * u / v;
431  r = (z * t1) / (t1 - two) - (w + z * w);
432  z = one - (r - z);
433  o.value = z;
434  j = o.words32.w0;
435  j += (n << 16);
436  if ((j >> 16) <= 0)
437    z = scalbnq (z, n);	/* subnormal output */
438  else
439    {
440      o.words32.w0 = j;
441      z = o.value;
442    }
443  return s * z;
444}
445