1/*
2 * ====================================================
3 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
4 *
5 * Developed at SunPro, a Sun Microsystems, Inc. business.
6 * Permission to use, copy, modify, and distribute this
7 * software is freely granted, provided that this notice
8 * is preserved.
9 * ====================================================
10 */
11
12/* Expansions and modifications for 128-bit long double are
13   Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
14   and are incorporated herein by permission of the author.  The author
15   reserves the right to distribute this material elsewhere under different
16   copying permissions.  These modifications are distributed here under
17   the following terms:
18
19    This library is free software; you can redistribute it and/or
20    modify it under the terms of the GNU Lesser General Public
21    License as published by the Free Software Foundation; either
22    version 2.1 of the License, or (at your option) any later version.
23
24    This library is distributed in the hope that it will be useful,
25    but WITHOUT ANY WARRANTY; without even the implied warranty of
26    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
27    Lesser General Public License for more details.
28
29    You should have received a copy of the GNU Lesser General Public
30    License along with this library; if not, see
31    <http://www.gnu.org/licenses/>.  */
32
33/* powq(x,y) return x**y
34 *
35 *		      n
36 * Method:  Let x =  2   * (1+f)
37 *	1. Compute and return log2(x) in two pieces:
38 *		log2(x) = w1 + w2,
39 *	   where w1 has 113-53 = 60 bit trailing zeros.
40 *	2. Perform y*log2(x) = n+y' by simulating muti-precision
41 *	   arithmetic, where |y'|<=0.5.
42 *	3. Return x**y = 2**n*exp(y'*log2)
43 *
44 * Special cases:
45 *	1.  (anything) ** 0  is 1
46 *	2.  (anything) ** 1  is itself
47 *	3.  (anything) ** NAN is NAN
48 *	4.  NAN ** (anything except 0) is NAN
49 *	5.  +-(|x| > 1) **  +INF is +INF
50 *	6.  +-(|x| > 1) **  -INF is +0
51 *	7.  +-(|x| < 1) **  +INF is +0
52 *	8.  +-(|x| < 1) **  -INF is +INF
53 *	9.  +-1         ** +-INF is NAN
54 *	10. +0 ** (+anything except 0, NAN)               is +0
55 *	11. -0 ** (+anything except 0, NAN, odd integer)  is +0
56 *	12. +0 ** (-anything except 0, NAN)               is +INF
57 *	13. -0 ** (-anything except 0, NAN, odd integer)  is +INF
58 *	14. -0 ** (odd integer) = -( +0 ** (odd integer) )
59 *	15. +INF ** (+anything except 0,NAN) is +INF
60 *	16. +INF ** (-anything except 0,NAN) is +0
61 *	17. -INF ** (anything)  = -0 ** (-anything)
62 *	18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
63 *	19. (-anything except 0 and inf) ** (non-integer) is NAN
64 *
65 */
66
67#include "quadmath-imp.h"
68
69static const __float128 bp[] = {
70  1,
71  1.5Q,
72};
73
74/* log_2(1.5) */
75static const __float128 dp_h[] = {
76  0.0,
77  5.8496250072115607565592654282227158546448E-1Q
78};
79
80/* Low part of log_2(1.5) */
81static const __float128 dp_l[] = {
82  0.0,
83  1.0579781240112554492329533686862998106046E-16Q
84};
85
86static const __float128 zero = 0,
87  one = 1,
88  two = 2,
89  two113 = 1.0384593717069655257060992658440192E34Q,
90  huge = 1.0e3000Q,
91  tiny = 1.0e-3000Q;
92
93/* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))
94   z = (x-1)/(x+1)
95   1 <= x <= 1.25
96   Peak relative error 2.3e-37 */
97static const __float128 LN[] =
98{
99 -3.0779177200290054398792536829702930623200E1Q,
100  6.5135778082209159921251824580292116201640E1Q,
101 -4.6312921812152436921591152809994014413540E1Q,
102  1.2510208195629420304615674658258363295208E1Q,
103 -9.9266909031921425609179910128531667336670E-1Q
104};
105static const __float128 LD[] =
106{
107 -5.129862866715009066465422805058933131960E1Q,
108  1.452015077564081884387441590064272782044E2Q,
109 -1.524043275549860505277434040464085593165E2Q,
110  7.236063513651544224319663428634139768808E1Q,
111 -1.494198912340228235853027849917095580053E1Q
112  /* 1.0E0 */
113};
114
115/* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))
116   0 <= x <= 0.5
117   Peak relative error 5.7e-38  */
118static const __float128 PN[] =
119{
120  5.081801691915377692446852383385968225675E8Q,
121  9.360895299872484512023336636427675327355E6Q,
122  4.213701282274196030811629773097579432957E4Q,
123  5.201006511142748908655720086041570288182E1Q,
124  9.088368420359444263703202925095675982530E-3Q,
125};
126static const __float128 PD[] =
127{
128  3.049081015149226615468111430031590411682E9Q,
129  1.069833887183886839966085436512368982758E8Q,
130  8.259257717868875207333991924545445705394E5Q,
131  1.872583833284143212651746812884298360922E3Q,
132  /* 1.0E0 */
133};
134
135static const __float128
136  /* ln 2 */
137  lg2 = 6.9314718055994530941723212145817656807550E-1Q,
138  lg2_h = 6.9314718055994528622676398299518041312695E-1Q,
139  lg2_l = 2.3190468138462996154948554638754786504121E-17Q,
140  ovt = 8.0085662595372944372e-0017Q,
141  /* 2/(3*log(2)) */
142  cp = 9.6179669392597560490661645400126142495110E-1Q,
143  cp_h = 9.6179669392597555432899980587535537779331E-1Q,
144  cp_l = 5.0577616648125906047157785230014751039424E-17Q;
145
146__float128
147powq (__float128 x, __float128 y)
148{
149  __float128 z, ax, z_h, z_l, p_h, p_l;
150  __float128 y1, t1, t2, r, s, sgn, t, u, v, w;
151  __float128 s2, s_h, s_l, t_h, t_l, ay;
152  int32_t i, j, k, yisint, n;
153  uint32_t ix, iy;
154  int32_t hx, hy;
155  ieee854_float128 o, p, q;
156
157  p.value = x;
158  hx = p.words32.w0;
159  ix = hx & 0x7fffffff;
160
161  q.value = y;
162  hy = q.words32.w0;
163  iy = hy & 0x7fffffff;
164
165
166  /* y==zero: x**0 = 1 */
167  if ((iy | q.words32.w1 | q.words32.w2 | q.words32.w3) == 0
168      && !issignalingq (x))
169    return one;
170
171  /* 1.0**y = 1; -1.0**+-Inf = 1 */
172  if (x == one && !issignalingq (y))
173    return one;
174  if (x == -1 && iy == 0x7fff0000
175      && (q.words32.w1 | q.words32.w2 | q.words32.w3) == 0)
176    return one;
177
178  /* +-NaN return x+y */
179  if ((ix > 0x7fff0000)
180      || ((ix == 0x7fff0000)
181	  && ((p.words32.w1 | p.words32.w2 | p.words32.w3) != 0))
182      || (iy > 0x7fff0000)
183      || ((iy == 0x7fff0000)
184	  && ((q.words32.w1 | q.words32.w2 | q.words32.w3) != 0)))
185    return x + y;
186
187  /* determine if y is an odd int when x < 0
188   * yisint = 0       ... y is not an integer
189   * yisint = 1       ... y is an odd int
190   * yisint = 2       ... y is an even int
191   */
192  yisint = 0;
193  if (hx < 0)
194    {
195      if (iy >= 0x40700000)	/* 2^113 */
196	yisint = 2;		/* even integer y */
197      else if (iy >= 0x3fff0000)	/* 1.0 */
198	{
199	  if (floorq (y) == y)
200	    {
201	      z = 0.5 * y;
202	      if (floorq (z) == z)
203		yisint = 2;
204	      else
205		yisint = 1;
206	    }
207	}
208    }
209
210  /* special value of y */
211  if ((q.words32.w1 | q.words32.w2 | q.words32.w3) == 0)
212    {
213      if (iy == 0x7fff0000)	/* y is +-inf */
214	{
215	  if (((ix - 0x3fff0000) | p.words32.w1 | p.words32.w2 | p.words32.w3)
216	      == 0)
217	    return y - y;	/* +-1**inf is NaN */
218	  else if (ix >= 0x3fff0000)	/* (|x|>1)**+-inf = inf,0 */
219	    return (hy >= 0) ? y : zero;
220	  else			/* (|x|<1)**-,+inf = inf,0 */
221	    return (hy < 0) ? -y : zero;
222	}
223      if (iy == 0x3fff0000)
224	{			/* y is  +-1 */
225	  if (hy < 0)
226	    return one / x;
227	  else
228	    return x;
229	}
230      if (hy == 0x40000000)
231	return x * x;		/* y is  2 */
232      if (hy == 0x3ffe0000)
233	{			/* y is  0.5 */
234	  if (hx >= 0)		/* x >= +0 */
235	    return sqrtq (x);
236	}
237    }
238
239  ax = fabsq (x);
240  /* special value of x */
241  if ((p.words32.w1 | p.words32.w2 | p.words32.w3) == 0)
242    {
243      if (ix == 0x7fff0000 || ix == 0 || ix == 0x3fff0000)
244	{
245	  z = ax;		/*x is +-0,+-inf,+-1 */
246	  if (hy < 0)
247	    z = one / z;	/* z = (1/|x|) */
248	  if (hx < 0)
249	    {
250	      if (((ix - 0x3fff0000) | yisint) == 0)
251		{
252		  z = (z - z) / (z - z);	/* (-1)**non-int is NaN */
253		}
254	      else if (yisint == 1)
255		z = -z;		/* (x<0)**odd = -(|x|**odd) */
256	    }
257	  return z;
258	}
259    }
260
261  /* (x<0)**(non-int) is NaN */
262  if (((((uint32_t) hx >> 31) - 1) | yisint) == 0)
263    return (x - x) / (x - x);
264
265  /* sgn (sign of result -ve**odd) = -1 else = 1 */
266  sgn = one;
267  if (((((uint32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
268    sgn = -one;			/* (-ve)**(odd int) */
269
270  /* |y| is huge.
271     2^-16495 = 1/2 of smallest representable value.
272     If (1 - 1/131072)^y underflows, y > 1.4986e9 */
273  if (iy > 0x401d654b)
274    {
275      /* if (1 - 2^-113)^y underflows, y > 1.1873e38 */
276      if (iy > 0x407d654b)
277	{
278	  if (ix <= 0x3ffeffff)
279	    return (hy < 0) ? huge * huge : tiny * tiny;
280	  if (ix >= 0x3fff0000)
281	    return (hy > 0) ? huge * huge : tiny * tiny;
282	}
283      /* over/underflow if x is not close to one */
284      if (ix < 0x3ffeffff)
285	return (hy < 0) ? sgn * huge * huge : sgn * tiny * tiny;
286      if (ix > 0x3fff0000)
287	return (hy > 0) ? sgn * huge * huge : sgn * tiny * tiny;
288    }
289
290  ay = y > 0 ? y : -y;
291  if (ay < 0x1p-128)
292    y = y < 0 ? -0x1p-128 : 0x1p-128;
293
294  n = 0;
295  /* take care subnormal number */
296  if (ix < 0x00010000)
297    {
298      ax *= two113;
299      n -= 113;
300      o.value = ax;
301      ix = o.words32.w0;
302    }
303  n += ((ix) >> 16) - 0x3fff;
304  j = ix & 0x0000ffff;
305  /* determine interval */
306  ix = j | 0x3fff0000;		/* normalize ix */
307  if (j <= 0x3988)
308    k = 0;			/* |x|<sqrt(3/2) */
309  else if (j < 0xbb67)
310    k = 1;			/* |x|<sqrt(3)   */
311  else
312    {
313      k = 0;
314      n += 1;
315      ix -= 0x00010000;
316    }
317
318  o.value = ax;
319  o.words32.w0 = ix;
320  ax = o.value;
321
322  /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
323  u = ax - bp[k];		/* bp[0]=1.0, bp[1]=1.5 */
324  v = one / (ax + bp[k]);
325  s = u * v;
326  s_h = s;
327
328  o.value = s_h;
329  o.words32.w3 = 0;
330  o.words32.w2 &= 0xf8000000;
331  s_h = o.value;
332  /* t_h=ax+bp[k] High */
333  t_h = ax + bp[k];
334  o.value = t_h;
335  o.words32.w3 = 0;
336  o.words32.w2 &= 0xf8000000;
337  t_h = o.value;
338  t_l = ax - (t_h - bp[k]);
339  s_l = v * ((u - s_h * t_h) - s_h * t_l);
340  /* compute log(ax) */
341  s2 = s * s;
342  u = LN[0] + s2 * (LN[1] + s2 * (LN[2] + s2 * (LN[3] + s2 * LN[4])));
343  v = LD[0] + s2 * (LD[1] + s2 * (LD[2] + s2 * (LD[3] + s2 * (LD[4] + s2))));
344  r = s2 * s2 * u / v;
345  r += s_l * (s_h + s);
346  s2 = s_h * s_h;
347  t_h = 3.0 + s2 + r;
348  o.value = t_h;
349  o.words32.w3 = 0;
350  o.words32.w2 &= 0xf8000000;
351  t_h = o.value;
352  t_l = r - ((t_h - 3.0) - s2);
353  /* u+v = s*(1+...) */
354  u = s_h * t_h;
355  v = s_l * t_h + t_l * s;
356  /* 2/(3log2)*(s+...) */
357  p_h = u + v;
358  o.value = p_h;
359  o.words32.w3 = 0;
360  o.words32.w2 &= 0xf8000000;
361  p_h = o.value;
362  p_l = v - (p_h - u);
363  z_h = cp_h * p_h;		/* cp_h+cp_l = 2/(3*log2) */
364  z_l = cp_l * p_h + p_l * cp + dp_l[k];
365  /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
366  t = (__float128) n;
367  t1 = (((z_h + z_l) + dp_h[k]) + t);
368  o.value = t1;
369  o.words32.w3 = 0;
370  o.words32.w2 &= 0xf8000000;
371  t1 = o.value;
372  t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
373
374  /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
375  y1 = y;
376  o.value = y1;
377  o.words32.w3 = 0;
378  o.words32.w2 &= 0xf8000000;
379  y1 = o.value;
380  p_l = (y - y1) * t1 + y * t2;
381  p_h = y1 * t1;
382  z = p_l + p_h;
383  o.value = z;
384  j = o.words32.w0;
385  if (j >= 0x400d0000) /* z >= 16384 */
386    {
387      /* if z > 16384 */
388      if (((j - 0x400d0000) | o.words32.w1 | o.words32.w2 | o.words32.w3) != 0)
389	return sgn * huge * huge;	/* overflow */
390      else
391	{
392	  if (p_l + ovt > z - p_h)
393	    return sgn * huge * huge;	/* overflow */
394	}
395    }
396  else if ((j & 0x7fffffff) >= 0x400d01b9)	/* z <= -16495 */
397    {
398      /* z < -16495 */
399      if (((j - 0xc00d01bc) | o.words32.w1 | o.words32.w2 | o.words32.w3)
400	  != 0)
401	return sgn * tiny * tiny;	/* underflow */
402      else
403	{
404	  if (p_l <= z - p_h)
405	    return sgn * tiny * tiny;	/* underflow */
406	}
407    }
408  /* compute 2**(p_h+p_l) */
409  i = j & 0x7fffffff;
410  k = (i >> 16) - 0x3fff;
411  n = 0;
412  if (i > 0x3ffe0000)
413    {				/* if |z| > 0.5, set n = [z+0.5] */
414      n = floorq (z + 0.5Q);
415      t = n;
416      p_h -= t;
417    }
418  t = p_l + p_h;
419  o.value = t;
420  o.words32.w3 = 0;
421  o.words32.w2 &= 0xf8000000;
422  t = o.value;
423  u = t * lg2_h;
424  v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
425  z = u + v;
426  w = v - (z - u);
427  /*  exp(z) */
428  t = z * z;
429  u = PN[0] + t * (PN[1] + t * (PN[2] + t * (PN[3] + t * PN[4])));
430  v = PD[0] + t * (PD[1] + t * (PD[2] + t * (PD[3] + t)));
431  t1 = z - t * u / v;
432  r = (z * t1) / (t1 - two) - (w + z * w);
433  z = one - (r - z);
434  o.value = z;
435  j = o.words32.w0;
436  j += (n << 16);
437  if ((j >> 16) <= 0)
438    {
439      z = scalbnq (z, n);	/* subnormal output */
440      __float128 force_underflow = z * z;
441      math_force_eval (force_underflow);
442    }
443  else
444    {
445      o.words32.w0 = j;
446      z = o.value;
447    }
448  return sgn * z;
449}
450