1/*
2 * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "prims/jni.h"
27#include "runtime/interfaceSupport.hpp"
28#include "runtime/sharedRuntime.hpp"
29#include "runtime/sharedRuntimeMath.hpp"
30
31// This file contains copies of the fdlibm routines used by
32// StrictMath. It turns out that it is almost always required to use
33// these runtime routines; the Intel CPU doesn't meet the Java
34// specification for sin/cos outside a certain limited argument range,
35// and the SPARC CPU doesn't appear to have sin/cos instructions. It
36// also turns out that avoiding the indirect call through function
37// pointer out to libjava.so in SharedRuntime speeds these routines up
38// by roughly 15% on both Win32/x86 and Solaris/SPARC.
39
40/*
41 * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
42 * double x[],y[]; int e0,nx,prec; int ipio2[];
43 *
44 * __kernel_rem_pio2 return the last three digits of N with
45 *              y = x - N*pi/2
46 * so that |y| < pi/2.
47 *
48 * The method is to compute the integer (mod 8) and fraction parts of
49 * (2/pi)*x without doing the full multiplication. In general we
50 * skip the part of the product that are known to be a huge integer (
51 * more accurately, = 0 mod 8 ). Thus the number of operations are
52 * independent of the exponent of the input.
53 *
54 * (2/pi) is represented by an array of 24-bit integers in ipio2[].
55 *
56 * Input parameters:
57 *      x[]     The input value (must be positive) is broken into nx
58 *              pieces of 24-bit integers in double precision format.
59 *              x[i] will be the i-th 24 bit of x. The scaled exponent
60 *              of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
61 *              match x's up to 24 bits.
62 *
63 *              Example of breaking a double positive z into x[0]+x[1]+x[2]:
64 *                      e0 = ilogb(z)-23
65 *                      z  = scalbn(z,-e0)
66 *              for i = 0,1,2
67 *                      x[i] = floor(z)
68 *                      z    = (z-x[i])*2**24
69 *
70 *
71 *      y[]     ouput result in an array of double precision numbers.
72 *              The dimension of y[] is:
73 *                      24-bit  precision       1
74 *                      53-bit  precision       2
75 *                      64-bit  precision       2
76 *                      113-bit precision       3
77 *              The actual value is the sum of them. Thus for 113-bit
78 *              precsion, one may have to do something like:
79 *
80 *              long double t,w,r_head, r_tail;
81 *              t = (long double)y[2] + (long double)y[1];
82 *              w = (long double)y[0];
83 *              r_head = t+w;
84 *              r_tail = w - (r_head - t);
85 *
86 *      e0      The exponent of x[0]
87 *
88 *      nx      dimension of x[]
89 *
90 *      prec    an interger indicating the precision:
91 *                      0       24  bits (single)
92 *                      1       53  bits (double)
93 *                      2       64  bits (extended)
94 *                      3       113 bits (quad)
95 *
96 *      ipio2[]
97 *              integer array, contains the (24*i)-th to (24*i+23)-th
98 *              bit of 2/pi after binary point. The corresponding
99 *              floating value is
100 *
101 *                      ipio2[i] * 2^(-24(i+1)).
102 *
103 * External function:
104 *      double scalbn(), floor();
105 *
106 *
107 * Here is the description of some local variables:
108 *
109 *      jk      jk+1 is the initial number of terms of ipio2[] needed
110 *              in the computation. The recommended value is 2,3,4,
111 *              6 for single, double, extended,and quad.
112 *
113 *      jz      local integer variable indicating the number of
114 *              terms of ipio2[] used.
115 *
116 *      jx      nx - 1
117 *
118 *      jv      index for pointing to the suitable ipio2[] for the
119 *              computation. In general, we want
120 *                      ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
121 *              is an integer. Thus
122 *                      e0-3-24*jv >= 0 or (e0-3)/24 >= jv
123 *              Hence jv = max(0,(e0-3)/24).
124 *
125 *      jp      jp+1 is the number of terms in PIo2[] needed, jp = jk.
126 *
127 *      q[]     double array with integral value, representing the
128 *              24-bits chunk of the product of x and 2/pi.
129 *
130 *      q0      the corresponding exponent of q[0]. Note that the
131 *              exponent for q[i] would be q0-24*i.
132 *
133 *      PIo2[]  double precision array, obtained by cutting pi/2
134 *              into 24 bits chunks.
135 *
136 *      f[]     ipio2[] in floating point
137 *
138 *      iq[]    integer array by breaking up q[] in 24-bits chunk.
139 *
140 *      fq[]    final product of x*(2/pi) in fq[0],..,fq[jk]
141 *
142 *      ih      integer. If >0 it indicates q[] is >= 0.5, hence
143 *              it also indicates the *sign* of the result.
144 *
145 */
146
147
148/*
149 * Constants:
150 * The hexadecimal values are the intended ones for the following
151 * constants. The decimal values may be used, provided that the
152 * compiler will convert from decimal to binary accurately enough
153 * to produce the hexadecimal values shown.
154 */
155
156
157static const int init_jk[] = {2,3,4,6}; /* initial value for jk */
158
159static const double PIo2[] = {
160  1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
161  7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
162  5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
163  3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
164  1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
165  1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
166  2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
167  2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
168};
169
170static const double
171zeroB   = 0.0,
172one     = 1.0,
173two24B  = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
174twon24  = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
175
176static int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int *ipio2) {
177  int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
178  double z,fw,f[20],fq[20],q[20];
179
180  /* initialize jk*/
181  jk = init_jk[prec];
182  jp = jk;
183
184  /* determine jx,jv,q0, note that 3>q0 */
185  jx =  nx-1;
186  jv = (e0-3)/24; if(jv<0) jv=0;
187  q0 =  e0-24*(jv+1);
188
189  /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
190  j = jv-jx; m = jx+jk;
191  for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : (double) ipio2[j];
192
193  /* compute q[0],q[1],...q[jk] */
194  for (i=0;i<=jk;i++) {
195    for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;
196  }
197
198  jz = jk;
199recompute:
200  /* distill q[] into iq[] reversingly */
201  for(i=0,j=jz,z=q[jz];j>0;i++,j--) {
202    fw    =  (double)((int)(twon24* z));
203    iq[i] =  (int)(z-two24B*fw);
204    z     =  q[j-1]+fw;
205  }
206
207  /* compute n */
208  z  = scalbnA(z,q0);           /* actual value of z */
209  z -= 8.0*floor(z*0.125);              /* trim off integer >= 8 */
210  n  = (int) z;
211  z -= (double)n;
212  ih = 0;
213  if(q0>0) {    /* need iq[jz-1] to determine n */
214    i  = (iq[jz-1]>>(24-q0)); n += i;
215    iq[jz-1] -= i<<(24-q0);
216    ih = iq[jz-1]>>(23-q0);
217  }
218  else if(q0==0) ih = iq[jz-1]>>23;
219  else if(z>=0.5) ih=2;
220
221  if(ih>0) {    /* q > 0.5 */
222    n += 1; carry = 0;
223    for(i=0;i<jz ;i++) {        /* compute 1-q */
224      j = iq[i];
225      if(carry==0) {
226        if(j!=0) {
227          carry = 1; iq[i] = 0x1000000- j;
228        }
229      } else  iq[i] = 0xffffff - j;
230    }
231    if(q0>0) {          /* rare case: chance is 1 in 12 */
232      switch(q0) {
233      case 1:
234        iq[jz-1] &= 0x7fffff; break;
235      case 2:
236        iq[jz-1] &= 0x3fffff; break;
237      }
238    }
239    if(ih==2) {
240      z = one - z;
241      if(carry!=0) z -= scalbnA(one,q0);
242    }
243  }
244
245  /* check if recomputation is needed */
246  if(z==zeroB) {
247    j = 0;
248    for (i=jz-1;i>=jk;i--) j |= iq[i];
249    if(j==0) { /* need recomputation */
250      for(k=1;iq[jk-k]==0;k++);   /* k = no. of terms needed */
251
252      for(i=jz+1;i<=jz+k;i++) {   /* add q[jz+1] to q[jz+k] */
253        f[jx+i] = (double) ipio2[jv+i];
254        for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
255        q[i] = fw;
256      }
257      jz += k;
258      goto recompute;
259    }
260  }
261
262  /* chop off zero terms */
263  if(z==0.0) {
264    jz -= 1; q0 -= 24;
265    while(iq[jz]==0) { jz--; q0-=24;}
266  } else { /* break z into 24-bit if necessary */
267    z = scalbnA(z,-q0);
268    if(z>=two24B) {
269      fw = (double)((int)(twon24*z));
270      iq[jz] = (int)(z-two24B*fw);
271      jz += 1; q0 += 24;
272      iq[jz] = (int) fw;
273    } else iq[jz] = (int) z ;
274  }
275
276  /* convert integer "bit" chunk to floating-point value */
277  fw = scalbnA(one,q0);
278  for(i=jz;i>=0;i--) {
279    q[i] = fw*(double)iq[i]; fw*=twon24;
280  }
281
282  /* compute PIo2[0,...,jp]*q[jz,...,0] */
283  for(i=jz;i>=0;i--) {
284    for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
285    fq[jz-i] = fw;
286  }
287
288  /* compress fq[] into y[] */
289  switch(prec) {
290  case 0:
291    fw = 0.0;
292    for (i=jz;i>=0;i--) fw += fq[i];
293    y[0] = (ih==0)? fw: -fw;
294    break;
295  case 1:
296  case 2:
297    fw = 0.0;
298    for (i=jz;i>=0;i--) fw += fq[i];
299    y[0] = (ih==0)? fw: -fw;
300    fw = fq[0]-fw;
301    for (i=1;i<=jz;i++) fw += fq[i];
302    y[1] = (ih==0)? fw: -fw;
303    break;
304  case 3:       /* painful */
305    for (i=jz;i>0;i--) {
306      fw      = fq[i-1]+fq[i];
307      fq[i]  += fq[i-1]-fw;
308      fq[i-1] = fw;
309    }
310    for (i=jz;i>1;i--) {
311      fw      = fq[i-1]+fq[i];
312      fq[i]  += fq[i-1]-fw;
313      fq[i-1] = fw;
314    }
315    for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];
316    if(ih==0) {
317      y[0] =  fq[0]; y[1] =  fq[1]; y[2] =  fw;
318    } else {
319      y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
320    }
321  }
322  return n&7;
323}
324
325
326/*
327 * ====================================================
328 * Copyright (c) 1993 Oracle and/or its affiliates. All rights reserved.
329 *
330 * Developed at SunPro, a Sun Microsystems, Inc. business.
331 * Permission to use, copy, modify, and distribute this
332 * software is freely granted, provided that this notice
333 * is preserved.
334 * ====================================================
335 *
336 */
337
338/* __ieee754_rem_pio2(x,y)
339 *
340 * return the remainder of x rem pi/2 in y[0]+y[1]
341 * use __kernel_rem_pio2()
342 */
343
344/*
345 * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
346 */
347static const int two_over_pi[] = {
348  0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
349  0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
350  0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
351  0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
352  0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
353  0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
354  0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
355  0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
356  0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
357  0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
358  0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
359};
360
361static const int npio2_hw[] = {
362  0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
363  0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
364  0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
365  0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
366  0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
367  0x404858EB, 0x404921FB,
368};
369
370/*
371 * invpio2:  53 bits of 2/pi
372 * pio2_1:   first  33 bit of pi/2
373 * pio2_1t:  pi/2 - pio2_1
374 * pio2_2:   second 33 bit of pi/2
375 * pio2_2t:  pi/2 - (pio2_1+pio2_2)
376 * pio2_3:   third  33 bit of pi/2
377 * pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)
378 */
379
380static const double
381zeroA =  0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
382half =  5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
383two24A =  1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
384invpio2 =  6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
385pio2_1  =  1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
386pio2_1t =  6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
387pio2_2  =  6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
388pio2_2t =  2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
389pio2_3  =  2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
390pio2_3t =  8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
391
392static int __ieee754_rem_pio2(double x, double *y) {
393  double z,w,t,r,fn;
394  double tx[3];
395  int e0,i,j,nx,n,ix,hx,i0;
396
397  i0 = ((*(int*)&two24A)>>30)^1;        /* high word index */
398  hx = *(i0+(int*)&x);          /* high word of x */
399  ix = hx&0x7fffffff;
400  if(ix<=0x3fe921fb)   /* |x| ~<= pi/4 , no need for reduction */
401    {y[0] = x; y[1] = 0; return 0;}
402  if(ix<0x4002d97c) {  /* |x| < 3pi/4, special case with n=+-1 */
403    if(hx>0) {
404      z = x - pio2_1;
405      if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */
406        y[0] = z - pio2_1t;
407        y[1] = (z-y[0])-pio2_1t;
408      } else {                /* near pi/2, use 33+33+53 bit pi */
409        z -= pio2_2;
410        y[0] = z - pio2_2t;
411        y[1] = (z-y[0])-pio2_2t;
412      }
413      return 1;
414    } else {    /* negative x */
415      z = x + pio2_1;
416      if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */
417        y[0] = z + pio2_1t;
418        y[1] = (z-y[0])+pio2_1t;
419      } else {                /* near pi/2, use 33+33+53 bit pi */
420        z += pio2_2;
421        y[0] = z + pio2_2t;
422        y[1] = (z-y[0])+pio2_2t;
423      }
424      return -1;
425    }
426  }
427  if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
428    t  = fabsd(x);
429    n  = (int) (t*invpio2+half);
430    fn = (double)n;
431    r  = t-fn*pio2_1;
432    w  = fn*pio2_1t;    /* 1st round good to 85 bit */
433    if(n<32&&ix!=npio2_hw[n-1]) {
434      y[0] = r-w;       /* quick check no cancellation */
435    } else {
436      j  = ix>>20;
437      y[0] = r-w;
438      i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
439      if(i>16) {  /* 2nd iteration needed, good to 118 */
440        t  = r;
441        w  = fn*pio2_2;
442        r  = t-w;
443        w  = fn*pio2_2t-((t-r)-w);
444        y[0] = r-w;
445        i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
446        if(i>49)  {     /* 3rd iteration need, 151 bits acc */
447          t  = r;       /* will cover all possible cases */
448          w  = fn*pio2_3;
449          r  = t-w;
450          w  = fn*pio2_3t-((t-r)-w);
451          y[0] = r-w;
452        }
453      }
454    }
455    y[1] = (r-y[0])-w;
456    if(hx<0)    {y[0] = -y[0]; y[1] = -y[1]; return -n;}
457    else         return n;
458  }
459  /*
460   * all other (large) arguments
461   */
462  if(ix>=0x7ff00000) {          /* x is inf or NaN */
463    y[0]=y[1]=x-x; return 0;
464  }
465  /* set z = scalbn(|x|,ilogb(x)-23) */
466  *(1-i0+(int*)&z) = *(1-i0+(int*)&x);
467  e0    = (ix>>20)-1046;        /* e0 = ilogb(z)-23; */
468  *(i0+(int*)&z) = ix - (e0<<20);
469  for(i=0;i<2;i++) {
470    tx[i] = (double)((int)(z));
471    z     = (z-tx[i])*two24A;
472  }
473  tx[2] = z;
474  nx = 3;
475  while(tx[nx-1]==zeroA) nx--;  /* skip zero term */
476  n  =  __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi);
477  if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
478  return n;
479}
480
481
482/* __kernel_sin( x, y, iy)
483 * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
484 * Input x is assumed to be bounded by ~pi/4 in magnitude.
485 * Input y is the tail of x.
486 * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
487 *
488 * Algorithm
489 *      1. Since sin(-x) = -sin(x), we need only to consider positive x.
490 *      2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
491 *      3. sin(x) is approximated by a polynomial of degree 13 on
492 *         [0,pi/4]
493 *                               3            13
494 *              sin(x) ~ x + S1*x + ... + S6*x
495 *         where
496 *
497 *      |sin(x)         2     4     6     8     10     12  |     -58
498 *      |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x  +S6*x   )| <= 2
499 *      |  x                                               |
500 *
501 *      4. sin(x+y) = sin(x) + sin'(x')*y
502 *                  ~ sin(x) + (1-x*x/2)*y
503 *         For better accuracy, let
504 *                   3      2      2      2      2
505 *              r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
506 *         then                   3    2
507 *              sin(x) = x + (S1*x + (x *(r-y/2)+y))
508 */
509
510static const double
511S1  = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
512S2  =  8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
513S3  = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
514S4  =  2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
515S5  = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
516S6  =  1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
517
518static double __kernel_sin(double x, double y, int iy)
519{
520        double z,r,v;
521        int ix;
522        ix = high(x)&0x7fffffff;                /* high word of x */
523        if(ix<0x3e400000)                       /* |x| < 2**-27 */
524           {if((int)x==0) return x;}            /* generate inexact */
525        z       =  x*x;
526        v       =  z*x;
527        r       =  S2+z*(S3+z*(S4+z*(S5+z*S6)));
528        if(iy==0) return x+v*(S1+z*r);
529        else      return x-((z*(half*y-v*r)-y)-v*S1);
530}
531
532/*
533 * __kernel_cos( x,  y )
534 * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
535 * Input x is assumed to be bounded by ~pi/4 in magnitude.
536 * Input y is the tail of x.
537 *
538 * Algorithm
539 *      1. Since cos(-x) = cos(x), we need only to consider positive x.
540 *      2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
541 *      3. cos(x) is approximated by a polynomial of degree 14 on
542 *         [0,pi/4]
543 *                                       4            14
544 *              cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
545 *         where the remez error is
546 *
547 *      |              2     4     6     8     10    12     14 |     -58
548 *      |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  )| <= 2
549 *      |                                                      |
550 *
551 *                     4     6     8     10    12     14
552 *      4. let r = C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  , then
553 *             cos(x) = 1 - x*x/2 + r
554 *         since cos(x+y) ~ cos(x) - sin(x)*y
555 *                        ~ cos(x) - x*y,
556 *         a correction term is necessary in cos(x) and hence
557 *              cos(x+y) = 1 - (x*x/2 - (r - x*y))
558 *         For better accuracy when x > 0.3, let qx = |x|/4 with
559 *         the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
560 *         Then
561 *              cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
562 *         Note that 1-qx and (x*x/2-qx) is EXACT here, and the
563 *         magnitude of the latter is at least a quarter of x*x/2,
564 *         thus, reducing the rounding error in the subtraction.
565 */
566
567static const double
568C1  =  4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
569C2  = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
570C3  =  2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
571C4  = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
572C5  =  2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
573C6  = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
574
575static double __kernel_cos(double x, double y)
576{
577  double a,h,z,r,qx=0;
578  int ix;
579  ix = high(x)&0x7fffffff;              /* ix = |x|'s high word*/
580  if(ix<0x3e400000) {                   /* if x < 2**27 */
581    if(((int)x)==0) return one;         /* generate inexact */
582  }
583  z  = x*x;
584  r  = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))));
585  if(ix < 0x3FD33333)                   /* if |x| < 0.3 */
586    return one - (0.5*z - (z*r - x*y));
587  else {
588    if(ix > 0x3fe90000) {               /* x > 0.78125 */
589      qx = 0.28125;
590    } else {
591      set_high(&qx, ix-0x00200000); /* x/4 */
592      set_low(&qx, 0);
593    }
594    h = 0.5*z-qx;
595    a = one-qx;
596    return a - (h - (z*r-x*y));
597  }
598}
599
600/* __kernel_tan( x, y, k )
601 * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854
602 * Input x is assumed to be bounded by ~pi/4 in magnitude.
603 * Input y is the tail of x.
604 * Input k indicates whether tan (if k=1) or
605 * -1/tan (if k= -1) is returned.
606 *
607 * Algorithm
608 *      1. Since tan(-x) = -tan(x), we need only to consider positive x.
609 *      2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0.
610 *      3. tan(x) is approximated by a odd polynomial of degree 27 on
611 *         [0,0.67434]
612 *                               3             27
613 *              tan(x) ~ x + T1*x + ... + T13*x
614 *         where
615 *
616 *              |tan(x)         2     4            26   |     -59.2
617 *              |----- - (1+T1*x +T2*x +.... +T13*x    )| <= 2
618 *              |  x                                    |
619 *
620 *         Note: tan(x+y) = tan(x) + tan'(x)*y
621 *                        ~ tan(x) + (1+x*x)*y
622 *         Therefore, for better accuracy in computing tan(x+y), let
623 *                   3      2      2       2       2
624 *              r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
625 *         then
626 *                                  3    2
627 *              tan(x+y) = x + (T1*x + (x *(r+y)+y))
628 *
629 *      4. For x in [0.67434,pi/4],  let y = pi/4 - x, then
630 *              tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
631 *                     = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
632 */
633
634static const double
635pio4  =  7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */
636pio4lo=  3.06161699786838301793e-17, /* 0x3C81A626, 0x33145C07 */
637T[] =  {
638  3.33333333333334091986e-01, /* 0x3FD55555, 0x55555563 */
639  1.33333333333201242699e-01, /* 0x3FC11111, 0x1110FE7A */
640  5.39682539762260521377e-02, /* 0x3FABA1BA, 0x1BB341FE */
641  2.18694882948595424599e-02, /* 0x3F9664F4, 0x8406D637 */
642  8.86323982359930005737e-03, /* 0x3F8226E3, 0xE96E8493 */
643  3.59207910759131235356e-03, /* 0x3F6D6D22, 0xC9560328 */
644  1.45620945432529025516e-03, /* 0x3F57DBC8, 0xFEE08315 */
645  5.88041240820264096874e-04, /* 0x3F4344D8, 0xF2F26501 */
646  2.46463134818469906812e-04, /* 0x3F3026F7, 0x1A8D1068 */
647  7.81794442939557092300e-05, /* 0x3F147E88, 0xA03792A6 */
648  7.14072491382608190305e-05, /* 0x3F12B80F, 0x32F0A7E9 */
649 -1.85586374855275456654e-05, /* 0xBEF375CB, 0xDB605373 */
650  2.59073051863633712884e-05, /* 0x3EFB2A70, 0x74BF7AD4 */
651};
652
653static double __kernel_tan(double x, double y, int iy)
654{
655  double z,r,v,w,s;
656  int ix,hx;
657  hx = high(x);           /* high word of x */
658  ix = hx&0x7fffffff;     /* high word of |x| */
659  if(ix<0x3e300000) {                     /* x < 2**-28 */
660    if((int)x==0) {                       /* generate inexact */
661      if (((ix | low(x)) | (iy + 1)) == 0)
662        return one / fabsd(x);
663      else {
664        if (iy == 1)
665          return x;
666        else {    /* compute -1 / (x+y) carefully */
667          double a, t;
668
669          z = w = x + y;
670          set_low(&z, 0);
671          v = y - (z - x);
672          t = a = -one / w;
673          set_low(&t, 0);
674          s = one + t * z;
675          return t + a * (s + t * v);
676        }
677      }
678    }
679  }
680  if(ix>=0x3FE59428) {                    /* |x|>=0.6744 */
681    if(hx<0) {x = -x; y = -y;}
682    z = pio4-x;
683    w = pio4lo-y;
684    x = z+w; y = 0.0;
685  }
686  z       =  x*x;
687  w       =  z*z;
688  /* Break x^5*(T[1]+x^2*T[2]+...) into
689   *    x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
690   *    x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
691   */
692  r = T[1]+w*(T[3]+w*(T[5]+w*(T[7]+w*(T[9]+w*T[11]))));
693  v = z*(T[2]+w*(T[4]+w*(T[6]+w*(T[8]+w*(T[10]+w*T[12])))));
694  s = z*x;
695  r = y + z*(s*(r+v)+y);
696  r += T[0]*s;
697  w = x+r;
698  if(ix>=0x3FE59428) {
699    v = (double)iy;
700    return (double)(1-((hx>>30)&2))*(v-2.0*(x-(w*w/(w+v)-r)));
701  }
702  if(iy==1) return w;
703  else {          /* if allow error up to 2 ulp,
704                     simply return -1.0/(x+r) here */
705    /*  compute -1.0/(x+r) accurately */
706    double a,t;
707    z  = w;
708    set_low(&z, 0);
709    v  = r-(z - x);     /* z+v = r+x */
710    t = a  = -1.0/w;    /* a = -1.0/w */
711    set_low(&t, 0);
712    s  = 1.0+t*z;
713    return t+a*(s+t*v);
714  }
715}
716
717
718//----------------------------------------------------------------------
719//
720// Routines for new sin/cos implementation
721//
722//----------------------------------------------------------------------
723
724/* sin(x)
725 * Return sine function of x.
726 *
727 * kernel function:
728 *      __kernel_sin            ... sine function on [-pi/4,pi/4]
729 *      __kernel_cos            ... cose function on [-pi/4,pi/4]
730 *      __ieee754_rem_pio2      ... argument reduction routine
731 *
732 * Method.
733 *      Let S,C and T denote the sin, cos and tan respectively on
734 *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
735 *      in [-pi/4 , +pi/4], and let n = k mod 4.
736 *      We have
737 *
738 *          n        sin(x)      cos(x)        tan(x)
739 *     ----------------------------------------------------------
740 *          0          S           C             T
741 *          1          C          -S            -1/T
742 *          2         -S          -C             T
743 *          3         -C           S            -1/T
744 *     ----------------------------------------------------------
745 *
746 * Special cases:
747 *      Let trig be any of sin, cos, or tan.
748 *      trig(+-INF)  is NaN, with signals;
749 *      trig(NaN)    is that NaN;
750 *
751 * Accuracy:
752 *      TRIG(x) returns trig(x) nearly rounded
753 */
754
755JRT_LEAF(jdouble, SharedRuntime::dsin(jdouble x))
756  double y[2],z=0.0;
757  int n, ix;
758
759  /* High word of x. */
760  ix = high(x);
761
762  /* |x| ~< pi/4 */
763  ix &= 0x7fffffff;
764  if(ix <= 0x3fe921fb) return __kernel_sin(x,z,0);
765
766  /* sin(Inf or NaN) is NaN */
767  else if (ix>=0x7ff00000) return x-x;
768
769  /* argument reduction needed */
770  else {
771    n = __ieee754_rem_pio2(x,y);
772    switch(n&3) {
773    case 0: return  __kernel_sin(y[0],y[1],1);
774    case 1: return  __kernel_cos(y[0],y[1]);
775    case 2: return -__kernel_sin(y[0],y[1],1);
776    default:
777      return -__kernel_cos(y[0],y[1]);
778    }
779  }
780JRT_END
781
782/* cos(x)
783 * Return cosine function of x.
784 *
785 * kernel function:
786 *      __kernel_sin            ... sine function on [-pi/4,pi/4]
787 *      __kernel_cos            ... cosine function on [-pi/4,pi/4]
788 *      __ieee754_rem_pio2      ... argument reduction routine
789 *
790 * Method.
791 *      Let S,C and T denote the sin, cos and tan respectively on
792 *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
793 *      in [-pi/4 , +pi/4], and let n = k mod 4.
794 *      We have
795 *
796 *          n        sin(x)      cos(x)        tan(x)
797 *     ----------------------------------------------------------
798 *          0          S           C             T
799 *          1          C          -S            -1/T
800 *          2         -S          -C             T
801 *          3         -C           S            -1/T
802 *     ----------------------------------------------------------
803 *
804 * Special cases:
805 *      Let trig be any of sin, cos, or tan.
806 *      trig(+-INF)  is NaN, with signals;
807 *      trig(NaN)    is that NaN;
808 *
809 * Accuracy:
810 *      TRIG(x) returns trig(x) nearly rounded
811 */
812
813JRT_LEAF(jdouble, SharedRuntime::dcos(jdouble x))
814  double y[2],z=0.0;
815  int n, ix;
816
817  /* High word of x. */
818  ix = high(x);
819
820  /* |x| ~< pi/4 */
821  ix &= 0x7fffffff;
822  if(ix <= 0x3fe921fb) return __kernel_cos(x,z);
823
824  /* cos(Inf or NaN) is NaN */
825  else if (ix>=0x7ff00000) return x-x;
826
827  /* argument reduction needed */
828  else {
829    n = __ieee754_rem_pio2(x,y);
830    switch(n&3) {
831    case 0: return  __kernel_cos(y[0],y[1]);
832    case 1: return -__kernel_sin(y[0],y[1],1);
833    case 2: return -__kernel_cos(y[0],y[1]);
834    default:
835      return  __kernel_sin(y[0],y[1],1);
836    }
837  }
838JRT_END
839
840/* tan(x)
841 * Return tangent function of x.
842 *
843 * kernel function:
844 *      __kernel_tan            ... tangent function on [-pi/4,pi/4]
845 *      __ieee754_rem_pio2      ... argument reduction routine
846 *
847 * Method.
848 *      Let S,C and T denote the sin, cos and tan respectively on
849 *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
850 *      in [-pi/4 , +pi/4], and let n = k mod 4.
851 *      We have
852 *
853 *          n        sin(x)      cos(x)        tan(x)
854 *     ----------------------------------------------------------
855 *          0          S           C             T
856 *          1          C          -S            -1/T
857 *          2         -S          -C             T
858 *          3         -C           S            -1/T
859 *     ----------------------------------------------------------
860 *
861 * Special cases:
862 *      Let trig be any of sin, cos, or tan.
863 *      trig(+-INF)  is NaN, with signals;
864 *      trig(NaN)    is that NaN;
865 *
866 * Accuracy:
867 *      TRIG(x) returns trig(x) nearly rounded
868 */
869
870JRT_LEAF(jdouble, SharedRuntime::dtan(jdouble x))
871  double y[2],z=0.0;
872  int n, ix;
873
874  /* High word of x. */
875  ix = high(x);
876
877  /* |x| ~< pi/4 */
878  ix &= 0x7fffffff;
879  if(ix <= 0x3fe921fb) return __kernel_tan(x,z,1);
880
881  /* tan(Inf or NaN) is NaN */
882  else if (ix>=0x7ff00000) return x-x;            /* NaN */
883
884  /* argument reduction needed */
885  else {
886    n = __ieee754_rem_pio2(x,y);
887    return __kernel_tan(y[0],y[1],1-((n&1)<<1)); /*   1 -- n even
888                                                     -1 -- n odd */
889  }
890JRT_END
891