1/* Copyright (C) 2008-2022 Free Software Foundation, Inc.
2   Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
3		on behalf of Synopsys Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15for more details.
16
17Under Section 7 of GPL version 3, you are granted additional
18permissions described in the GCC Runtime Library Exception, version
193.1, as published by the Free Software Foundation.
20
21You should have received a copy of the GNU General Public License and
22a copy of the GCC Runtime Library Exception along with this program;
23see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24<http://www.gnu.org/licenses/>.  */
25
26/* We use a polynom similar to a Tchebycheff polynom to get an initial
27   seed, and then use a newton-raphson iteration step to get an
28   approximate result
29   If this result can't be rounded to the exact result with confidence, we
30   round to the value between the two closest representable values, and
31   test if the correctly rounded value is above or below this value.
32
33   Because of the Newton-raphson iteration step, an error in the seed at X
34   is amplified by X.  Therefore, we don't want a Tchebycheff polynom
35   or a polynom that is close to optimal according to the maximum norm
36   on the errro of the seed value; we want one that is close to optimal
37   according to the maximum norm on the error of the result, i.e. we
38   want the maxima of the polynom to increase linearily.
39   Given an interval [X0,X2) over which to approximate,
40   with X1 := (X0+X2)/2,  D := X1-X0, F := 1/D, and S := D/X1 we have,
41   like for Tchebycheff polynoms:
42   P(0) := 1
43   but then we have:
44   P(1) := X + S*D
45   P(2) := 2 * X^2 + S*D * X - D^2
46   Then again:
47   P(n+1) := 2 * X * P(n) - D^2 * P (n-1)
48 */
49
50static long double merr = 42.;
51
52double
53err (long double a0, long double a1, long double x)
54{
55  long double y0 = a0 + (x-1)*a1;
56
57  long double approx = 2. * y0 - y0 * x * y0;
58  long double true = 1./x;
59  long double err = approx - true;
60
61  if (err <= -1./65536./16384.)
62    printf ("ERROR EXCEEDS 1 ULP %.15f %.15f %.15f\n",
63	    (double)x, (double)approx, (double)true);
64  if (merr > err)
65    merr = err;
66  return err;
67}
68
69int
70main (void)
71{
72  long double T[5]; /* Taylor polynom */
73  long double P[5][5];
74  int i, j;
75  long double X0, X1, X2, S;
76  long double inc = 1./64;
77  long double D = inc*0.5;
78  long i0, i1, i2, io;
79
80  memset (P, 0, sizeof (P));
81  P[0][0] = 1.;
82  for (i = 1; i < 5; i++)
83    P[i][i] = 1 << i-1;
84  P[2][0] = -D*D;
85  for (X0 = 1.; X0 < 2.; X0 += inc)
86    {
87      X1 = X0 + inc * 0.5;
88      X2 = X0 + inc;
89      S = D / X1;
90      T[0] = 1./X1;
91      for (i = 1; i < 5; i++)
92	T[i] = T[i-1] * -T[0];
93#if 0
94      printf ("T %1.8f %f %f %f %f\n", (double)T[0], (double)T[1], (double)T[2],
95(double)T[3], (double)T[4]);
96#endif
97      P[1][0] = S*D;
98      P[2][1] = S*D;
99      for (i = 3; i < 5; i++)
100	{
101	  P[i][0] = -D*D*P[i-2][0];
102	  for (j = 1; j < i; j++)
103	    P[i][j] = 2*P[i-1][j-1]-D*D*P[i-2][j];
104	}
105#if 0
106      printf ("P3 %1.8f %f %f %f %f\n", (double)P[3][0], (double)P[3][1], (double)P[3][2],
107(double)P[3][3], (double)P[3][4]);
108      printf ("P4 %1.8f %f %f %f %f\n", (double)P[4][0], (double)P[4][1], (double)P[4][2],
109(double)P[4][3], (double)P[4][4]);
110#endif
111      for (i = 4; i > 1; i--)
112	{
113	  long double a = T[i]/P[i][i];
114
115	  for (j = 0; j < i; j++)
116	    T[j] -= a * P[i][j];
117	}
118#if 0
119      printf ("A %1.8f %f %f\n", (double)T[0], (double)T[1], (double)T[2]);
120#endif
121#if 0
122      i2 = T[2]*1024;
123      long double a = (T[2]-i/1024.)/P[2][2];
124      for (j = 0; j < 2; j++)
125	T[j] -= a * P[2][j];
126#else
127      i2 = 0;
128#endif
129	  long double T0, Ti1;
130      for (i = 0, i0 = 0; i < 4; i++)
131	{
132
133	  i1 = T[1]*4096. + i0 / (long double)(1 << 20) - 0.5;
134	  i1 = - (-i1 & 0x0fff);
135	  Ti1 = ((unsigned)(-i1 << 20) | i0) /-(long double)(1LL<<32LL);
136	  T0 = T[0] - (T[1]-Ti1)/P[1][1] * P[1][0] - (X1 - 1) * Ti1;
137	  i0 = T0 * 1024 * 1024 + 0.5;
138	  i0 &= 0xfffff;
139	}
140#if 0
141      printf ("A %1.8f %f %f\n", (double)T[0], (double)T[1], (double)T[2]);
142#endif
143      io = (unsigned)(-i1 << 20) | i0;
144      long double A1 = (unsigned)io/-65536./65536.;
145      long double A0 =  (unsigned)(io << 12)/65536./65536.;
146      long double Xm0 = 1./sqrt (-A1);
147      long double Xm1 = 0.5+0.5*-A0/A1;
148#if 0
149      printf ("%f %f %f %f\n", (double)A0, (double)A1, (double) Ti1, (double)X0);
150      printf ("%.12f %.12f %.12f\n",
151	      err (A0, A1, X0), err (A0, A1, X1), err (A0, A1, X2));
152      printf ("%.12f %.12f\n", (double)Xm0, (double)Xm1);
153      printf ("%.12f %.12f\n", err (A0, A1, Xm0), err (A0, A1, Xm1));
154#endif
155      printf ("\t.long 0x%x\n", io);
156   }
157#if 0
158  printf ("maximum error: %.15f %x %f\n", (double)merr, (unsigned)(long long)(-merr * 65536 * 65536), (double)log(-merr)/log(2));
159#endif
160  return 0;
161}
162