1/* Copyright (C) 2008-2022 Free Software Foundation, Inc. 2 Contributor: Joern Rennecke <joern.rennecke@embecosm.com> 3 on behalf of Synopsys Inc. 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify it under 8the terms of the GNU General Public License as published by the Free 9Software Foundation; either version 3, or (at your option) any later 10version. 11 12GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13WARRANTY; without even the implied warranty of MERCHANTABILITY or 14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15for more details. 16 17Under Section 7 of GPL version 3, you are granted additional 18permissions described in the GCC Runtime Library Exception, version 193.1, as published by the Free Software Foundation. 20 21You should have received a copy of the GNU General Public License and 22a copy of the GCC Runtime Library Exception along with this program; 23see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24<http://www.gnu.org/licenses/>. */ 25 26/* We use a polynom similar to a Tchebycheff polynom to get an initial 27 seed, and then use a newton-raphson iteration step to get an 28 approximate result 29 If this result can't be rounded to the exact result with confidence, we 30 round to the value between the two closest representable values, and 31 test if the correctly rounded value is above or below this value. 32 33 Because of the Newton-raphson iteration step, an error in the seed at X 34 is amplified by X. Therefore, we don't want a Tchebycheff polynom 35 or a polynom that is close to optimal according to the maximum norm 36 on the errro of the seed value; we want one that is close to optimal 37 according to the maximum norm on the error of the result, i.e. we 38 want the maxima of the polynom to increase linearily. 39 Given an interval [X0,X2) over which to approximate, 40 with X1 := (X0+X2)/2, D := X1-X0, F := 1/D, and S := D/X1 we have, 41 like for Tchebycheff polynoms: 42 P(0) := 1 43 but then we have: 44 P(1) := X + S*D 45 P(2) := 2 * X^2 + S*D * X - D^2 46 Then again: 47 P(n+1) := 2 * X * P(n) - D^2 * P (n-1) 48 */ 49 50static long double merr = 42.; 51 52double 53err (long double a0, long double a1, long double x) 54{ 55 long double y0 = a0 + (x-1)*a1; 56 57 long double approx = 2. * y0 - y0 * x * y0; 58 long double true = 1./x; 59 long double err = approx - true; 60 61 if (err <= -1./65536./16384.) 62 printf ("ERROR EXCEEDS 1 ULP %.15f %.15f %.15f\n", 63 (double)x, (double)approx, (double)true); 64 if (merr > err) 65 merr = err; 66 return err; 67} 68 69int 70main (void) 71{ 72 long double T[5]; /* Taylor polynom */ 73 long double P[5][5]; 74 int i, j; 75 long double X0, X1, X2, S; 76 long double inc = 1./64; 77 long double D = inc*0.5; 78 long i0, i1, i2, io; 79 80 memset (P, 0, sizeof (P)); 81 P[0][0] = 1.; 82 for (i = 1; i < 5; i++) 83 P[i][i] = 1 << i-1; 84 P[2][0] = -D*D; 85 for (X0 = 1.; X0 < 2.; X0 += inc) 86 { 87 X1 = X0 + inc * 0.5; 88 X2 = X0 + inc; 89 S = D / X1; 90 T[0] = 1./X1; 91 for (i = 1; i < 5; i++) 92 T[i] = T[i-1] * -T[0]; 93#if 0 94 printf ("T %1.8f %f %f %f %f\n", (double)T[0], (double)T[1], (double)T[2], 95(double)T[3], (double)T[4]); 96#endif 97 P[1][0] = S*D; 98 P[2][1] = S*D; 99 for (i = 3; i < 5; i++) 100 { 101 P[i][0] = -D*D*P[i-2][0]; 102 for (j = 1; j < i; j++) 103 P[i][j] = 2*P[i-1][j-1]-D*D*P[i-2][j]; 104 } 105#if 0 106 printf ("P3 %1.8f %f %f %f %f\n", (double)P[3][0], (double)P[3][1], (double)P[3][2], 107(double)P[3][3], (double)P[3][4]); 108 printf ("P4 %1.8f %f %f %f %f\n", (double)P[4][0], (double)P[4][1], (double)P[4][2], 109(double)P[4][3], (double)P[4][4]); 110#endif 111 for (i = 4; i > 1; i--) 112 { 113 long double a = T[i]/P[i][i]; 114 115 for (j = 0; j < i; j++) 116 T[j] -= a * P[i][j]; 117 } 118#if 0 119 printf ("A %1.8f %f %f\n", (double)T[0], (double)T[1], (double)T[2]); 120#endif 121#if 0 122 i2 = T[2]*1024; 123 long double a = (T[2]-i/1024.)/P[2][2]; 124 for (j = 0; j < 2; j++) 125 T[j] -= a * P[2][j]; 126#else 127 i2 = 0; 128#endif 129 long double T0, Ti1; 130 for (i = 0, i0 = 0; i < 4; i++) 131 { 132 133 i1 = T[1]*4096. + i0 / (long double)(1 << 20) - 0.5; 134 i1 = - (-i1 & 0x0fff); 135 Ti1 = ((unsigned)(-i1 << 20) | i0) /-(long double)(1LL<<32LL); 136 T0 = T[0] - (T[1]-Ti1)/P[1][1] * P[1][0] - (X1 - 1) * Ti1; 137 i0 = T0 * 1024 * 1024 + 0.5; 138 i0 &= 0xfffff; 139 } 140#if 0 141 printf ("A %1.8f %f %f\n", (double)T[0], (double)T[1], (double)T[2]); 142#endif 143 io = (unsigned)(-i1 << 20) | i0; 144 long double A1 = (unsigned)io/-65536./65536.; 145 long double A0 = (unsigned)(io << 12)/65536./65536.; 146 long double Xm0 = 1./sqrt (-A1); 147 long double Xm1 = 0.5+0.5*-A0/A1; 148#if 0 149 printf ("%f %f %f %f\n", (double)A0, (double)A1, (double) Ti1, (double)X0); 150 printf ("%.12f %.12f %.12f\n", 151 err (A0, A1, X0), err (A0, A1, X1), err (A0, A1, X2)); 152 printf ("%.12f %.12f\n", (double)Xm0, (double)Xm1); 153 printf ("%.12f %.12f\n", err (A0, A1, Xm0), err (A0, A1, Xm1)); 154#endif 155 printf ("\t.long 0x%x\n", io); 156 } 157#if 0 158 printf ("maximum error: %.15f %x %f\n", (double)merr, (unsigned)(long long)(-merr * 65536 * 65536), (double)log(-merr)/log(2)); 159#endif 160 return 0; 161} 162