1/* Copyright (C) 2008-2020 Free Software Foundation, Inc. 2 Contributor: Joern Rennecke <joern.rennecke@embecosm.com> 3 on behalf of Synopsys Inc. 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify it under 8the terms of the GNU General Public License as published by the Free 9Software Foundation; either version 3, or (at your option) any later 10version. 11 12GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13WARRANTY; without even the implied warranty of MERCHANTABILITY or 14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15for more details. 16 17Under Section 7 of GPL version 3, you are granted additional 18permissions described in the GCC Runtime Library Exception, version 193.1, as published by the Free Software Foundation. 20 21You should have received a copy of the GNU General Public License and 22a copy of the GCC Runtime Library Exception along with this program; 23see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24<http://www.gnu.org/licenses/>. */ 25 26/* We use a polynom similar to a Tchebycheff polynom to get an initial 27 seed, and then use a newton-raphson iteration step to get an 28 approximate result 29 If this result can't be rounded to the exact result with confidence, we 30 round to the value between the two closest representable values, and 31 test if the correctly rounded value is above or below this value. 32 33 Because of the Newton-raphson iteration step, an error in the seed at X 34 is amplified by X. Therefore, we don't want a Tchebycheff polynom 35 or a polynom that is close to optimal according to the maximum norm 36 on the errro of the seed value; we want one that is close to optimal 37 according to the maximum norm on the error of the result, i.e. we 38 want the maxima of the polynom to increase linearily. 39 Given an interval [X0,X2) over which to approximate, 40 with X1 := (X0+X2)/2, D := X1-X0, F := 1/D, and S := D/X1 we have, 41 like for Tchebycheff polynoms: 42 P(0) := 1 43 but then we have: 44 P(1) := X + S*D 45 P(2) := 2 * X^2 + S*D * X - D^2 46 Then again: 47 P(n+1) := 2 * X * P(n) - D^2 * P (n-1) 48 */ 49 50int 51main (void) 52{ 53 long double T[5]; /* Taylor polynom */ 54 long double P[5][5]; 55 int i, j; 56 long double X0, X1, X2, S; 57 long double inc = 1./64; 58 long double D = inc*0.5; 59 long i0, i1, i2; 60 61 memset (P, 0, sizeof (P)); 62 P[0][0] = 1.; 63 for (i = 1; i < 5; i++) 64 P[i][i] = 1 << i-1; 65 P[2][0] = -D*D; 66 for (X0 = 1.; X0 < 2.; X0 += inc) 67 { 68 X1 = X0 + inc * 0.5; 69 X2 = X1 + inc; 70 S = D / X1; 71 T[0] = 1./X1; 72 for (i = 1; i < 5; i++) 73 T[i] = T[i-1] * -T[0]; 74#if 0 75 printf ("T %1.8f %f %f %f %f\n", (double)T[0], (double)T[1], (double)T[2], 76(double)T[3], (double)T[4]); 77#endif 78 P[1][0] = S*D; 79 P[2][1] = S*D; 80 for (i = 3; i < 5; i++) 81 { 82 P[i][0] = -D*D*P[i-2][0]; 83 for (j = 1; j < i; j++) 84 P[i][j] = 2*P[i-1][j-1]-D*D*P[i-2][j]; 85 } 86#if 0 87 printf ("P3 %1.8f %f %f %f %f\n", (double)P[3][0], (double)P[3][1], (double)P[3][2], 88(double)P[3][3], (double)P[3][4]); 89 printf ("P4 %1.8f %f %f %f %f\n", (double)P[4][0], (double)P[4][1], (double)P[4][2], 90(double)P[4][3], (double)P[4][4]); 91#endif 92 for (i = 4; i > 1; i--) 93 { 94 long double a = T[i]/P[i][i]; 95 96 for (j = 0; j < i; j++) 97 T[j] -= a * P[i][j]; 98 } 99#if 0 100 printf ("A %1.8f %f %f\n", (double)T[0], (double)T[1], (double)T[2]); 101#endif 102#if 0 103 i2 = T[2]*512; 104 long double a = (T[2]-i/512.)/P[2][2]; 105 for (j = 0; j < 2; j++) 106 T[j] -= a * P[2][j]; 107#else 108 i2 = 0; 109#endif 110 for (i = 0, i0 = 0; i < 4; i++) 111 { 112 long double T0, Ti1; 113 114 i1 = T[1]*8192. + i0 / (long double)(1 << 19) - 0.5; 115 i1 = - (-i1 & 0x1fff); 116 Ti1 = ((unsigned)(-i1 << 19) | i0) /-(long double)(1LL<<32LL); 117 T0 = T[0] - (T[1]-Ti1)/P[1][1] * P[1][0] - (X1 - 1) * Ti1; 118 i0 = T0 * 512 * 1024 + 0.5; 119 i0 &= 0x7ffff; 120 } 121#if 0 122 printf ("A %1.8f %f %f\n", (double)T[0], (double)T[1], (double)T[2]); 123#endif 124 printf ("\t.long 0x%x\n", (-i1 << 19) | i0); 125 } 126 return 0; 127} 128