1169689Skan/* Software floating-point emulation. 2169689Skan Basic four-word fraction declaration and manipulation. 3171825Skan Copyright (C) 1997,1998,1999,2006,2007 Free Software Foundation, Inc. 4169689Skan This file is part of the GNU C Library. 5169689Skan Contributed by Richard Henderson (rth@cygnus.com), 6169689Skan Jakub Jelinek (jj@ultra.linux.cz), 7169689Skan David S. Miller (davem@redhat.com) and 8169689Skan Peter Maydell (pmaydell@chiark.greenend.org.uk). 9169689Skan 10169689Skan The GNU C Library is free software; you can redistribute it and/or 11169689Skan modify it under the terms of the GNU Lesser General Public 12169689Skan License as published by the Free Software Foundation; either 13169689Skan version 2.1 of the License, or (at your option) any later version. 14169689Skan 15169689Skan In addition to the permissions in the GNU Lesser General Public 16169689Skan License, the Free Software Foundation gives you unlimited 17169689Skan permission to link the compiled version of this file into 18169689Skan combinations with other programs, and to distribute those 19169689Skan combinations without any restriction coming from the use of this 20169689Skan file. (The Lesser General Public License restrictions do apply in 21169689Skan other respects; for example, they cover modification of the file, 22169689Skan and distribution when not linked into a combine executable.) 23169689Skan 24169689Skan The GNU C Library is distributed in the hope that it will be useful, 25169689Skan but WITHOUT ANY WARRANTY; without even the implied warranty of 26169689Skan MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 27169689Skan Lesser General Public License for more details. 28169689Skan 29169689Skan You should have received a copy of the GNU Lesser General Public 30169689Skan License along with the GNU C Library; if not, write to the Free 31169689Skan Software Foundation, 51 Franklin Street, Fifth Floor, Boston, 32169689Skan MA 02110-1301, USA. */ 33169689Skan 34169689Skan#define _FP_FRAC_DECL_4(X) _FP_W_TYPE X##_f[4] 35169689Skan#define _FP_FRAC_COPY_4(D,S) \ 36169689Skan (D##_f[0] = S##_f[0], D##_f[1] = S##_f[1], \ 37169689Skan D##_f[2] = S##_f[2], D##_f[3] = S##_f[3]) 38169689Skan#define _FP_FRAC_SET_4(X,I) __FP_FRAC_SET_4(X, I) 39169689Skan#define _FP_FRAC_HIGH_4(X) (X##_f[3]) 40169689Skan#define _FP_FRAC_LOW_4(X) (X##_f[0]) 41169689Skan#define _FP_FRAC_WORD_4(X,w) (X##_f[w]) 42169689Skan 43169689Skan#define _FP_FRAC_SLL_4(X,N) \ 44169689Skan do { \ 45169689Skan _FP_I_TYPE _up, _down, _skip, _i; \ 46169689Skan _skip = (N) / _FP_W_TYPE_SIZE; \ 47169689Skan _up = (N) % _FP_W_TYPE_SIZE; \ 48169689Skan _down = _FP_W_TYPE_SIZE - _up; \ 49169689Skan if (!_up) \ 50169689Skan for (_i = 3; _i >= _skip; --_i) \ 51169689Skan X##_f[_i] = X##_f[_i-_skip]; \ 52169689Skan else \ 53169689Skan { \ 54169689Skan for (_i = 3; _i > _skip; --_i) \ 55169689Skan X##_f[_i] = X##_f[_i-_skip] << _up \ 56169689Skan | X##_f[_i-_skip-1] >> _down; \ 57169689Skan X##_f[_i--] = X##_f[0] << _up; \ 58169689Skan } \ 59169689Skan for (; _i >= 0; --_i) \ 60169689Skan X##_f[_i] = 0; \ 61169689Skan } while (0) 62169689Skan 63169689Skan/* This one was broken too */ 64169689Skan#define _FP_FRAC_SRL_4(X,N) \ 65169689Skan do { \ 66169689Skan _FP_I_TYPE _up, _down, _skip, _i; \ 67169689Skan _skip = (N) / _FP_W_TYPE_SIZE; \ 68169689Skan _down = (N) % _FP_W_TYPE_SIZE; \ 69169689Skan _up = _FP_W_TYPE_SIZE - _down; \ 70169689Skan if (!_down) \ 71169689Skan for (_i = 0; _i <= 3-_skip; ++_i) \ 72169689Skan X##_f[_i] = X##_f[_i+_skip]; \ 73169689Skan else \ 74169689Skan { \ 75169689Skan for (_i = 0; _i < 3-_skip; ++_i) \ 76169689Skan X##_f[_i] = X##_f[_i+_skip] >> _down \ 77169689Skan | X##_f[_i+_skip+1] << _up; \ 78169689Skan X##_f[_i++] = X##_f[3] >> _down; \ 79169689Skan } \ 80169689Skan for (; _i < 4; ++_i) \ 81169689Skan X##_f[_i] = 0; \ 82169689Skan } while (0) 83169689Skan 84169689Skan 85169689Skan/* Right shift with sticky-lsb. 86169689Skan * What this actually means is that we do a standard right-shift, 87169689Skan * but that if any of the bits that fall off the right hand side 88169689Skan * were one then we always set the LSbit. 89169689Skan */ 90169689Skan#define _FP_FRAC_SRST_4(X,S,N,size) \ 91169689Skan do { \ 92169689Skan _FP_I_TYPE _up, _down, _skip, _i; \ 93169689Skan _FP_W_TYPE _s; \ 94169689Skan _skip = (N) / _FP_W_TYPE_SIZE; \ 95169689Skan _down = (N) % _FP_W_TYPE_SIZE; \ 96169689Skan _up = _FP_W_TYPE_SIZE - _down; \ 97169689Skan for (_s = _i = 0; _i < _skip; ++_i) \ 98169689Skan _s |= X##_f[_i]; \ 99169689Skan if (!_down) \ 100169689Skan for (_i = 0; _i <= 3-_skip; ++_i) \ 101169689Skan X##_f[_i] = X##_f[_i+_skip]; \ 102169689Skan else \ 103169689Skan { \ 104169689Skan _s |= X##_f[_i] << _up; \ 105169689Skan for (_i = 0; _i < 3-_skip; ++_i) \ 106169689Skan X##_f[_i] = X##_f[_i+_skip] >> _down \ 107169689Skan | X##_f[_i+_skip+1] << _up; \ 108169689Skan X##_f[_i++] = X##_f[3] >> _down; \ 109169689Skan } \ 110169689Skan for (; _i < 4; ++_i) \ 111169689Skan X##_f[_i] = 0; \ 112169689Skan S = (_s != 0); \ 113169689Skan } while (0) 114169689Skan 115169689Skan#define _FP_FRAC_SRS_4(X,N,size) \ 116169689Skan do { \ 117169689Skan int _sticky; \ 118169689Skan _FP_FRAC_SRST_4(X, _sticky, N, size); \ 119169689Skan X##_f[0] |= _sticky; \ 120169689Skan } while (0) 121169689Skan 122169689Skan#define _FP_FRAC_ADD_4(R,X,Y) \ 123169689Skan __FP_FRAC_ADD_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0], \ 124169689Skan X##_f[3], X##_f[2], X##_f[1], X##_f[0], \ 125169689Skan Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0]) 126169689Skan 127169689Skan#define _FP_FRAC_SUB_4(R,X,Y) \ 128169689Skan __FP_FRAC_SUB_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0], \ 129169689Skan X##_f[3], X##_f[2], X##_f[1], X##_f[0], \ 130169689Skan Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0]) 131169689Skan 132169689Skan#define _FP_FRAC_DEC_4(X,Y) \ 133169689Skan __FP_FRAC_DEC_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], \ 134169689Skan Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0]) 135169689Skan 136169689Skan#define _FP_FRAC_ADDI_4(X,I) \ 137169689Skan __FP_FRAC_ADDI_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], I) 138169689Skan 139169689Skan#define _FP_ZEROFRAC_4 0,0,0,0 140169689Skan#define _FP_MINFRAC_4 0,0,0,1 141169689Skan#define _FP_MAXFRAC_4 (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0) 142169689Skan 143169689Skan#define _FP_FRAC_ZEROP_4(X) ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0) 144169689Skan#define _FP_FRAC_NEGP_4(X) ((_FP_WS_TYPE)X##_f[3] < 0) 145169689Skan#define _FP_FRAC_OVERP_4(fs,X) (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs) 146169689Skan#define _FP_FRAC_CLEAR_OVERP_4(fs,X) (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs) 147169689Skan 148169689Skan#define _FP_FRAC_EQ_4(X,Y) \ 149169689Skan (X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1] \ 150169689Skan && X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3]) 151169689Skan 152169689Skan#define _FP_FRAC_GT_4(X,Y) \ 153169689Skan (X##_f[3] > Y##_f[3] || \ 154169689Skan (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] || \ 155169689Skan (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] || \ 156169689Skan (X##_f[1] == Y##_f[1] && X##_f[0] > Y##_f[0]) \ 157169689Skan )) \ 158169689Skan )) \ 159169689Skan ) 160169689Skan 161169689Skan#define _FP_FRAC_GE_4(X,Y) \ 162169689Skan (X##_f[3] > Y##_f[3] || \ 163169689Skan (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] || \ 164169689Skan (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] || \ 165169689Skan (X##_f[1] == Y##_f[1] && X##_f[0] >= Y##_f[0]) \ 166169689Skan )) \ 167169689Skan )) \ 168169689Skan ) 169169689Skan 170169689Skan 171169689Skan#define _FP_FRAC_CLZ_4(R,X) \ 172169689Skan do { \ 173169689Skan if (X##_f[3]) \ 174169689Skan { \ 175169689Skan __FP_CLZ(R,X##_f[3]); \ 176169689Skan } \ 177169689Skan else if (X##_f[2]) \ 178169689Skan { \ 179169689Skan __FP_CLZ(R,X##_f[2]); \ 180169689Skan R += _FP_W_TYPE_SIZE; \ 181169689Skan } \ 182169689Skan else if (X##_f[1]) \ 183169689Skan { \ 184169689Skan __FP_CLZ(R,X##_f[1]); \ 185169689Skan R += _FP_W_TYPE_SIZE*2; \ 186169689Skan } \ 187169689Skan else \ 188169689Skan { \ 189169689Skan __FP_CLZ(R,X##_f[0]); \ 190169689Skan R += _FP_W_TYPE_SIZE*3; \ 191169689Skan } \ 192169689Skan } while(0) 193169689Skan 194169689Skan 195169689Skan#define _FP_UNPACK_RAW_4(fs, X, val) \ 196169689Skan do { \ 197169689Skan union _FP_UNION_##fs _flo; _flo.flt = (val); \ 198169689Skan X##_f[0] = _flo.bits.frac0; \ 199169689Skan X##_f[1] = _flo.bits.frac1; \ 200169689Skan X##_f[2] = _flo.bits.frac2; \ 201169689Skan X##_f[3] = _flo.bits.frac3; \ 202169689Skan X##_e = _flo.bits.exp; \ 203169689Skan X##_s = _flo.bits.sign; \ 204169689Skan } while (0) 205169689Skan 206169689Skan#define _FP_UNPACK_RAW_4_P(fs, X, val) \ 207169689Skan do { \ 208169689Skan union _FP_UNION_##fs *_flo = \ 209169689Skan (union _FP_UNION_##fs *)(val); \ 210169689Skan \ 211169689Skan X##_f[0] = _flo->bits.frac0; \ 212169689Skan X##_f[1] = _flo->bits.frac1; \ 213169689Skan X##_f[2] = _flo->bits.frac2; \ 214169689Skan X##_f[3] = _flo->bits.frac3; \ 215169689Skan X##_e = _flo->bits.exp; \ 216169689Skan X##_s = _flo->bits.sign; \ 217169689Skan } while (0) 218169689Skan 219169689Skan#define _FP_PACK_RAW_4(fs, val, X) \ 220169689Skan do { \ 221169689Skan union _FP_UNION_##fs _flo; \ 222169689Skan _flo.bits.frac0 = X##_f[0]; \ 223169689Skan _flo.bits.frac1 = X##_f[1]; \ 224169689Skan _flo.bits.frac2 = X##_f[2]; \ 225169689Skan _flo.bits.frac3 = X##_f[3]; \ 226169689Skan _flo.bits.exp = X##_e; \ 227169689Skan _flo.bits.sign = X##_s; \ 228169689Skan (val) = _flo.flt; \ 229169689Skan } while (0) 230169689Skan 231169689Skan#define _FP_PACK_RAW_4_P(fs, val, X) \ 232169689Skan do { \ 233169689Skan union _FP_UNION_##fs *_flo = \ 234169689Skan (union _FP_UNION_##fs *)(val); \ 235169689Skan \ 236169689Skan _flo->bits.frac0 = X##_f[0]; \ 237169689Skan _flo->bits.frac1 = X##_f[1]; \ 238169689Skan _flo->bits.frac2 = X##_f[2]; \ 239169689Skan _flo->bits.frac3 = X##_f[3]; \ 240169689Skan _flo->bits.exp = X##_e; \ 241169689Skan _flo->bits.sign = X##_s; \ 242169689Skan } while (0) 243169689Skan 244169689Skan/* 245169689Skan * Multiplication algorithms: 246169689Skan */ 247169689Skan 248169689Skan/* Given a 1W * 1W => 2W primitive, do the extended multiplication. */ 249169689Skan 250169689Skan#define _FP_MUL_MEAT_4_wide(wfracbits, R, X, Y, doit) \ 251169689Skan do { \ 252169689Skan _FP_FRAC_DECL_8(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \ 253169689Skan _FP_FRAC_DECL_2(_d); _FP_FRAC_DECL_2(_e); _FP_FRAC_DECL_2(_f); \ 254169689Skan \ 255169689Skan doit(_FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0), X##_f[0], Y##_f[0]); \ 256169689Skan doit(_b_f1, _b_f0, X##_f[0], Y##_f[1]); \ 257169689Skan doit(_c_f1, _c_f0, X##_f[1], Y##_f[0]); \ 258169689Skan doit(_d_f1, _d_f0, X##_f[1], Y##_f[1]); \ 259169689Skan doit(_e_f1, _e_f0, X##_f[0], Y##_f[2]); \ 260169689Skan doit(_f_f1, _f_f0, X##_f[2], Y##_f[0]); \ 261169689Skan __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2), \ 262169689Skan _FP_FRAC_WORD_8(_z,1), 0,_b_f1,_b_f0, \ 263169689Skan 0,0,_FP_FRAC_WORD_8(_z,1)); \ 264169689Skan __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2), \ 265169689Skan _FP_FRAC_WORD_8(_z,1), 0,_c_f1,_c_f0, \ 266169689Skan _FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2), \ 267169689Skan _FP_FRAC_WORD_8(_z,1)); \ 268169689Skan __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3), \ 269169689Skan _FP_FRAC_WORD_8(_z,2), 0,_d_f1,_d_f0, \ 270169689Skan 0,_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2)); \ 271169689Skan __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3), \ 272169689Skan _FP_FRAC_WORD_8(_z,2), 0,_e_f1,_e_f0, \ 273169689Skan _FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3), \ 274169689Skan _FP_FRAC_WORD_8(_z,2)); \ 275169689Skan __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3), \ 276169689Skan _FP_FRAC_WORD_8(_z,2), 0,_f_f1,_f_f0, \ 277169689Skan _FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3), \ 278169689Skan _FP_FRAC_WORD_8(_z,2)); \ 279169689Skan doit(_b_f1, _b_f0, X##_f[0], Y##_f[3]); \ 280169689Skan doit(_c_f1, _c_f0, X##_f[3], Y##_f[0]); \ 281169689Skan doit(_d_f1, _d_f0, X##_f[1], Y##_f[2]); \ 282169689Skan doit(_e_f1, _e_f0, X##_f[2], Y##_f[1]); \ 283169689Skan __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \ 284169689Skan _FP_FRAC_WORD_8(_z,3), 0,_b_f1,_b_f0, \ 285169689Skan 0,_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3)); \ 286169689Skan __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \ 287169689Skan _FP_FRAC_WORD_8(_z,3), 0,_c_f1,_c_f0, \ 288169689Skan _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \ 289169689Skan _FP_FRAC_WORD_8(_z,3)); \ 290169689Skan __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \ 291169689Skan _FP_FRAC_WORD_8(_z,3), 0,_d_f1,_d_f0, \ 292169689Skan _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \ 293169689Skan _FP_FRAC_WORD_8(_z,3)); \ 294169689Skan __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \ 295169689Skan _FP_FRAC_WORD_8(_z,3), 0,_e_f1,_e_f0, \ 296169689Skan _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \ 297169689Skan _FP_FRAC_WORD_8(_z,3)); \ 298169689Skan doit(_b_f1, _b_f0, X##_f[2], Y##_f[2]); \ 299169689Skan doit(_c_f1, _c_f0, X##_f[1], Y##_f[3]); \ 300169689Skan doit(_d_f1, _d_f0, X##_f[3], Y##_f[1]); \ 301169689Skan doit(_e_f1, _e_f0, X##_f[2], Y##_f[3]); \ 302169689Skan doit(_f_f1, _f_f0, X##_f[3], Y##_f[2]); \ 303169689Skan __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5), \ 304169689Skan _FP_FRAC_WORD_8(_z,4), 0,_b_f1,_b_f0, \ 305169689Skan 0,_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4)); \ 306169689Skan __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5), \ 307169689Skan _FP_FRAC_WORD_8(_z,4), 0,_c_f1,_c_f0, \ 308169689Skan _FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5), \ 309169689Skan _FP_FRAC_WORD_8(_z,4)); \ 310169689Skan __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5), \ 311169689Skan _FP_FRAC_WORD_8(_z,4), 0,_d_f1,_d_f0, \ 312169689Skan _FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5), \ 313169689Skan _FP_FRAC_WORD_8(_z,4)); \ 314169689Skan __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6), \ 315169689Skan _FP_FRAC_WORD_8(_z,5), 0,_e_f1,_e_f0, \ 316169689Skan 0,_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5)); \ 317169689Skan __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6), \ 318169689Skan _FP_FRAC_WORD_8(_z,5), 0,_f_f1,_f_f0, \ 319169689Skan _FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6), \ 320169689Skan _FP_FRAC_WORD_8(_z,5)); \ 321169689Skan doit(_b_f1, _b_f0, X##_f[3], Y##_f[3]); \ 322169689Skan __FP_FRAC_ADD_2(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6), \ 323169689Skan _b_f1,_b_f0, \ 324169689Skan _FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6)); \ 325169689Skan \ 326169689Skan /* Normalize since we know where the msb of the multiplicands \ 327169689Skan were (bit B), we know that the msb of the of the product is \ 328169689Skan at either 2B or 2B-1. */ \ 329169689Skan _FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits); \ 330169689Skan __FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2), \ 331169689Skan _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0)); \ 332169689Skan } while (0) 333169689Skan 334169689Skan#define _FP_MUL_MEAT_4_gmp(wfracbits, R, X, Y) \ 335169689Skan do { \ 336169689Skan _FP_FRAC_DECL_8(_z); \ 337169689Skan \ 338169689Skan mpn_mul_n(_z_f, _x_f, _y_f, 4); \ 339169689Skan \ 340169689Skan /* Normalize since we know where the msb of the multiplicands \ 341169689Skan were (bit B), we know that the msb of the of the product is \ 342169689Skan at either 2B or 2B-1. */ \ 343169689Skan _FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits); \ 344169689Skan __FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2), \ 345169689Skan _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0)); \ 346169689Skan } while (0) 347169689Skan 348169689Skan/* 349169689Skan * Helper utility for _FP_DIV_MEAT_4_udiv: 350169689Skan * pppp = m * nnn 351169689Skan */ 352169689Skan#define umul_ppppmnnn(p3,p2,p1,p0,m,n2,n1,n0) \ 353169689Skan do { \ 354169689Skan UWtype _t; \ 355169689Skan umul_ppmm(p1,p0,m,n0); \ 356169689Skan umul_ppmm(p2,_t,m,n1); \ 357169689Skan __FP_FRAC_ADDI_2(p2,p1,_t); \ 358169689Skan umul_ppmm(p3,_t,m,n2); \ 359169689Skan __FP_FRAC_ADDI_2(p3,p2,_t); \ 360169689Skan } while (0) 361169689Skan 362169689Skan/* 363169689Skan * Division algorithms: 364169689Skan */ 365169689Skan 366169689Skan#define _FP_DIV_MEAT_4_udiv(fs, R, X, Y) \ 367169689Skan do { \ 368169689Skan int _i; \ 369169689Skan _FP_FRAC_DECL_4(_n); _FP_FRAC_DECL_4(_m); \ 370169689Skan _FP_FRAC_SET_4(_n, _FP_ZEROFRAC_4); \ 371169689Skan if (_FP_FRAC_GT_4(X, Y)) \ 372169689Skan { \ 373169689Skan _n_f[3] = X##_f[0] << (_FP_W_TYPE_SIZE - 1); \ 374169689Skan _FP_FRAC_SRL_4(X, 1); \ 375169689Skan } \ 376169689Skan else \ 377169689Skan R##_e--; \ 378169689Skan \ 379169689Skan /* Normalize, i.e. make the most significant bit of the \ 380169689Skan denominator set. */ \ 381169689Skan _FP_FRAC_SLL_4(Y, _FP_WFRACXBITS_##fs); \ 382169689Skan \ 383169689Skan for (_i = 3; ; _i--) \ 384169689Skan { \ 385169689Skan if (X##_f[3] == Y##_f[3]) \ 386169689Skan { \ 387169689Skan /* This is a special case, not an optimization \ 388169689Skan (X##_f[3]/Y##_f[3] would not fit into UWtype). \ 389169689Skan As X## is guaranteed to be < Y, R##_f[_i] can be either \ 390169689Skan (UWtype)-1 or (UWtype)-2. */ \ 391169689Skan R##_f[_i] = -1; \ 392169689Skan if (!_i) \ 393169689Skan break; \ 394169689Skan __FP_FRAC_SUB_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], \ 395169689Skan Y##_f[2], Y##_f[1], Y##_f[0], 0, \ 396169689Skan X##_f[2], X##_f[1], X##_f[0], _n_f[_i]); \ 397169689Skan _FP_FRAC_SUB_4(X, Y, X); \ 398169689Skan if (X##_f[3] > Y##_f[3]) \ 399169689Skan { \ 400169689Skan R##_f[_i] = -2; \ 401169689Skan _FP_FRAC_ADD_4(X, Y, X); \ 402169689Skan } \ 403169689Skan } \ 404169689Skan else \ 405169689Skan { \ 406169689Skan udiv_qrnnd(R##_f[_i], X##_f[3], X##_f[3], X##_f[2], Y##_f[3]); \ 407169689Skan umul_ppppmnnn(_m_f[3], _m_f[2], _m_f[1], _m_f[0], \ 408169689Skan R##_f[_i], Y##_f[2], Y##_f[1], Y##_f[0]); \ 409169689Skan X##_f[2] = X##_f[1]; \ 410169689Skan X##_f[1] = X##_f[0]; \ 411169689Skan X##_f[0] = _n_f[_i]; \ 412169689Skan if (_FP_FRAC_GT_4(_m, X)) \ 413169689Skan { \ 414169689Skan R##_f[_i]--; \ 415169689Skan _FP_FRAC_ADD_4(X, Y, X); \ 416169689Skan if (_FP_FRAC_GE_4(X, Y) && _FP_FRAC_GT_4(_m, X)) \ 417169689Skan { \ 418169689Skan R##_f[_i]--; \ 419169689Skan _FP_FRAC_ADD_4(X, Y, X); \ 420169689Skan } \ 421169689Skan } \ 422169689Skan _FP_FRAC_DEC_4(X, _m); \ 423169689Skan if (!_i) \ 424169689Skan { \ 425169689Skan if (!_FP_FRAC_EQ_4(X, _m)) \ 426169689Skan R##_f[0] |= _FP_WORK_STICKY; \ 427169689Skan break; \ 428169689Skan } \ 429169689Skan } \ 430169689Skan } \ 431169689Skan } while (0) 432169689Skan 433169689Skan 434169689Skan/* 435169689Skan * Square root algorithms: 436169689Skan * We have just one right now, maybe Newton approximation 437169689Skan * should be added for those machines where division is fast. 438169689Skan */ 439169689Skan 440169689Skan#define _FP_SQRT_MEAT_4(R, S, T, X, q) \ 441169689Skan do { \ 442169689Skan while (q) \ 443169689Skan { \ 444169689Skan T##_f[3] = S##_f[3] + q; \ 445169689Skan if (T##_f[3] <= X##_f[3]) \ 446169689Skan { \ 447169689Skan S##_f[3] = T##_f[3] + q; \ 448169689Skan X##_f[3] -= T##_f[3]; \ 449169689Skan R##_f[3] += q; \ 450169689Skan } \ 451169689Skan _FP_FRAC_SLL_4(X, 1); \ 452169689Skan q >>= 1; \ 453169689Skan } \ 454169689Skan q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \ 455169689Skan while (q) \ 456169689Skan { \ 457169689Skan T##_f[2] = S##_f[2] + q; \ 458169689Skan T##_f[3] = S##_f[3]; \ 459169689Skan if (T##_f[3] < X##_f[3] || \ 460169689Skan (T##_f[3] == X##_f[3] && T##_f[2] <= X##_f[2])) \ 461169689Skan { \ 462169689Skan S##_f[2] = T##_f[2] + q; \ 463169689Skan S##_f[3] += (T##_f[2] > S##_f[2]); \ 464169689Skan __FP_FRAC_DEC_2(X##_f[3], X##_f[2], \ 465169689Skan T##_f[3], T##_f[2]); \ 466169689Skan R##_f[2] += q; \ 467169689Skan } \ 468169689Skan _FP_FRAC_SLL_4(X, 1); \ 469169689Skan q >>= 1; \ 470169689Skan } \ 471169689Skan q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \ 472169689Skan while (q) \ 473169689Skan { \ 474169689Skan T##_f[1] = S##_f[1] + q; \ 475169689Skan T##_f[2] = S##_f[2]; \ 476169689Skan T##_f[3] = S##_f[3]; \ 477169689Skan if (T##_f[3] < X##_f[3] || \ 478169689Skan (T##_f[3] == X##_f[3] && (T##_f[2] < X##_f[2] || \ 479169689Skan (T##_f[2] == X##_f[2] && T##_f[1] <= X##_f[1])))) \ 480169689Skan { \ 481169689Skan S##_f[1] = T##_f[1] + q; \ 482169689Skan S##_f[2] += (T##_f[1] > S##_f[1]); \ 483169689Skan S##_f[3] += (T##_f[2] > S##_f[2]); \ 484169689Skan __FP_FRAC_DEC_3(X##_f[3], X##_f[2], X##_f[1], \ 485169689Skan T##_f[3], T##_f[2], T##_f[1]); \ 486169689Skan R##_f[1] += q; \ 487169689Skan } \ 488169689Skan _FP_FRAC_SLL_4(X, 1); \ 489169689Skan q >>= 1; \ 490169689Skan } \ 491169689Skan q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \ 492169689Skan while (q != _FP_WORK_ROUND) \ 493169689Skan { \ 494169689Skan T##_f[0] = S##_f[0] + q; \ 495169689Skan T##_f[1] = S##_f[1]; \ 496169689Skan T##_f[2] = S##_f[2]; \ 497169689Skan T##_f[3] = S##_f[3]; \ 498169689Skan if (_FP_FRAC_GE_4(X,T)) \ 499169689Skan { \ 500169689Skan S##_f[0] = T##_f[0] + q; \ 501169689Skan S##_f[1] += (T##_f[0] > S##_f[0]); \ 502169689Skan S##_f[2] += (T##_f[1] > S##_f[1]); \ 503169689Skan S##_f[3] += (T##_f[2] > S##_f[2]); \ 504169689Skan _FP_FRAC_DEC_4(X, T); \ 505169689Skan R##_f[0] += q; \ 506169689Skan } \ 507169689Skan _FP_FRAC_SLL_4(X, 1); \ 508169689Skan q >>= 1; \ 509169689Skan } \ 510169689Skan if (!_FP_FRAC_ZEROP_4(X)) \ 511169689Skan { \ 512169689Skan if (_FP_FRAC_GT_4(X,S)) \ 513169689Skan R##_f[0] |= _FP_WORK_ROUND; \ 514169689Skan R##_f[0] |= _FP_WORK_STICKY; \ 515169689Skan } \ 516169689Skan } while (0) 517169689Skan 518169689Skan 519169689Skan/* 520169689Skan * Internals 521169689Skan */ 522169689Skan 523169689Skan#define __FP_FRAC_SET_4(X,I3,I2,I1,I0) \ 524169689Skan (X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0) 525169689Skan 526169689Skan#ifndef __FP_FRAC_ADD_3 527169689Skan#define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) \ 528169689Skan do { \ 529169689Skan _FP_W_TYPE _c1, _c2; \ 530169689Skan r0 = x0 + y0; \ 531169689Skan _c1 = r0 < x0; \ 532169689Skan r1 = x1 + y1; \ 533169689Skan _c2 = r1 < x1; \ 534169689Skan r1 += _c1; \ 535169689Skan _c2 |= r1 < _c1; \ 536169689Skan r2 = x2 + y2 + _c2; \ 537169689Skan } while (0) 538169689Skan#endif 539169689Skan 540169689Skan#ifndef __FP_FRAC_ADD_4 541169689Skan#define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0) \ 542169689Skan do { \ 543169689Skan _FP_W_TYPE _c1, _c2, _c3; \ 544169689Skan r0 = x0 + y0; \ 545169689Skan _c1 = r0 < x0; \ 546169689Skan r1 = x1 + y1; \ 547169689Skan _c2 = r1 < x1; \ 548169689Skan r1 += _c1; \ 549169689Skan _c2 |= r1 < _c1; \ 550169689Skan r2 = x2 + y2; \ 551169689Skan _c3 = r2 < x2; \ 552169689Skan r2 += _c2; \ 553169689Skan _c3 |= r2 < _c2; \ 554169689Skan r3 = x3 + y3 + _c3; \ 555169689Skan } while (0) 556169689Skan#endif 557169689Skan 558169689Skan#ifndef __FP_FRAC_SUB_3 559169689Skan#define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) \ 560169689Skan do { \ 561169689Skan _FP_W_TYPE _c1, _c2; \ 562169689Skan r0 = x0 - y0; \ 563169689Skan _c1 = r0 > x0; \ 564169689Skan r1 = x1 - y1; \ 565169689Skan _c2 = r1 > x1; \ 566169689Skan r1 -= _c1; \ 567169689Skan _c2 |= _c1 && (y1 == x1); \ 568169689Skan r2 = x2 - y2 - _c2; \ 569169689Skan } while (0) 570169689Skan#endif 571169689Skan 572169689Skan#ifndef __FP_FRAC_SUB_4 573169689Skan#define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0) \ 574169689Skan do { \ 575169689Skan _FP_W_TYPE _c1, _c2, _c3; \ 576169689Skan r0 = x0 - y0; \ 577169689Skan _c1 = r0 > x0; \ 578169689Skan r1 = x1 - y1; \ 579169689Skan _c2 = r1 > x1; \ 580169689Skan r1 -= _c1; \ 581169689Skan _c2 |= _c1 && (y1 == x1); \ 582169689Skan r2 = x2 - y2; \ 583169689Skan _c3 = r2 > x2; \ 584169689Skan r2 -= _c2; \ 585169689Skan _c3 |= _c2 && (y2 == x2); \ 586169689Skan r3 = x3 - y3 - _c3; \ 587169689Skan } while (0) 588169689Skan#endif 589169689Skan 590169689Skan#ifndef __FP_FRAC_DEC_3 591169689Skan#define __FP_FRAC_DEC_3(x2,x1,x0,y2,y1,y0) \ 592169689Skan do { \ 593169689Skan UWtype _t0, _t1, _t2; \ 594169689Skan _t0 = x0, _t1 = x1, _t2 = x2; \ 595169689Skan __FP_FRAC_SUB_3 (x2, x1, x0, _t2, _t1, _t0, y2, y1, y0); \ 596169689Skan } while (0) 597169689Skan#endif 598169689Skan 599169689Skan#ifndef __FP_FRAC_DEC_4 600169689Skan#define __FP_FRAC_DEC_4(x3,x2,x1,x0,y3,y2,y1,y0) \ 601169689Skan do { \ 602169689Skan UWtype _t0, _t1, _t2, _t3; \ 603169689Skan _t0 = x0, _t1 = x1, _t2 = x2, _t3 = x3; \ 604169689Skan __FP_FRAC_SUB_4 (x3,x2,x1,x0,_t3,_t2,_t1,_t0, y3,y2,y1,y0); \ 605169689Skan } while (0) 606169689Skan#endif 607169689Skan 608169689Skan#ifndef __FP_FRAC_ADDI_4 609169689Skan#define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i) \ 610169689Skan do { \ 611169689Skan UWtype _t; \ 612169689Skan _t = ((x0 += i) < i); \ 613169689Skan x1 += _t; _t = (x1 < _t); \ 614169689Skan x2 += _t; _t = (x2 < _t); \ 615169689Skan x3 += _t; \ 616169689Skan } while (0) 617169689Skan#endif 618169689Skan 619169689Skan/* Convert FP values between word sizes. This appears to be more 620169689Skan * complicated than I'd have expected it to be, so these might be 621169689Skan * wrong... These macros are in any case somewhat bogus because they 622169689Skan * use information about what various FRAC_n variables look like 623169689Skan * internally [eg, that 2 word vars are X_f0 and x_f1]. But so do 624169689Skan * the ones in op-2.h and op-1.h. 625169689Skan */ 626169689Skan#define _FP_FRAC_COPY_1_4(D, S) (D##_f = S##_f[0]) 627169689Skan 628169689Skan#define _FP_FRAC_COPY_2_4(D, S) \ 629169689Skando { \ 630169689Skan D##_f0 = S##_f[0]; \ 631169689Skan D##_f1 = S##_f[1]; \ 632169689Skan} while (0) 633169689Skan 634169689Skan/* Assembly/disassembly for converting to/from integral types. 635169689Skan * No shifting or overflow handled here. 636169689Skan */ 637169689Skan/* Put the FP value X into r, which is an integer of size rsize. */ 638169689Skan#define _FP_FRAC_ASSEMBLE_4(r, X, rsize) \ 639169689Skan do { \ 640169689Skan if (rsize <= _FP_W_TYPE_SIZE) \ 641169689Skan r = X##_f[0]; \ 642169689Skan else if (rsize <= 2*_FP_W_TYPE_SIZE) \ 643169689Skan { \ 644169689Skan r = X##_f[1]; \ 645169689Skan r <<= _FP_W_TYPE_SIZE; \ 646169689Skan r += X##_f[0]; \ 647169689Skan } \ 648169689Skan else \ 649169689Skan { \ 650169689Skan /* I'm feeling lazy so we deal with int == 3words (implausible)*/ \ 651169689Skan /* and int == 4words as a single case. */ \ 652169689Skan r = X##_f[3]; \ 653169689Skan r <<= _FP_W_TYPE_SIZE; \ 654169689Skan r += X##_f[2]; \ 655169689Skan r <<= _FP_W_TYPE_SIZE; \ 656169689Skan r += X##_f[1]; \ 657169689Skan r <<= _FP_W_TYPE_SIZE; \ 658169689Skan r += X##_f[0]; \ 659169689Skan } \ 660169689Skan } while (0) 661169689Skan 662169689Skan/* "No disassemble Number Five!" */ 663169689Skan/* move an integer of size rsize into X's fractional part. We rely on 664169689Skan * the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid 665169689Skan * having to mask the values we store into it. 666169689Skan */ 667169689Skan#define _FP_FRAC_DISASSEMBLE_4(X, r, rsize) \ 668169689Skan do { \ 669169689Skan X##_f[0] = r; \ 670169689Skan X##_f[1] = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \ 671169689Skan X##_f[2] = (rsize <= 2*_FP_W_TYPE_SIZE ? 0 : r >> 2*_FP_W_TYPE_SIZE); \ 672169689Skan X##_f[3] = (rsize <= 3*_FP_W_TYPE_SIZE ? 0 : r >> 3*_FP_W_TYPE_SIZE); \ 673169689Skan } while (0); 674169689Skan 675169689Skan#define _FP_FRAC_COPY_4_1(D, S) \ 676169689Skando { \ 677169689Skan D##_f[0] = S##_f; \ 678169689Skan D##_f[1] = D##_f[2] = D##_f[3] = 0; \ 679169689Skan} while (0) 680169689Skan 681169689Skan#define _FP_FRAC_COPY_4_2(D, S) \ 682169689Skando { \ 683169689Skan D##_f[0] = S##_f0; \ 684169689Skan D##_f[1] = S##_f1; \ 685169689Skan D##_f[2] = D##_f[3] = 0; \ 686169689Skan} while (0) 687171825Skan 688171825Skan#define _FP_FRAC_COPY_4_4(D,S) _FP_FRAC_COPY_4(D,S) 689