1169689Skan/* Software floating-point emulation. 2169689Skan Definitions for IEEE Extended Precision. 3171825Skan Copyright (C) 1999,2006,2007 Free Software Foundation, Inc. 4169689Skan This file is part of the GNU C Library. 5169689Skan Contributed by Jakub Jelinek (jj@ultra.linux.cz). 6169689Skan 7169689Skan The GNU C Library is free software; you can redistribute it and/or 8169689Skan modify it under the terms of the GNU Lesser General Public 9169689Skan License as published by the Free Software Foundation; either 10169689Skan version 2.1 of the License, or (at your option) any later version. 11169689Skan 12169689Skan In addition to the permissions in the GNU Lesser General Public 13169689Skan License, the Free Software Foundation gives you unlimited 14169689Skan permission to link the compiled version of this file into 15169689Skan combinations with other programs, and to distribute those 16169689Skan combinations without any restriction coming from the use of this 17169689Skan file. (The Lesser General Public License restrictions do apply in 18169689Skan other respects; for example, they cover modification of the file, 19169689Skan and distribution when not linked into a combine executable.) 20169689Skan 21169689Skan The GNU C Library is distributed in the hope that it will be useful, 22169689Skan but WITHOUT ANY WARRANTY; without even the implied warranty of 23169689Skan MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 24169689Skan Lesser General Public License for more details. 25169689Skan 26169689Skan You should have received a copy of the GNU Lesser General Public 27169689Skan License along with the GNU C Library; if not, write to the Free 28169689Skan Software Foundation, 51 Franklin Street, Fifth Floor, Boston, 29169689Skan MA 02110-1301, USA. */ 30169689Skan 31169689Skan#if _FP_W_TYPE_SIZE < 32 32169689Skan#error "Here's a nickel, kid. Go buy yourself a real computer." 33169689Skan#endif 34169689Skan 35169689Skan#if _FP_W_TYPE_SIZE < 64 36169689Skan#define _FP_FRACTBITS_E (4*_FP_W_TYPE_SIZE) 37169689Skan#else 38169689Skan#define _FP_FRACTBITS_E (2*_FP_W_TYPE_SIZE) 39169689Skan#endif 40169689Skan 41169689Skan#define _FP_FRACBITS_E 64 42169689Skan#define _FP_FRACXBITS_E (_FP_FRACTBITS_E - _FP_FRACBITS_E) 43169689Skan#define _FP_WFRACBITS_E (_FP_WORKBITS + _FP_FRACBITS_E) 44169689Skan#define _FP_WFRACXBITS_E (_FP_FRACTBITS_E - _FP_WFRACBITS_E) 45169689Skan#define _FP_EXPBITS_E 15 46169689Skan#define _FP_EXPBIAS_E 16383 47169689Skan#define _FP_EXPMAX_E 32767 48169689Skan 49169689Skan#define _FP_QNANBIT_E \ 50169689Skan ((_FP_W_TYPE)1 << (_FP_FRACBITS_E-2) % _FP_W_TYPE_SIZE) 51169689Skan#define _FP_QNANBIT_SH_E \ 52169689Skan ((_FP_W_TYPE)1 << (_FP_FRACBITS_E-2+_FP_WORKBITS) % _FP_W_TYPE_SIZE) 53169689Skan#define _FP_IMPLBIT_E \ 54169689Skan ((_FP_W_TYPE)1 << (_FP_FRACBITS_E-1) % _FP_W_TYPE_SIZE) 55169689Skan#define _FP_IMPLBIT_SH_E \ 56169689Skan ((_FP_W_TYPE)1 << (_FP_FRACBITS_E-1+_FP_WORKBITS) % _FP_W_TYPE_SIZE) 57169689Skan#define _FP_OVERFLOW_E \ 58169689Skan ((_FP_W_TYPE)1 << (_FP_WFRACBITS_E % _FP_W_TYPE_SIZE)) 59169689Skan 60169689Skantypedef float XFtype __attribute__((mode(XF))); 61169689Skan 62169689Skan#if _FP_W_TYPE_SIZE < 64 63169689Skan 64169689Skanunion _FP_UNION_E 65169689Skan{ 66169689Skan XFtype flt; 67169689Skan struct 68169689Skan { 69169689Skan#if __BYTE_ORDER == __BIG_ENDIAN 70169689Skan unsigned long pad1 : _FP_W_TYPE_SIZE; 71169689Skan unsigned long pad2 : (_FP_W_TYPE_SIZE - 1 - _FP_EXPBITS_E); 72169689Skan unsigned long sign : 1; 73169689Skan unsigned long exp : _FP_EXPBITS_E; 74169689Skan unsigned long frac1 : _FP_W_TYPE_SIZE; 75169689Skan unsigned long frac0 : _FP_W_TYPE_SIZE; 76169689Skan#else 77169689Skan unsigned long frac0 : _FP_W_TYPE_SIZE; 78169689Skan unsigned long frac1 : _FP_W_TYPE_SIZE; 79169689Skan unsigned exp : _FP_EXPBITS_E; 80169689Skan unsigned sign : 1; 81169689Skan#endif /* not bigendian */ 82169689Skan } bits __attribute__((packed)); 83169689Skan}; 84169689Skan 85169689Skan 86169689Skan#define FP_DECL_E(X) _FP_DECL(4,X) 87169689Skan 88169689Skan#define FP_UNPACK_RAW_E(X, val) \ 89169689Skan do { \ 90169689Skan union _FP_UNION_E _flo; _flo.flt = (val); \ 91169689Skan \ 92169689Skan X##_f[2] = 0; X##_f[3] = 0; \ 93169689Skan X##_f[0] = _flo.bits.frac0; \ 94169689Skan X##_f[1] = _flo.bits.frac1; \ 95169689Skan X##_e = _flo.bits.exp; \ 96169689Skan X##_s = _flo.bits.sign; \ 97169689Skan } while (0) 98169689Skan 99169689Skan#define FP_UNPACK_RAW_EP(X, val) \ 100169689Skan do { \ 101169689Skan union _FP_UNION_E *_flo = \ 102169689Skan (union _FP_UNION_E *)(val); \ 103169689Skan \ 104169689Skan X##_f[2] = 0; X##_f[3] = 0; \ 105169689Skan X##_f[0] = _flo->bits.frac0; \ 106169689Skan X##_f[1] = _flo->bits.frac1; \ 107169689Skan X##_e = _flo->bits.exp; \ 108169689Skan X##_s = _flo->bits.sign; \ 109169689Skan } while (0) 110169689Skan 111169689Skan#define FP_PACK_RAW_E(val, X) \ 112169689Skan do { \ 113169689Skan union _FP_UNION_E _flo; \ 114169689Skan \ 115169689Skan if (X##_e) X##_f[1] |= _FP_IMPLBIT_E; \ 116169689Skan else X##_f[1] &= ~(_FP_IMPLBIT_E); \ 117169689Skan _flo.bits.frac0 = X##_f[0]; \ 118169689Skan _flo.bits.frac1 = X##_f[1]; \ 119169689Skan _flo.bits.exp = X##_e; \ 120169689Skan _flo.bits.sign = X##_s; \ 121169689Skan \ 122169689Skan (val) = _flo.flt; \ 123169689Skan } while (0) 124169689Skan 125169689Skan#define FP_PACK_RAW_EP(val, X) \ 126169689Skan do { \ 127169689Skan if (!FP_INHIBIT_RESULTS) \ 128169689Skan { \ 129169689Skan union _FP_UNION_E *_flo = \ 130169689Skan (union _FP_UNION_E *)(val); \ 131169689Skan \ 132169689Skan if (X##_e) X##_f[1] |= _FP_IMPLBIT_E; \ 133169689Skan else X##_f[1] &= ~(_FP_IMPLBIT_E); \ 134169689Skan _flo->bits.frac0 = X##_f[0]; \ 135169689Skan _flo->bits.frac1 = X##_f[1]; \ 136169689Skan _flo->bits.exp = X##_e; \ 137169689Skan _flo->bits.sign = X##_s; \ 138169689Skan } \ 139169689Skan } while (0) 140169689Skan 141169689Skan#define FP_UNPACK_E(X,val) \ 142169689Skan do { \ 143169689Skan FP_UNPACK_RAW_E(X,val); \ 144169689Skan _FP_UNPACK_CANONICAL(E,4,X); \ 145169689Skan } while (0) 146169689Skan 147169689Skan#define FP_UNPACK_EP(X,val) \ 148169689Skan do { \ 149169689Skan FP_UNPACK_RAW_EP(X,val); \ 150169689Skan _FP_UNPACK_CANONICAL(E,4,X); \ 151169689Skan } while (0) 152169689Skan 153169689Skan#define FP_UNPACK_SEMIRAW_E(X,val) \ 154169689Skan do { \ 155171825Skan FP_UNPACK_RAW_E(X,val); \ 156169689Skan _FP_UNPACK_SEMIRAW(E,4,X); \ 157169689Skan } while (0) 158169689Skan 159169689Skan#define FP_UNPACK_SEMIRAW_EP(X,val) \ 160169689Skan do { \ 161171825Skan FP_UNPACK_RAW_EP(X,val); \ 162169689Skan _FP_UNPACK_SEMIRAW(E,4,X); \ 163169689Skan } while (0) 164169689Skan 165169689Skan#define FP_PACK_E(val,X) \ 166169689Skan do { \ 167169689Skan _FP_PACK_CANONICAL(E,4,X); \ 168169689Skan FP_PACK_RAW_E(val,X); \ 169169689Skan } while (0) 170169689Skan 171169689Skan#define FP_PACK_EP(val,X) \ 172169689Skan do { \ 173169689Skan _FP_PACK_CANONICAL(E,4,X); \ 174169689Skan FP_PACK_RAW_EP(val,X); \ 175169689Skan } while (0) 176169689Skan 177169689Skan#define FP_PACK_SEMIRAW_E(val,X) \ 178169689Skan do { \ 179169689Skan _FP_PACK_SEMIRAW(E,4,X); \ 180171825Skan FP_PACK_RAW_E(val,X); \ 181169689Skan } while (0) 182169689Skan 183169689Skan#define FP_PACK_SEMIRAW_EP(val,X) \ 184169689Skan do { \ 185169689Skan _FP_PACK_SEMIRAW(E,4,X); \ 186171825Skan FP_PACK_RAW_EP(val,X); \ 187169689Skan } while (0) 188169689Skan 189169689Skan#define FP_ISSIGNAN_E(X) _FP_ISSIGNAN(E,4,X) 190169689Skan#define FP_NEG_E(R,X) _FP_NEG(E,4,R,X) 191169689Skan#define FP_ADD_E(R,X,Y) _FP_ADD(E,4,R,X,Y) 192169689Skan#define FP_SUB_E(R,X,Y) _FP_SUB(E,4,R,X,Y) 193169689Skan#define FP_MUL_E(R,X,Y) _FP_MUL(E,4,R,X,Y) 194169689Skan#define FP_DIV_E(R,X,Y) _FP_DIV(E,4,R,X,Y) 195169689Skan#define FP_SQRT_E(R,X) _FP_SQRT(E,4,R,X) 196169689Skan 197169689Skan/* 198169689Skan * Square root algorithms: 199169689Skan * We have just one right now, maybe Newton approximation 200169689Skan * should be added for those machines where division is fast. 201169689Skan * This has special _E version because standard _4 square 202169689Skan * root would not work (it has to start normally with the 203169689Skan * second word and not the first), but as we have to do it 204169689Skan * anyway, we optimize it by doing most of the calculations 205169689Skan * in two UWtype registers instead of four. 206169689Skan */ 207169689Skan 208169689Skan#define _FP_SQRT_MEAT_E(R, S, T, X, q) \ 209169689Skan do { \ 210169689Skan q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \ 211169689Skan _FP_FRAC_SRL_4(X, (_FP_WORKBITS)); \ 212169689Skan while (q) \ 213169689Skan { \ 214169689Skan T##_f[1] = S##_f[1] + q; \ 215169689Skan if (T##_f[1] <= X##_f[1]) \ 216169689Skan { \ 217169689Skan S##_f[1] = T##_f[1] + q; \ 218169689Skan X##_f[1] -= T##_f[1]; \ 219169689Skan R##_f[1] += q; \ 220169689Skan } \ 221169689Skan _FP_FRAC_SLL_2(X, 1); \ 222169689Skan q >>= 1; \ 223169689Skan } \ 224169689Skan q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \ 225169689Skan while (q) \ 226169689Skan { \ 227169689Skan T##_f[0] = S##_f[0] + q; \ 228169689Skan T##_f[1] = S##_f[1]; \ 229169689Skan if (T##_f[1] < X##_f[1] || \ 230169689Skan (T##_f[1] == X##_f[1] && \ 231169689Skan T##_f[0] <= X##_f[0])) \ 232169689Skan { \ 233169689Skan S##_f[0] = T##_f[0] + q; \ 234169689Skan S##_f[1] += (T##_f[0] > S##_f[0]); \ 235169689Skan _FP_FRAC_DEC_2(X, T); \ 236169689Skan R##_f[0] += q; \ 237169689Skan } \ 238169689Skan _FP_FRAC_SLL_2(X, 1); \ 239169689Skan q >>= 1; \ 240169689Skan } \ 241169689Skan _FP_FRAC_SLL_4(R, (_FP_WORKBITS)); \ 242169689Skan if (X##_f[0] | X##_f[1]) \ 243169689Skan { \ 244169689Skan if (S##_f[1] < X##_f[1] || \ 245169689Skan (S##_f[1] == X##_f[1] && \ 246169689Skan S##_f[0] < X##_f[0])) \ 247169689Skan R##_f[0] |= _FP_WORK_ROUND; \ 248169689Skan R##_f[0] |= _FP_WORK_STICKY; \ 249169689Skan } \ 250169689Skan } while (0) 251169689Skan 252169689Skan#define FP_CMP_E(r,X,Y,un) _FP_CMP(E,4,r,X,Y,un) 253169689Skan#define FP_CMP_EQ_E(r,X,Y) _FP_CMP_EQ(E,4,r,X,Y) 254169689Skan#define FP_CMP_UNORD_E(r,X,Y) _FP_CMP_UNORD(E,4,r,X,Y) 255169689Skan 256169689Skan#define FP_TO_INT_E(r,X,rsz,rsg) _FP_TO_INT(E,4,r,X,rsz,rsg) 257169689Skan#define FP_FROM_INT_E(X,r,rs,rt) _FP_FROM_INT(E,4,X,r,rs,rt) 258169689Skan 259169689Skan#define _FP_FRAC_HIGH_E(X) (X##_f[2]) 260169689Skan#define _FP_FRAC_HIGH_RAW_E(X) (X##_f[1]) 261169689Skan 262169689Skan#else /* not _FP_W_TYPE_SIZE < 64 */ 263169689Skanunion _FP_UNION_E 264169689Skan{ 265169689Skan XFtype flt; 266169689Skan struct { 267169689Skan#if __BYTE_ORDER == __BIG_ENDIAN 268171825Skan _FP_W_TYPE pad : (_FP_W_TYPE_SIZE - 1 - _FP_EXPBITS_E); 269171825Skan unsigned sign : 1; 270171825Skan unsigned exp : _FP_EXPBITS_E; 271171825Skan _FP_W_TYPE frac : _FP_W_TYPE_SIZE; 272169689Skan#else 273171825Skan _FP_W_TYPE frac : _FP_W_TYPE_SIZE; 274171825Skan unsigned exp : _FP_EXPBITS_E; 275171825Skan unsigned sign : 1; 276169689Skan#endif 277169689Skan } bits; 278169689Skan}; 279169689Skan 280169689Skan#define FP_DECL_E(X) _FP_DECL(2,X) 281169689Skan 282169689Skan#define FP_UNPACK_RAW_E(X, val) \ 283169689Skan do { \ 284169689Skan union _FP_UNION_E _flo; _flo.flt = (val); \ 285169689Skan \ 286169689Skan X##_f0 = _flo.bits.frac; \ 287169689Skan X##_f1 = 0; \ 288169689Skan X##_e = _flo.bits.exp; \ 289169689Skan X##_s = _flo.bits.sign; \ 290169689Skan } while (0) 291169689Skan 292169689Skan#define FP_UNPACK_RAW_EP(X, val) \ 293169689Skan do { \ 294169689Skan union _FP_UNION_E *_flo = \ 295169689Skan (union _FP_UNION_E *)(val); \ 296169689Skan \ 297169689Skan X##_f0 = _flo->bits.frac; \ 298169689Skan X##_f1 = 0; \ 299169689Skan X##_e = _flo->bits.exp; \ 300169689Skan X##_s = _flo->bits.sign; \ 301169689Skan } while (0) 302169689Skan 303169689Skan#define FP_PACK_RAW_E(val, X) \ 304169689Skan do { \ 305169689Skan union _FP_UNION_E _flo; \ 306169689Skan \ 307169689Skan if (X##_e) X##_f0 |= _FP_IMPLBIT_E; \ 308169689Skan else X##_f0 &= ~(_FP_IMPLBIT_E); \ 309169689Skan _flo.bits.frac = X##_f0; \ 310169689Skan _flo.bits.exp = X##_e; \ 311169689Skan _flo.bits.sign = X##_s; \ 312169689Skan \ 313169689Skan (val) = _flo.flt; \ 314169689Skan } while (0) 315169689Skan 316169689Skan#define FP_PACK_RAW_EP(fs, val, X) \ 317169689Skan do { \ 318169689Skan if (!FP_INHIBIT_RESULTS) \ 319169689Skan { \ 320169689Skan union _FP_UNION_E *_flo = \ 321169689Skan (union _FP_UNION_E *)(val); \ 322169689Skan \ 323169689Skan if (X##_e) X##_f0 |= _FP_IMPLBIT_E; \ 324169689Skan else X##_f0 &= ~(_FP_IMPLBIT_E); \ 325169689Skan _flo->bits.frac = X##_f0; \ 326169689Skan _flo->bits.exp = X##_e; \ 327169689Skan _flo->bits.sign = X##_s; \ 328169689Skan } \ 329169689Skan } while (0) 330169689Skan 331169689Skan 332169689Skan#define FP_UNPACK_E(X,val) \ 333169689Skan do { \ 334169689Skan FP_UNPACK_RAW_E(X,val); \ 335169689Skan _FP_UNPACK_CANONICAL(E,2,X); \ 336169689Skan } while (0) 337169689Skan 338169689Skan#define FP_UNPACK_EP(X,val) \ 339169689Skan do { \ 340169689Skan FP_UNPACK_RAW_EP(X,val); \ 341169689Skan _FP_UNPACK_CANONICAL(E,2,X); \ 342169689Skan } while (0) 343169689Skan 344169689Skan#define FP_UNPACK_SEMIRAW_E(X,val) \ 345169689Skan do { \ 346171825Skan FP_UNPACK_RAW_E(X,val); \ 347169689Skan _FP_UNPACK_SEMIRAW(E,2,X); \ 348169689Skan } while (0) 349169689Skan 350169689Skan#define FP_UNPACK_SEMIRAW_EP(X,val) \ 351169689Skan do { \ 352171825Skan FP_UNPACK_RAW_EP(X,val); \ 353169689Skan _FP_UNPACK_SEMIRAW(E,2,X); \ 354169689Skan } while (0) 355169689Skan 356169689Skan#define FP_PACK_E(val,X) \ 357169689Skan do { \ 358169689Skan _FP_PACK_CANONICAL(E,2,X); \ 359169689Skan FP_PACK_RAW_E(val,X); \ 360169689Skan } while (0) 361169689Skan 362169689Skan#define FP_PACK_EP(val,X) \ 363169689Skan do { \ 364169689Skan _FP_PACK_CANONICAL(E,2,X); \ 365169689Skan FP_PACK_RAW_EP(val,X); \ 366169689Skan } while (0) 367169689Skan 368169689Skan#define FP_PACK_SEMIRAW_E(val,X) \ 369169689Skan do { \ 370169689Skan _FP_PACK_SEMIRAW(E,2,X); \ 371171825Skan FP_PACK_RAW_E(val,X); \ 372169689Skan } while (0) 373169689Skan 374169689Skan#define FP_PACK_SEMIRAW_EP(val,X) \ 375169689Skan do { \ 376169689Skan _FP_PACK_SEMIRAW(E,2,X); \ 377171825Skan FP_PACK_RAW_EP(val,X); \ 378169689Skan } while (0) 379169689Skan 380169689Skan#define FP_ISSIGNAN_E(X) _FP_ISSIGNAN(E,2,X) 381169689Skan#define FP_NEG_E(R,X) _FP_NEG(E,2,R,X) 382169689Skan#define FP_ADD_E(R,X,Y) _FP_ADD(E,2,R,X,Y) 383169689Skan#define FP_SUB_E(R,X,Y) _FP_SUB(E,2,R,X,Y) 384169689Skan#define FP_MUL_E(R,X,Y) _FP_MUL(E,2,R,X,Y) 385169689Skan#define FP_DIV_E(R,X,Y) _FP_DIV(E,2,R,X,Y) 386169689Skan#define FP_SQRT_E(R,X) _FP_SQRT(E,2,R,X) 387169689Skan 388169689Skan/* 389169689Skan * Square root algorithms: 390169689Skan * We have just one right now, maybe Newton approximation 391169689Skan * should be added for those machines where division is fast. 392169689Skan * We optimize it by doing most of the calculations 393169689Skan * in one UWtype registers instead of two, although we don't 394169689Skan * have to. 395169689Skan */ 396169689Skan#define _FP_SQRT_MEAT_E(R, S, T, X, q) \ 397169689Skan do { \ 398169689Skan q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \ 399169689Skan _FP_FRAC_SRL_2(X, (_FP_WORKBITS)); \ 400169689Skan while (q) \ 401169689Skan { \ 402169689Skan T##_f0 = S##_f0 + q; \ 403169689Skan if (T##_f0 <= X##_f0) \ 404169689Skan { \ 405169689Skan S##_f0 = T##_f0 + q; \ 406169689Skan X##_f0 -= T##_f0; \ 407169689Skan R##_f0 += q; \ 408169689Skan } \ 409169689Skan _FP_FRAC_SLL_1(X, 1); \ 410169689Skan q >>= 1; \ 411169689Skan } \ 412169689Skan _FP_FRAC_SLL_2(R, (_FP_WORKBITS)); \ 413169689Skan if (X##_f0) \ 414169689Skan { \ 415169689Skan if (S##_f0 < X##_f0) \ 416169689Skan R##_f0 |= _FP_WORK_ROUND; \ 417169689Skan R##_f0 |= _FP_WORK_STICKY; \ 418169689Skan } \ 419169689Skan } while (0) 420169689Skan 421169689Skan#define FP_CMP_E(r,X,Y,un) _FP_CMP(E,2,r,X,Y,un) 422169689Skan#define FP_CMP_EQ_E(r,X,Y) _FP_CMP_EQ(E,2,r,X,Y) 423169689Skan#define FP_CMP_UNORD_E(r,X,Y) _FP_CMP_UNORD(E,2,r,X,Y) 424169689Skan 425169689Skan#define FP_TO_INT_E(r,X,rsz,rsg) _FP_TO_INT(E,2,r,X,rsz,rsg) 426169689Skan#define FP_FROM_INT_E(X,r,rs,rt) _FP_FROM_INT(E,2,X,r,rs,rt) 427169689Skan 428169689Skan#define _FP_FRAC_HIGH_E(X) (X##_f1) 429169689Skan#define _FP_FRAC_HIGH_RAW_E(X) (X##_f0) 430169689Skan 431169689Skan#endif /* not _FP_W_TYPE_SIZE < 64 */ 432