1169689Skan/* Software floating-point emulation. 2169689Skan Basic two-word fraction declaration and manipulation. 3171825Skan Copyright (C) 1997,1998,1999,2006,2007 Free Software Foundation, Inc. 4169689Skan This file is part of the GNU C Library. 5169689Skan Contributed by Richard Henderson (rth@cygnus.com), 6169689Skan Jakub Jelinek (jj@ultra.linux.cz), 7169689Skan David S. Miller (davem@redhat.com) and 8169689Skan Peter Maydell (pmaydell@chiark.greenend.org.uk). 9169689Skan 10169689Skan The GNU C Library is free software; you can redistribute it and/or 11169689Skan modify it under the terms of the GNU Lesser General Public 12169689Skan License as published by the Free Software Foundation; either 13169689Skan version 2.1 of the License, or (at your option) any later version. 14169689Skan 15169689Skan In addition to the permissions in the GNU Lesser General Public 16169689Skan License, the Free Software Foundation gives you unlimited 17169689Skan permission to link the compiled version of this file into 18169689Skan combinations with other programs, and to distribute those 19169689Skan combinations without any restriction coming from the use of this 20169689Skan file. (The Lesser General Public License restrictions do apply in 21169689Skan other respects; for example, they cover modification of the file, 22169689Skan and distribution when not linked into a combine executable.) 23169689Skan 24169689Skan The GNU C Library is distributed in the hope that it will be useful, 25169689Skan but WITHOUT ANY WARRANTY; without even the implied warranty of 26169689Skan MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 27169689Skan Lesser General Public License for more details. 28169689Skan 29169689Skan You should have received a copy of the GNU Lesser General Public 30169689Skan License along with the GNU C Library; if not, write to the Free 31169689Skan Software Foundation, 51 Franklin Street, Fifth Floor, Boston, 32169689Skan MA 02110-1301, USA. */ 33169689Skan 34169689Skan#define _FP_FRAC_DECL_2(X) _FP_W_TYPE X##_f0, X##_f1 35169689Skan#define _FP_FRAC_COPY_2(D,S) (D##_f0 = S##_f0, D##_f1 = S##_f1) 36169689Skan#define _FP_FRAC_SET_2(X,I) __FP_FRAC_SET_2(X, I) 37169689Skan#define _FP_FRAC_HIGH_2(X) (X##_f1) 38169689Skan#define _FP_FRAC_LOW_2(X) (X##_f0) 39169689Skan#define _FP_FRAC_WORD_2(X,w) (X##_f##w) 40169689Skan 41169689Skan#define _FP_FRAC_SLL_2(X,N) \ 42169689Skan(void)(((N) < _FP_W_TYPE_SIZE) \ 43169689Skan ? ({ \ 44169689Skan if (__builtin_constant_p(N) && (N) == 1) \ 45169689Skan { \ 46169689Skan X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE)(X##_f0)) < 0); \ 47169689Skan X##_f0 += X##_f0; \ 48169689Skan } \ 49169689Skan else \ 50169689Skan { \ 51169689Skan X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \ 52169689Skan X##_f0 <<= (N); \ 53169689Skan } \ 54169689Skan 0; \ 55169689Skan }) \ 56169689Skan : ({ \ 57169689Skan X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE); \ 58169689Skan X##_f0 = 0; \ 59169689Skan })) 60169689Skan 61169689Skan 62169689Skan#define _FP_FRAC_SRL_2(X,N) \ 63169689Skan(void)(((N) < _FP_W_TYPE_SIZE) \ 64169689Skan ? ({ \ 65169689Skan X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \ 66169689Skan X##_f1 >>= (N); \ 67169689Skan }) \ 68169689Skan : ({ \ 69169689Skan X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE); \ 70169689Skan X##_f1 = 0; \ 71169689Skan })) 72169689Skan 73169689Skan/* Right shift with sticky-lsb. */ 74169689Skan#define _FP_FRAC_SRST_2(X,S, N,sz) \ 75169689Skan(void)(((N) < _FP_W_TYPE_SIZE) \ 76169689Skan ? ({ \ 77169689Skan S = (__builtin_constant_p(N) && (N) == 1 \ 78169689Skan ? X##_f0 & 1 \ 79169689Skan : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0); \ 80169689Skan X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)); \ 81169689Skan X##_f1 >>= (N); \ 82169689Skan }) \ 83169689Skan : ({ \ 84169689Skan S = ((((N) == _FP_W_TYPE_SIZE \ 85169689Skan ? 0 \ 86169689Skan : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \ 87169689Skan | X##_f0) != 0); \ 88169689Skan X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE)); \ 89169689Skan X##_f1 = 0; \ 90169689Skan })) 91169689Skan 92169689Skan#define _FP_FRAC_SRS_2(X,N,sz) \ 93169689Skan(void)(((N) < _FP_W_TYPE_SIZE) \ 94169689Skan ? ({ \ 95169689Skan X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) | \ 96169689Skan (__builtin_constant_p(N) && (N) == 1 \ 97169689Skan ? X##_f0 & 1 \ 98169689Skan : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \ 99169689Skan X##_f1 >>= (N); \ 100169689Skan }) \ 101169689Skan : ({ \ 102169689Skan X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) | \ 103169689Skan ((((N) == _FP_W_TYPE_SIZE \ 104169689Skan ? 0 \ 105169689Skan : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \ 106169689Skan | X##_f0) != 0)); \ 107169689Skan X##_f1 = 0; \ 108169689Skan })) 109169689Skan 110169689Skan#define _FP_FRAC_ADDI_2(X,I) \ 111169689Skan __FP_FRAC_ADDI_2(X##_f1, X##_f0, I) 112169689Skan 113169689Skan#define _FP_FRAC_ADD_2(R,X,Y) \ 114169689Skan __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0) 115169689Skan 116169689Skan#define _FP_FRAC_SUB_2(R,X,Y) \ 117169689Skan __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0) 118169689Skan 119169689Skan#define _FP_FRAC_DEC_2(X,Y) \ 120169689Skan __FP_FRAC_DEC_2(X##_f1, X##_f0, Y##_f1, Y##_f0) 121169689Skan 122169689Skan#define _FP_FRAC_CLZ_2(R,X) \ 123169689Skan do { \ 124169689Skan if (X##_f1) \ 125169689Skan __FP_CLZ(R,X##_f1); \ 126169689Skan else \ 127169689Skan { \ 128169689Skan __FP_CLZ(R,X##_f0); \ 129169689Skan R += _FP_W_TYPE_SIZE; \ 130169689Skan } \ 131169689Skan } while(0) 132169689Skan 133169689Skan/* Predicates */ 134169689Skan#define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE)X##_f1 < 0) 135169689Skan#define _FP_FRAC_ZEROP_2(X) ((X##_f1 | X##_f0) == 0) 136169689Skan#define _FP_FRAC_OVERP_2(fs,X) (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs) 137169689Skan#define _FP_FRAC_CLEAR_OVERP_2(fs,X) (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs) 138169689Skan#define _FP_FRAC_EQ_2(X, Y) (X##_f1 == Y##_f1 && X##_f0 == Y##_f0) 139169689Skan#define _FP_FRAC_GT_2(X, Y) \ 140169689Skan (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0)) 141169689Skan#define _FP_FRAC_GE_2(X, Y) \ 142169689Skan (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0)) 143169689Skan 144169689Skan#define _FP_ZEROFRAC_2 0, 0 145169689Skan#define _FP_MINFRAC_2 0, 1 146169689Skan#define _FP_MAXFRAC_2 (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0) 147169689Skan 148169689Skan/* 149169689Skan * Internals 150169689Skan */ 151169689Skan 152169689Skan#define __FP_FRAC_SET_2(X,I1,I0) (X##_f0 = I0, X##_f1 = I1) 153169689Skan 154169689Skan#define __FP_CLZ_2(R, xh, xl) \ 155169689Skan do { \ 156169689Skan if (xh) \ 157169689Skan __FP_CLZ(R,xh); \ 158169689Skan else \ 159169689Skan { \ 160169689Skan __FP_CLZ(R,xl); \ 161169689Skan R += _FP_W_TYPE_SIZE; \ 162169689Skan } \ 163169689Skan } while(0) 164169689Skan 165169689Skan#if 0 166169689Skan 167169689Skan#ifndef __FP_FRAC_ADDI_2 168169689Skan#define __FP_FRAC_ADDI_2(xh, xl, i) \ 169169689Skan (xh += ((xl += i) < i)) 170169689Skan#endif 171169689Skan#ifndef __FP_FRAC_ADD_2 172169689Skan#define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \ 173169689Skan (rh = xh + yh + ((rl = xl + yl) < xl)) 174169689Skan#endif 175169689Skan#ifndef __FP_FRAC_SUB_2 176169689Skan#define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \ 177169689Skan (rh = xh - yh - ((rl = xl - yl) > xl)) 178169689Skan#endif 179169689Skan#ifndef __FP_FRAC_DEC_2 180169689Skan#define __FP_FRAC_DEC_2(xh, xl, yh, yl) \ 181169689Skan do { \ 182169689Skan UWtype _t = xl; \ 183169689Skan xh -= yh + ((xl -= yl) > _t); \ 184169689Skan } while (0) 185169689Skan#endif 186169689Skan 187169689Skan#else 188169689Skan 189169689Skan#undef __FP_FRAC_ADDI_2 190169689Skan#define __FP_FRAC_ADDI_2(xh, xl, i) add_ssaaaa(xh, xl, xh, xl, 0, i) 191169689Skan#undef __FP_FRAC_ADD_2 192169689Skan#define __FP_FRAC_ADD_2 add_ssaaaa 193169689Skan#undef __FP_FRAC_SUB_2 194169689Skan#define __FP_FRAC_SUB_2 sub_ddmmss 195169689Skan#undef __FP_FRAC_DEC_2 196169689Skan#define __FP_FRAC_DEC_2(xh, xl, yh, yl) sub_ddmmss(xh, xl, xh, xl, yh, yl) 197169689Skan 198169689Skan#endif 199169689Skan 200169689Skan/* 201169689Skan * Unpack the raw bits of a native fp value. Do not classify or 202169689Skan * normalize the data. 203169689Skan */ 204169689Skan 205169689Skan#define _FP_UNPACK_RAW_2(fs, X, val) \ 206169689Skan do { \ 207169689Skan union _FP_UNION_##fs _flo; _flo.flt = (val); \ 208169689Skan \ 209169689Skan X##_f0 = _flo.bits.frac0; \ 210169689Skan X##_f1 = _flo.bits.frac1; \ 211169689Skan X##_e = _flo.bits.exp; \ 212169689Skan X##_s = _flo.bits.sign; \ 213169689Skan } while (0) 214169689Skan 215169689Skan#define _FP_UNPACK_RAW_2_P(fs, X, val) \ 216169689Skan do { \ 217169689Skan union _FP_UNION_##fs *_flo = \ 218169689Skan (union _FP_UNION_##fs *)(val); \ 219169689Skan \ 220169689Skan X##_f0 = _flo->bits.frac0; \ 221169689Skan X##_f1 = _flo->bits.frac1; \ 222169689Skan X##_e = _flo->bits.exp; \ 223169689Skan X##_s = _flo->bits.sign; \ 224169689Skan } while (0) 225169689Skan 226169689Skan 227169689Skan/* 228169689Skan * Repack the raw bits of a native fp value. 229169689Skan */ 230169689Skan 231169689Skan#define _FP_PACK_RAW_2(fs, val, X) \ 232169689Skan do { \ 233169689Skan union _FP_UNION_##fs _flo; \ 234169689Skan \ 235169689Skan _flo.bits.frac0 = X##_f0; \ 236169689Skan _flo.bits.frac1 = X##_f1; \ 237169689Skan _flo.bits.exp = X##_e; \ 238169689Skan _flo.bits.sign = X##_s; \ 239169689Skan \ 240169689Skan (val) = _flo.flt; \ 241169689Skan } while (0) 242169689Skan 243169689Skan#define _FP_PACK_RAW_2_P(fs, val, X) \ 244169689Skan do { \ 245169689Skan union _FP_UNION_##fs *_flo = \ 246169689Skan (union _FP_UNION_##fs *)(val); \ 247169689Skan \ 248169689Skan _flo->bits.frac0 = X##_f0; \ 249169689Skan _flo->bits.frac1 = X##_f1; \ 250169689Skan _flo->bits.exp = X##_e; \ 251169689Skan _flo->bits.sign = X##_s; \ 252169689Skan } while (0) 253169689Skan 254169689Skan 255169689Skan/* 256169689Skan * Multiplication algorithms: 257169689Skan */ 258169689Skan 259169689Skan/* Given a 1W * 1W => 2W primitive, do the extended multiplication. */ 260169689Skan 261169689Skan#define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit) \ 262169689Skan do { \ 263169689Skan _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \ 264169689Skan \ 265169689Skan doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \ 266169689Skan doit(_b_f1, _b_f0, X##_f0, Y##_f1); \ 267169689Skan doit(_c_f1, _c_f0, X##_f1, Y##_f0); \ 268169689Skan doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), X##_f1, Y##_f1); \ 269169689Skan \ 270169689Skan __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 271169689Skan _FP_FRAC_WORD_4(_z,1), 0, _b_f1, _b_f0, \ 272169689Skan _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 273169689Skan _FP_FRAC_WORD_4(_z,1)); \ 274169689Skan __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 275169689Skan _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0, \ 276169689Skan _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 277169689Skan _FP_FRAC_WORD_4(_z,1)); \ 278169689Skan \ 279169689Skan /* Normalize since we know where the msb of the multiplicands \ 280169689Skan were (bit B), we know that the msb of the of the product is \ 281169689Skan at either 2B or 2B-1. */ \ 282169689Skan _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \ 283169689Skan R##_f0 = _FP_FRAC_WORD_4(_z,0); \ 284169689Skan R##_f1 = _FP_FRAC_WORD_4(_z,1); \ 285169689Skan } while (0) 286169689Skan 287169689Skan/* Given a 1W * 1W => 2W primitive, do the extended multiplication. 288169689Skan Do only 3 multiplications instead of four. This one is for machines 289169689Skan where multiplication is much more expensive than subtraction. */ 290169689Skan 291169689Skan#define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit) \ 292169689Skan do { \ 293169689Skan _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \ 294169689Skan _FP_W_TYPE _d; \ 295169689Skan int _c1, _c2; \ 296169689Skan \ 297169689Skan _b_f0 = X##_f0 + X##_f1; \ 298169689Skan _c1 = _b_f0 < X##_f0; \ 299169689Skan _b_f1 = Y##_f0 + Y##_f1; \ 300169689Skan _c2 = _b_f1 < Y##_f0; \ 301169689Skan doit(_d, _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \ 302169689Skan doit(_FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1), _b_f0, _b_f1); \ 303169689Skan doit(_c_f1, _c_f0, X##_f1, Y##_f1); \ 304169689Skan \ 305169689Skan _b_f0 &= -_c2; \ 306169689Skan _b_f1 &= -_c1; \ 307169689Skan __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 308169689Skan _FP_FRAC_WORD_4(_z,1), (_c1 & _c2), 0, _d, \ 309169689Skan 0, _FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1)); \ 310169689Skan __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 311169689Skan _b_f0); \ 312169689Skan __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 313169689Skan _b_f1); \ 314169689Skan __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 315169689Skan _FP_FRAC_WORD_4(_z,1), \ 316169689Skan 0, _d, _FP_FRAC_WORD_4(_z,0)); \ 317169689Skan __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 318169689Skan _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0); \ 319169689Skan __FP_FRAC_ADD_2(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), \ 320169689Skan _c_f1, _c_f0, \ 321169689Skan _FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2)); \ 322169689Skan \ 323169689Skan /* Normalize since we know where the msb of the multiplicands \ 324169689Skan were (bit B), we know that the msb of the of the product is \ 325169689Skan at either 2B or 2B-1. */ \ 326169689Skan _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \ 327169689Skan R##_f0 = _FP_FRAC_WORD_4(_z,0); \ 328169689Skan R##_f1 = _FP_FRAC_WORD_4(_z,1); \ 329169689Skan } while (0) 330169689Skan 331169689Skan#define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y) \ 332169689Skan do { \ 333169689Skan _FP_FRAC_DECL_4(_z); \ 334169689Skan _FP_W_TYPE _x[2], _y[2]; \ 335169689Skan _x[0] = X##_f0; _x[1] = X##_f1; \ 336169689Skan _y[0] = Y##_f0; _y[1] = Y##_f1; \ 337169689Skan \ 338169689Skan mpn_mul_n(_z_f, _x, _y, 2); \ 339169689Skan \ 340169689Skan /* Normalize since we know where the msb of the multiplicands \ 341169689Skan were (bit B), we know that the msb of the of the product is \ 342169689Skan at either 2B or 2B-1. */ \ 343169689Skan _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \ 344169689Skan R##_f0 = _z_f[0]; \ 345169689Skan R##_f1 = _z_f[1]; \ 346169689Skan } while (0) 347169689Skan 348169689Skan/* Do at most 120x120=240 bits multiplication using double floating 349169689Skan point multiplication. This is useful if floating point 350169689Skan multiplication has much bigger throughput than integer multiply. 351169689Skan It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits 352169689Skan between 106 and 120 only. 353169689Skan Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set. 354169689Skan SETFETZ is a macro which will disable all FPU exceptions and set rounding 355169689Skan towards zero, RESETFE should optionally reset it back. */ 356169689Skan 357169689Skan#define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \ 358169689Skan do { \ 359169689Skan static const double _const[] = { \ 360169689Skan /* 2^-24 */ 5.9604644775390625e-08, \ 361169689Skan /* 2^-48 */ 3.5527136788005009e-15, \ 362169689Skan /* 2^-72 */ 2.1175823681357508e-22, \ 363169689Skan /* 2^-96 */ 1.2621774483536189e-29, \ 364169689Skan /* 2^28 */ 2.68435456e+08, \ 365169689Skan /* 2^4 */ 1.600000e+01, \ 366169689Skan /* 2^-20 */ 9.5367431640625e-07, \ 367169689Skan /* 2^-44 */ 5.6843418860808015e-14, \ 368169689Skan /* 2^-68 */ 3.3881317890172014e-21, \ 369169689Skan /* 2^-92 */ 2.0194839173657902e-28, \ 370169689Skan /* 2^-116 */ 1.2037062152420224e-35}; \ 371169689Skan double _a240, _b240, _c240, _d240, _e240, _f240, \ 372169689Skan _g240, _h240, _i240, _j240, _k240; \ 373169689Skan union { double d; UDItype i; } _l240, _m240, _n240, _o240, \ 374169689Skan _p240, _q240, _r240, _s240; \ 375169689Skan UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0; \ 376169689Skan \ 377169689Skan if (wfracbits < 106 || wfracbits > 120) \ 378169689Skan abort(); \ 379169689Skan \ 380169689Skan setfetz; \ 381169689Skan \ 382169689Skan _e240 = (double)(long)(X##_f0 & 0xffffff); \ 383169689Skan _j240 = (double)(long)(Y##_f0 & 0xffffff); \ 384169689Skan _d240 = (double)(long)((X##_f0 >> 24) & 0xffffff); \ 385169689Skan _i240 = (double)(long)((Y##_f0 >> 24) & 0xffffff); \ 386169689Skan _c240 = (double)(long)(((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \ 387169689Skan _h240 = (double)(long)(((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \ 388169689Skan _b240 = (double)(long)((X##_f1 >> 8) & 0xffffff); \ 389169689Skan _g240 = (double)(long)((Y##_f1 >> 8) & 0xffffff); \ 390169689Skan _a240 = (double)(long)(X##_f1 >> 32); \ 391169689Skan _f240 = (double)(long)(Y##_f1 >> 32); \ 392169689Skan _e240 *= _const[3]; \ 393169689Skan _j240 *= _const[3]; \ 394169689Skan _d240 *= _const[2]; \ 395169689Skan _i240 *= _const[2]; \ 396169689Skan _c240 *= _const[1]; \ 397169689Skan _h240 *= _const[1]; \ 398169689Skan _b240 *= _const[0]; \ 399169689Skan _g240 *= _const[0]; \ 400169689Skan _s240.d = _e240*_j240;\ 401169689Skan _r240.d = _d240*_j240 + _e240*_i240;\ 402169689Skan _q240.d = _c240*_j240 + _d240*_i240 + _e240*_h240;\ 403169689Skan _p240.d = _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240;\ 404169689Skan _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240;\ 405169689Skan _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240; \ 406169689Skan _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240; \ 407169689Skan _l240.d = _a240*_g240 + _b240*_f240; \ 408169689Skan _k240 = _a240*_f240; \ 409169689Skan _r240.d += _s240.d; \ 410169689Skan _q240.d += _r240.d; \ 411169689Skan _p240.d += _q240.d; \ 412169689Skan _o240.d += _p240.d; \ 413169689Skan _n240.d += _o240.d; \ 414169689Skan _m240.d += _n240.d; \ 415169689Skan _l240.d += _m240.d; \ 416169689Skan _k240 += _l240.d; \ 417169689Skan _s240.d -= ((_const[10]+_s240.d)-_const[10]); \ 418169689Skan _r240.d -= ((_const[9]+_r240.d)-_const[9]); \ 419169689Skan _q240.d -= ((_const[8]+_q240.d)-_const[8]); \ 420169689Skan _p240.d -= ((_const[7]+_p240.d)-_const[7]); \ 421169689Skan _o240.d += _const[7]; \ 422169689Skan _n240.d += _const[6]; \ 423169689Skan _m240.d += _const[5]; \ 424169689Skan _l240.d += _const[4]; \ 425169689Skan if (_s240.d != 0.0) _y240 = 1; \ 426169689Skan if (_r240.d != 0.0) _y240 = 1; \ 427169689Skan if (_q240.d != 0.0) _y240 = 1; \ 428169689Skan if (_p240.d != 0.0) _y240 = 1; \ 429169689Skan _t240 = (DItype)_k240; \ 430169689Skan _u240 = _l240.i; \ 431169689Skan _v240 = _m240.i; \ 432169689Skan _w240 = _n240.i; \ 433169689Skan _x240 = _o240.i; \ 434169689Skan R##_f1 = (_t240 << (128 - (wfracbits - 1))) \ 435169689Skan | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104)); \ 436169689Skan R##_f0 = ((_u240 & 0xffffff) << (168 - (wfracbits - 1))) \ 437169689Skan | ((_v240 & 0xffffff) << (144 - (wfracbits - 1))) \ 438169689Skan | ((_w240 & 0xffffff) << (120 - (wfracbits - 1))) \ 439169689Skan | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96)) \ 440169689Skan | _y240; \ 441169689Skan resetfe; \ 442169689Skan } while (0) 443169689Skan 444169689Skan/* 445169689Skan * Division algorithms: 446169689Skan */ 447169689Skan 448169689Skan#define _FP_DIV_MEAT_2_udiv(fs, R, X, Y) \ 449169689Skan do { \ 450169689Skan _FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_f0, _m_f1, _m_f0; \ 451169689Skan if (_FP_FRAC_GT_2(X, Y)) \ 452169689Skan { \ 453169689Skan _n_f2 = X##_f1 >> 1; \ 454169689Skan _n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \ 455169689Skan _n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1); \ 456169689Skan } \ 457169689Skan else \ 458169689Skan { \ 459169689Skan R##_e--; \ 460169689Skan _n_f2 = X##_f1; \ 461169689Skan _n_f1 = X##_f0; \ 462169689Skan _n_f0 = 0; \ 463169689Skan } \ 464169689Skan \ 465169689Skan /* Normalize, i.e. make the most significant bit of the \ 466169689Skan denominator set. */ \ 467169689Skan _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs); \ 468169689Skan \ 469169689Skan udiv_qrnnd(R##_f1, _r_f1, _n_f2, _n_f1, Y##_f1); \ 470169689Skan umul_ppmm(_m_f1, _m_f0, R##_f1, Y##_f0); \ 471169689Skan _r_f0 = _n_f0; \ 472169689Skan if (_FP_FRAC_GT_2(_m, _r)) \ 473169689Skan { \ 474169689Skan R##_f1--; \ 475169689Skan _FP_FRAC_ADD_2(_r, Y, _r); \ 476169689Skan if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \ 477169689Skan { \ 478169689Skan R##_f1--; \ 479169689Skan _FP_FRAC_ADD_2(_r, Y, _r); \ 480169689Skan } \ 481169689Skan } \ 482169689Skan _FP_FRAC_DEC_2(_r, _m); \ 483169689Skan \ 484169689Skan if (_r_f1 == Y##_f1) \ 485169689Skan { \ 486169689Skan /* This is a special case, not an optimization \ 487169689Skan (_r/Y##_f1 would not fit into UWtype). \ 488169689Skan As _r is guaranteed to be < Y, R##_f0 can be either \ 489169689Skan (UWtype)-1 or (UWtype)-2. But as we know what kind \ 490169689Skan of bits it is (sticky, guard, round), we don't care. \ 491169689Skan We also don't care what the reminder is, because the \ 492169689Skan guard bit will be set anyway. -jj */ \ 493169689Skan R##_f0 = -1; \ 494169689Skan } \ 495169689Skan else \ 496169689Skan { \ 497169689Skan udiv_qrnnd(R##_f0, _r_f1, _r_f1, _r_f0, Y##_f1); \ 498169689Skan umul_ppmm(_m_f1, _m_f0, R##_f0, Y##_f0); \ 499169689Skan _r_f0 = 0; \ 500169689Skan if (_FP_FRAC_GT_2(_m, _r)) \ 501169689Skan { \ 502169689Skan R##_f0--; \ 503169689Skan _FP_FRAC_ADD_2(_r, Y, _r); \ 504169689Skan if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \ 505169689Skan { \ 506169689Skan R##_f0--; \ 507169689Skan _FP_FRAC_ADD_2(_r, Y, _r); \ 508169689Skan } \ 509169689Skan } \ 510169689Skan if (!_FP_FRAC_EQ_2(_r, _m)) \ 511169689Skan R##_f0 |= _FP_WORK_STICKY; \ 512169689Skan } \ 513169689Skan } while (0) 514169689Skan 515169689Skan 516169689Skan#define _FP_DIV_MEAT_2_gmp(fs, R, X, Y) \ 517169689Skan do { \ 518169689Skan _FP_W_TYPE _x[4], _y[2], _z[4]; \ 519169689Skan _y[0] = Y##_f0; _y[1] = Y##_f1; \ 520169689Skan _x[0] = _x[3] = 0; \ 521169689Skan if (_FP_FRAC_GT_2(X, Y)) \ 522169689Skan { \ 523169689Skan R##_e++; \ 524169689Skan _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE) | \ 525169689Skan X##_f1 >> (_FP_W_TYPE_SIZE - \ 526169689Skan (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE))); \ 527169689Skan _x[2] = X##_f1 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE); \ 528169689Skan } \ 529169689Skan else \ 530169689Skan { \ 531169689Skan _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE) | \ 532169689Skan X##_f1 >> (_FP_W_TYPE_SIZE - \ 533169689Skan (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE))); \ 534169689Skan _x[2] = X##_f1 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE); \ 535169689Skan } \ 536169689Skan \ 537169689Skan (void) mpn_divrem (_z, 0, _x, 4, _y, 2); \ 538169689Skan R##_f1 = _z[1]; \ 539169689Skan R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0); \ 540169689Skan } while (0) 541169689Skan 542169689Skan 543169689Skan/* 544169689Skan * Square root algorithms: 545169689Skan * We have just one right now, maybe Newton approximation 546169689Skan * should be added for those machines where division is fast. 547169689Skan */ 548169689Skan 549169689Skan#define _FP_SQRT_MEAT_2(R, S, T, X, q) \ 550169689Skan do { \ 551169689Skan while (q) \ 552169689Skan { \ 553169689Skan T##_f1 = S##_f1 + q; \ 554169689Skan if (T##_f1 <= X##_f1) \ 555169689Skan { \ 556169689Skan S##_f1 = T##_f1 + q; \ 557169689Skan X##_f1 -= T##_f1; \ 558169689Skan R##_f1 += q; \ 559169689Skan } \ 560169689Skan _FP_FRAC_SLL_2(X, 1); \ 561169689Skan q >>= 1; \ 562169689Skan } \ 563169689Skan q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \ 564169689Skan while (q != _FP_WORK_ROUND) \ 565169689Skan { \ 566169689Skan T##_f0 = S##_f0 + q; \ 567169689Skan T##_f1 = S##_f1; \ 568169689Skan if (T##_f1 < X##_f1 || \ 569169689Skan (T##_f1 == X##_f1 && T##_f0 <= X##_f0)) \ 570169689Skan { \ 571169689Skan S##_f0 = T##_f0 + q; \ 572169689Skan S##_f1 += (T##_f0 > S##_f0); \ 573169689Skan _FP_FRAC_DEC_2(X, T); \ 574169689Skan R##_f0 += q; \ 575169689Skan } \ 576169689Skan _FP_FRAC_SLL_2(X, 1); \ 577169689Skan q >>= 1; \ 578169689Skan } \ 579169689Skan if (X##_f0 | X##_f1) \ 580169689Skan { \ 581169689Skan if (S##_f1 < X##_f1 || \ 582169689Skan (S##_f1 == X##_f1 && S##_f0 < X##_f0)) \ 583169689Skan R##_f0 |= _FP_WORK_ROUND; \ 584169689Skan R##_f0 |= _FP_WORK_STICKY; \ 585169689Skan } \ 586169689Skan } while (0) 587169689Skan 588169689Skan 589169689Skan/* 590169689Skan * Assembly/disassembly for converting to/from integral types. 591169689Skan * No shifting or overflow handled here. 592169689Skan */ 593169689Skan 594169689Skan#define _FP_FRAC_ASSEMBLE_2(r, X, rsize) \ 595169689Skan(void)((rsize <= _FP_W_TYPE_SIZE) \ 596169689Skan ? ({ r = X##_f0; }) \ 597169689Skan : ({ \ 598169689Skan r = X##_f1; \ 599169689Skan r <<= _FP_W_TYPE_SIZE; \ 600169689Skan r += X##_f0; \ 601169689Skan })) 602169689Skan 603169689Skan#define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) \ 604169689Skan do { \ 605169689Skan X##_f0 = r; \ 606169689Skan X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \ 607169689Skan } while (0) 608169689Skan 609169689Skan/* 610169689Skan * Convert FP values between word sizes 611169689Skan */ 612169689Skan 613169689Skan#define _FP_FRAC_COPY_1_2(D, S) (D##_f = S##_f0) 614169689Skan 615169689Skan#define _FP_FRAC_COPY_2_1(D, S) ((D##_f0 = S##_f), (D##_f1 = 0)) 616171825Skan 617171825Skan#define _FP_FRAC_COPY_2_2(D,S) _FP_FRAC_COPY_2(D,S) 618