1/* Software floating-point emulation. 2 Basic two-word fraction declaration and manipulation. 3 Copyright (C) 1997,1998,1999 Free Software Foundation, Inc. 4 This file is part of the GNU C Library. 5 Contributed by Richard Henderson (rth@cygnus.com), 6 Jakub Jelinek (jj@ultra.linux.cz), 7 David S. Miller (davem@redhat.com) and 8 Peter Maydell (pmaydell@chiark.greenend.org.uk). 9 10 The GNU C Library is free software; you can redistribute it and/or 11 modify it under the terms of the GNU Library General Public License as 12 published by the Free Software Foundation; either version 2 of the 13 License, or (at your option) any later version. 14 15 The GNU C Library is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 Library General Public License for more details. 19 20 You should have received a copy of the GNU Library General Public 21 License along with the GNU C Library; see the file COPYING.LIB. If 22 not, write to the Free Software Foundation, Inc., 23 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 24 25#ifndef __MATH_EMU_OP_2_H__ 26#define __MATH_EMU_OP_2_H__ 27 28#define _FP_FRAC_DECL_2(X) _FP_W_TYPE X##_f0 = 0, X##_f1 = 0 29#define _FP_FRAC_COPY_2(D,S) (D##_f0 = S##_f0, D##_f1 = S##_f1) 30#define _FP_FRAC_SET_2(X,I) __FP_FRAC_SET_2(X, I) 31#define _FP_FRAC_HIGH_2(X) (X##_f1) 32#define _FP_FRAC_LOW_2(X) (X##_f0) 33#define _FP_FRAC_WORD_2(X,w) (X##_f##w) 34 35#define _FP_FRAC_SLL_2(X,N) \ 36 do { \ 37 if ((N) < _FP_W_TYPE_SIZE) \ 38 { \ 39 if (__builtin_constant_p(N) && (N) == 1) \ 40 { \ 41 X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE)(X##_f0)) < 0); \ 42 X##_f0 += X##_f0; \ 43 } \ 44 else \ 45 { \ 46 X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \ 47 X##_f0 <<= (N); \ 48 } \ 49 } \ 50 else \ 51 { \ 52 X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE); \ 53 X##_f0 = 0; \ 54 } \ 55 } while (0) 56 57#define _FP_FRAC_SRL_2(X,N) \ 58 do { \ 59 if ((N) < _FP_W_TYPE_SIZE) \ 60 { \ 61 X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \ 62 X##_f1 >>= (N); \ 63 } \ 64 else \ 65 { \ 66 X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE); \ 67 X##_f1 = 0; \ 68 } \ 69 } while (0) 70 71/* Right shift with sticky-lsb. */ 72#define _FP_FRAC_SRS_2(X,N,sz) \ 73 do { \ 74 if ((N) < _FP_W_TYPE_SIZE) \ 75 { \ 76 X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) | \ 77 (__builtin_constant_p(N) && (N) == 1 \ 78 ? X##_f0 & 1 \ 79 : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \ 80 X##_f1 >>= (N); \ 81 } \ 82 else \ 83 { \ 84 X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) | \ 85 (((X##_f1 << (2*_FP_W_TYPE_SIZE - (N))) | X##_f0) != 0)); \ 86 X##_f1 = 0; \ 87 } \ 88 } while (0) 89 90#define _FP_FRAC_ADDI_2(X,I) \ 91 __FP_FRAC_ADDI_2(X##_f1, X##_f0, I) 92 93#define _FP_FRAC_ADD_2(R,X,Y) \ 94 __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0) 95 96#define _FP_FRAC_SUB_2(R,X,Y) \ 97 __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0) 98 99#define _FP_FRAC_DEC_2(X,Y) \ 100 __FP_FRAC_DEC_2(X##_f1, X##_f0, Y##_f1, Y##_f0) 101 102#define _FP_FRAC_CLZ_2(R,X) \ 103 do { \ 104 if (X##_f1) \ 105 __FP_CLZ(R,X##_f1); \ 106 else \ 107 { \ 108 __FP_CLZ(R,X##_f0); \ 109 R += _FP_W_TYPE_SIZE; \ 110 } \ 111 } while(0) 112 113/* Predicates */ 114#define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE)X##_f1 < 0) 115#define _FP_FRAC_ZEROP_2(X) ((X##_f1 | X##_f0) == 0) 116#define _FP_FRAC_OVERP_2(fs,X) (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs) 117#define _FP_FRAC_CLEAR_OVERP_2(fs,X) (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs) 118#define _FP_FRAC_EQ_2(X, Y) (X##_f1 == Y##_f1 && X##_f0 == Y##_f0) 119#define _FP_FRAC_GT_2(X, Y) \ 120 (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0)) 121#define _FP_FRAC_GE_2(X, Y) \ 122 (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0)) 123 124#define _FP_ZEROFRAC_2 0, 0 125#define _FP_MINFRAC_2 0, 1 126#define _FP_MAXFRAC_2 (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0) 127 128/* 129 * Internals 130 */ 131 132#define __FP_FRAC_SET_2(X,I1,I0) (X##_f0 = I0, X##_f1 = I1) 133 134#define __FP_CLZ_2(R, xh, xl) \ 135 do { \ 136 if (xh) \ 137 __FP_CLZ(R,xh); \ 138 else \ 139 { \ 140 __FP_CLZ(R,xl); \ 141 R += _FP_W_TYPE_SIZE; \ 142 } \ 143 } while(0) 144 145 146#undef __FP_FRAC_ADDI_2 147#define __FP_FRAC_ADDI_2(xh, xl, i) add_ssaaaa(xh, xl, xh, xl, 0, i) 148#undef __FP_FRAC_ADD_2 149#define __FP_FRAC_ADD_2 add_ssaaaa 150#undef __FP_FRAC_SUB_2 151#define __FP_FRAC_SUB_2 sub_ddmmss 152#undef __FP_FRAC_DEC_2 153#define __FP_FRAC_DEC_2(xh, xl, yh, yl) sub_ddmmss(xh, xl, xh, xl, yh, yl) 154 155/* 156 * Unpack the raw bits of a native fp value. Do not classify or 157 * normalize the data. 158 */ 159 160#define _FP_UNPACK_RAW_2(fs, X, val) \ 161 do { \ 162 union _FP_UNION_##fs _flo; _flo.flt = (val); \ 163 \ 164 X##_f0 = _flo.bits.frac0; \ 165 X##_f1 = _flo.bits.frac1; \ 166 X##_e = _flo.bits.exp; \ 167 X##_s = _flo.bits.sign; \ 168 } while (0) 169 170#define _FP_UNPACK_RAW_2_P(fs, X, val) \ 171 do { \ 172 union _FP_UNION_##fs *_flo = \ 173 (union _FP_UNION_##fs *)(val); \ 174 \ 175 X##_f0 = _flo->bits.frac0; \ 176 X##_f1 = _flo->bits.frac1; \ 177 X##_e = _flo->bits.exp; \ 178 X##_s = _flo->bits.sign; \ 179 } while (0) 180 181 182/* 183 * Repack the raw bits of a native fp value. 184 */ 185 186#define _FP_PACK_RAW_2(fs, val, X) \ 187 do { \ 188 union _FP_UNION_##fs _flo; \ 189 \ 190 _flo.bits.frac0 = X##_f0; \ 191 _flo.bits.frac1 = X##_f1; \ 192 _flo.bits.exp = X##_e; \ 193 _flo.bits.sign = X##_s; \ 194 \ 195 (val) = _flo.flt; \ 196 } while (0) 197 198#define _FP_PACK_RAW_2_P(fs, val, X) \ 199 do { \ 200 union _FP_UNION_##fs *_flo = \ 201 (union _FP_UNION_##fs *)(val); \ 202 \ 203 _flo->bits.frac0 = X##_f0; \ 204 _flo->bits.frac1 = X##_f1; \ 205 _flo->bits.exp = X##_e; \ 206 _flo->bits.sign = X##_s; \ 207 } while (0) 208 209 210/* 211 * Multiplication algorithms: 212 */ 213 214/* Given a 1W * 1W => 2W primitive, do the extended multiplication. */ 215 216#define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit) \ 217 do { \ 218 _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \ 219 \ 220 doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \ 221 doit(_b_f1, _b_f0, X##_f0, Y##_f1); \ 222 doit(_c_f1, _c_f0, X##_f1, Y##_f0); \ 223 doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), X##_f1, Y##_f1); \ 224 \ 225 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 226 _FP_FRAC_WORD_4(_z,1), 0, _b_f1, _b_f0, \ 227 _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 228 _FP_FRAC_WORD_4(_z,1)); \ 229 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 230 _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0, \ 231 _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 232 _FP_FRAC_WORD_4(_z,1)); \ 233 \ 234 /* Normalize since we know where the msb of the multiplicands \ 235 were (bit B), we know that the msb of the of the product is \ 236 at either 2B or 2B-1. */ \ 237 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \ 238 R##_f0 = _FP_FRAC_WORD_4(_z,0); \ 239 R##_f1 = _FP_FRAC_WORD_4(_z,1); \ 240 } while (0) 241 242/* Given a 1W * 1W => 2W primitive, do the extended multiplication. 243 Do only 3 multiplications instead of four. This one is for machines 244 where multiplication is much more expensive than subtraction. */ 245 246#define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit) \ 247 do { \ 248 _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \ 249 _FP_W_TYPE _d; \ 250 int _c1, _c2; \ 251 \ 252 _b_f0 = X##_f0 + X##_f1; \ 253 _c1 = _b_f0 < X##_f0; \ 254 _b_f1 = Y##_f0 + Y##_f1; \ 255 _c2 = _b_f1 < Y##_f0; \ 256 doit(_d, _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \ 257 doit(_FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1), _b_f0, _b_f1); \ 258 doit(_c_f1, _c_f0, X##_f1, Y##_f1); \ 259 \ 260 _b_f0 &= -_c2; \ 261 _b_f1 &= -_c1; \ 262 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 263 _FP_FRAC_WORD_4(_z,1), (_c1 & _c2), 0, _d, \ 264 0, _FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1)); \ 265 __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 266 _b_f0); \ 267 __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 268 _b_f1); \ 269 __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 270 _FP_FRAC_WORD_4(_z,1), \ 271 0, _d, _FP_FRAC_WORD_4(_z,0)); \ 272 __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 273 _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0); \ 274 __FP_FRAC_ADD_2(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), \ 275 _c_f1, _c_f0, \ 276 _FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2)); \ 277 \ 278 /* Normalize since we know where the msb of the multiplicands \ 279 were (bit B), we know that the msb of the of the product is \ 280 at either 2B or 2B-1. */ \ 281 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \ 282 R##_f0 = _FP_FRAC_WORD_4(_z,0); \ 283 R##_f1 = _FP_FRAC_WORD_4(_z,1); \ 284 } while (0) 285 286#define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y) \ 287 do { \ 288 _FP_FRAC_DECL_4(_z); \ 289 _FP_W_TYPE _x[2], _y[2]; \ 290 _x[0] = X##_f0; _x[1] = X##_f1; \ 291 _y[0] = Y##_f0; _y[1] = Y##_f1; \ 292 \ 293 mpn_mul_n(_z_f, _x, _y, 2); \ 294 \ 295 /* Normalize since we know where the msb of the multiplicands \ 296 were (bit B), we know that the msb of the of the product is \ 297 at either 2B or 2B-1. */ \ 298 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \ 299 R##_f0 = _z_f[0]; \ 300 R##_f1 = _z_f[1]; \ 301 } while (0) 302 303/* Do at most 120x120=240 bits multiplication using double floating 304 point multiplication. This is useful if floating point 305 multiplication has much bigger throughput than integer multiply. 306 It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits 307 between 106 and 120 only. 308 Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set. 309 SETFETZ is a macro which will disable all FPU exceptions and set rounding 310 towards zero, RESETFE should optionally reset it back. */ 311 312#define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \ 313 do { \ 314 static const double _const[] = { \ 315 /* 2^-24 */ 5.9604644775390625e-08, \ 316 /* 2^-48 */ 3.5527136788005009e-15, \ 317 /* 2^-72 */ 2.1175823681357508e-22, \ 318 /* 2^-96 */ 1.2621774483536189e-29, \ 319 /* 2^28 */ 2.68435456e+08, \ 320 /* 2^4 */ 1.600000e+01, \ 321 /* 2^-20 */ 9.5367431640625e-07, \ 322 /* 2^-44 */ 5.6843418860808015e-14, \ 323 /* 2^-68 */ 3.3881317890172014e-21, \ 324 /* 2^-92 */ 2.0194839173657902e-28, \ 325 /* 2^-116 */ 1.2037062152420224e-35}; \ 326 double _a240, _b240, _c240, _d240, _e240, _f240, \ 327 _g240, _h240, _i240, _j240, _k240; \ 328 union { double d; UDItype i; } _l240, _m240, _n240, _o240, \ 329 _p240, _q240, _r240, _s240; \ 330 UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0; \ 331 \ 332 if (wfracbits < 106 || wfracbits > 120) \ 333 abort(); \ 334 \ 335 setfetz; \ 336 \ 337 _e240 = (double)(long)(X##_f0 & 0xffffff); \ 338 _j240 = (double)(long)(Y##_f0 & 0xffffff); \ 339 _d240 = (double)(long)((X##_f0 >> 24) & 0xffffff); \ 340 _i240 = (double)(long)((Y##_f0 >> 24) & 0xffffff); \ 341 _c240 = (double)(long)(((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \ 342 _h240 = (double)(long)(((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \ 343 _b240 = (double)(long)((X##_f1 >> 8) & 0xffffff); \ 344 _g240 = (double)(long)((Y##_f1 >> 8) & 0xffffff); \ 345 _a240 = (double)(long)(X##_f1 >> 32); \ 346 _f240 = (double)(long)(Y##_f1 >> 32); \ 347 _e240 *= _const[3]; \ 348 _j240 *= _const[3]; \ 349 _d240 *= _const[2]; \ 350 _i240 *= _const[2]; \ 351 _c240 *= _const[1]; \ 352 _h240 *= _const[1]; \ 353 _b240 *= _const[0]; \ 354 _g240 *= _const[0]; \ 355 _s240.d = _e240*_j240;\ 356 _r240.d = _d240*_j240 + _e240*_i240;\ 357 _q240.d = _c240*_j240 + _d240*_i240 + _e240*_h240;\ 358 _p240.d = _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240;\ 359 _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240;\ 360 _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240; \ 361 _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240; \ 362 _l240.d = _a240*_g240 + _b240*_f240; \ 363 _k240 = _a240*_f240; \ 364 _r240.d += _s240.d; \ 365 _q240.d += _r240.d; \ 366 _p240.d += _q240.d; \ 367 _o240.d += _p240.d; \ 368 _n240.d += _o240.d; \ 369 _m240.d += _n240.d; \ 370 _l240.d += _m240.d; \ 371 _k240 += _l240.d; \ 372 _s240.d -= ((_const[10]+_s240.d)-_const[10]); \ 373 _r240.d -= ((_const[9]+_r240.d)-_const[9]); \ 374 _q240.d -= ((_const[8]+_q240.d)-_const[8]); \ 375 _p240.d -= ((_const[7]+_p240.d)-_const[7]); \ 376 _o240.d += _const[7]; \ 377 _n240.d += _const[6]; \ 378 _m240.d += _const[5]; \ 379 _l240.d += _const[4]; \ 380 if (_s240.d != 0.0) _y240 = 1; \ 381 if (_r240.d != 0.0) _y240 = 1; \ 382 if (_q240.d != 0.0) _y240 = 1; \ 383 if (_p240.d != 0.0) _y240 = 1; \ 384 _t240 = (DItype)_k240; \ 385 _u240 = _l240.i; \ 386 _v240 = _m240.i; \ 387 _w240 = _n240.i; \ 388 _x240 = _o240.i; \ 389 R##_f1 = (_t240 << (128 - (wfracbits - 1))) \ 390 | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104)); \ 391 R##_f0 = ((_u240 & 0xffffff) << (168 - (wfracbits - 1))) \ 392 | ((_v240 & 0xffffff) << (144 - (wfracbits - 1))) \ 393 | ((_w240 & 0xffffff) << (120 - (wfracbits - 1))) \ 394 | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96)) \ 395 | _y240; \ 396 resetfe; \ 397 } while (0) 398 399/* 400 * Division algorithms: 401 */ 402 403#define _FP_DIV_MEAT_2_udiv(fs, R, X, Y) \ 404 do { \ 405 _FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_f0, _m_f1, _m_f0; \ 406 if (_FP_FRAC_GT_2(X, Y)) \ 407 { \ 408 _n_f2 = X##_f1 >> 1; \ 409 _n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \ 410 _n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1); \ 411 } \ 412 else \ 413 { \ 414 R##_e--; \ 415 _n_f2 = X##_f1; \ 416 _n_f1 = X##_f0; \ 417 _n_f0 = 0; \ 418 } \ 419 \ 420 /* Normalize, i.e. make the most significant bit of the \ 421 denominator set. */ \ 422 _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs); \ 423 \ 424 udiv_qrnnd(R##_f1, _r_f1, _n_f2, _n_f1, Y##_f1); \ 425 umul_ppmm(_m_f1, _m_f0, R##_f1, Y##_f0); \ 426 _r_f0 = _n_f0; \ 427 if (_FP_FRAC_GT_2(_m, _r)) \ 428 { \ 429 R##_f1--; \ 430 _FP_FRAC_ADD_2(_r, Y, _r); \ 431 if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \ 432 { \ 433 R##_f1--; \ 434 _FP_FRAC_ADD_2(_r, Y, _r); \ 435 } \ 436 } \ 437 _FP_FRAC_DEC_2(_r, _m); \ 438 \ 439 if (_r_f1 == Y##_f1) \ 440 { \ 441 /* This is a special case, not an optimization \ 442 (_r/Y##_f1 would not fit into UWtype). \ 443 As _r is guaranteed to be < Y, R##_f0 can be either \ 444 (UWtype)-1 or (UWtype)-2. But as we know what kind \ 445 of bits it is (sticky, guard, round), we don't care. \ 446 We also don't care what the reminder is, because the \ 447 guard bit will be set anyway. -jj */ \ 448 R##_f0 = -1; \ 449 } \ 450 else \ 451 { \ 452 udiv_qrnnd(R##_f0, _r_f1, _r_f1, _r_f0, Y##_f1); \ 453 umul_ppmm(_m_f1, _m_f0, R##_f0, Y##_f0); \ 454 _r_f0 = 0; \ 455 if (_FP_FRAC_GT_2(_m, _r)) \ 456 { \ 457 R##_f0--; \ 458 _FP_FRAC_ADD_2(_r, Y, _r); \ 459 if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \ 460 { \ 461 R##_f0--; \ 462 _FP_FRAC_ADD_2(_r, Y, _r); \ 463 } \ 464 } \ 465 if (!_FP_FRAC_EQ_2(_r, _m)) \ 466 R##_f0 |= _FP_WORK_STICKY; \ 467 } \ 468 } while (0) 469 470 471#define _FP_DIV_MEAT_2_gmp(fs, R, X, Y) \ 472 do { \ 473 _FP_W_TYPE _x[4], _y[2], _z[4]; \ 474 _y[0] = Y##_f0; _y[1] = Y##_f1; \ 475 _x[0] = _x[3] = 0; \ 476 if (_FP_FRAC_GT_2(X, Y)) \ 477 { \ 478 R##_e++; \ 479 _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE) | \ 480 X##_f1 >> (_FP_W_TYPE_SIZE - \ 481 (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE))); \ 482 _x[2] = X##_f1 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE); \ 483 } \ 484 else \ 485 { \ 486 _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE) | \ 487 X##_f1 >> (_FP_W_TYPE_SIZE - \ 488 (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE))); \ 489 _x[2] = X##_f1 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE); \ 490 } \ 491 \ 492 (void) mpn_divrem (_z, 0, _x, 4, _y, 2); \ 493 R##_f1 = _z[1]; \ 494 R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0); \ 495 } while (0) 496 497 498/* 499 * Square root algorithms: 500 * We have just one right now, maybe Newton approximation 501 * should be added for those machines where division is fast. 502 */ 503 504#define _FP_SQRT_MEAT_2(R, S, T, X, q) \ 505 do { \ 506 while (q) \ 507 { \ 508 T##_f1 = S##_f1 + q; \ 509 if (T##_f1 <= X##_f1) \ 510 { \ 511 S##_f1 = T##_f1 + q; \ 512 X##_f1 -= T##_f1; \ 513 R##_f1 += q; \ 514 } \ 515 _FP_FRAC_SLL_2(X, 1); \ 516 q >>= 1; \ 517 } \ 518 q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \ 519 while (q != _FP_WORK_ROUND) \ 520 { \ 521 T##_f0 = S##_f0 + q; \ 522 T##_f1 = S##_f1; \ 523 if (T##_f1 < X##_f1 || \ 524 (T##_f1 == X##_f1 && T##_f0 <= X##_f0)) \ 525 { \ 526 S##_f0 = T##_f0 + q; \ 527 S##_f1 += (T##_f0 > S##_f0); \ 528 _FP_FRAC_DEC_2(X, T); \ 529 R##_f0 += q; \ 530 } \ 531 _FP_FRAC_SLL_2(X, 1); \ 532 q >>= 1; \ 533 } \ 534 if (X##_f0 | X##_f1) \ 535 { \ 536 if (S##_f1 < X##_f1 || \ 537 (S##_f1 == X##_f1 && S##_f0 < X##_f0)) \ 538 R##_f0 |= _FP_WORK_ROUND; \ 539 R##_f0 |= _FP_WORK_STICKY; \ 540 } \ 541 } while (0) 542 543 544/* 545 * Assembly/disassembly for converting to/from integral types. 546 * No shifting or overflow handled here. 547 */ 548 549#define _FP_FRAC_ASSEMBLE_2(r, X, rsize) \ 550 do { \ 551 if (rsize <= _FP_W_TYPE_SIZE) \ 552 r = X##_f0; \ 553 else \ 554 { \ 555 r = X##_f1; \ 556 r <<= _FP_W_TYPE_SIZE; \ 557 r += X##_f0; \ 558 } \ 559 } while (0) 560 561#define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) \ 562 do { \ 563 X##_f0 = r; \ 564 X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \ 565 } while (0) 566 567/* 568 * Convert FP values between word sizes 569 */ 570 571#define _FP_FRAC_CONV_1_2(dfs, sfs, D, S) \ 572 do { \ 573 if (S##_c != FP_CLS_NAN) \ 574 _FP_FRAC_SRS_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs), \ 575 _FP_WFRACBITS_##sfs); \ 576 else \ 577 _FP_FRAC_SRL_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs)); \ 578 D##_f = S##_f0; \ 579 } while (0) 580 581#define _FP_FRAC_CONV_2_1(dfs, sfs, D, S) \ 582 do { \ 583 D##_f0 = S##_f; \ 584 D##_f1 = 0; \ 585 _FP_FRAC_SLL_2(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs)); \ 586 } while (0) 587 588#endif 589