1/* Software floating-point emulation. 2 Basic two-word fraction declaration and manipulation. 3 Copyright (C) 1997,1998,1999 Free Software Foundation, Inc. 4 This file is part of the GNU C Library. 5 Contributed by Richard Henderson (rth@cygnus.com), 6 Jakub Jelinek (jj@ultra.linux.cz), 7 David S. Miller (davem@redhat.com) and 8 Peter Maydell (pmaydell@chiark.greenend.org.uk). 9 10 The GNU C Library is free software; you can redistribute it and/or 11 modify it under the terms of the GNU Library General Public License as 12 published by the Free Software Foundation; either version 2 of the 13 License, or (at your option) any later version. 14 15 The GNU C Library is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 Library General Public License for more details. 19 20 You should have received a copy of the GNU Library General Public 21 License along with the GNU C Library; see the file COPYING.LIB. If 22 not, write to the Free Software Foundation, Inc., 23 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 24 25#ifndef __MATH_EMU_OP_2_H__ 26#define __MATH_EMU_OP_2_H__ 27 28#define _FP_FRAC_DECL_2(X) _FP_W_TYPE X##_f0, X##_f1 29#define _FP_FRAC_COPY_2(D,S) (D##_f0 = S##_f0, D##_f1 = S##_f1) 30#define _FP_FRAC_SET_2(X,I) __FP_FRAC_SET_2(X, I) 31#define _FP_FRAC_HIGH_2(X) (X##_f1) 32#define _FP_FRAC_LOW_2(X) (X##_f0) 33#define _FP_FRAC_WORD_2(X,w) (X##_f##w) 34 35#define _FP_FRAC_SLL_2(X,N) \ 36 do { \ 37 if ((N) < _FP_W_TYPE_SIZE) \ 38 { \ 39 if (__builtin_constant_p(N) && (N) == 1) \ 40 { \ 41 X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE)(X##_f0)) < 0); \ 42 X##_f0 += X##_f0; \ 43 } \ 44 else \ 45 { \ 46 X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \ 47 X##_f0 <<= (N); \ 48 } \ 49 } \ 50 else \ 51 { \ 52 X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE); \ 53 X##_f0 = 0; \ 54 } \ 55 } while (0) 56 57#define _FP_FRAC_SRL_2(X,N) \ 58 do { \ 59 if ((N) < _FP_W_TYPE_SIZE) \ 60 { \ 61 X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \ 62 X##_f1 >>= (N); \ 63 } \ 64 else \ 65 { \ 66 X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE); \ 67 X##_f1 = 0; \ 68 } \ 69 } while (0) 70 71/* Right shift with sticky-lsb. */ 72#define _FP_FRAC_SRS_2(X,N,sz) \ 73 do { \ 74 if ((N) < _FP_W_TYPE_SIZE) \ 75 { \ 76 X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) | \ 77 (__builtin_constant_p(N) && (N) == 1 \ 78 ? X##_f0 & 1 \ 79 : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \ 80 X##_f1 >>= (N); \ 81 } \ 82 else \ 83 { \ 84 X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) | \ 85 (((X##_f1 << (2*_FP_W_TYPE_SIZE - (N))) | X##_f0) != 0)); \ 86 X##_f1 = 0; \ 87 } \ 88 } while (0) 89 90#define _FP_FRAC_ADDI_2(X,I) \ 91 __FP_FRAC_ADDI_2(X##_f1, X##_f0, I) 92 93#define _FP_FRAC_ADD_2(R,X,Y) \ 94 __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0) 95 96#define _FP_FRAC_SUB_2(R,X,Y) \ 97 __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0) 98 99#define _FP_FRAC_DEC_2(X,Y) \ 100 __FP_FRAC_DEC_2(X##_f1, X##_f0, Y##_f1, Y##_f0) 101 102#define _FP_FRAC_CLZ_2(R,X) \ 103 do { \ 104 if (X##_f1) \ 105 __FP_CLZ(R,X##_f1); \ 106 else \ 107 { \ 108 __FP_CLZ(R,X##_f0); \ 109 R += _FP_W_TYPE_SIZE; \ 110 } \ 111 } while(0) 112 113/* Predicates */ 114#define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE)X##_f1 < 0) 115#define _FP_FRAC_ZEROP_2(X) ((X##_f1 | X##_f0) == 0) 116#define _FP_FRAC_OVERP_2(fs,X) (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs) 117#define _FP_FRAC_CLEAR_OVERP_2(fs,X) (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs) 118#define _FP_FRAC_EQ_2(X, Y) (X##_f1 == Y##_f1 && X##_f0 == Y##_f0) 119#define _FP_FRAC_GT_2(X, Y) \ 120 (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0)) 121#define _FP_FRAC_GE_2(X, Y) \ 122 (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0)) 123 124#define _FP_ZEROFRAC_2 0, 0 125#define _FP_MINFRAC_2 0, 1 126#define _FP_MAXFRAC_2 (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0) 127 128/* 129 * Internals 130 */ 131 132#define __FP_FRAC_SET_2(X,I1,I0) (X##_f0 = I0, X##_f1 = I1) 133 134#define __FP_CLZ_2(R, xh, xl) \ 135 do { \ 136 if (xh) \ 137 __FP_CLZ(R,xh); \ 138 else \ 139 { \ 140 __FP_CLZ(R,xl); \ 141 R += _FP_W_TYPE_SIZE; \ 142 } \ 143 } while(0) 144 145 146#undef __FP_FRAC_ADDI_2 147#define __FP_FRAC_ADDI_2(xh, xl, i) add_ssaaaa(xh, xl, xh, xl, 0, i) 148#undef __FP_FRAC_ADD_2 149#define __FP_FRAC_ADD_2 add_ssaaaa 150#undef __FP_FRAC_SUB_2 151#define __FP_FRAC_SUB_2 sub_ddmmss 152#undef __FP_FRAC_DEC_2 153#define __FP_FRAC_DEC_2(xh, xl, yh, yl) sub_ddmmss(xh, xl, xh, xl, yh, yl) 154 155 156/* 157 * Unpack the raw bits of a native fp value. Do not classify or 158 * normalize the data. 159 */ 160 161#define _FP_UNPACK_RAW_2(fs, X, val) \ 162 do { \ 163 union _FP_UNION_##fs _flo; _flo.flt = (val); \ 164 \ 165 X##_f0 = _flo.bits.frac0; \ 166 X##_f1 = _flo.bits.frac1; \ 167 X##_e = _flo.bits.exp; \ 168 X##_s = _flo.bits.sign; \ 169 } while (0) 170 171#define _FP_UNPACK_RAW_2_P(fs, X, val) \ 172 do { \ 173 union _FP_UNION_##fs *_flo = \ 174 (union _FP_UNION_##fs *)(val); \ 175 \ 176 X##_f0 = _flo->bits.frac0; \ 177 X##_f1 = _flo->bits.frac1; \ 178 X##_e = _flo->bits.exp; \ 179 X##_s = _flo->bits.sign; \ 180 } while (0) 181 182 183/* 184 * Repack the raw bits of a native fp value. 185 */ 186 187#define _FP_PACK_RAW_2(fs, val, X) \ 188 do { \ 189 union _FP_UNION_##fs _flo; \ 190 \ 191 _flo.bits.frac0 = X##_f0; \ 192 _flo.bits.frac1 = X##_f1; \ 193 _flo.bits.exp = X##_e; \ 194 _flo.bits.sign = X##_s; \ 195 \ 196 (val) = _flo.flt; \ 197 } while (0) 198 199#define _FP_PACK_RAW_2_P(fs, val, X) \ 200 do { \ 201 union _FP_UNION_##fs *_flo = \ 202 (union _FP_UNION_##fs *)(val); \ 203 \ 204 _flo->bits.frac0 = X##_f0; \ 205 _flo->bits.frac1 = X##_f1; \ 206 _flo->bits.exp = X##_e; \ 207 _flo->bits.sign = X##_s; \ 208 } while (0) 209 210 211/* 212 * Multiplication algorithms: 213 */ 214 215/* Given a 1W * 1W => 2W primitive, do the extended multiplication. */ 216 217#define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit) \ 218 do { \ 219 _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \ 220 \ 221 doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \ 222 doit(_b_f1, _b_f0, X##_f0, Y##_f1); \ 223 doit(_c_f1, _c_f0, X##_f1, Y##_f0); \ 224 doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), X##_f1, Y##_f1); \ 225 \ 226 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 227 _FP_FRAC_WORD_4(_z,1), 0, _b_f1, _b_f0, \ 228 _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 229 _FP_FRAC_WORD_4(_z,1)); \ 230 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 231 _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0, \ 232 _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 233 _FP_FRAC_WORD_4(_z,1)); \ 234 \ 235 /* Normalize since we know where the msb of the multiplicands \ 236 were (bit B), we know that the msb of the of the product is \ 237 at either 2B or 2B-1. */ \ 238 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \ 239 R##_f0 = _FP_FRAC_WORD_4(_z,0); \ 240 R##_f1 = _FP_FRAC_WORD_4(_z,1); \ 241 } while (0) 242 243/* Given a 1W * 1W => 2W primitive, do the extended multiplication. 244 Do only 3 multiplications instead of four. This one is for machines 245 where multiplication is much more expensive than subtraction. */ 246 247#define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit) \ 248 do { \ 249 _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \ 250 _FP_W_TYPE _d; \ 251 int _c1, _c2; \ 252 \ 253 _b_f0 = X##_f0 + X##_f1; \ 254 _c1 = _b_f0 < X##_f0; \ 255 _b_f1 = Y##_f0 + Y##_f1; \ 256 _c2 = _b_f1 < Y##_f0; \ 257 doit(_d, _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \ 258 doit(_FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1), _b_f0, _b_f1); \ 259 doit(_c_f1, _c_f0, X##_f1, Y##_f1); \ 260 \ 261 _b_f0 &= -_c2; \ 262 _b_f1 &= -_c1; \ 263 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 264 _FP_FRAC_WORD_4(_z,1), (_c1 & _c2), 0, _d, \ 265 0, _FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1)); \ 266 __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 267 _b_f0); \ 268 __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 269 _b_f1); \ 270 __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 271 _FP_FRAC_WORD_4(_z,1), \ 272 0, _d, _FP_FRAC_WORD_4(_z,0)); \ 273 __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 274 _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0); \ 275 __FP_FRAC_ADD_2(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), \ 276 _c_f1, _c_f0, \ 277 _FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2)); \ 278 \ 279 /* Normalize since we know where the msb of the multiplicands \ 280 were (bit B), we know that the msb of the of the product is \ 281 at either 2B or 2B-1. */ \ 282 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \ 283 R##_f0 = _FP_FRAC_WORD_4(_z,0); \ 284 R##_f1 = _FP_FRAC_WORD_4(_z,1); \ 285 } while (0) 286 287#define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y) \ 288 do { \ 289 _FP_FRAC_DECL_4(_z); \ 290 _FP_W_TYPE _x[2], _y[2]; \ 291 _x[0] = X##_f0; _x[1] = X##_f1; \ 292 _y[0] = Y##_f0; _y[1] = Y##_f1; \ 293 \ 294 mpn_mul_n(_z_f, _x, _y, 2); \ 295 \ 296 /* Normalize since we know where the msb of the multiplicands \ 297 were (bit B), we know that the msb of the of the product is \ 298 at either 2B or 2B-1. */ \ 299 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \ 300 R##_f0 = _z_f[0]; \ 301 R##_f1 = _z_f[1]; \ 302 } while (0) 303 304/* Do at most 120x120=240 bits multiplication using double floating 305 point multiplication. This is useful if floating point 306 multiplication has much bigger throughput than integer multiply. 307 It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits 308 between 106 and 120 only. 309 Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set. 310 SETFETZ is a macro which will disable all FPU exceptions and set rounding 311 towards zero, RESETFE should optionally reset it back. */ 312 313#define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \ 314 do { \ 315 static const double _const[] = { \ 316 /* 2^-24 */ 5.9604644775390625e-08, \ 317 /* 2^-48 */ 3.5527136788005009e-15, \ 318 /* 2^-72 */ 2.1175823681357508e-22, \ 319 /* 2^-96 */ 1.2621774483536189e-29, \ 320 /* 2^28 */ 2.68435456e+08, \ 321 /* 2^4 */ 1.600000e+01, \ 322 /* 2^-20 */ 9.5367431640625e-07, \ 323 /* 2^-44 */ 5.6843418860808015e-14, \ 324 /* 2^-68 */ 3.3881317890172014e-21, \ 325 /* 2^-92 */ 2.0194839173657902e-28, \ 326 /* 2^-116 */ 1.2037062152420224e-35}; \ 327 double _a240, _b240, _c240, _d240, _e240, _f240, \ 328 _g240, _h240, _i240, _j240, _k240; \ 329 union { double d; UDItype i; } _l240, _m240, _n240, _o240, \ 330 _p240, _q240, _r240, _s240; \ 331 UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0; \ 332 \ 333 if (wfracbits < 106 || wfracbits > 120) \ 334 abort(); \ 335 \ 336 setfetz; \ 337 \ 338 _e240 = (double)(long)(X##_f0 & 0xffffff); \ 339 _j240 = (double)(long)(Y##_f0 & 0xffffff); \ 340 _d240 = (double)(long)((X##_f0 >> 24) & 0xffffff); \ 341 _i240 = (double)(long)((Y##_f0 >> 24) & 0xffffff); \ 342 _c240 = (double)(long)(((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \ 343 _h240 = (double)(long)(((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \ 344 _b240 = (double)(long)((X##_f1 >> 8) & 0xffffff); \ 345 _g240 = (double)(long)((Y##_f1 >> 8) & 0xffffff); \ 346 _a240 = (double)(long)(X##_f1 >> 32); \ 347 _f240 = (double)(long)(Y##_f1 >> 32); \ 348 _e240 *= _const[3]; \ 349 _j240 *= _const[3]; \ 350 _d240 *= _const[2]; \ 351 _i240 *= _const[2]; \ 352 _c240 *= _const[1]; \ 353 _h240 *= _const[1]; \ 354 _b240 *= _const[0]; \ 355 _g240 *= _const[0]; \ 356 _s240.d = _e240*_j240;\ 357 _r240.d = _d240*_j240 + _e240*_i240;\ 358 _q240.d = _c240*_j240 + _d240*_i240 + _e240*_h240;\ 359 _p240.d = _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240;\ 360 _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240;\ 361 _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240; \ 362 _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240; \ 363 _l240.d = _a240*_g240 + _b240*_f240; \ 364 _k240 = _a240*_f240; \ 365 _r240.d += _s240.d; \ 366 _q240.d += _r240.d; \ 367 _p240.d += _q240.d; \ 368 _o240.d += _p240.d; \ 369 _n240.d += _o240.d; \ 370 _m240.d += _n240.d; \ 371 _l240.d += _m240.d; \ 372 _k240 += _l240.d; \ 373 _s240.d -= ((_const[10]+_s240.d)-_const[10]); \ 374 _r240.d -= ((_const[9]+_r240.d)-_const[9]); \ 375 _q240.d -= ((_const[8]+_q240.d)-_const[8]); \ 376 _p240.d -= ((_const[7]+_p240.d)-_const[7]); \ 377 _o240.d += _const[7]; \ 378 _n240.d += _const[6]; \ 379 _m240.d += _const[5]; \ 380 _l240.d += _const[4]; \ 381 if (_s240.d != 0.0) _y240 = 1; \ 382 if (_r240.d != 0.0) _y240 = 1; \ 383 if (_q240.d != 0.0) _y240 = 1; \ 384 if (_p240.d != 0.0) _y240 = 1; \ 385 _t240 = (DItype)_k240; \ 386 _u240 = _l240.i; \ 387 _v240 = _m240.i; \ 388 _w240 = _n240.i; \ 389 _x240 = _o240.i; \ 390 R##_f1 = (_t240 << (128 - (wfracbits - 1))) \ 391 | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104)); \ 392 R##_f0 = ((_u240 & 0xffffff) << (168 - (wfracbits - 1))) \ 393 | ((_v240 & 0xffffff) << (144 - (wfracbits - 1))) \ 394 | ((_w240 & 0xffffff) << (120 - (wfracbits - 1))) \ 395 | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96)) \ 396 | _y240; \ 397 resetfe; \ 398 } while (0) 399 400/* 401 * Division algorithms: 402 */ 403 404#define _FP_DIV_MEAT_2_udiv(fs, R, X, Y) \ 405 do { \ 406 _FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_f0, _m_f1, _m_f0; \ 407 if (_FP_FRAC_GT_2(X, Y)) \ 408 { \ 409 _n_f2 = X##_f1 >> 1; \ 410 _n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \ 411 _n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1); \ 412 } \ 413 else \ 414 { \ 415 R##_e--; \ 416 _n_f2 = X##_f1; \ 417 _n_f1 = X##_f0; \ 418 _n_f0 = 0; \ 419 } \ 420 \ 421 /* Normalize, i.e. make the most significant bit of the \ 422 denominator set. */ \ 423 _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs); \ 424 \ 425 udiv_qrnnd(R##_f1, _r_f1, _n_f2, _n_f1, Y##_f1); \ 426 umul_ppmm(_m_f1, _m_f0, R##_f1, Y##_f0); \ 427 _r_f0 = _n_f0; \ 428 if (_FP_FRAC_GT_2(_m, _r)) \ 429 { \ 430 R##_f1--; \ 431 _FP_FRAC_ADD_2(_r, Y, _r); \ 432 if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \ 433 { \ 434 R##_f1--; \ 435 _FP_FRAC_ADD_2(_r, Y, _r); \ 436 } \ 437 } \ 438 _FP_FRAC_DEC_2(_r, _m); \ 439 \ 440 if (_r_f1 == Y##_f1) \ 441 { \ 442 /* This is a special case, not an optimization \ 443 (_r/Y##_f1 would not fit into UWtype). \ 444 As _r is guaranteed to be < Y, R##_f0 can be either \ 445 (UWtype)-1 or (UWtype)-2. But as we know what kind \ 446 of bits it is (sticky, guard, round), we don't care. \ 447 We also don't care what the reminder is, because the \ 448 guard bit will be set anyway. -jj */ \ 449 R##_f0 = -1; \ 450 } \ 451 else \ 452 { \ 453 udiv_qrnnd(R##_f0, _r_f1, _r_f1, _r_f0, Y##_f1); \ 454 umul_ppmm(_m_f1, _m_f0, R##_f0, Y##_f0); \ 455 _r_f0 = 0; \ 456 if (_FP_FRAC_GT_2(_m, _r)) \ 457 { \ 458 R##_f0--; \ 459 _FP_FRAC_ADD_2(_r, Y, _r); \ 460 if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \ 461 { \ 462 R##_f0--; \ 463 _FP_FRAC_ADD_2(_r, Y, _r); \ 464 } \ 465 } \ 466 if (!_FP_FRAC_EQ_2(_r, _m)) \ 467 R##_f0 |= _FP_WORK_STICKY; \ 468 } \ 469 } while (0) 470 471 472#define _FP_DIV_MEAT_2_gmp(fs, R, X, Y) \ 473 do { \ 474 _FP_W_TYPE _x[4], _y[2], _z[4]; \ 475 _y[0] = Y##_f0; _y[1] = Y##_f1; \ 476 _x[0] = _x[3] = 0; \ 477 if (_FP_FRAC_GT_2(X, Y)) \ 478 { \ 479 R##_e++; \ 480 _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE) | \ 481 X##_f1 >> (_FP_W_TYPE_SIZE - \ 482 (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE))); \ 483 _x[2] = X##_f1 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE); \ 484 } \ 485 else \ 486 { \ 487 _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE) | \ 488 X##_f1 >> (_FP_W_TYPE_SIZE - \ 489 (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE))); \ 490 _x[2] = X##_f1 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE); \ 491 } \ 492 \ 493 (void) mpn_divrem (_z, 0, _x, 4, _y, 2); \ 494 R##_f1 = _z[1]; \ 495 R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0); \ 496 } while (0) 497 498 499/* 500 * Square root algorithms: 501 * We have just one right now, maybe Newton approximation 502 * should be added for those machines where division is fast. 503 */ 504 505#define _FP_SQRT_MEAT_2(R, S, T, X, q) \ 506 do { \ 507 while (q) \ 508 { \ 509 T##_f1 = S##_f1 + q; \ 510 if (T##_f1 <= X##_f1) \ 511 { \ 512 S##_f1 = T##_f1 + q; \ 513 X##_f1 -= T##_f1; \ 514 R##_f1 += q; \ 515 } \ 516 _FP_FRAC_SLL_2(X, 1); \ 517 q >>= 1; \ 518 } \ 519 q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \ 520 while (q != _FP_WORK_ROUND) \ 521 { \ 522 T##_f0 = S##_f0 + q; \ 523 T##_f1 = S##_f1; \ 524 if (T##_f1 < X##_f1 || \ 525 (T##_f1 == X##_f1 && T##_f0 <= X##_f0)) \ 526 { \ 527 S##_f0 = T##_f0 + q; \ 528 S##_f1 += (T##_f0 > S##_f0); \ 529 _FP_FRAC_DEC_2(X, T); \ 530 R##_f0 += q; \ 531 } \ 532 _FP_FRAC_SLL_2(X, 1); \ 533 q >>= 1; \ 534 } \ 535 if (X##_f0 | X##_f1) \ 536 { \ 537 if (S##_f1 < X##_f1 || \ 538 (S##_f1 == X##_f1 && S##_f0 < X##_f0)) \ 539 R##_f0 |= _FP_WORK_ROUND; \ 540 R##_f0 |= _FP_WORK_STICKY; \ 541 } \ 542 } while (0) 543 544 545/* 546 * Assembly/disassembly for converting to/from integral types. 547 * No shifting or overflow handled here. 548 */ 549 550#define _FP_FRAC_ASSEMBLE_2(r, X, rsize) \ 551 do { \ 552 if (rsize <= _FP_W_TYPE_SIZE) \ 553 r = X##_f0; \ 554 else \ 555 { \ 556 r = X##_f1; \ 557 r <<= _FP_W_TYPE_SIZE; \ 558 r += X##_f0; \ 559 } \ 560 } while (0) 561 562#define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) \ 563 do { \ 564 X##_f0 = r; \ 565 X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \ 566 } while (0) 567 568/* 569 * Convert FP values between word sizes 570 */ 571 572#define _FP_FRAC_CONV_1_2(dfs, sfs, D, S) \ 573 do { \ 574 if (S##_c != FP_CLS_NAN) \ 575 _FP_FRAC_SRS_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs), \ 576 _FP_WFRACBITS_##sfs); \ 577 else \ 578 _FP_FRAC_SRL_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs)); \ 579 D##_f = S##_f0; \ 580 } while (0) 581 582#define _FP_FRAC_CONV_2_1(dfs, sfs, D, S) \ 583 do { \ 584 D##_f0 = S##_f; \ 585 D##_f1 = 0; \ 586 _FP_FRAC_SLL_2(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs)); \ 587 } while (0) 588 589#endif 590