1/* Copyright (C) 2007, 2009 Free Software Foundation, Inc. 2 3This file is part of GCC. 4 5GCC is free software; you can redistribute it and/or modify it under 6the terms of the GNU General Public License as published by the Free 7Software Foundation; either version 3, or (at your option) any later 8version. 9 10GCC is distributed in the hope that it will be useful, but WITHOUT ANY 11WARRANTY; without even the implied warranty of MERCHANTABILITY or 12FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13for more details. 14 15Under Section 7 of GPL version 3, you are granted additional 16permissions described in the GCC Runtime Library Exception, version 173.1, as published by the Free Software Foundation. 18 19You should have received a copy of the GNU General Public License and 20a copy of the GCC Runtime Library Exception along with this program; 21see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22<http://www.gnu.org/licenses/>. */ 23 24/***************************************************************************** 25 * 26 * BID128 fma x * y + z 27 * 28 ****************************************************************************/ 29 30#include "bid_internal.h" 31 32static void 33rounding_correction (unsigned int rnd_mode, 34 unsigned int is_inexact_lt_midpoint, 35 unsigned int is_inexact_gt_midpoint, 36 unsigned int is_midpoint_lt_even, 37 unsigned int is_midpoint_gt_even, 38 int unbexp, 39 UINT128 * ptrres, _IDEC_flags * ptrfpsf) { 40 // unbiased true exponent unbexp may be larger than emax 41 42 UINT128 res = *ptrres; // expected to have the correct sign and coefficient 43 // (the exponent field is ignored, as unbexp is used instead) 44 UINT64 sign, exp; 45 UINT64 C_hi, C_lo; 46 47 // general correction from RN to RA, RM, RP, RZ 48 // Note: if the result is negative, then is_inexact_lt_midpoint, 49 // is_inexact_gt_midpoint, is_midpoint_lt_even, and is_midpoint_gt_even 50 // have to be considered as if determined for the absolute value of the 51 // result (so they seem to be reversed) 52 53 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || 54 is_midpoint_lt_even || is_midpoint_gt_even) { 55 *ptrfpsf |= INEXACT_EXCEPTION; 56 } 57 // apply correction to result calculated with unbounded exponent 58 sign = res.w[1] & MASK_SIGN; 59 exp = (UINT64) (unbexp + 6176) << 49; // valid only if expmin<=unbexp<=expmax 60 C_hi = res.w[1] & MASK_COEFF; 61 C_lo = res.w[0]; 62 if ((!sign && ((rnd_mode == ROUNDING_UP && is_inexact_lt_midpoint) || 63 ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_UP) && 64 is_midpoint_gt_even))) || 65 (sign && ((rnd_mode == ROUNDING_DOWN && is_inexact_lt_midpoint) || 66 ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_DOWN) && 67 is_midpoint_gt_even)))) { 68 // C = C + 1 69 C_lo = C_lo + 1; 70 if (C_lo == 0) 71 C_hi = C_hi + 1; 72 if (C_hi == 0x0001ed09bead87c0ull && C_lo == 0x378d8e6400000000ull) { 73 // C = 10^34 => rounding overflow 74 C_hi = 0x0000314dc6448d93ull; 75 C_lo = 0x38c15b0a00000000ull; // 10^33 76 // exp = exp + EXP_P1; 77 unbexp = unbexp + 1; 78 exp = (UINT64) (unbexp + 6176) << 49; 79 } 80 } else if ((is_midpoint_lt_even || is_inexact_gt_midpoint) && 81 ((sign && (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TO_ZERO)) || 82 (!sign && (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TO_ZERO)))) { 83 // C = C - 1 84 C_lo = C_lo - 1; 85 if (C_lo == 0xffffffffffffffffull) 86 C_hi--; 87 // check if we crossed into the lower decade 88 if (C_hi == 0x0000314dc6448d93ull && C_lo == 0x38c15b09ffffffffull) { 89 // C = 10^33 - 1 90 if (exp > 0) { 91 C_hi = 0x0001ed09bead87c0ull; // 10^34 - 1 92 C_lo = 0x378d8e63ffffffffull; 93 // exp = exp - EXP_P1; 94 unbexp = unbexp - 1; 95 exp = (UINT64) (unbexp + 6176) << 49; 96 } else { // if exp = 0 97 if (is_midpoint_lt_even || is_midpoint_lt_even || 98 is_inexact_gt_midpoint || is_inexact_gt_midpoint) // tiny & inexact 99 *ptrfpsf |= UNDERFLOW_EXCEPTION; 100 } 101 } 102 } else { 103 ; // the result is already correct 104 } 105 if (unbexp > expmax) { // 6111 106 *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); 107 exp = 0; 108 if (!sign) { // result is positive 109 if (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TIES_AWAY) { // +inf 110 C_hi = 0x7800000000000000ull; 111 C_lo = 0x0000000000000000ull; 112 } else { // res = +MAXFP = (10^34-1) * 10^emax 113 C_hi = 0x5fffed09bead87c0ull; 114 C_lo = 0x378d8e63ffffffffull; 115 } 116 } else { // result is negative 117 if (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TIES_AWAY) { // -inf 118 C_hi = 0xf800000000000000ull; 119 C_lo = 0x0000000000000000ull; 120 } else { // res = -MAXFP = -(10^34-1) * 10^emax 121 C_hi = 0xdfffed09bead87c0ull; 122 C_lo = 0x378d8e63ffffffffull; 123 } 124 } 125 } 126 // assemble the result 127 res.w[1] = sign | exp | C_hi; 128 res.w[0] = C_lo; 129 *ptrres = res; 130} 131 132static void 133add256 (UINT256 x, UINT256 y, UINT256 * pz) { 134 // *z = x + yl assume the sum fits in 256 bits 135 UINT256 z; 136 z.w[0] = x.w[0] + y.w[0]; 137 if (z.w[0] < x.w[0]) { 138 x.w[1]++; 139 if (x.w[1] == 0x0000000000000000ull) { 140 x.w[2]++; 141 if (x.w[2] == 0x0000000000000000ull) { 142 x.w[3]++; 143 } 144 } 145 } 146 z.w[1] = x.w[1] + y.w[1]; 147 if (z.w[1] < x.w[1]) { 148 x.w[2]++; 149 if (x.w[2] == 0x0000000000000000ull) { 150 x.w[3]++; 151 } 152 } 153 z.w[2] = x.w[2] + y.w[2]; 154 if (z.w[2] < x.w[2]) { 155 x.w[3]++; 156 } 157 z.w[3] = x.w[3] + y.w[3]; // it was assumed that no carry is possible 158 *pz = z; 159} 160 161static void 162sub256 (UINT256 x, UINT256 y, UINT256 * pz) { 163 // *z = x - y; assume x >= y 164 UINT256 z; 165 z.w[0] = x.w[0] - y.w[0]; 166 if (z.w[0] > x.w[0]) { 167 x.w[1]--; 168 if (x.w[1] == 0xffffffffffffffffull) { 169 x.w[2]--; 170 if (x.w[2] == 0xffffffffffffffffull) { 171 x.w[3]--; 172 } 173 } 174 } 175 z.w[1] = x.w[1] - y.w[1]; 176 if (z.w[1] > x.w[1]) { 177 x.w[2]--; 178 if (x.w[2] == 0xffffffffffffffffull) { 179 x.w[3]--; 180 } 181 } 182 z.w[2] = x.w[2] - y.w[2]; 183 if (z.w[2] > x.w[2]) { 184 x.w[3]--; 185 } 186 z.w[3] = x.w[3] - y.w[3]; // no borrow possible, because x >= y 187 *pz = z; 188} 189 190 191static int 192nr_digits256 (UINT256 R256) { 193 int ind; 194 // determine the number of decimal digits in R256 195 if (R256.w[3] == 0x0 && R256.w[2] == 0x0 && R256.w[1] == 0x0) { 196 // between 1 and 19 digits 197 for (ind = 1; ind <= 19; ind++) { 198 if (R256.w[0] < ten2k64[ind]) { 199 break; 200 } 201 } 202 // ind digits 203 } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0 && 204 (R256.w[1] < ten2k128[0].w[1] || 205 (R256.w[1] == ten2k128[0].w[1] 206 && R256.w[0] < ten2k128[0].w[0]))) { 207 // 20 digits 208 ind = 20; 209 } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0) { 210 // between 21 and 38 digits 211 for (ind = 1; ind <= 18; ind++) { 212 if (R256.w[1] < ten2k128[ind].w[1] || 213 (R256.w[1] == ten2k128[ind].w[1] && 214 R256.w[0] < ten2k128[ind].w[0])) { 215 break; 216 } 217 } 218 // ind + 20 digits 219 ind = ind + 20; 220 } else if (R256.w[3] == 0x0 && 221 (R256.w[2] < ten2k256[0].w[2] || 222 (R256.w[2] == ten2k256[0].w[2] && 223 R256.w[1] < ten2k256[0].w[1]) || 224 (R256.w[2] == ten2k256[0].w[2] && 225 R256.w[1] == ten2k256[0].w[1] && 226 R256.w[0] < ten2k256[0].w[0]))) { 227 // 39 digits 228 ind = 39; 229 } else { 230 // between 40 and 68 digits 231 for (ind = 1; ind <= 29; ind++) { 232 if (R256.w[3] < ten2k256[ind].w[3] || 233 (R256.w[3] == ten2k256[ind].w[3] && 234 R256.w[2] < ten2k256[ind].w[2]) || 235 (R256.w[3] == ten2k256[ind].w[3] && 236 R256.w[2] == ten2k256[ind].w[2] && 237 R256.w[1] < ten2k256[ind].w[1]) || 238 (R256.w[3] == ten2k256[ind].w[3] && 239 R256.w[2] == ten2k256[ind].w[2] && 240 R256.w[1] == ten2k256[ind].w[1] && 241 R256.w[0] < ten2k256[ind].w[0])) { 242 break; 243 } 244 } 245 // ind + 39 digits 246 ind = ind + 39; 247 } 248 return (ind); 249} 250 251// add/subtract C4 and C3 * 10^scale; this may follow a previous rounding, so 252// use the rounding information from ptr_is_* to avoid a double rounding error 253static void 254add_and_round (int q3, 255 int q4, 256 int e4, 257 int delta, 258 int p34, 259 UINT64 z_sign, 260 UINT64 p_sign, 261 UINT128 C3, 262 UINT256 C4, 263 int rnd_mode, 264 int *ptr_is_midpoint_lt_even, 265 int *ptr_is_midpoint_gt_even, 266 int *ptr_is_inexact_lt_midpoint, 267 int *ptr_is_inexact_gt_midpoint, 268 _IDEC_flags * ptrfpsf, UINT128 * ptrres) { 269 270 int scale; 271 int x0; 272 int ind; 273 UINT64 R64; 274 UINT128 P128, R128; 275 UINT192 P192, R192; 276 UINT256 R256; 277 int is_midpoint_lt_even = 0; 278 int is_midpoint_gt_even = 0; 279 int is_inexact_lt_midpoint = 0; 280 int is_inexact_gt_midpoint = 0; 281 int is_midpoint_lt_even0 = 0; 282 int is_midpoint_gt_even0 = 0; 283 int is_inexact_lt_midpoint0 = 0; 284 int is_inexact_gt_midpoint0 = 0; 285 int incr_exp = 0; 286 int is_tiny = 0; 287 int lt_half_ulp = 0; 288 int eq_half_ulp = 0; 289 // int gt_half_ulp = 0; 290 UINT128 res = *ptrres; 291 292 // scale C3 up by 10^(q4-delta-q3), 0 <= q4-delta-q3 <= 2*P34-2 = 66 293 scale = q4 - delta - q3; // 0 <= scale <= 66 (or 0 <= scale <= 68 if this 294 // comes from Cases (2), (3), (4), (5), (6), with 0 <= |delta| <= 1 295 296 // calculate C3 * 10^scale in R256 (it has at most 67 decimal digits for 297 // Cases (15),(16),(17) and at most 69 for Cases (2),(3),(4),(5),(6)) 298 if (scale == 0) { 299 R256.w[3] = 0x0ull; 300 R256.w[2] = 0x0ull; 301 R256.w[1] = C3.w[1]; 302 R256.w[0] = C3.w[0]; 303 } else if (scale <= 19) { // 10^scale fits in 64 bits 304 P128.w[1] = 0; 305 P128.w[0] = ten2k64[scale]; 306 __mul_128x128_to_256 (R256, P128, C3); 307 } else if (scale <= 38) { // 10^scale fits in 128 bits 308 __mul_128x128_to_256 (R256, ten2k128[scale - 20], C3); 309 } else if (scale <= 57) { // 39 <= scale <= 57 310 // 10^scale fits in 192 bits but C3 * 10^scale fits in 223 or 230 bits 311 // (10^67 has 223 bits; 10^69 has 230 bits); 312 // must split the computation: 313 // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127 314 // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty 315 // Note that 1 <= scale - 38 <= 19 => 10^(scale-38) fits in 64 bits 316 __mul_64x128_to_128 (R128, ten2k64[scale - 38], C3); 317 // now multiply R128 by 10^38 318 __mul_128x128_to_256 (R256, R128, ten2k128[18]); 319 } else { // 58 <= scale <= 66 320 // 10^scale takes between 193 and 220 bits, 321 // and C3 * 10^scale fits in 223 bits (10^67/10^69 has 223/230 bits) 322 // must split the computation: 323 // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127 324 // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty 325 // Note that 20 <= scale - 38 <= 30 => 10^(scale-38) fits in 128 bits 326 // Calculate first 10^(scale-38) * C3, which fits in 128 bits; because 327 // 10^(scale-38) takes more than 64 bits, C3 will take less than 64 328 __mul_64x128_to_128 (R128, C3.w[0], ten2k128[scale - 58]); 329 // now calculate 10*38 * 10^(scale-38) * C3 330 __mul_128x128_to_256 (R256, R128, ten2k128[18]); 331 } 332 // C3 * 10^scale is now in R256 333 334 // for Cases (15), (16), (17) C4 > C3 * 10^scale because C4 has at least 335 // one extra digit; for Cases (2), (3), (4), (5), or (6) any order is 336 // possible 337 // add/subtract C4 and C3 * 10^scale; the exponent is e4 338 if (p_sign == z_sign) { // R256 = C4 + R256 339 // calculate R256 = C4 + C3 * 10^scale = C4 + R256 which is exact, 340 // but may require rounding 341 add256 (C4, R256, &R256); 342 } else { // if (p_sign != z_sign) { // R256 = C4 - R256 343 // calculate R256 = C4 - C3 * 10^scale = C4 - R256 or 344 // R256 = C3 * 10^scale - C4 = R256 - C4 which is exact, 345 // but may require rounding 346 347 // compare first R256 = C3 * 10^scale and C4 348 if (R256.w[3] > C4.w[3] || (R256.w[3] == C4.w[3] && R256.w[2] > C4.w[2]) || 349 (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] > C4.w[1]) || 350 (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] == C4.w[1] && 351 R256.w[0] >= C4.w[0])) { // C3 * 10^scale >= C4 352 // calculate R256 = C3 * 10^scale - C4 = R256 - C4, which is exact, 353 // but may require rounding 354 sub256 (R256, C4, &R256); 355 // flip p_sign too, because the result has the sign of z 356 p_sign = z_sign; 357 } else { // if C4 > C3 * 10^scale 358 // calculate R256 = C4 - C3 * 10^scale = C4 - R256, which is exact, 359 // but may require rounding 360 sub256 (C4, R256, &R256); 361 } 362 // if the result is pure zero, the sign depends on the rounding mode 363 // (x*y and z had opposite signs) 364 if (R256.w[3] == 0x0ull && R256.w[2] == 0x0ull && 365 R256.w[1] == 0x0ull && R256.w[0] == 0x0ull) { 366 if (rnd_mode != ROUNDING_DOWN) 367 p_sign = 0x0000000000000000ull; 368 else 369 p_sign = 0x8000000000000000ull; 370 // the exponent is max (e4, expmin) 371 if (e4 < -6176) 372 e4 = expmin; 373 // assemble result 374 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49); 375 res.w[0] = 0x0; 376 *ptrres = res; 377 return; 378 } 379 } 380 381 // determine the number of decimal digits in R256 382 ind = nr_digits256 (R256); 383 384 // the exact result is (-1)^p_sign * R256 * 10^e4 where q (R256) = ind; 385 // round to the destination precision, with unbounded exponent 386 387 if (ind <= p34) { 388 // result rounded to the destination precision with unbounded exponent 389 // is exact 390 if (ind + e4 < p34 + expmin) { 391 is_tiny = 1; // applies to all rounding modes 392 } 393 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R256.w[1]; 394 res.w[0] = R256.w[0]; 395 // Note: res is correct only if expmin <= e4 <= expmax 396 } else { // if (ind > p34) 397 // if more than P digits, round to nearest to P digits 398 // round R256 to p34 digits 399 x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68 400 if (ind <= 38) { 401 P128.w[1] = R256.w[1]; 402 P128.w[0] = R256.w[0]; 403 round128_19_38 (ind, x0, P128, &R128, &incr_exp, 404 &is_midpoint_lt_even, &is_midpoint_gt_even, 405 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); 406 } else if (ind <= 57) { 407 P192.w[2] = R256.w[2]; 408 P192.w[1] = R256.w[1]; 409 P192.w[0] = R256.w[0]; 410 round192_39_57 (ind, x0, P192, &R192, &incr_exp, 411 &is_midpoint_lt_even, &is_midpoint_gt_even, 412 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); 413 R128.w[1] = R192.w[1]; 414 R128.w[0] = R192.w[0]; 415 } else { // if (ind <= 68) 416 round256_58_76 (ind, x0, R256, &R256, &incr_exp, 417 &is_midpoint_lt_even, &is_midpoint_gt_even, 418 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); 419 R128.w[1] = R256.w[1]; 420 R128.w[0] = R256.w[0]; 421 } 422 // the rounded result has p34 = 34 digits 423 e4 = e4 + x0 + incr_exp; 424 if (rnd_mode == ROUNDING_TO_NEAREST) { 425 if (e4 < expmin) { 426 is_tiny = 1; // for other rounding modes apply correction 427 } 428 } else { 429 // for RM, RP, RZ, RA apply correction in order to determine tininess 430 // but do not save the result; apply the correction to 431 // (-1)^p_sign * significand * 10^0 432 P128.w[1] = p_sign | 0x3040000000000000ull | R128.w[1]; 433 P128.w[0] = R128.w[0]; 434 rounding_correction (rnd_mode, 435 is_inexact_lt_midpoint, 436 is_inexact_gt_midpoint, is_midpoint_lt_even, 437 is_midpoint_gt_even, 0, &P128, ptrfpsf); 438 scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1 439 // the number of digits in the significand is p34 = 34 440 if (e4 + scale < expmin) { 441 is_tiny = 1; 442 } 443 } 444 ind = p34; // the number of decimal digits in the signifcand of res 445 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R128.w[1]; // RN 446 res.w[0] = R128.w[0]; 447 // Note: res is correct only if expmin <= e4 <= expmax 448 // set the inexact flag after rounding with bounded exponent, if any 449 } 450 // at this point we have the result rounded with unbounded exponent in 451 // res and we know its tininess: 452 // res = (-1)^p_sign * significand * 10^e4, 453 // where q (significand) = ind <= p34 454 // Note: res is correct only if expmin <= e4 <= expmax 455 456 // check for overflow if RN 457 if (rnd_mode == ROUNDING_TO_NEAREST && (ind + e4) > (p34 + expmax)) { 458 res.w[1] = p_sign | 0x7800000000000000ull; 459 res.w[0] = 0x0000000000000000ull; 460 *ptrres = res; 461 *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); 462 return; // BID_RETURN (res) 463 } // else not overflow or not RN, so continue 464 465 // if (e4 >= expmin) we have the result rounded with bounded exponent 466 if (e4 < expmin) { 467 x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res 468 // where the result rounded [at most] once is 469 // (-1)^p_sign * significand_res * 10^e4 470 471 // avoid double rounding error 472 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; 473 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; 474 is_midpoint_lt_even0 = is_midpoint_lt_even; 475 is_midpoint_gt_even0 = is_midpoint_gt_even; 476 is_inexact_lt_midpoint = 0; 477 is_inexact_gt_midpoint = 0; 478 is_midpoint_lt_even = 0; 479 is_midpoint_gt_even = 0; 480 481 if (x0 > ind) { 482 // nothing is left of res when moving the decimal point left x0 digits 483 is_inexact_lt_midpoint = 1; 484 res.w[1] = p_sign | 0x0000000000000000ull; 485 res.w[0] = 0x0000000000000000ull; 486 e4 = expmin; 487 } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34 488 // this is <, =, or > 1/2 ulp 489 // compare the ind-digit value in the significand of res with 490 // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is 491 // less than, equal to, or greater than 1/2 ulp (significand of res) 492 R128.w[1] = res.w[1] & MASK_COEFF; 493 R128.w[0] = res.w[0]; 494 if (ind <= 19) { 495 if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp 496 lt_half_ulp = 1; 497 is_inexact_lt_midpoint = 1; 498 } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp 499 eq_half_ulp = 1; 500 is_midpoint_gt_even = 1; 501 } else { // > 1/2 ulp 502 // gt_half_ulp = 1; 503 is_inexact_gt_midpoint = 1; 504 } 505 } else { // if (ind <= 38) { 506 if (R128.w[1] < midpoint128[ind - 20].w[1] || 507 (R128.w[1] == midpoint128[ind - 20].w[1] && 508 R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp 509 lt_half_ulp = 1; 510 is_inexact_lt_midpoint = 1; 511 } else if (R128.w[1] == midpoint128[ind - 20].w[1] && 512 R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp 513 eq_half_ulp = 1; 514 is_midpoint_gt_even = 1; 515 } else { // > 1/2 ulp 516 // gt_half_ulp = 1; 517 is_inexact_gt_midpoint = 1; 518 } 519 } 520 if (lt_half_ulp || eq_half_ulp) { 521 // res = +0.0 * 10^expmin 522 res.w[1] = 0x0000000000000000ull; 523 res.w[0] = 0x0000000000000000ull; 524 } else { // if (gt_half_ulp) 525 // res = +1 * 10^expmin 526 res.w[1] = 0x0000000000000000ull; 527 res.w[0] = 0x0000000000000001ull; 528 } 529 res.w[1] = p_sign | res.w[1]; 530 e4 = expmin; 531 } else { // if (1 <= x0 <= ind - 1 <= 33) 532 // round the ind-digit result to ind - x0 digits 533 534 if (ind <= 18) { // 2 <= ind <= 18 535 round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp, 536 &is_midpoint_lt_even, &is_midpoint_gt_even, 537 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); 538 res.w[1] = 0x0; 539 res.w[0] = R64; 540 } else if (ind <= 38) { 541 P128.w[1] = res.w[1] & MASK_COEFF; 542 P128.w[0] = res.w[0]; 543 round128_19_38 (ind, x0, P128, &res, &incr_exp, 544 &is_midpoint_lt_even, &is_midpoint_gt_even, 545 &is_inexact_lt_midpoint, 546 &is_inexact_gt_midpoint); 547 } 548 e4 = e4 + x0; // expmin 549 // we want the exponent to be expmin, so if incr_exp = 1 then 550 // multiply the rounded result by 10 - it will still fit in 113 bits 551 if (incr_exp) { 552 // 64 x 128 -> 128 553 P128.w[1] = res.w[1] & MASK_COEFF; 554 P128.w[0] = res.w[0]; 555 __mul_64x128_to_128 (res, ten2k64[1], P128); 556 } 557 res.w[1] = 558 p_sign | ((UINT64) (e4 + 6176) << 49) | (res.w[1] & MASK_COEFF); 559 // avoid a double rounding error 560 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 561 is_midpoint_lt_even) { // double rounding error upward 562 // res = res - 1 563 res.w[0]--; 564 if (res.w[0] == 0xffffffffffffffffull) 565 res.w[1]--; 566 // Note: a double rounding error upward is not possible; for this 567 // the result after the first rounding would have to be 99...95 568 // (35 digits in all), possibly followed by a number of zeros; this 569 // is not possible in Cases (2)-(6) or (15)-(17) which may get here 570 is_midpoint_lt_even = 0; 571 is_inexact_lt_midpoint = 1; 572 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 573 is_midpoint_gt_even) { // double rounding error downward 574 // res = res + 1 575 res.w[0]++; 576 if (res.w[0] == 0) 577 res.w[1]++; 578 is_midpoint_gt_even = 0; 579 is_inexact_gt_midpoint = 1; 580 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && 581 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 582 // if this second rounding was exact the result may still be 583 // inexact because of the first rounding 584 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { 585 is_inexact_gt_midpoint = 1; 586 } 587 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { 588 is_inexact_lt_midpoint = 1; 589 } 590 } else if (is_midpoint_gt_even && 591 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { 592 // pulled up to a midpoint 593 is_inexact_lt_midpoint = 1; 594 is_inexact_gt_midpoint = 0; 595 is_midpoint_lt_even = 0; 596 is_midpoint_gt_even = 0; 597 } else if (is_midpoint_lt_even && 598 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { 599 // pulled down to a midpoint 600 is_inexact_lt_midpoint = 0; 601 is_inexact_gt_midpoint = 1; 602 is_midpoint_lt_even = 0; 603 is_midpoint_gt_even = 0; 604 } else { 605 ; 606 } 607 } 608 } 609 // res contains the correct result 610 // apply correction if not rounding to nearest 611 if (rnd_mode != ROUNDING_TO_NEAREST) { 612 rounding_correction (rnd_mode, 613 is_inexact_lt_midpoint, is_inexact_gt_midpoint, 614 is_midpoint_lt_even, is_midpoint_gt_even, 615 e4, &res, ptrfpsf); 616 } 617 if (is_midpoint_lt_even || is_midpoint_gt_even || 618 is_inexact_lt_midpoint || is_inexact_gt_midpoint) { 619 // set the inexact flag 620 *ptrfpsf |= INEXACT_EXCEPTION; 621 if (is_tiny) 622 *ptrfpsf |= UNDERFLOW_EXCEPTION; 623 } 624 625 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 626 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 627 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 628 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 629 *ptrres = res; 630 return; 631} 632 633 634#if DECIMAL_CALL_BY_REFERENCE 635static void 636bid128_ext_fma (int *ptr_is_midpoint_lt_even, 637 int *ptr_is_midpoint_gt_even, 638 int *ptr_is_inexact_lt_midpoint, 639 int *ptr_is_inexact_gt_midpoint, UINT128 * pres, 640 UINT128 * px, UINT128 * py, 641 UINT128 * 642 pz _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 643 _EXC_INFO_PARAM) { 644 UINT128 x = *px, y = *py, z = *pz; 645#if !DECIMAL_GLOBAL_ROUNDING 646 unsigned int rnd_mode = *prnd_mode; 647#endif 648#else 649static UINT128 650bid128_ext_fma (int *ptr_is_midpoint_lt_even, 651 int *ptr_is_midpoint_gt_even, 652 int *ptr_is_inexact_lt_midpoint, 653 int *ptr_is_inexact_gt_midpoint, UINT128 x, UINT128 y, 654 UINT128 z _RND_MODE_PARAM _EXC_FLAGS_PARAM 655 _EXC_MASKS_PARAM _EXC_INFO_PARAM) { 656#endif 657 658 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 659 UINT64 x_sign, y_sign, z_sign, p_sign, tmp_sign; 660 UINT64 x_exp = 0, y_exp = 0, z_exp = 0, p_exp; 661 int true_p_exp; 662 UINT128 C1, C2, C3; 663 UINT256 C4; 664 int q1 = 0, q2 = 0, q3 = 0, q4; 665 int e1, e2, e3, e4; 666 int scale, ind, delta, x0; 667 int p34 = P34; // used to modify the limit on the number of digits 668 BID_UI64DOUBLE tmp; 669 int x_nr_bits, y_nr_bits, z_nr_bits; 670 unsigned int save_fpsf; 671 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0; 672 int is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; 673 int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0; 674 int is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0; 675 int incr_exp = 0; 676 int lsb; 677 int lt_half_ulp = 0; 678 int eq_half_ulp = 0; 679 int gt_half_ulp = 0; 680 int is_tiny = 0; 681 UINT64 R64, tmp64; 682 UINT128 P128, R128; 683 UINT192 P192, R192; 684 UINT256 R256; 685 686 // the following are based on the table of special cases for fma; the NaN 687 // behavior is similar to that of the IA-64 Architecture fma 688 689 // identify cases where at least one operand is NaN 690 691 BID_SWAP128 (x); 692 BID_SWAP128 (y); 693 BID_SWAP128 (z); 694 if ((y.w[1] & MASK_NAN) == MASK_NAN) { // y is NAN 695 // if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y) 696 // check first for non-canonical NaN payload 697 if (((y.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || 698 (((y.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && 699 (y.w[0] > 0x38c15b09ffffffffull))) { 700 y.w[1] = y.w[1] & 0xffffc00000000000ull; 701 y.w[0] = 0x0ull; 702 } 703 if ((y.w[1] & MASK_SNAN) == MASK_SNAN) { // y is SNAN 704 // set invalid flag 705 *pfpsf |= INVALID_EXCEPTION; 706 // return quiet (y) 707 res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] 708 res.w[0] = y.w[0]; 709 } else { // y is QNaN 710 // return y 711 res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] 712 res.w[0] = y.w[0]; 713 // if z = SNaN or x = SNaN signal invalid exception 714 if ((z.w[1] & MASK_SNAN) == MASK_SNAN || 715 (x.w[1] & MASK_SNAN) == MASK_SNAN) { 716 // set invalid flag 717 *pfpsf |= INVALID_EXCEPTION; 718 } 719 } 720 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 721 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 722 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 723 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 724 BID_SWAP128 (res); 725 BID_RETURN (res) 726 } else if ((z.w[1] & MASK_NAN) == MASK_NAN) { // z is NAN 727 // if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z) 728 // check first for non-canonical NaN payload 729 if (((z.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || 730 (((z.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && 731 (z.w[0] > 0x38c15b09ffffffffull))) { 732 z.w[1] = z.w[1] & 0xffffc00000000000ull; 733 z.w[0] = 0x0ull; 734 } 735 if ((z.w[1] & MASK_SNAN) == MASK_SNAN) { // z is SNAN 736 // set invalid flag 737 *pfpsf |= INVALID_EXCEPTION; 738 // return quiet (z) 739 res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] 740 res.w[0] = z.w[0]; 741 } else { // z is QNaN 742 // return z 743 res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] 744 res.w[0] = z.w[0]; 745 // if x = SNaN signal invalid exception 746 if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { 747 // set invalid flag 748 *pfpsf |= INVALID_EXCEPTION; 749 } 750 } 751 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 752 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 753 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 754 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 755 BID_SWAP128 (res); 756 BID_RETURN (res) 757 } else if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN 758 // if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x) 759 // check first for non-canonical NaN payload 760 if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || 761 (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && 762 (x.w[0] > 0x38c15b09ffffffffull))) { 763 x.w[1] = x.w[1] & 0xffffc00000000000ull; 764 x.w[0] = 0x0ull; 765 } 766 if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN 767 // set invalid flag 768 *pfpsf |= INVALID_EXCEPTION; 769 // return quiet (x) 770 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] 771 res.w[0] = x.w[0]; 772 } else { // x is QNaN 773 // return x 774 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] 775 res.w[0] = x.w[0]; 776 } 777 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 778 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 779 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 780 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 781 BID_SWAP128 (res); 782 BID_RETURN (res) 783 } 784 // x, y, z are 0, f, or inf but not NaN => unpack the arguments and check 785 // for non-canonical values 786 787 x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative 788 C1.w[1] = x.w[1] & MASK_COEFF; 789 C1.w[0] = x.w[0]; 790 if ((x.w[1] & MASK_ANY_INF) != MASK_INF) { // x != inf 791 // if x is not infinity check for non-canonical values - treated as zero 792 if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 793 // non-canonical 794 x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits 795 C1.w[1] = 0; // significand high 796 C1.w[0] = 0; // significand low 797 } else { // G0_G1 != 11 798 x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits 799 if (C1.w[1] > 0x0001ed09bead87c0ull || 800 (C1.w[1] == 0x0001ed09bead87c0ull && 801 C1.w[0] > 0x378d8e63ffffffffull)) { 802 // x is non-canonical if coefficient is larger than 10^34 -1 803 C1.w[1] = 0; 804 C1.w[0] = 0; 805 } else { // canonical 806 ; 807 } 808 } 809 } 810 y_sign = y.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative 811 C2.w[1] = y.w[1] & MASK_COEFF; 812 C2.w[0] = y.w[0]; 813 if ((y.w[1] & MASK_ANY_INF) != MASK_INF) { // y != inf 814 // if y is not infinity check for non-canonical values - treated as zero 815 if ((y.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 816 // non-canonical 817 y_exp = (y.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits 818 C2.w[1] = 0; // significand high 819 C2.w[0] = 0; // significand low 820 } else { // G0_G1 != 11 821 y_exp = y.w[1] & MASK_EXP; // biased and shifted left 49 bits 822 if (C2.w[1] > 0x0001ed09bead87c0ull || 823 (C2.w[1] == 0x0001ed09bead87c0ull && 824 C2.w[0] > 0x378d8e63ffffffffull)) { 825 // y is non-canonical if coefficient is larger than 10^34 -1 826 C2.w[1] = 0; 827 C2.w[0] = 0; 828 } else { // canonical 829 ; 830 } 831 } 832 } 833 z_sign = z.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative 834 C3.w[1] = z.w[1] & MASK_COEFF; 835 C3.w[0] = z.w[0]; 836 if ((z.w[1] & MASK_ANY_INF) != MASK_INF) { // z != inf 837 // if z is not infinity check for non-canonical values - treated as zero 838 if ((z.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 839 // non-canonical 840 z_exp = (z.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits 841 C3.w[1] = 0; // significand high 842 C3.w[0] = 0; // significand low 843 } else { // G0_G1 != 11 844 z_exp = z.w[1] & MASK_EXP; // biased and shifted left 49 bits 845 if (C3.w[1] > 0x0001ed09bead87c0ull || 846 (C3.w[1] == 0x0001ed09bead87c0ull && 847 C3.w[0] > 0x378d8e63ffffffffull)) { 848 // z is non-canonical if coefficient is larger than 10^34 -1 849 C3.w[1] = 0; 850 C3.w[0] = 0; 851 } else { // canonical 852 ; 853 } 854 } 855 } 856 857 p_sign = x_sign ^ y_sign; // sign of the product 858 859 // identify cases where at least one operand is infinity 860 861 if ((x.w[1] & MASK_ANY_INF) == MASK_INF) { // x = inf 862 if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf 863 if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf 864 if (p_sign == z_sign) { 865 res.w[1] = z_sign | MASK_INF; 866 res.w[0] = 0x0; 867 } else { 868 // return QNaN Indefinite 869 res.w[1] = 0x7c00000000000000ull; 870 res.w[0] = 0x0000000000000000ull; 871 // set invalid flag 872 *pfpsf |= INVALID_EXCEPTION; 873 } 874 } else { // z = 0 or z = f 875 res.w[1] = p_sign | MASK_INF; 876 res.w[0] = 0x0; 877 } 878 } else if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f 879 if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf 880 if (p_sign == z_sign) { 881 res.w[1] = z_sign | MASK_INF; 882 res.w[0] = 0x0; 883 } else { 884 // return QNaN Indefinite 885 res.w[1] = 0x7c00000000000000ull; 886 res.w[0] = 0x0000000000000000ull; 887 // set invalid flag 888 *pfpsf |= INVALID_EXCEPTION; 889 } 890 } else { // z = 0 or z = f 891 res.w[1] = p_sign | MASK_INF; 892 res.w[0] = 0x0; 893 } 894 } else { // y = 0 895 // return QNaN Indefinite 896 res.w[1] = 0x7c00000000000000ull; 897 res.w[0] = 0x0000000000000000ull; 898 // set invalid flag 899 *pfpsf |= INVALID_EXCEPTION; 900 } 901 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 902 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 903 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 904 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 905 BID_SWAP128 (res); 906 BID_RETURN (res) 907 } else if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf 908 if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf 909 // x = f, necessarily 910 if ((p_sign != z_sign) 911 || (C1.w[1] == 0x0ull && C1.w[0] == 0x0ull)) { 912 // return QNaN Indefinite 913 res.w[1] = 0x7c00000000000000ull; 914 res.w[0] = 0x0000000000000000ull; 915 // set invalid flag 916 *pfpsf |= INVALID_EXCEPTION; 917 } else { 918 res.w[1] = z_sign | MASK_INF; 919 res.w[0] = 0x0; 920 } 921 } else if (C1.w[1] == 0x0 && C1.w[0] == 0x0) { // x = 0 922 // z = 0, f, inf 923 // return QNaN Indefinite 924 res.w[1] = 0x7c00000000000000ull; 925 res.w[0] = 0x0000000000000000ull; 926 // set invalid flag 927 *pfpsf |= INVALID_EXCEPTION; 928 } else { 929 // x = f and z = 0, f, necessarily 930 res.w[1] = p_sign | MASK_INF; 931 res.w[0] = 0x0; 932 } 933 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 934 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 935 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 936 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 937 BID_SWAP128 (res); 938 BID_RETURN (res) 939 } else if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf 940 // x = 0, f and y = 0, f, necessarily 941 res.w[1] = z_sign | MASK_INF; 942 res.w[0] = 0x0; 943 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 944 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 945 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 946 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 947 BID_SWAP128 (res); 948 BID_RETURN (res) 949 } 950 951 true_p_exp = (x_exp >> 49) - 6176 + (y_exp >> 49) - 6176; 952 if (true_p_exp < -6176) 953 p_exp = 0; // cannot be less than EXP_MIN 954 else 955 p_exp = (UINT64) (true_p_exp + 6176) << 49; 956 957 if (((C1.w[1] == 0x0 && C1.w[0] == 0x0) || (C2.w[1] == 0x0 && C2.w[0] == 0x0)) && C3.w[1] == 0x0 && C3.w[0] == 0x0) { // (x = 0 or y = 0) and z = 0 958 // the result is 0 959 if (p_exp < z_exp) 960 res.w[1] = p_exp; // preferred exponent 961 else 962 res.w[1] = z_exp; // preferred exponent 963 if (p_sign == z_sign) { 964 res.w[1] |= z_sign; 965 res.w[0] = 0x0; 966 } else { // x * y and z have opposite signs 967 if (rnd_mode == ROUNDING_DOWN) { 968 // res = -0.0 969 res.w[1] |= MASK_SIGN; 970 res.w[0] = 0x0; 971 } else { 972 // res = +0.0 973 // res.w[1] |= 0x0; 974 res.w[0] = 0x0; 975 } 976 } 977 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 978 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 979 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 980 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 981 BID_SWAP128 (res); 982 BID_RETURN (res) 983 } 984 // from this point on, we may need to know the number of decimal digits 985 // in the significands of x, y, z when x, y, z != 0 986 987 if (C1.w[1] != 0 || C1.w[0] != 0) { // x = f (non-zero finite) 988 // q1 = nr. of decimal digits in x 989 // determine first the nr. of bits in x 990 if (C1.w[1] == 0) { 991 if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 992 // split the 64-bit value in two 32-bit halves to avoid rounding errors 993 if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 994 tmp.d = (double) (C1.w[0] >> 32); // exact conversion 995 x_nr_bits = 996 33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 997 } else { // x < 2^32 998 tmp.d = (double) (C1.w[0]); // exact conversion 999 x_nr_bits = 1000 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1001 } 1002 } else { // if x < 2^53 1003 tmp.d = (double) C1.w[0]; // exact conversion 1004 x_nr_bits = 1005 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1006 } 1007 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) 1008 tmp.d = (double) C1.w[1]; // exact conversion 1009 x_nr_bits = 1010 65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1011 } 1012 q1 = nr_digits[x_nr_bits - 1].digits; 1013 if (q1 == 0) { 1014 q1 = nr_digits[x_nr_bits - 1].digits1; 1015 if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi || 1016 (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi && 1017 C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo)) 1018 q1++; 1019 } 1020 } 1021 1022 if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f (non-zero finite) 1023 if (C2.w[1] == 0) { 1024 if (C2.w[0] >= 0x0020000000000000ull) { // y >= 2^53 1025 // split the 64-bit value in two 32-bit halves to avoid rounding errors 1026 if (C2.w[0] >= 0x0000000100000000ull) { // y >= 2^32 1027 tmp.d = (double) (C2.w[0] >> 32); // exact conversion 1028 y_nr_bits = 1029 32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1030 } else { // y < 2^32 1031 tmp.d = (double) C2.w[0]; // exact conversion 1032 y_nr_bits = 1033 ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1034 } 1035 } else { // if y < 2^53 1036 tmp.d = (double) C2.w[0]; // exact conversion 1037 y_nr_bits = 1038 ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1039 } 1040 } else { // C2.w[1] != 0 => nr. bits = 64 + nr_bits (C2.w[1]) 1041 tmp.d = (double) C2.w[1]; // exact conversion 1042 y_nr_bits = 1043 64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1044 } 1045 1046 q2 = nr_digits[y_nr_bits].digits; 1047 if (q2 == 0) { 1048 q2 = nr_digits[y_nr_bits].digits1; 1049 if (C2.w[1] > nr_digits[y_nr_bits].threshold_hi || 1050 (C2.w[1] == nr_digits[y_nr_bits].threshold_hi && 1051 C2.w[0] >= nr_digits[y_nr_bits].threshold_lo)) 1052 q2++; 1053 } 1054 } 1055 1056 if (C3.w[1] != 0 || C3.w[0] != 0) { // z = f (non-zero finite) 1057 if (C3.w[1] == 0) { 1058 if (C3.w[0] >= 0x0020000000000000ull) { // z >= 2^53 1059 // split the 64-bit value in two 32-bit halves to avoid rounding errors 1060 if (C3.w[0] >= 0x0000000100000000ull) { // z >= 2^32 1061 tmp.d = (double) (C3.w[0] >> 32); // exact conversion 1062 z_nr_bits = 1063 32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1064 } else { // z < 2^32 1065 tmp.d = (double) C3.w[0]; // exact conversion 1066 z_nr_bits = 1067 ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1068 } 1069 } else { // if z < 2^53 1070 tmp.d = (double) C3.w[0]; // exact conversion 1071 z_nr_bits = 1072 ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1073 } 1074 } else { // C3.w[1] != 0 => nr. bits = 64 + nr_bits (C3.w[1]) 1075 tmp.d = (double) C3.w[1]; // exact conversion 1076 z_nr_bits = 1077 64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1078 } 1079 1080 q3 = nr_digits[z_nr_bits].digits; 1081 if (q3 == 0) { 1082 q3 = nr_digits[z_nr_bits].digits1; 1083 if (C3.w[1] > nr_digits[z_nr_bits].threshold_hi || 1084 (C3.w[1] == nr_digits[z_nr_bits].threshold_hi && 1085 C3.w[0] >= nr_digits[z_nr_bits].threshold_lo)) 1086 q3++; 1087 } 1088 } 1089 1090 if ((C1.w[1] == 0x0 && C1.w[0] == 0x0) || 1091 (C2.w[1] == 0x0 && C2.w[0] == 0x0)) { 1092 // x = 0 or y = 0 1093 // z = f, necessarily; for 0 + z return z, with the preferred exponent 1094 // the result is z, but need to get the preferred exponent 1095 if (z_exp <= p_exp) { // the preferred exponent is z_exp 1096 res.w[1] = z_sign | (z_exp & MASK_EXP) | C3.w[1]; 1097 res.w[0] = C3.w[0]; 1098 } else { // if (p_exp < z_exp) the preferred exponent is p_exp 1099 // return (C3 * 10^scale) * 10^(z_exp - scale) 1100 // where scale = min (p34-q3, (z_exp-p_exp) >> 49) 1101 scale = p34 - q3; 1102 ind = (z_exp - p_exp) >> 49; 1103 if (ind < scale) 1104 scale = ind; 1105 if (scale == 0) { 1106 res.w[1] = z.w[1]; // & MASK_COEFF, which is redundant 1107 res.w[0] = z.w[0]; 1108 } else if (q3 <= 19) { // z fits in 64 bits 1109 if (scale <= 19) { // 10^scale fits in 64 bits 1110 // 64 x 64 C3.w[0] * ten2k64[scale] 1111 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); 1112 } else { // 10^scale fits in 128 bits 1113 // 64 x 128 C3.w[0] * ten2k128[scale - 20] 1114 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); 1115 } 1116 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits 1117 // 64 x 128 ten2k64[scale] * C3 1118 __mul_128x64_to_128 (res, ten2k64[scale], C3); 1119 } 1120 // subtract scale from the exponent 1121 z_exp = z_exp - ((UINT64) scale << 49); 1122 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 1123 } 1124 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 1125 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 1126 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 1127 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 1128 BID_SWAP128 (res); 1129 BID_RETURN (res) 1130 } else { 1131 ; // continue with x = f, y = f, z = 0 or x = f, y = f, z = f 1132 } 1133 1134 e1 = (x_exp >> 49) - 6176; // unbiased exponent of x 1135 e2 = (y_exp >> 49) - 6176; // unbiased exponent of y 1136 e3 = (z_exp >> 49) - 6176; // unbiased exponent of z 1137 e4 = e1 + e2; // unbiased exponent of the exact x * y 1138 1139 // calculate C1 * C2 and its number of decimal digits, q4 1140 1141 // the exact product has either q1 + q2 - 1 or q1 + q2 decimal digits 1142 // where 2 <= q1 + q2 <= 68 1143 // calculate C4 = C1 * C2 and determine q 1144 C4.w[3] = C4.w[2] = C4.w[1] = C4.w[0] = 0; 1145 if (q1 + q2 <= 19) { // if 2 <= q1 + q2 <= 19, C4 = C1 * C2 fits in 64 bits 1146 C4.w[0] = C1.w[0] * C2.w[0]; 1147 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 1148 if (C4.w[0] < ten2k64[q1 + q2 - 1]) 1149 q4 = q1 + q2 - 1; // q4 in [1, 18] 1150 else 1151 q4 = q1 + q2; // q4 in [2, 19] 1152 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; 1153 } else if (q1 + q2 == 20) { // C4 = C1 * C2 fits in 64 or 128 bits 1154 // q1 <= 19 and q2 <= 19 so both C1 and C2 fit in 64 bits 1155 __mul_64x64_to_128MACH (C4, C1.w[0], C2.w[0]); 1156 // if C4 < 10^(q1+q2-1) = 10^19 then q4 = q1+q2-1 = 19 else q4 = q1+q2 = 20 1157 if (C4.w[1] == 0 && C4.w[0] < ten2k64[19]) { // 19 = q1+q2-1 1158 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; 1159 q4 = 19; // 19 = q1 + q2 - 1 1160 } else { 1161 // if (C4.w[1] == 0) 1162 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; 1163 // else 1164 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; 1165 q4 = 20; // 20 = q1 + q2 1166 } 1167 } else if (q1 + q2 <= 38) { // 21 <= q1 + q2 <= 38 1168 // C4 = C1 * C2 fits in 64 or 128 bits 1169 // (64 bits possibly, but only when q1 + q2 = 21 and C4 has 20 digits) 1170 // at least one of C1, C2 has at most 19 decimal digits & fits in 64 bits 1171 if (q1 <= 19) { 1172 __mul_128x64_to_128 (C4, C1.w[0], C2); 1173 } else { // q2 <= 19 1174 __mul_128x64_to_128 (C4, C2.w[0], C1); 1175 } 1176 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 1177 if (C4.w[1] < ten2k128[q1 + q2 - 21].w[1] || 1178 (C4.w[1] == ten2k128[q1 + q2 - 21].w[1] && 1179 C4.w[0] < ten2k128[q1 + q2 - 21].w[0])) { 1180 // if (C4.w[1] == 0) // q4 = 20, necessarily 1181 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; 1182 // else 1183 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; 1184 q4 = q1 + q2 - 1; // q4 in [20, 37] 1185 } else { 1186 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; 1187 q4 = q1 + q2; // q4 in [21, 38] 1188 } 1189 } else if (q1 + q2 == 39) { // C4 = C1 * C2 fits in 128 or 192 bits 1190 // both C1 and C2 fit in 128 bits (actually in 113 bits) 1191 // may replace this by 128x128_to192 1192 __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] is 0 1193 // if C4 < 10^(q1+q2-1) = 10^38 then q4 = q1+q2-1 = 38 else q4 = q1+q2 = 39 1194 if (C4.w[2] == 0 && (C4.w[1] < ten2k128[18].w[1] || 1195 (C4.w[1] == ten2k128[18].w[1] 1196 && C4.w[0] < ten2k128[18].w[0]))) { 1197 // 18 = 38 - 20 = q1+q2-1 - 20 1198 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; 1199 q4 = 38; // 38 = q1 + q2 - 1 1200 } else { 1201 // if (C4.w[2] == 0) 1202 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; 1203 // else 1204 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; 1205 q4 = 39; // 39 = q1 + q2 1206 } 1207 } else if (q1 + q2 <= 57) { // 40 <= q1 + q2 <= 57 1208 // C4 = C1 * C2 fits in 128 or 192 bits 1209 // (128 bits possibly, but only when q1 + q2 = 40 and C4 has 39 digits) 1210 // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one 1211 // may fit in 64 bits 1212 if (C1.w[1] == 0) { // C1 fits in 64 bits 1213 // __mul_64x128_full (REShi64, RESlo128, A64, B128) 1214 __mul_64x128_full (C4.w[2], C4, C1.w[0], C2); 1215 } else if (C2.w[1] == 0) { // C2 fits in 64 bits 1216 // __mul_64x128_full (REShi64, RESlo128, A64, B128) 1217 __mul_64x128_full (C4.w[2], C4, C2.w[0], C1); 1218 } else { // both C1 and C2 require 128 bits 1219 // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1); 1220 __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0 1221 } 1222 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 1223 if (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] || 1224 (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] && 1225 (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] || 1226 (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] && 1227 C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))) { 1228 // if (C4.w[2] == 0) // q4 = 39, necessarily 1229 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; 1230 // else 1231 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; 1232 q4 = q1 + q2 - 1; // q4 in [39, 56] 1233 } else { 1234 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; 1235 q4 = q1 + q2; // q4 in [40, 57] 1236 } 1237 } else if (q1 + q2 == 58) { // C4 = C1 * C2 fits in 192 or 256 bits 1238 // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one 1239 // may fit in 64 bits 1240 if (C1.w[1] == 0) { // C1 * C2 will fit in 192 bits 1241 __mul_64x128_full (C4.w[2], C4, C1.w[0], C2); // may use 64x128_to_192 1242 } else if (C2.w[1] == 0) { // C1 * C2 will fit in 192 bits 1243 __mul_64x128_full (C4.w[2], C4, C2.w[0], C1); // may use 64x128_to_192 1244 } else { // C1 * C2 will fit in 192 bits or in 256 bits 1245 __mul_128x128_to_256 (C4, C1, C2); 1246 } 1247 // if C4 < 10^(q1+q2-1) = 10^57 then q4 = q1+q2-1 = 57 else q4 = q1+q2 = 58 1248 if (C4.w[3] == 0 && (C4.w[2] < ten2k256[18].w[2] || 1249 (C4.w[2] == ten2k256[18].w[2] 1250 && (C4.w[1] < ten2k256[18].w[1] 1251 || (C4.w[1] == ten2k256[18].w[1] 1252 && C4.w[0] < ten2k256[18].w[0]))))) { 1253 // 18 = 57 - 39 = q1+q2-1 - 39 1254 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; 1255 q4 = 57; // 57 = q1 + q2 - 1 1256 } else { 1257 // if (C4.w[3] == 0) 1258 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; 1259 // else 1260 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256; 1261 q4 = 58; // 58 = q1 + q2 1262 } 1263 } else { // if 59 <= q1 + q2 <= 68 1264 // C4 = C1 * C2 fits in 192 or 256 bits 1265 // (192 bits possibly, but only when q1 + q2 = 59 and C4 has 58 digits) 1266 // both C1 and C2 fit in 128 bits (actually in 113 bits); none fits in 1267 // 64 bits 1268 // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1); 1269 __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0 1270 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 1271 if (C4.w[3] < ten2k256[q1 + q2 - 40].w[3] || 1272 (C4.w[3] == ten2k256[q1 + q2 - 40].w[3] && 1273 (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] || 1274 (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] && 1275 (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] || 1276 (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] && 1277 C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))))) { 1278 // if (C4.w[3] == 0) // q4 = 58, necessarily 1279 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; 1280 // else 1281 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256; 1282 q4 = q1 + q2 - 1; // q4 in [58, 67] 1283 } else { 1284 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256; 1285 q4 = q1 + q2; // q4 in [59, 68] 1286 } 1287 } 1288 1289 if (C3.w[1] == 0x0 && C3.w[0] == 0x0) { // x = f, y = f, z = 0 1290 save_fpsf = *pfpsf; // sticky bits - caller value must be preserved 1291 *pfpsf = 0; 1292 1293 if (q4 > p34) { 1294 1295 // truncate C4 to p34 digits into res 1296 // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68 1297 x0 = q4 - p34; 1298 if (q4 <= 38) { 1299 P128.w[1] = C4.w[1]; 1300 P128.w[0] = C4.w[0]; 1301 round128_19_38 (q4, x0, P128, &res, &incr_exp, 1302 &is_midpoint_lt_even, &is_midpoint_gt_even, 1303 &is_inexact_lt_midpoint, 1304 &is_inexact_gt_midpoint); 1305 } else if (q4 <= 57) { // 35 <= q4 <= 57 1306 P192.w[2] = C4.w[2]; 1307 P192.w[1] = C4.w[1]; 1308 P192.w[0] = C4.w[0]; 1309 round192_39_57 (q4, x0, P192, &R192, &incr_exp, 1310 &is_midpoint_lt_even, &is_midpoint_gt_even, 1311 &is_inexact_lt_midpoint, 1312 &is_inexact_gt_midpoint); 1313 res.w[0] = R192.w[0]; 1314 res.w[1] = R192.w[1]; 1315 } else { // if (q4 <= 68) 1316 round256_58_76 (q4, x0, C4, &R256, &incr_exp, 1317 &is_midpoint_lt_even, &is_midpoint_gt_even, 1318 &is_inexact_lt_midpoint, 1319 &is_inexact_gt_midpoint); 1320 res.w[0] = R256.w[0]; 1321 res.w[1] = R256.w[1]; 1322 } 1323 e4 = e4 + x0; 1324 if (incr_exp) { 1325 e4 = e4 + 1; 1326 } 1327 q4 = p34; 1328 // res is now the coefficient of the result rounded to the destination 1329 // precision, with unbounded exponent; the exponent is e4; q4=digits(res) 1330 } else { // if (q4 <= p34) 1331 // C4 * 10^e4 is the result rounded to the destination precision, with 1332 // unbounded exponent (which is exact) 1333 1334 if ((q4 + e4 <= p34 + expmax) && (e4 > expmax)) { 1335 // e4 is too large, but can be brought within range by scaling up C4 1336 scale = e4 - expmax; // 1 <= scale < P-q4 <= P-1 => 1 <= scale <= P-2 1337 // res = (C4 * 10^scale) * 10^expmax 1338 if (q4 <= 19) { // C4 fits in 64 bits 1339 if (scale <= 19) { // 10^scale fits in 64 bits 1340 // 64 x 64 C4.w[0] * ten2k64[scale] 1341 __mul_64x64_to_128MACH (res, C4.w[0], ten2k64[scale]); 1342 } else { // 10^scale fits in 128 bits 1343 // 64 x 128 C4.w[0] * ten2k128[scale - 20] 1344 __mul_128x64_to_128 (res, C4.w[0], ten2k128[scale - 20]); 1345 } 1346 } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits 1347 // 64 x 128 ten2k64[scale] * CC43 1348 __mul_128x64_to_128 (res, ten2k64[scale], C4); 1349 } 1350 e4 = e4 - scale; // expmax 1351 q4 = q4 + scale; 1352 } else { 1353 res.w[1] = C4.w[1]; 1354 res.w[0] = C4.w[0]; 1355 } 1356 // res is the coefficient of the result rounded to the destination 1357 // precision, with unbounded exponent (it has q4 digits); the exponent 1358 // is e4 (exact result) 1359 } 1360 1361 // check for overflow 1362 if (q4 + e4 > p34 + expmax) { 1363 if (rnd_mode == ROUNDING_TO_NEAREST) { 1364 res.w[1] = p_sign | 0x7800000000000000ull; // +/-inf 1365 res.w[0] = 0x0000000000000000ull; 1366 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); 1367 } else { 1368 res.w[1] = p_sign | res.w[1]; 1369 rounding_correction (rnd_mode, 1370 is_inexact_lt_midpoint, 1371 is_inexact_gt_midpoint, 1372 is_midpoint_lt_even, is_midpoint_gt_even, 1373 e4, &res, pfpsf); 1374 } 1375 *pfpsf |= save_fpsf; 1376 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 1377 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 1378 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 1379 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 1380 BID_SWAP128 (res); 1381 BID_RETURN (res) 1382 } 1383 // check for underflow 1384 if (q4 + e4 < expmin + P34) { 1385 is_tiny = 1; // the result is tiny 1386 if (e4 < expmin) { 1387 // if e4 < expmin, we must truncate more of res 1388 x0 = expmin - e4; // x0 >= 1 1389 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; 1390 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; 1391 is_midpoint_lt_even0 = is_midpoint_lt_even; 1392 is_midpoint_gt_even0 = is_midpoint_gt_even; 1393 is_inexact_lt_midpoint = 0; 1394 is_inexact_gt_midpoint = 0; 1395 is_midpoint_lt_even = 0; 1396 is_midpoint_gt_even = 0; 1397 // the number of decimal digits in res is q4 1398 if (x0 < q4) { // 1 <= x0 <= q4-1 => round res to q4 - x0 digits 1399 if (q4 <= 18) { // 2 <= q4 <= 18, 1 <= x0 <= 17 1400 round64_2_18 (q4, x0, res.w[0], &R64, &incr_exp, 1401 &is_midpoint_lt_even, &is_midpoint_gt_even, 1402 &is_inexact_lt_midpoint, 1403 &is_inexact_gt_midpoint); 1404 if (incr_exp) { 1405 // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17 1406 R64 = ten2k64[q4 - x0]; 1407 } 1408 // res.w[1] = 0; (from above) 1409 res.w[0] = R64; 1410 } else { // if (q4 <= 34) 1411 // 19 <= q4 <= 38 1412 P128.w[1] = res.w[1]; 1413 P128.w[0] = res.w[0]; 1414 round128_19_38 (q4, x0, P128, &res, &incr_exp, 1415 &is_midpoint_lt_even, &is_midpoint_gt_even, 1416 &is_inexact_lt_midpoint, 1417 &is_inexact_gt_midpoint); 1418 if (incr_exp) { 1419 // increase coefficient by a factor of 10; this will be <= 10^33 1420 // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37 1421 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 1422 // res.w[1] = 0; 1423 res.w[0] = ten2k64[q4 - x0]; 1424 } else { // 20 <= q4 - x0 <= 37 1425 res.w[0] = ten2k128[q4 - x0 - 20].w[0]; 1426 res.w[1] = ten2k128[q4 - x0 - 20].w[1]; 1427 } 1428 } 1429 } 1430 e4 = e4 + x0; // expmin 1431 } else if (x0 == q4) { 1432 // the second rounding is for 0.d(0)d(1)...d(q4-1) * 10^emin 1433 // determine relationship with 1/2 ulp 1434 if (q4 <= 19) { 1435 if (res.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp 1436 lt_half_ulp = 1; 1437 is_inexact_lt_midpoint = 1; 1438 } else if (res.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp 1439 eq_half_ulp = 1; 1440 is_midpoint_gt_even = 1; 1441 } else { // > 1/2 ulp 1442 // gt_half_ulp = 1; 1443 is_inexact_gt_midpoint = 1; 1444 } 1445 } else { // if (q4 <= 34) 1446 if (res.w[1] < midpoint128[q4 - 20].w[1] || 1447 (res.w[1] == midpoint128[q4 - 20].w[1] && 1448 res.w[0] < midpoint128[q4 - 20].w[0])) { // < 1/2 ulp 1449 lt_half_ulp = 1; 1450 is_inexact_lt_midpoint = 1; 1451 } else if (res.w[1] == midpoint128[q4 - 20].w[1] && 1452 res.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp 1453 eq_half_ulp = 1; 1454 is_midpoint_gt_even = 1; 1455 } else { // > 1/2 ulp 1456 // gt_half_ulp = 1; 1457 is_inexact_gt_midpoint = 1; 1458 } 1459 } 1460 if (lt_half_ulp || eq_half_ulp) { 1461 // res = +0.0 * 10^expmin 1462 res.w[1] = 0x0000000000000000ull; 1463 res.w[0] = 0x0000000000000000ull; 1464 } else { // if (gt_half_ulp) 1465 // res = +1 * 10^expmin 1466 res.w[1] = 0x0000000000000000ull; 1467 res.w[0] = 0x0000000000000001ull; 1468 } 1469 e4 = expmin; 1470 } else { // if (x0 > q4) 1471 // the second rounding is for 0.0...d(0)d(1)...d(q4-1) * 10^emin 1472 res.w[1] = 0; 1473 res.w[0] = 0; 1474 e4 = expmin; 1475 is_inexact_lt_midpoint = 1; 1476 } 1477 // avoid a double rounding error 1478 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 1479 is_midpoint_lt_even) { // double rounding error upward 1480 // res = res - 1 1481 res.w[0]--; 1482 if (res.w[0] == 0xffffffffffffffffull) 1483 res.w[1]--; 1484 // Note: a double rounding error upward is not possible; for this 1485 // the result after the first rounding would have to be 99...95 1486 // (35 digits in all), possibly followed by a number of zeros; this 1487 // not possible for f * f + 0 1488 is_midpoint_lt_even = 0; 1489 is_inexact_lt_midpoint = 1; 1490 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 1491 is_midpoint_gt_even) { // double rounding error downward 1492 // res = res + 1 1493 res.w[0]++; 1494 if (res.w[0] == 0) 1495 res.w[1]++; 1496 is_midpoint_gt_even = 0; 1497 is_inexact_gt_midpoint = 1; 1498 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && 1499 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 1500 // if this second rounding was exact the result may still be 1501 // inexact because of the first rounding 1502 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { 1503 is_inexact_gt_midpoint = 1; 1504 } 1505 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { 1506 is_inexact_lt_midpoint = 1; 1507 } 1508 } else if (is_midpoint_gt_even && 1509 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { 1510 // pulled up to a midpoint 1511 is_inexact_lt_midpoint = 1; 1512 is_inexact_gt_midpoint = 0; 1513 is_midpoint_lt_even = 0; 1514 is_midpoint_gt_even = 0; 1515 } else if (is_midpoint_lt_even && 1516 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { 1517 // pulled down to a midpoint 1518 is_inexact_lt_midpoint = 0; 1519 is_inexact_gt_midpoint = 1; 1520 is_midpoint_lt_even = 0; 1521 is_midpoint_gt_even = 0; 1522 } else { 1523 ; 1524 } 1525 } else { // if e4 >= emin then q4 < P and the result is tiny and exact 1526 if (e3 < e4) { 1527 // if (e3 < e4) the preferred exponent is e3 1528 // return (C4 * 10^scale) * 10^(e4 - scale) 1529 // where scale = min (p34-q4, (e4 - e3)) 1530 scale = p34 - q4; 1531 ind = e4 - e3; 1532 if (ind < scale) 1533 scale = ind; 1534 if (scale == 0) { 1535 ; // res and e4 are unchanged 1536 } else if (q4 <= 19) { // C4 fits in 64 bits 1537 if (scale <= 19) { // 10^scale fits in 64 bits 1538 // 64 x 64 res.w[0] * ten2k64[scale] 1539 __mul_64x64_to_128MACH (res, res.w[0], ten2k64[scale]); 1540 } else { // 10^scale fits in 128 bits 1541 // 64 x 128 res.w[0] * ten2k128[scale - 20] 1542 __mul_128x64_to_128 (res, res.w[0], ten2k128[scale - 20]); 1543 } 1544 } else { // res fits in 128 bits, but 10^scale must fit in 64 bits 1545 // 64 x 128 ten2k64[scale] * C3 1546 __mul_128x64_to_128 (res, ten2k64[scale], res); 1547 } 1548 // subtract scale from the exponent 1549 e4 = e4 - scale; 1550 } 1551 } 1552 1553 // check for inexact result 1554 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || 1555 is_midpoint_lt_even || is_midpoint_gt_even) { 1556 // set the inexact flag and the underflow flag 1557 *pfpsf |= INEXACT_EXCEPTION; 1558 *pfpsf |= UNDERFLOW_EXCEPTION; 1559 } 1560 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; 1561 if (rnd_mode != ROUNDING_TO_NEAREST) { 1562 rounding_correction (rnd_mode, 1563 is_inexact_lt_midpoint, 1564 is_inexact_gt_midpoint, 1565 is_midpoint_lt_even, is_midpoint_gt_even, 1566 e4, &res, pfpsf); 1567 } 1568 *pfpsf |= save_fpsf; 1569 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 1570 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 1571 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 1572 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 1573 BID_SWAP128 (res); 1574 BID_RETURN (res) 1575 } 1576 // no overflow, and no underflow for rounding to nearest 1577 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; 1578 1579 if (rnd_mode != ROUNDING_TO_NEAREST) { 1580 rounding_correction (rnd_mode, 1581 is_inexact_lt_midpoint, 1582 is_inexact_gt_midpoint, 1583 is_midpoint_lt_even, is_midpoint_gt_even, 1584 e4, &res, pfpsf); 1585 // if e4 = expmin && significand < 10^33 => result is tiny (for RD, RZ) 1586 if (e4 == expmin) { 1587 if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull || 1588 ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull && 1589 res.w[0] < 0x38c15b0a00000000ull)) { 1590 is_tiny = 1; 1591 } 1592 } 1593 } 1594 1595 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || 1596 is_midpoint_lt_even || is_midpoint_gt_even) { 1597 // set the inexact flag 1598 *pfpsf |= INEXACT_EXCEPTION; 1599 if (is_tiny) 1600 *pfpsf |= UNDERFLOW_EXCEPTION; 1601 } 1602 1603 if ((*pfpsf & INEXACT_EXCEPTION) == 0) { // x * y is exact 1604 // need to ensure that the result has the preferred exponent 1605 p_exp = res.w[1] & MASK_EXP; 1606 if (z_exp < p_exp) { // the preferred exponent is z_exp 1607 // signficand of res in C3 1608 C3.w[1] = res.w[1] & MASK_COEFF; 1609 C3.w[0] = res.w[0]; 1610 // the number of decimal digits of x * y is q4 <= 34 1611 // Note: the coefficient fits in 128 bits 1612 1613 // return (C3 * 10^scale) * 10^(p_exp - scale) 1614 // where scale = min (p34-q4, (p_exp-z_exp) >> 49) 1615 scale = p34 - q4; 1616 ind = (p_exp - z_exp) >> 49; 1617 if (ind < scale) 1618 scale = ind; 1619 // subtract scale from the exponent 1620 p_exp = p_exp - ((UINT64) scale << 49); 1621 if (scale == 0) { 1622 ; // leave res unchanged 1623 } else if (q4 <= 19) { // x * y fits in 64 bits 1624 if (scale <= 19) { // 10^scale fits in 64 bits 1625 // 64 x 64 C3.w[0] * ten2k64[scale] 1626 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); 1627 } else { // 10^scale fits in 128 bits 1628 // 64 x 128 C3.w[0] * ten2k128[scale - 20] 1629 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); 1630 } 1631 res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1]; 1632 } else { // x * y fits in 128 bits, but 10^scale must fit in 64 bits 1633 // 64 x 128 ten2k64[scale] * C3 1634 __mul_128x64_to_128 (res, ten2k64[scale], C3); 1635 res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1]; 1636 } 1637 } // else leave the result as it is, because p_exp <= z_exp 1638 } 1639 *pfpsf |= save_fpsf; 1640 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 1641 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 1642 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 1643 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 1644 BID_SWAP128 (res); 1645 BID_RETURN (res) 1646 } // else we have f * f + f 1647 1648 // continue with x = f, y = f, z = f 1649 1650 delta = q3 + e3 - q4 - e4; 1651delta_ge_zero: 1652 if (delta >= 0) { 1653 1654 if (p34 <= delta - 1 || // Case (1') 1655 (p34 == delta && e3 + 6176 < p34 - q3)) { // Case (1''A) 1656 // check for overflow, which can occur only in Case (1') 1657 if ((q3 + e3) > (p34 + expmax) && p34 <= delta - 1) { 1658 // e3 > expmax implies p34 <= delta-1 and e3 > expmax is a necessary 1659 // condition for (q3 + e3) > (p34 + expmax) 1660 if (rnd_mode == ROUNDING_TO_NEAREST) { 1661 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf 1662 res.w[0] = 0x0000000000000000ull; 1663 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); 1664 } else { 1665 if (p_sign == z_sign) { 1666 is_inexact_lt_midpoint = 1; 1667 } else { 1668 is_inexact_gt_midpoint = 1; 1669 } 1670 // q3 <= p34; if (q3 < p34) scale C3 up by 10^(p34-q3) 1671 scale = p34 - q3; 1672 if (scale == 0) { 1673 res.w[1] = z_sign | C3.w[1]; 1674 res.w[0] = C3.w[0]; 1675 } else { 1676 if (q3 <= 19) { // C3 fits in 64 bits 1677 if (scale <= 19) { // 10^scale fits in 64 bits 1678 // 64 x 64 C3.w[0] * ten2k64[scale] 1679 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); 1680 } else { // 10^scale fits in 128 bits 1681 // 64 x 128 C3.w[0] * ten2k128[scale - 20] 1682 __mul_128x64_to_128 (res, C3.w[0], 1683 ten2k128[scale - 20]); 1684 } 1685 } else { // C3 fits in 128 bits, but 10^scale must fit in 64 bits 1686 // 64 x 128 ten2k64[scale] * C3 1687 __mul_128x64_to_128 (res, ten2k64[scale], C3); 1688 } 1689 // the coefficient in res has q3 + scale = p34 digits 1690 } 1691 e3 = e3 - scale; 1692 res.w[1] = z_sign | res.w[1]; 1693 rounding_correction (rnd_mode, 1694 is_inexact_lt_midpoint, 1695 is_inexact_gt_midpoint, 1696 is_midpoint_lt_even, is_midpoint_gt_even, 1697 e3, &res, pfpsf); 1698 } 1699 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 1700 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 1701 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 1702 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 1703 BID_SWAP128 (res); 1704 BID_RETURN (res) 1705 } 1706 // res = z 1707 if (q3 < p34) { // the preferred exponent is z_exp - (p34 - q3) 1708 // return (C3 * 10^scale) * 10^(z_exp - scale) 1709 // where scale = min (p34-q3, z_exp-EMIN) 1710 scale = p34 - q3; 1711 ind = e3 + 6176; 1712 if (ind < scale) 1713 scale = ind; 1714 if (scale == 0) { 1715 res.w[1] = C3.w[1]; 1716 res.w[0] = C3.w[0]; 1717 } else if (q3 <= 19) { // z fits in 64 bits 1718 if (scale <= 19) { // 10^scale fits in 64 bits 1719 // 64 x 64 C3.w[0] * ten2k64[scale] 1720 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); 1721 } else { // 10^scale fits in 128 bits 1722 // 64 x 128 C3.w[0] * ten2k128[scale - 20] 1723 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); 1724 } 1725 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits 1726 // 64 x 128 ten2k64[scale] * C3 1727 __mul_128x64_to_128 (res, ten2k64[scale], C3); 1728 } 1729 // the coefficient in res has q3 + scale digits 1730 // subtract scale from the exponent 1731 z_exp = z_exp - ((UINT64) scale << 49); 1732 e3 = e3 - scale; 1733 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 1734 if (scale + q3 < p34) 1735 *pfpsf |= UNDERFLOW_EXCEPTION; 1736 } else { 1737 scale = 0; 1738 res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | C3.w[1]; 1739 res.w[0] = C3.w[0]; 1740 } 1741 1742 // use the following to avoid double rounding errors when operating on 1743 // mixed formats in rounding to nearest, and for correcting the result 1744 // if not rounding to nearest 1745 if ((p_sign != z_sign) && (delta == (q3 + scale + 1))) { 1746 // there is a gap of exactly one digit between the scaled C3 and C4 1747 // C3 * 10^ scale = 10^(q3+scale-1) <=> C3 = 10^(q3-1) is special case 1748 if ((q3 <= 19 && C3.w[0] != ten2k64[q3 - 1]) || 1749 (q3 == 20 && (C3.w[1] != 0 || C3.w[0] != ten2k64[19])) || 1750 (q3 >= 21 && (C3.w[1] != ten2k128[q3 - 21].w[1] || 1751 C3.w[0] != ten2k128[q3 - 21].w[0]))) { 1752 // C3 * 10^ scale != 10^(q3-1) 1753 // if ((res.w[1] & MASK_COEFF) != 0x0000314dc6448d93ull || 1754 // res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33 1755 is_inexact_gt_midpoint = 1; // if (z_sign), set as if for abs. value 1756 } else { // if C3 * 10^scale = 10^(q3+scale-1) 1757 // ok from above e3 = (z_exp >> 49) - 6176; 1758 // the result is always inexact 1759 if (q4 == 1) { 1760 R64 = C4.w[0]; 1761 } else { 1762 // if q4 > 1 then truncate C4 from q4 digits to 1 digit; 1763 // x = q4-1, 1 <= x <= 67 and check if this operation is exact 1764 if (q4 <= 18) { // 2 <= q4 <= 18 1765 round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp, 1766 &is_midpoint_lt_even, &is_midpoint_gt_even, 1767 &is_inexact_lt_midpoint, 1768 &is_inexact_gt_midpoint); 1769 } else if (q4 <= 38) { 1770 P128.w[1] = C4.w[1]; 1771 P128.w[0] = C4.w[0]; 1772 round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp, 1773 &is_midpoint_lt_even, 1774 &is_midpoint_gt_even, 1775 &is_inexact_lt_midpoint, 1776 &is_inexact_gt_midpoint); 1777 R64 = R128.w[0]; // one decimal digit 1778 } else if (q4 <= 57) { 1779 P192.w[2] = C4.w[2]; 1780 P192.w[1] = C4.w[1]; 1781 P192.w[0] = C4.w[0]; 1782 round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp, 1783 &is_midpoint_lt_even, 1784 &is_midpoint_gt_even, 1785 &is_inexact_lt_midpoint, 1786 &is_inexact_gt_midpoint); 1787 R64 = R192.w[0]; // one decimal digit 1788 } else { // if (q4 <= 68) 1789 round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp, 1790 &is_midpoint_lt_even, 1791 &is_midpoint_gt_even, 1792 &is_inexact_lt_midpoint, 1793 &is_inexact_gt_midpoint); 1794 R64 = R256.w[0]; // one decimal digit 1795 } 1796 if (incr_exp) { 1797 R64 = 10; 1798 } 1799 } 1800 if (q4 == 1 && C4.w[0] == 5) { 1801 is_inexact_lt_midpoint = 0; 1802 is_inexact_gt_midpoint = 0; 1803 is_midpoint_lt_even = 1; 1804 is_midpoint_gt_even = 0; 1805 } else if ((e3 == expmin) || 1806 R64 < 5 || (R64 == 5 && is_inexact_gt_midpoint)) { 1807 // result does not change 1808 is_inexact_lt_midpoint = 0; 1809 is_inexact_gt_midpoint = 1; 1810 is_midpoint_lt_even = 0; 1811 is_midpoint_gt_even = 0; 1812 } else { 1813 is_inexact_lt_midpoint = 1; 1814 is_inexact_gt_midpoint = 0; 1815 is_midpoint_lt_even = 0; 1816 is_midpoint_gt_even = 0; 1817 // result decremented is 10^(q3+scale) - 1 1818 if ((q3 + scale) <= 19) { 1819 res.w[1] = 0; 1820 res.w[0] = ten2k64[q3 + scale]; 1821 } else { // if ((q3 + scale + 1) <= 35) 1822 res.w[1] = ten2k128[q3 + scale - 20].w[1]; 1823 res.w[0] = ten2k128[q3 + scale - 20].w[0]; 1824 } 1825 res.w[0] = res.w[0] - 1; // borrow never occurs 1826 z_exp = z_exp - EXP_P1; 1827 e3 = e3 - 1; 1828 res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1]; 1829 } 1830 if (e3 == expmin) { 1831 if (R64 < 5 || (R64 == 5 && !is_inexact_lt_midpoint)) { 1832 ; // result not tiny (in round-to-nearest mode) 1833 } else { 1834 *pfpsf |= UNDERFLOW_EXCEPTION; 1835 } 1836 } 1837 } // end 10^(q3+scale-1) 1838 // set the inexact flag 1839 *pfpsf |= INEXACT_EXCEPTION; 1840 } else { 1841 if (p_sign == z_sign) { 1842 // if (z_sign), set as if for absolute value 1843 is_inexact_lt_midpoint = 1; 1844 } else { // if (p_sign != z_sign) 1845 // if (z_sign), set as if for absolute value 1846 is_inexact_gt_midpoint = 1; 1847 } 1848 *pfpsf |= INEXACT_EXCEPTION; 1849 } 1850 // the result is always inexact => set the inexact flag 1851 // Determine tininess: 1852 // if (exp > expmin) 1853 // the result is not tiny 1854 // else // if exp = emin 1855 // if (q3 + scale < p34) 1856 // the result is tiny 1857 // else // if (q3 + scale = p34) 1858 // if (C3 * 10^scale > 10^33) 1859 // the result is not tiny 1860 // else // if C3 * 10^scale = 10^33 1861 // if (xy * z > 0) 1862 // the result is not tiny 1863 // else // if (xy * z < 0) 1864 // if (z > 0) 1865 // if rnd_mode != RP 1866 // the result is tiny 1867 // else // if RP 1868 // the result is not tiny 1869 // else // if (z < 0) 1870 // if rnd_mode != RM 1871 // the result is tiny 1872 // else // if RM 1873 // the result is not tiny 1874 // endif 1875 // endif 1876 // endif 1877 // endif 1878 // endif 1879 // endif 1880 if ((e3 == expmin && (q3 + scale) < p34) || 1881 (e3 == expmin && (q3 + scale) == p34 && 1882 (res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull && // 10^33_high 1883 res.w[0] == 0x38c15b0a00000000ull && // 10^33_low 1884 z_sign != p_sign && ((!z_sign && rnd_mode != ROUNDING_UP) || 1885 (z_sign && rnd_mode != ROUNDING_DOWN)))) { 1886 *pfpsf |= UNDERFLOW_EXCEPTION; 1887 } 1888 if (rnd_mode != ROUNDING_TO_NEAREST) { 1889 rounding_correction (rnd_mode, 1890 is_inexact_lt_midpoint, 1891 is_inexact_gt_midpoint, 1892 is_midpoint_lt_even, is_midpoint_gt_even, 1893 e3, &res, pfpsf); 1894 } 1895 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 1896 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 1897 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 1898 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 1899 BID_SWAP128 (res); 1900 BID_RETURN (res) 1901 1902 } else if (p34 == delta) { // Case (1''B) 1903 1904 // because Case (1''A) was treated above, e3 + 6176 >= p34 - q3 1905 // and C3 can be scaled up to p34 digits if needed 1906 1907 // scale C3 to p34 digits if needed 1908 scale = p34 - q3; // 0 <= scale <= p34 - 1 1909 if (scale == 0) { 1910 res.w[1] = C3.w[1]; 1911 res.w[0] = C3.w[0]; 1912 } else if (q3 <= 19) { // z fits in 64 bits 1913 if (scale <= 19) { // 10^scale fits in 64 bits 1914 // 64 x 64 C3.w[0] * ten2k64[scale] 1915 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); 1916 } else { // 10^scale fits in 128 bits 1917 // 64 x 128 C3.w[0] * ten2k128[scale - 20] 1918 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); 1919 } 1920 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits 1921 // 64 x 128 ten2k64[scale] * C3 1922 __mul_128x64_to_128 (res, ten2k64[scale], C3); 1923 } 1924 // subtract scale from the exponent 1925 z_exp = z_exp - ((UINT64) scale << 49); 1926 e3 = e3 - scale; 1927 // now z_sign, z_exp, and res correspond to a z scaled to p34 = 34 digits 1928 1929 // determine whether x * y is less than, equal to, or greater than 1930 // 1/2 ulp (z) 1931 if (q4 <= 19) { 1932 if (C4.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp 1933 lt_half_ulp = 1; 1934 } else if (C4.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp 1935 eq_half_ulp = 1; 1936 } else { // > 1/2 ulp 1937 gt_half_ulp = 1; 1938 } 1939 } else if (q4 <= 38) { 1940 if (C4.w[2] == 0 && (C4.w[1] < midpoint128[q4 - 20].w[1] || 1941 (C4.w[1] == midpoint128[q4 - 20].w[1] && 1942 C4.w[0] < midpoint128[q4 - 20].w[0]))) { // < 1/2 ulp 1943 lt_half_ulp = 1; 1944 } else if (C4.w[2] == 0 && C4.w[1] == midpoint128[q4 - 20].w[1] && 1945 C4.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp 1946 eq_half_ulp = 1; 1947 } else { // > 1/2 ulp 1948 gt_half_ulp = 1; 1949 } 1950 } else if (q4 <= 58) { 1951 if (C4.w[3] == 0 && (C4.w[2] < midpoint192[q4 - 39].w[2] || 1952 (C4.w[2] == midpoint192[q4 - 39].w[2] && 1953 C4.w[1] < midpoint192[q4 - 39].w[1]) || 1954 (C4.w[2] == midpoint192[q4 - 39].w[2] && 1955 C4.w[1] == midpoint192[q4 - 39].w[1] && 1956 C4.w[0] < midpoint192[q4 - 39].w[0]))) { // < 1/2 ulp 1957 lt_half_ulp = 1; 1958 } else if (C4.w[3] == 0 && C4.w[2] == midpoint192[q4 - 39].w[2] && 1959 C4.w[1] == midpoint192[q4 - 39].w[1] && 1960 C4.w[0] == midpoint192[q4 - 39].w[0]) { // = 1/2 ulp 1961 eq_half_ulp = 1; 1962 } else { // > 1/2 ulp 1963 gt_half_ulp = 1; 1964 } 1965 } else { 1966 if (C4.w[3] < midpoint256[q4 - 59].w[3] || 1967 (C4.w[3] == midpoint256[q4 - 59].w[3] && 1968 C4.w[2] < midpoint256[q4 - 59].w[2]) || 1969 (C4.w[3] == midpoint256[q4 - 59].w[3] && 1970 C4.w[2] == midpoint256[q4 - 59].w[2] && 1971 C4.w[1] < midpoint256[q4 - 59].w[1]) || 1972 (C4.w[3] == midpoint256[q4 - 59].w[3] && 1973 C4.w[2] == midpoint256[q4 - 59].w[2] && 1974 C4.w[1] == midpoint256[q4 - 59].w[1] && 1975 C4.w[0] < midpoint256[q4 - 59].w[0])) { // < 1/2 ulp 1976 lt_half_ulp = 1; 1977 } else if (C4.w[3] == midpoint256[q4 - 59].w[3] && 1978 C4.w[2] == midpoint256[q4 - 59].w[2] && 1979 C4.w[1] == midpoint256[q4 - 59].w[1] && 1980 C4.w[0] == midpoint256[q4 - 59].w[0]) { // = 1/2 ulp 1981 eq_half_ulp = 1; 1982 } else { // > 1/2 ulp 1983 gt_half_ulp = 1; 1984 } 1985 } 1986 1987 if (p_sign == z_sign) { 1988 if (lt_half_ulp) { 1989 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 1990 // use the following to avoid double rounding errors when operating on 1991 // mixed formats in rounding to nearest 1992 is_inexact_lt_midpoint = 1; // if (z_sign), as if for absolute value 1993 } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) { 1994 // add 1 ulp to the significand 1995 res.w[0]++; 1996 if (res.w[0] == 0x0ull) 1997 res.w[1]++; 1998 // check for rounding overflow, when coeff == 10^34 1999 if ((res.w[1] & MASK_COEFF) == 0x0001ed09bead87c0ull && 2000 res.w[0] == 0x378d8e6400000000ull) { // coefficient = 10^34 2001 e3 = e3 + 1; 2002 // coeff = 10^33 2003 z_exp = ((UINT64) (e3 + 6176) << 49) & MASK_EXP; 2004 res.w[1] = 0x0000314dc6448d93ull; 2005 res.w[0] = 0x38c15b0a00000000ull; 2006 } 2007 // end add 1 ulp 2008 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 2009 if (eq_half_ulp) { 2010 is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value 2011 } else { 2012 is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value 2013 } 2014 } else { // if (eq_half_ulp && !(res.w[0] & 0x01)) 2015 // leave unchanged 2016 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 2017 is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value 2018 } 2019 // the result is always inexact, and never tiny 2020 // set the inexact flag 2021 *pfpsf |= INEXACT_EXCEPTION; 2022 // check for overflow 2023 if (e3 > expmax && rnd_mode == ROUNDING_TO_NEAREST) { 2024 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf 2025 res.w[0] = 0x0000000000000000ull; 2026 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); 2027 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 2028 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 2029 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 2030 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 2031 BID_SWAP128 (res); 2032 BID_RETURN (res) 2033 } 2034 if (rnd_mode != ROUNDING_TO_NEAREST) { 2035 rounding_correction (rnd_mode, 2036 is_inexact_lt_midpoint, 2037 is_inexact_gt_midpoint, 2038 is_midpoint_lt_even, is_midpoint_gt_even, 2039 e3, &res, pfpsf); 2040 z_exp = res.w[1] & MASK_EXP; 2041 } 2042 } else { // if (p_sign != z_sign) 2043 // consider two cases, because C3 * 10^scale = 10^33 is a special case 2044 if (res.w[1] != 0x0000314dc6448d93ull || 2045 res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33 2046 if (lt_half_ulp) { 2047 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 2048 // use the following to avoid double rounding errors when operating 2049 // on mixed formats in rounding to nearest 2050 is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value 2051 } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) { 2052 // subtract 1 ulp from the significand 2053 res.w[0]--; 2054 if (res.w[0] == 0xffffffffffffffffull) 2055 res.w[1]--; 2056 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 2057 if (eq_half_ulp) { 2058 is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value 2059 } else { 2060 is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value 2061 } 2062 } else { // if (eq_half_ulp && !(res.w[0] & 0x01)) 2063 // leave unchanged 2064 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 2065 is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value 2066 } 2067 // the result is always inexact, and never tiny 2068 // check for overflow for RN 2069 if (e3 > expmax) { 2070 if (rnd_mode == ROUNDING_TO_NEAREST) { 2071 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf 2072 res.w[0] = 0x0000000000000000ull; 2073 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); 2074 } else { 2075 rounding_correction (rnd_mode, 2076 is_inexact_lt_midpoint, 2077 is_inexact_gt_midpoint, 2078 is_midpoint_lt_even, 2079 is_midpoint_gt_even, e3, &res, 2080 pfpsf); 2081 } 2082 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 2083 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 2084 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 2085 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 2086 BID_SWAP128 (res); 2087 BID_RETURN (res) 2088 } 2089 // set the inexact flag 2090 *pfpsf |= INEXACT_EXCEPTION; 2091 if (rnd_mode != ROUNDING_TO_NEAREST) { 2092 rounding_correction (rnd_mode, 2093 is_inexact_lt_midpoint, 2094 is_inexact_gt_midpoint, 2095 is_midpoint_lt_even, 2096 is_midpoint_gt_even, e3, &res, pfpsf); 2097 } 2098 z_exp = res.w[1] & MASK_EXP; 2099 } else { // if C3 * 10^scale = 10^33 2100 e3 = (z_exp >> 49) - 6176; 2101 if (e3 > expmin) { 2102 // the result is exact if exp > expmin and C4 = d*10^(q4-1), 2103 // where d = 1, 2, 3, ..., 9; it could be tiny too, but exact 2104 if (q4 == 1) { 2105 // if q4 = 1 the result is exact 2106 // result coefficient = 10^34 - C4 2107 res.w[1] = 0x0001ed09bead87c0ull; 2108 res.w[0] = 0x378d8e6400000000ull - C4.w[0]; 2109 z_exp = z_exp - EXP_P1; 2110 e3 = e3 - 1; 2111 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 2112 } else { 2113 // if q4 > 1 then truncate C4 from q4 digits to 1 digit; 2114 // x = q4-1, 1 <= x <= 67 and check if this operation is exact 2115 if (q4 <= 18) { // 2 <= q4 <= 18 2116 round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp, 2117 &is_midpoint_lt_even, 2118 &is_midpoint_gt_even, 2119 &is_inexact_lt_midpoint, 2120 &is_inexact_gt_midpoint); 2121 } else if (q4 <= 38) { 2122 P128.w[1] = C4.w[1]; 2123 P128.w[0] = C4.w[0]; 2124 round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp, 2125 &is_midpoint_lt_even, 2126 &is_midpoint_gt_even, 2127 &is_inexact_lt_midpoint, 2128 &is_inexact_gt_midpoint); 2129 R64 = R128.w[0]; // one decimal digit 2130 } else if (q4 <= 57) { 2131 P192.w[2] = C4.w[2]; 2132 P192.w[1] = C4.w[1]; 2133 P192.w[0] = C4.w[0]; 2134 round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp, 2135 &is_midpoint_lt_even, 2136 &is_midpoint_gt_even, 2137 &is_inexact_lt_midpoint, 2138 &is_inexact_gt_midpoint); 2139 R64 = R192.w[0]; // one decimal digit 2140 } else { // if (q4 <= 68) 2141 round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp, 2142 &is_midpoint_lt_even, 2143 &is_midpoint_gt_even, 2144 &is_inexact_lt_midpoint, 2145 &is_inexact_gt_midpoint); 2146 R64 = R256.w[0]; // one decimal digit 2147 } 2148 if (!is_midpoint_lt_even && !is_midpoint_gt_even && 2149 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 2150 // the result is exact: 10^34 - R64 2151 // incr_exp = 0 with certainty 2152 z_exp = z_exp - EXP_P1; 2153 e3 = e3 - 1; 2154 res.w[1] = 2155 z_sign | (z_exp & MASK_EXP) | 0x0001ed09bead87c0ull; 2156 res.w[0] = 0x378d8e6400000000ull - R64; 2157 } else { 2158 // We want R64 to be the top digit of C4, but we actually 2159 // obtained (C4 * 10^(-q4+1))RN; a correction may be needed, 2160 // because the top digit is (C4 * 10^(-q4+1))RZ 2161 // however, if incr_exp = 1 then R64 = 10 with certainty 2162 if (incr_exp) { 2163 R64 = 10; 2164 } 2165 // the result is inexact as C4 has more than 1 significant digit 2166 // and C3 * 10^scale = 10^33 2167 // example of case that is treated here: 2168 // 100...0 * 10^e3 - 0.41 * 10^e3 = 2169 // 0999...9.59 * 10^e3 -> rounds to 99...96*10^(e3-1) 2170 // note that (e3 > expmin} 2171 // in order to round, subtract R64 from 10^34 and then compare 2172 // C4 - R64 * 10^(q4-1) with 1/2 ulp 2173 // calculate 10^34 - R64 2174 res.w[1] = 0x0001ed09bead87c0ull; 2175 res.w[0] = 0x378d8e6400000000ull - R64; 2176 z_exp = z_exp - EXP_P1; // will be OR-ed with sign & significand 2177 // calculate C4 - R64 * 10^(q4-1); this is a rare case and 2178 // R64 is small, 1 <= R64 <= 9 2179 e3 = e3 - 1; 2180 if (is_inexact_lt_midpoint) { 2181 is_inexact_lt_midpoint = 0; 2182 is_inexact_gt_midpoint = 1; 2183 } else if (is_inexact_gt_midpoint) { 2184 is_inexact_gt_midpoint = 0; 2185 is_inexact_lt_midpoint = 1; 2186 } else if (is_midpoint_lt_even) { 2187 is_midpoint_lt_even = 0; 2188 is_midpoint_gt_even = 1; 2189 } else if (is_midpoint_gt_even) { 2190 is_midpoint_gt_even = 0; 2191 is_midpoint_lt_even = 1; 2192 } else { 2193 ; 2194 } 2195 // the result is always inexact, and never tiny 2196 // check for overflow for RN 2197 if (e3 > expmax) { 2198 if (rnd_mode == ROUNDING_TO_NEAREST) { 2199 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf 2200 res.w[0] = 0x0000000000000000ull; 2201 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); 2202 } else { 2203 rounding_correction (rnd_mode, 2204 is_inexact_lt_midpoint, 2205 is_inexact_gt_midpoint, 2206 is_midpoint_lt_even, 2207 is_midpoint_gt_even, e3, &res, 2208 pfpsf); 2209 } 2210 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 2211 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 2212 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 2213 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 2214 BID_SWAP128 (res); 2215 BID_RETURN (res) 2216 } 2217 // set the inexact flag 2218 *pfpsf |= INEXACT_EXCEPTION; 2219 res.w[1] = 2220 z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1]; 2221 if (rnd_mode != ROUNDING_TO_NEAREST) { 2222 rounding_correction (rnd_mode, 2223 is_inexact_lt_midpoint, 2224 is_inexact_gt_midpoint, 2225 is_midpoint_lt_even, 2226 is_midpoint_gt_even, e3, &res, 2227 pfpsf); 2228 } 2229 z_exp = res.w[1] & MASK_EXP; 2230 } // end result is inexact 2231 } // end q4 > 1 2232 } else { // if (e3 = emin) 2233 // if e3 = expmin the result is also tiny (the condition for 2234 // tininess is C4 > 050...0 [q4 digits] which is met because 2235 // the msd of C4 is not zero) 2236 // the result is tiny and inexact in all rounding modes; 2237 // it is either 100...0 or 0999...9 (use lt_half_ulp, eq_half_ulp, 2238 // gt_half_ulp to calculate) 2239 // if (lt_half_ulp || eq_half_ulp) res = 10^33 stays unchanged 2240 2241 // p_sign != z_sign so swap gt_half_ulp and lt_half_ulp 2242 if (gt_half_ulp) { // res = 10^33 - 1 2243 res.w[1] = 0x0000314dc6448d93ull; 2244 res.w[0] = 0x38c15b09ffffffffull; 2245 } else { 2246 res.w[1] = 0x0000314dc6448d93ull; 2247 res.w[0] = 0x38c15b0a00000000ull; 2248 } 2249 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 2250 *pfpsf |= UNDERFLOW_EXCEPTION; // inexact is set later 2251 2252 if (eq_half_ulp) { 2253 is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value 2254 } else if (lt_half_ulp) { 2255 is_inexact_gt_midpoint = 1; //if(z_sign), as if for absolute value 2256 } else { // if (gt_half_ulp) 2257 is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value 2258 } 2259 2260 if (rnd_mode != ROUNDING_TO_NEAREST) { 2261 rounding_correction (rnd_mode, 2262 is_inexact_lt_midpoint, 2263 is_inexact_gt_midpoint, 2264 is_midpoint_lt_even, 2265 is_midpoint_gt_even, e3, &res, 2266 pfpsf); 2267 z_exp = res.w[1] & MASK_EXP; 2268 } 2269 } // end e3 = emin 2270 // set the inexact flag (if the result was not exact) 2271 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || 2272 is_midpoint_lt_even || is_midpoint_gt_even) 2273 *pfpsf |= INEXACT_EXCEPTION; 2274 } // end 10^33 2275 } // end if (p_sign != z_sign) 2276 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 2277 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 2278 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 2279 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 2280 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 2281 BID_SWAP128 (res); 2282 BID_RETURN (res) 2283 2284 } else if (((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2) 2285 (q3 <= delta && delta + q4 <= p34) || // Case (3) 2286 (delta < q3 && p34 < delta + q4) || // Case (4) 2287 (delta < q3 && q3 <= delta + q4 && delta + q4 <= p34) || // Case (5) 2288 (delta + q4 < q3)) && // Case (6) 2289 !(delta <= 1 && p_sign != z_sign)) { // Case (2), (3), (4), (5) or (6) 2290 2291 // the result has the sign of z 2292 2293 if ((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2) 2294 (delta < q3 && p34 < delta + q4)) { // Case (4) 2295 // round first the sum x * y + z with unbounded exponent 2296 // scale C3 up by scale = p34 - q3, 1 <= scale <= p34-1, 2297 // 1 <= scale <= 33 2298 // calculate res = C3 * 10^scale 2299 scale = p34 - q3; 2300 x0 = delta + q4 - p34; 2301 } else if (delta + q4 < q3) { // Case (6) 2302 // make Case (6) look like Case (3) or Case (5) with scale = 0 2303 // by scaling up C4 by 10^(q3 - delta - q4) 2304 scale = q3 - delta - q4; // 1 <= scale <= 33 2305 if (q4 <= 19) { // 1 <= scale <= 19; C4 fits in 64 bits 2306 if (scale <= 19) { // 10^scale fits in 64 bits 2307 // 64 x 64 C4.w[0] * ten2k64[scale] 2308 __mul_64x64_to_128MACH (P128, C4.w[0], ten2k64[scale]); 2309 } else { // 10^scale fits in 128 bits 2310 // 64 x 128 C4.w[0] * ten2k128[scale - 20] 2311 __mul_128x64_to_128 (P128, C4.w[0], ten2k128[scale - 20]); 2312 } 2313 } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits 2314 // 64 x 128 ten2k64[scale] * C4 2315 __mul_128x64_to_128 (P128, ten2k64[scale], C4); 2316 } 2317 C4.w[0] = P128.w[0]; 2318 C4.w[1] = P128.w[1]; 2319 // e4 does not need adjustment, as it is not used from this point on 2320 scale = 0; 2321 x0 = 0; 2322 // now Case (6) looks like Case (3) or Case (5) with scale = 0 2323 } else { // if Case (3) or Case (5) 2324 // Note: Case (3) is similar to Case (2), but scale differs and the 2325 // result is exact, unless it is tiny (so x0 = 0 when calculating the 2326 // result with unbounded exponent) 2327 2328 // calculate first the sum x * y + z with unbounded exponent (exact) 2329 // scale C3 up by scale = delta + q4 - q3, 1 <= scale <= p34-1, 2330 // 1 <= scale <= 33 2331 // calculate res = C3 * 10^scale 2332 scale = delta + q4 - q3; 2333 x0 = 0; 2334 // Note: the comments which follow refer [mainly] to Case (2)] 2335 } 2336 2337 case2_repeat: 2338 if (scale == 0) { // this could happen e.g. if we return to case2_repeat 2339 // or in Case (4) 2340 res.w[1] = C3.w[1]; 2341 res.w[0] = C3.w[0]; 2342 } else if (q3 <= 19) { // 1 <= scale <= 19; z fits in 64 bits 2343 if (scale <= 19) { // 10^scale fits in 64 bits 2344 // 64 x 64 C3.w[0] * ten2k64[scale] 2345 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); 2346 } else { // 10^scale fits in 128 bits 2347 // 64 x 128 C3.w[0] * ten2k128[scale - 20] 2348 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); 2349 } 2350 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits 2351 // 64 x 128 ten2k64[scale] * C3 2352 __mul_128x64_to_128 (res, ten2k64[scale], C3); 2353 } 2354 // e3 is already calculated 2355 e3 = e3 - scale; 2356 // now res = C3 * 10^scale and e3 = e3 - scale 2357 // Note: C3 * 10^scale could be 10^34 if we returned to case2_repeat 2358 // because the result was too small 2359 2360 // round C4 to nearest to q4 - x0 digits, where x0 = delta + q4 - p34, 2361 // 1 <= x0 <= min (q4 - 1, 2 * p34 - 1) <=> 1 <= x0 <= min (q4 - 1, 67) 2362 // Also: 1 <= q4 - x0 <= p34 -1 => 1 <= q4 - x0 <= 33 (so the result of 2363 // the rounding fits in 128 bits!) 2364 // x0 = delta + q4 - p34 (calculated before reaching case2_repeat) 2365 // because q3 + q4 - x0 <= P => x0 >= q3 + q4 - p34 2366 if (x0 == 0) { // this could happen only if we return to case2_repeat, or 2367 // for Case (3) or Case (6) 2368 R128.w[1] = C4.w[1]; 2369 R128.w[0] = C4.w[0]; 2370 } else if (q4 <= 18) { 2371 // 2 <= q4 <= 18, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 17 2372 round64_2_18 (q4, x0, C4.w[0], &R64, &incr_exp, 2373 &is_midpoint_lt_even, &is_midpoint_gt_even, 2374 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); 2375 if (incr_exp) { 2376 // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17 2377 R64 = ten2k64[q4 - x0]; 2378 } 2379 R128.w[1] = 0; 2380 R128.w[0] = R64; 2381 } else if (q4 <= 38) { 2382 // 19 <= q4 <= 38, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 37 2383 P128.w[1] = C4.w[1]; 2384 P128.w[0] = C4.w[0]; 2385 round128_19_38 (q4, x0, P128, &R128, &incr_exp, 2386 &is_midpoint_lt_even, &is_midpoint_gt_even, 2387 &is_inexact_lt_midpoint, 2388 &is_inexact_gt_midpoint); 2389 if (incr_exp) { 2390 // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37 2391 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 2392 R128.w[0] = ten2k64[q4 - x0]; 2393 // R128.w[1] stays 0 2394 } else { // 20 <= q4 - x0 <= 37 2395 R128.w[0] = ten2k128[q4 - x0 - 20].w[0]; 2396 R128.w[1] = ten2k128[q4 - x0 - 20].w[1]; 2397 } 2398 } 2399 } else if (q4 <= 57) { 2400 // 38 <= q4 <= 57, max(1, q3+q4-p34) <= x0 <= q4 - 1, 5 <= x0 <= 56 2401 P192.w[2] = C4.w[2]; 2402 P192.w[1] = C4.w[1]; 2403 P192.w[0] = C4.w[0]; 2404 round192_39_57 (q4, x0, P192, &R192, &incr_exp, 2405 &is_midpoint_lt_even, &is_midpoint_gt_even, 2406 &is_inexact_lt_midpoint, 2407 &is_inexact_gt_midpoint); 2408 // R192.w[2] is always 0 2409 if (incr_exp) { 2410 // R192 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 5, 1 <= q4 - x0 <= 52 2411 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 2412 R192.w[0] = ten2k64[q4 - x0]; 2413 // R192.w[1] stays 0 2414 // R192.w[2] stays 0 2415 } else { // 20 <= q4 - x0 <= 33 2416 R192.w[0] = ten2k128[q4 - x0 - 20].w[0]; 2417 R192.w[1] = ten2k128[q4 - x0 - 20].w[1]; 2418 // R192.w[2] stays 0 2419 } 2420 } 2421 R128.w[1] = R192.w[1]; 2422 R128.w[0] = R192.w[0]; 2423 } else { 2424 // 58 <= q4 <= 68, max(1, q3+q4-p34) <= x0 <= q4 - 1, 25 <= x0 <= 67 2425 round256_58_76 (q4, x0, C4, &R256, &incr_exp, 2426 &is_midpoint_lt_even, &is_midpoint_gt_even, 2427 &is_inexact_lt_midpoint, 2428 &is_inexact_gt_midpoint); 2429 // R256.w[3] and R256.w[2] are always 0 2430 if (incr_exp) { 2431 // R256 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 25, 1 <= q4 - x0 <= 43 2432 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 2433 R256.w[0] = ten2k64[q4 - x0]; 2434 // R256.w[1] stays 0 2435 // R256.w[2] stays 0 2436 // R256.w[3] stays 0 2437 } else { // 20 <= q4 - x0 <= 33 2438 R256.w[0] = ten2k128[q4 - x0 - 20].w[0]; 2439 R256.w[1] = ten2k128[q4 - x0 - 20].w[1]; 2440 // R256.w[2] stays 0 2441 // R256.w[3] stays 0 2442 } 2443 } 2444 R128.w[1] = R256.w[1]; 2445 R128.w[0] = R256.w[0]; 2446 } 2447 // now add C3 * 10^scale in res and the signed top (q4-x0) digits of C4, 2448 // rounded to nearest, which were copied into R128 2449 if (z_sign == p_sign) { 2450 lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale 2451 // the sum can result in [up to] p34 or p34 + 1 digits 2452 res.w[0] = res.w[0] + R128.w[0]; 2453 res.w[1] = res.w[1] + R128.w[1]; 2454 if (res.w[0] < R128.w[0]) 2455 res.w[1]++; // carry 2456 // if res > 10^34 - 1 need to increase x0 and decrease scale by 1 2457 if (res.w[1] > 0x0001ed09bead87c0ull || 2458 (res.w[1] == 0x0001ed09bead87c0ull && 2459 res.w[0] > 0x378d8e63ffffffffull)) { 2460 // avoid double rounding error 2461 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; 2462 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; 2463 is_midpoint_lt_even0 = is_midpoint_lt_even; 2464 is_midpoint_gt_even0 = is_midpoint_gt_even; 2465 is_inexact_lt_midpoint = 0; 2466 is_inexact_gt_midpoint = 0; 2467 is_midpoint_lt_even = 0; 2468 is_midpoint_gt_even = 0; 2469 P128.w[1] = res.w[1]; 2470 P128.w[0] = res.w[0]; 2471 round128_19_38 (35, 1, P128, &res, &incr_exp, 2472 &is_midpoint_lt_even, &is_midpoint_gt_even, 2473 &is_inexact_lt_midpoint, 2474 &is_inexact_gt_midpoint); 2475 // incr_exp is 0 with certainty in this case 2476 // avoid a double rounding error 2477 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 2478 is_midpoint_lt_even) { // double rounding error upward 2479 // res = res - 1 2480 res.w[0]--; 2481 if (res.w[0] == 0xffffffffffffffffull) 2482 res.w[1]--; 2483 // Note: a double rounding error upward is not possible; for this 2484 // the result after the first rounding would have to be 99...95 2485 // (35 digits in all), possibly followed by a number of zeros; this 2486 // not possible in Cases (2)-(6) or (15)-(17) which may get here 2487 is_midpoint_lt_even = 0; 2488 is_inexact_lt_midpoint = 1; 2489 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 2490 is_midpoint_gt_even) { // double rounding error downward 2491 // res = res + 1 2492 res.w[0]++; 2493 if (res.w[0] == 0) 2494 res.w[1]++; 2495 is_midpoint_gt_even = 0; 2496 is_inexact_gt_midpoint = 1; 2497 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && 2498 !is_inexact_lt_midpoint 2499 && !is_inexact_gt_midpoint) { 2500 // if this second rounding was exact the result may still be 2501 // inexact because of the first rounding 2502 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { 2503 is_inexact_gt_midpoint = 1; 2504 } 2505 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { 2506 is_inexact_lt_midpoint = 1; 2507 } 2508 } else if (is_midpoint_gt_even && 2509 (is_inexact_gt_midpoint0 2510 || is_midpoint_lt_even0)) { 2511 // pulled up to a midpoint 2512 is_inexact_lt_midpoint = 1; 2513 is_inexact_gt_midpoint = 0; 2514 is_midpoint_lt_even = 0; 2515 is_midpoint_gt_even = 0; 2516 } else if (is_midpoint_lt_even && 2517 (is_inexact_lt_midpoint0 2518 || is_midpoint_gt_even0)) { 2519 // pulled down to a midpoint 2520 is_inexact_lt_midpoint = 0; 2521 is_inexact_gt_midpoint = 1; 2522 is_midpoint_lt_even = 0; 2523 is_midpoint_gt_even = 0; 2524 } else { 2525 ; 2526 } 2527 // adjust exponent 2528 e3 = e3 + 1; 2529 if (!is_midpoint_lt_even && !is_midpoint_gt_even && 2530 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 2531 if (is_midpoint_lt_even0 || is_midpoint_gt_even0 || 2532 is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) { 2533 is_inexact_lt_midpoint = 1; 2534 } 2535 } 2536 } else { 2537 // this is the result rounded with unbounded exponent, unless a 2538 // correction is needed 2539 res.w[1] = res.w[1] & MASK_COEFF; 2540 if (lsb == 1) { 2541 if (is_midpoint_gt_even) { 2542 // res = res + 1 2543 is_midpoint_gt_even = 0; 2544 is_midpoint_lt_even = 1; 2545 res.w[0]++; 2546 if (res.w[0] == 0x0) 2547 res.w[1]++; 2548 // check for rounding overflow 2549 if (res.w[1] == 0x0001ed09bead87c0ull && 2550 res.w[0] == 0x378d8e6400000000ull) { 2551 // res = 10^34 => rounding overflow 2552 res.w[1] = 0x0000314dc6448d93ull; 2553 res.w[0] = 0x38c15b0a00000000ull; // 10^33 2554 e3++; 2555 } 2556 } else if (is_midpoint_lt_even) { 2557 // res = res - 1 2558 is_midpoint_lt_even = 0; 2559 is_midpoint_gt_even = 1; 2560 res.w[0]--; 2561 if (res.w[0] == 0xffffffffffffffffull) 2562 res.w[1]--; 2563 // if the result is pure zero, the sign depends on the rounding 2564 // mode (x*y and z had opposite signs) 2565 if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) { 2566 if (rnd_mode != ROUNDING_DOWN) 2567 z_sign = 0x0000000000000000ull; 2568 else 2569 z_sign = 0x8000000000000000ull; 2570 // the exponent is max (e3, expmin) 2571 res.w[1] = 0x0; 2572 res.w[0] = 0x0; 2573 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 2574 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 2575 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 2576 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 2577 BID_SWAP128 (res); 2578 BID_RETURN (res) 2579 } 2580 } else { 2581 ; 2582 } 2583 } 2584 } 2585 } else { // if (z_sign != p_sign) 2586 lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale; R128 contains rounded C4 2587 // used to swap rounding indicators if p_sign != z_sign 2588 // the sum can result in [up to] p34 or p34 - 1 digits 2589 tmp64 = res.w[0]; 2590 res.w[0] = res.w[0] - R128.w[0]; 2591 res.w[1] = res.w[1] - R128.w[1]; 2592 if (res.w[0] > tmp64) 2593 res.w[1]--; // borrow 2594 // if res < 10^33 and exp > expmin need to decrease x0 and 2595 // increase scale by 1 2596 if (e3 > expmin && ((res.w[1] < 0x0000314dc6448d93ull || 2597 (res.w[1] == 0x0000314dc6448d93ull && 2598 res.w[0] < 0x38c15b0a00000000ull)) || 2599 (is_inexact_lt_midpoint 2600 && res.w[1] == 0x0000314dc6448d93ull 2601 && res.w[0] == 0x38c15b0a00000000ull)) 2602 && x0 >= 1) { 2603 x0 = x0 - 1; 2604 // first restore e3, otherwise it will be too small 2605 e3 = e3 + scale; 2606 scale = scale + 1; 2607 is_inexact_lt_midpoint = 0; 2608 is_inexact_gt_midpoint = 0; 2609 is_midpoint_lt_even = 0; 2610 is_midpoint_gt_even = 0; 2611 incr_exp = 0; 2612 goto case2_repeat; 2613 } 2614 // else this is the result rounded with unbounded exponent; 2615 // because the result has opposite sign to that of C4 which was 2616 // rounded, need to change the rounding indicators 2617 if (is_inexact_lt_midpoint) { 2618 is_inexact_lt_midpoint = 0; 2619 is_inexact_gt_midpoint = 1; 2620 } else if (is_inexact_gt_midpoint) { 2621 is_inexact_gt_midpoint = 0; 2622 is_inexact_lt_midpoint = 1; 2623 } else if (lsb == 0) { 2624 if (is_midpoint_lt_even) { 2625 is_midpoint_lt_even = 0; 2626 is_midpoint_gt_even = 1; 2627 } else if (is_midpoint_gt_even) { 2628 is_midpoint_gt_even = 0; 2629 is_midpoint_lt_even = 1; 2630 } else { 2631 ; 2632 } 2633 } else if (lsb == 1) { 2634 if (is_midpoint_lt_even) { 2635 // res = res + 1 2636 res.w[0]++; 2637 if (res.w[0] == 0x0) 2638 res.w[1]++; 2639 // check for rounding overflow 2640 if (res.w[1] == 0x0001ed09bead87c0ull && 2641 res.w[0] == 0x378d8e6400000000ull) { 2642 // res = 10^34 => rounding overflow 2643 res.w[1] = 0x0000314dc6448d93ull; 2644 res.w[0] = 0x38c15b0a00000000ull; // 10^33 2645 e3++; 2646 } 2647 } else if (is_midpoint_gt_even) { 2648 // res = res - 1 2649 res.w[0]--; 2650 if (res.w[0] == 0xffffffffffffffffull) 2651 res.w[1]--; 2652 // if the result is pure zero, the sign depends on the rounding 2653 // mode (x*y and z had opposite signs) 2654 if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) { 2655 if (rnd_mode != ROUNDING_DOWN) 2656 z_sign = 0x0000000000000000ull; 2657 else 2658 z_sign = 0x8000000000000000ull; 2659 // the exponent is max (e3, expmin) 2660 res.w[1] = 0x0; 2661 res.w[0] = 0x0; 2662 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 2663 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 2664 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 2665 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 2666 BID_SWAP128 (res); 2667 BID_RETURN (res) 2668 } 2669 } else { 2670 ; 2671 } 2672 } else { 2673 ; 2674 } 2675 } 2676 // check for underflow 2677 if (e3 == expmin) { // and if significand < 10^33 => result is tiny 2678 if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull || 2679 ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull && 2680 res.w[0] < 0x38c15b0a00000000ull)) { 2681 is_tiny = 1; 2682 } 2683 } else if (e3 < expmin) { 2684 // the result is tiny, so we must truncate more of res 2685 is_tiny = 1; 2686 x0 = expmin - e3; 2687 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; 2688 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; 2689 is_midpoint_lt_even0 = is_midpoint_lt_even; 2690 is_midpoint_gt_even0 = is_midpoint_gt_even; 2691 is_inexact_lt_midpoint = 0; 2692 is_inexact_gt_midpoint = 0; 2693 is_midpoint_lt_even = 0; 2694 is_midpoint_gt_even = 0; 2695 // determine the number of decimal digits in res 2696 if (res.w[1] == 0x0) { 2697 // between 1 and 19 digits 2698 for (ind = 1; ind <= 19; ind++) { 2699 if (res.w[0] < ten2k64[ind]) { 2700 break; 2701 } 2702 } 2703 // ind digits 2704 } else if (res.w[1] < ten2k128[0].w[1] || 2705 (res.w[1] == ten2k128[0].w[1] 2706 && res.w[0] < ten2k128[0].w[0])) { 2707 // 20 digits 2708 ind = 20; 2709 } else { // between 21 and 38 digits 2710 for (ind = 1; ind <= 18; ind++) { 2711 if (res.w[1] < ten2k128[ind].w[1] || 2712 (res.w[1] == ten2k128[ind].w[1] && 2713 res.w[0] < ten2k128[ind].w[0])) { 2714 break; 2715 } 2716 } 2717 // ind + 20 digits 2718 ind = ind + 20; 2719 } 2720 2721 // at this point ind >= x0; because delta >= 2 on this path, the case 2722 // ind = x0 can occur only in Case (2) or case (3), when C3 has one 2723 // digit (q3 = 1) equal to 1 (C3 = 1), e3 is expmin (e3 = expmin), 2724 // the signs of x * y and z are opposite, and through cancellation 2725 // the most significant decimal digit in res has the weight 2726 // 10^(emin-1); however, it is clear that in this case the most 2727 // significant digit is 9, so the result before rounding is 2728 // 0.9... * 10^emin 2729 // Otherwise, ind > x0 because there are non-zero decimal digits in the 2730 // result with weight of at least 10^emin, and correction for underflow 2731 // can be carried out using the round*_*_2_* () routines 2732 if (x0 == ind) { // the result before rounding is 0.9... * 10^emin 2733 res.w[1] = 0x0; 2734 res.w[0] = 0x1; 2735 is_inexact_gt_midpoint = 1; 2736 } else if (ind <= 18) { // check that 2 <= ind 2737 // 2 <= ind <= 18, 1 <= x0 <= 17 2738 round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp, 2739 &is_midpoint_lt_even, &is_midpoint_gt_even, 2740 &is_inexact_lt_midpoint, 2741 &is_inexact_gt_midpoint); 2742 if (incr_exp) { 2743 // R64 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 17 2744 R64 = ten2k64[ind - x0]; 2745 } 2746 res.w[1] = 0; 2747 res.w[0] = R64; 2748 } else if (ind <= 38) { 2749 // 19 <= ind <= 38 2750 P128.w[1] = res.w[1]; 2751 P128.w[0] = res.w[0]; 2752 round128_19_38 (ind, x0, P128, &res, &incr_exp, 2753 &is_midpoint_lt_even, &is_midpoint_gt_even, 2754 &is_inexact_lt_midpoint, 2755 &is_inexact_gt_midpoint); 2756 if (incr_exp) { 2757 // R128 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 37 2758 if (ind - x0 <= 19) { // 1 <= ind - x0 <= 19 2759 res.w[0] = ten2k64[ind - x0]; 2760 // res.w[1] stays 0 2761 } else { // 20 <= ind - x0 <= 37 2762 res.w[0] = ten2k128[ind - x0 - 20].w[0]; 2763 res.w[1] = ten2k128[ind - x0 - 20].w[1]; 2764 } 2765 } 2766 } 2767 // avoid a double rounding error 2768 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 2769 is_midpoint_lt_even) { // double rounding error upward 2770 // res = res - 1 2771 res.w[0]--; 2772 if (res.w[0] == 0xffffffffffffffffull) 2773 res.w[1]--; 2774 // Note: a double rounding error upward is not possible; for this 2775 // the result after the first rounding would have to be 99...95 2776 // (35 digits in all), possibly followed by a number of zeros; this 2777 // not possible in Cases (2)-(6) which may get here 2778 is_midpoint_lt_even = 0; 2779 is_inexact_lt_midpoint = 1; 2780 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 2781 is_midpoint_gt_even) { // double rounding error downward 2782 // res = res + 1 2783 res.w[0]++; 2784 if (res.w[0] == 0) 2785 res.w[1]++; 2786 is_midpoint_gt_even = 0; 2787 is_inexact_gt_midpoint = 1; 2788 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && 2789 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 2790 // if this second rounding was exact the result may still be 2791 // inexact because of the first rounding 2792 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { 2793 is_inexact_gt_midpoint = 1; 2794 } 2795 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { 2796 is_inexact_lt_midpoint = 1; 2797 } 2798 } else if (is_midpoint_gt_even && 2799 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { 2800 // pulled up to a midpoint 2801 is_inexact_lt_midpoint = 1; 2802 is_inexact_gt_midpoint = 0; 2803 is_midpoint_lt_even = 0; 2804 is_midpoint_gt_even = 0; 2805 } else if (is_midpoint_lt_even && 2806 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { 2807 // pulled down to a midpoint 2808 is_inexact_lt_midpoint = 0; 2809 is_inexact_gt_midpoint = 1; 2810 is_midpoint_lt_even = 0; 2811 is_midpoint_gt_even = 0; 2812 } else { 2813 ; 2814 } 2815 // adjust exponent 2816 e3 = e3 + x0; 2817 if (!is_midpoint_lt_even && !is_midpoint_gt_even && 2818 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 2819 if (is_midpoint_lt_even0 || is_midpoint_gt_even0 || 2820 is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) { 2821 is_inexact_lt_midpoint = 1; 2822 } 2823 } 2824 } else { 2825 ; // not underflow 2826 } 2827 // check for inexact result 2828 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || 2829 is_midpoint_lt_even || is_midpoint_gt_even) { 2830 // set the inexact flag 2831 *pfpsf |= INEXACT_EXCEPTION; 2832 if (is_tiny) 2833 *pfpsf |= UNDERFLOW_EXCEPTION; 2834 } 2835 // now check for significand = 10^34 (may have resulted from going 2836 // back to case2_repeat) 2837 if (res.w[1] == 0x0001ed09bead87c0ull && 2838 res.w[0] == 0x378d8e6400000000ull) { // if res = 10^34 2839 res.w[1] = 0x0000314dc6448d93ull; // res = 10^33 2840 res.w[0] = 0x38c15b0a00000000ull; 2841 e3 = e3 + 1; 2842 } 2843 res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1]; 2844 // check for overflow 2845 if (rnd_mode == ROUNDING_TO_NEAREST && e3 > expmax) { 2846 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf 2847 res.w[0] = 0x0000000000000000ull; 2848 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); 2849 } 2850 if (rnd_mode != ROUNDING_TO_NEAREST) { 2851 rounding_correction (rnd_mode, 2852 is_inexact_lt_midpoint, 2853 is_inexact_gt_midpoint, 2854 is_midpoint_lt_even, is_midpoint_gt_even, 2855 e3, &res, pfpsf); 2856 } 2857 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 2858 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 2859 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 2860 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 2861 BID_SWAP128 (res); 2862 BID_RETURN (res) 2863 2864 } else { 2865 2866 // we get here only if delta <= 1 in Cases (2), (3), (4), (5), or (6) and 2867 // the signs of x*y and z are opposite; in these cases massive 2868 // cancellation can occur, so it is better to scale either C3 or C4 and 2869 // to perform the subtraction before rounding; rounding is performed 2870 // next, depending on the number of decimal digits in the result and on 2871 // the exponent value 2872 // Note: overlow is not possible in this case 2873 // this is similar to Cases (15), (16), and (17) 2874 2875 if (delta + q4 < q3) { // from Case (6) 2876 // Case (6) with 0<= delta <= 1 is similar to Cases (15), (16), and 2877 // (17) if we swap (C3, C4), (q3, q4), (e3, e4), (z_sign, p_sign) 2878 // and call add_and_round; delta stays positive 2879 // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3 2880 P128.w[1] = C3.w[1]; 2881 P128.w[0] = C3.w[0]; 2882 C3.w[1] = C4.w[1]; 2883 C3.w[0] = C4.w[0]; 2884 C4.w[1] = P128.w[1]; 2885 C4.w[0] = P128.w[0]; 2886 ind = q3; 2887 q3 = q4; 2888 q4 = ind; 2889 ind = e3; 2890 e3 = e4; 2891 e4 = ind; 2892 tmp_sign = z_sign; 2893 z_sign = p_sign; 2894 p_sign = tmp_sign; 2895 } else { // from Cases (2), (3), (4), (5) 2896 // In Cases (2), (3), (4), (5) with 0 <= delta <= 1 C3 has to be 2897 // scaled up by q4 + delta - q3; this is the same as in Cases (15), 2898 // (16), and (17) if we just change the sign of delta 2899 delta = -delta; 2900 } 2901 add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4, 2902 rnd_mode, &is_midpoint_lt_even, 2903 &is_midpoint_gt_even, &is_inexact_lt_midpoint, 2904 &is_inexact_gt_midpoint, pfpsf, &res); 2905 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 2906 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 2907 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 2908 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 2909 BID_SWAP128 (res); 2910 BID_RETURN (res) 2911 2912 } 2913 2914 } else { // if delta < 0 2915 2916 delta = -delta; 2917 2918 if (p34 < q4 && q4 <= delta) { // Case (7) 2919 2920 // truncate C4 to p34 digits into res 2921 // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68 2922 x0 = q4 - p34; 2923 if (q4 <= 38) { 2924 P128.w[1] = C4.w[1]; 2925 P128.w[0] = C4.w[0]; 2926 round128_19_38 (q4, x0, P128, &res, &incr_exp, 2927 &is_midpoint_lt_even, &is_midpoint_gt_even, 2928 &is_inexact_lt_midpoint, 2929 &is_inexact_gt_midpoint); 2930 } else if (q4 <= 57) { // 35 <= q4 <= 57 2931 P192.w[2] = C4.w[2]; 2932 P192.w[1] = C4.w[1]; 2933 P192.w[0] = C4.w[0]; 2934 round192_39_57 (q4, x0, P192, &R192, &incr_exp, 2935 &is_midpoint_lt_even, &is_midpoint_gt_even, 2936 &is_inexact_lt_midpoint, 2937 &is_inexact_gt_midpoint); 2938 res.w[0] = R192.w[0]; 2939 res.w[1] = R192.w[1]; 2940 } else { // if (q4 <= 68) 2941 round256_58_76 (q4, x0, C4, &R256, &incr_exp, 2942 &is_midpoint_lt_even, &is_midpoint_gt_even, 2943 &is_inexact_lt_midpoint, 2944 &is_inexact_gt_midpoint); 2945 res.w[0] = R256.w[0]; 2946 res.w[1] = R256.w[1]; 2947 } 2948 e4 = e4 + x0; 2949 if (incr_exp) { 2950 e4 = e4 + 1; 2951 } 2952 if (!is_midpoint_lt_even && !is_midpoint_gt_even && 2953 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 2954 // if C4 rounded to p34 digits is exact then the result is inexact, 2955 // in a way that depends on the signs of x * y and z 2956 if (p_sign == z_sign) { 2957 is_inexact_lt_midpoint = 1; 2958 } else { // if (p_sign != z_sign) 2959 if (res.w[1] != 0x0000314dc6448d93ull || 2960 res.w[0] != 0x38c15b0a00000000ull) { // res != 10^33 2961 is_inexact_gt_midpoint = 1; 2962 } else { // res = 10^33 and exact is a special case 2963 // if C3 < 1/2 ulp then res = 10^33 and is_inexact_gt_midpoint = 1 2964 // if C3 = 1/2 ulp then res = 10^33 and is_midpoint_lt_even = 1 2965 // if C3 > 1/2 ulp then res = 10^34-1 and is_inexact_lt_midpoint = 1 2966 // Note: ulp is really ulp/10 (after borrow which propagates to msd) 2967 if (delta > p34 + 1) { // C3 < 1/2 2968 // res = 10^33, unchanged 2969 is_inexact_gt_midpoint = 1; 2970 } else { // if (delta == p34 + 1) 2971 if (q3 <= 19) { 2972 if (C3.w[0] < midpoint64[q3 - 1]) { // C3 < 1/2 ulp 2973 // res = 10^33, unchanged 2974 is_inexact_gt_midpoint = 1; 2975 } else if (C3.w[0] == midpoint64[q3 - 1]) { // C3 = 1/2 ulp 2976 // res = 10^33, unchanged 2977 is_midpoint_lt_even = 1; 2978 } else { // if (C3.w[0] > midpoint64[q3-1]), C3 > 1/2 ulp 2979 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 2980 res.w[0] = 0x378d8e63ffffffffull; 2981 e4 = e4 - 1; 2982 is_inexact_lt_midpoint = 1; 2983 } 2984 } else { // if (20 <= q3 <=34) 2985 if (C3.w[1] < midpoint128[q3 - 20].w[1] || 2986 (C3.w[1] == midpoint128[q3 - 20].w[1] && 2987 C3.w[0] < midpoint128[q3 - 20].w[0])) { // C3 < 1/2 ulp 2988 // res = 10^33, unchanged 2989 is_inexact_gt_midpoint = 1; 2990 } else if (C3.w[1] == midpoint128[q3 - 20].w[1] && 2991 C3.w[0] == midpoint128[q3 - 20].w[0]) { // C3 = 1/2 ulp 2992 // res = 10^33, unchanged 2993 is_midpoint_lt_even = 1; 2994 } else { // if (C3 > midpoint128[q3-20]), C3 > 1/2 ulp 2995 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 2996 res.w[0] = 0x378d8e63ffffffffull; 2997 e4 = e4 - 1; 2998 is_inexact_lt_midpoint = 1; 2999 } 3000 } 3001 } 3002 } 3003 } 3004 } else if (is_midpoint_lt_even) { 3005 if (z_sign != p_sign) { 3006 // needs correction: res = res - 1 3007 res.w[0] = res.w[0] - 1; 3008 if (res.w[0] == 0xffffffffffffffffull) 3009 res.w[1]--; 3010 // if it is (10^33-1)*10^e4 then the corect result is 3011 // (10^34-1)*10(e4-1) 3012 if (res.w[1] == 0x0000314dc6448d93ull && 3013 res.w[0] == 0x38c15b09ffffffffull) { 3014 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 3015 res.w[0] = 0x378d8e63ffffffffull; 3016 e4 = e4 - 1; 3017 } 3018 is_midpoint_lt_even = 0; 3019 is_inexact_lt_midpoint = 1; 3020 } else { // if (z_sign == p_sign) 3021 is_midpoint_lt_even = 0; 3022 is_inexact_gt_midpoint = 1; 3023 } 3024 } else if (is_midpoint_gt_even) { 3025 if (z_sign == p_sign) { 3026 // needs correction: res = res + 1 (cannot cross in the next binade) 3027 res.w[0] = res.w[0] + 1; 3028 if (res.w[0] == 0x0000000000000000ull) 3029 res.w[1]++; 3030 is_midpoint_gt_even = 0; 3031 is_inexact_gt_midpoint = 1; 3032 } else { // if (z_sign != p_sign) 3033 is_midpoint_gt_even = 0; 3034 is_inexact_lt_midpoint = 1; 3035 } 3036 } else { 3037 ; // the rounded result is already correct 3038 } 3039 // check for overflow 3040 if (rnd_mode == ROUNDING_TO_NEAREST && e4 > expmax) { 3041 res.w[1] = p_sign | 0x7800000000000000ull; 3042 res.w[0] = 0x0000000000000000ull; 3043 *pfpsf |= (OVERFLOW_EXCEPTION | INEXACT_EXCEPTION); 3044 } else { // no overflow or not RN 3045 p_exp = ((UINT64) (e4 + 6176) << 49); 3046 res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1]; 3047 } 3048 if (rnd_mode != ROUNDING_TO_NEAREST) { 3049 rounding_correction (rnd_mode, 3050 is_inexact_lt_midpoint, 3051 is_inexact_gt_midpoint, 3052 is_midpoint_lt_even, is_midpoint_gt_even, 3053 e4, &res, pfpsf); 3054 } 3055 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || 3056 is_midpoint_lt_even || is_midpoint_gt_even) { 3057 // set the inexact flag 3058 *pfpsf |= INEXACT_EXCEPTION; 3059 } 3060 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 3061 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 3062 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 3063 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 3064 BID_SWAP128 (res); 3065 BID_RETURN (res) 3066 3067 } else if ((q4 <= p34 && p34 <= delta) || // Case (8) 3068 (q4 <= delta && delta < p34 && p34 < delta + q3) || // Case (9) 3069 (q4 <= delta && delta + q3 <= p34) || // Case (10) 3070 (delta < q4 && q4 <= p34 && p34 < delta + q3) || // Case (13) 3071 (delta < q4 && q4 <= delta + q3 && delta + q3 <= p34) || // Case (14) 3072 (delta + q3 < q4 && q4 <= p34)) { // Case (18) 3073 3074 // Case (8) is similar to Case (1), with C3 and C4 swapped 3075 // Case (9) is similar to Case (2), with C3 and C4 swapped 3076 // Case (10) is similar to Case (3), with C3 and C4 swapped 3077 // Case (13) is similar to Case (4), with C3 and C4 swapped 3078 // Case (14) is similar to Case (5), with C3 and C4 swapped 3079 // Case (18) is similar to Case (6), with C3 and C4 swapped 3080 3081 // swap (C3, C4), (q3, q4), (e3, 34), (z_sign, p_sign), (z_exp, p_exp) 3082 // and go back to delta_ge_zero 3083 // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3 3084 P128.w[1] = C3.w[1]; 3085 P128.w[0] = C3.w[0]; 3086 C3.w[1] = C4.w[1]; 3087 C3.w[0] = C4.w[0]; 3088 C4.w[1] = P128.w[1]; 3089 C4.w[0] = P128.w[0]; 3090 ind = q3; 3091 q3 = q4; 3092 q4 = ind; 3093 ind = e3; 3094 e3 = e4; 3095 e4 = ind; 3096 tmp_sign = z_sign; 3097 z_sign = p_sign; 3098 p_sign = tmp_sign; 3099 tmp.ui64 = z_exp; 3100 z_exp = p_exp; 3101 p_exp = tmp.ui64; 3102 goto delta_ge_zero; 3103 3104 } else if ((p34 <= delta && delta < q4 && q4 < delta + q3) || // Case (11) 3105 (delta < p34 && p34 < q4 && q4 < delta + q3)) { // Case (12) 3106 3107 // round C3 to nearest to q3 - x0 digits, where x0 = e4 - e3, 3108 // 1 <= x0 <= q3 - 1 <= p34 - 1 3109 x0 = e4 - e3; // or x0 = delta + q3 - q4 3110 if (q3 <= 18) { // 2 <= q3 <= 18 3111 round64_2_18 (q3, x0, C3.w[0], &R64, &incr_exp, 3112 &is_midpoint_lt_even, &is_midpoint_gt_even, 3113 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); 3114 // C3.w[1] = 0; 3115 C3.w[0] = R64; 3116 } else if (q3 <= 38) { 3117 round128_19_38 (q3, x0, C3, &R128, &incr_exp, 3118 &is_midpoint_lt_even, &is_midpoint_gt_even, 3119 &is_inexact_lt_midpoint, 3120 &is_inexact_gt_midpoint); 3121 C3.w[1] = R128.w[1]; 3122 C3.w[0] = R128.w[0]; 3123 } 3124 // the rounded result has q3 - x0 digits 3125 // we want the exponent to be e4, so if incr_exp = 1 then 3126 // multiply the rounded result by 10 - it will still fit in 113 bits 3127 if (incr_exp) { 3128 // 64 x 128 -> 128 3129 P128.w[1] = C3.w[1]; 3130 P128.w[0] = C3.w[0]; 3131 __mul_64x128_to_128 (C3, ten2k64[1], P128); 3132 } 3133 e3 = e3 + x0; // this is e4 3134 // now add/subtract the 256-bit C4 and the new (and shorter) 128-bit C3; 3135 // the result will have the sign of x * y; the exponent is e4 3136 R256.w[3] = 0; 3137 R256.w[2] = 0; 3138 R256.w[1] = C3.w[1]; 3139 R256.w[0] = C3.w[0]; 3140 if (p_sign == z_sign) { // R256 = C4 + R256 3141 add256 (C4, R256, &R256); 3142 } else { // if (p_sign != z_sign) { // R256 = C4 - R256 3143 sub256 (C4, R256, &R256); // the result cannot be pure zero 3144 // because the result has opposite sign to that of R256 which was 3145 // rounded, need to change the rounding indicators 3146 lsb = C4.w[0] & 0x01; 3147 if (is_inexact_lt_midpoint) { 3148 is_inexact_lt_midpoint = 0; 3149 is_inexact_gt_midpoint = 1; 3150 } else if (is_inexact_gt_midpoint) { 3151 is_inexact_gt_midpoint = 0; 3152 is_inexact_lt_midpoint = 1; 3153 } else if (lsb == 0) { 3154 if (is_midpoint_lt_even) { 3155 is_midpoint_lt_even = 0; 3156 is_midpoint_gt_even = 1; 3157 } else if (is_midpoint_gt_even) { 3158 is_midpoint_gt_even = 0; 3159 is_midpoint_lt_even = 1; 3160 } else { 3161 ; 3162 } 3163 } else if (lsb == 1) { 3164 if (is_midpoint_lt_even) { 3165 // res = res + 1 3166 R256.w[0]++; 3167 if (R256.w[0] == 0x0) { 3168 R256.w[1]++; 3169 if (R256.w[1] == 0x0) { 3170 R256.w[2]++; 3171 if (R256.w[2] == 0x0) { 3172 R256.w[3]++; 3173 } 3174 } 3175 } 3176 // no check for rounding overflow - R256 was a difference 3177 } else if (is_midpoint_gt_even) { 3178 // res = res - 1 3179 R256.w[0]--; 3180 if (R256.w[0] == 0xffffffffffffffffull) { 3181 R256.w[1]--; 3182 if (R256.w[1] == 0xffffffffffffffffull) { 3183 R256.w[2]--; 3184 if (R256.w[2] == 0xffffffffffffffffull) { 3185 R256.w[3]--; 3186 } 3187 } 3188 } 3189 } else { 3190 ; 3191 } 3192 } else { 3193 ; 3194 } 3195 } 3196 // determine the number of decimal digits in R256 3197 ind = nr_digits256 (R256); // ind >= p34 3198 // if R256 is sum, then ind > p34; if R256 is a difference, then 3199 // ind >= p34; this means that we can calculate the result rounded to 3200 // the destination precision, with unbounded exponent, starting from R256 3201 // and using the indicators from the rounding of C3 to avoid a double 3202 // rounding error 3203 3204 if (ind < p34) { 3205 ; 3206 } else if (ind == p34) { 3207 // the result rounded to the destination precision with 3208 // unbounded exponent 3209 // is (-1)^p_sign * R256 * 10^e4 3210 res.w[1] = R256.w[1]; 3211 res.w[0] = R256.w[0]; 3212 } else { // if (ind > p34) 3213 // if more than P digits, round to nearest to P digits 3214 // round R256 to p34 digits 3215 x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68 3216 // save C3 rounding indicators to help avoid double rounding error 3217 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; 3218 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; 3219 is_midpoint_lt_even0 = is_midpoint_lt_even; 3220 is_midpoint_gt_even0 = is_midpoint_gt_even; 3221 // initialize rounding indicators 3222 is_inexact_lt_midpoint = 0; 3223 is_inexact_gt_midpoint = 0; 3224 is_midpoint_lt_even = 0; 3225 is_midpoint_gt_even = 0; 3226 // round to p34 digits; the result fits in 113 bits 3227 if (ind <= 38) { 3228 P128.w[1] = R256.w[1]; 3229 P128.w[0] = R256.w[0]; 3230 round128_19_38 (ind, x0, P128, &R128, &incr_exp, 3231 &is_midpoint_lt_even, &is_midpoint_gt_even, 3232 &is_inexact_lt_midpoint, 3233 &is_inexact_gt_midpoint); 3234 } else if (ind <= 57) { 3235 P192.w[2] = R256.w[2]; 3236 P192.w[1] = R256.w[1]; 3237 P192.w[0] = R256.w[0]; 3238 round192_39_57 (ind, x0, P192, &R192, &incr_exp, 3239 &is_midpoint_lt_even, &is_midpoint_gt_even, 3240 &is_inexact_lt_midpoint, 3241 &is_inexact_gt_midpoint); 3242 R128.w[1] = R192.w[1]; 3243 R128.w[0] = R192.w[0]; 3244 } else { // if (ind <= 68) 3245 round256_58_76 (ind, x0, R256, &R256, &incr_exp, 3246 &is_midpoint_lt_even, &is_midpoint_gt_even, 3247 &is_inexact_lt_midpoint, 3248 &is_inexact_gt_midpoint); 3249 R128.w[1] = R256.w[1]; 3250 R128.w[0] = R256.w[0]; 3251 } 3252 // the rounded result has p34 = 34 digits 3253 e4 = e4 + x0 + incr_exp; 3254 3255 res.w[1] = R128.w[1]; 3256 res.w[0] = R128.w[0]; 3257 3258 // avoid a double rounding error 3259 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 3260 is_midpoint_lt_even) { // double rounding error upward 3261 // res = res - 1 3262 res.w[0]--; 3263 if (res.w[0] == 0xffffffffffffffffull) 3264 res.w[1]--; 3265 is_midpoint_lt_even = 0; 3266 is_inexact_lt_midpoint = 1; 3267 // Note: a double rounding error upward is not possible; for this 3268 // the result after the first rounding would have to be 99...95 3269 // (35 digits in all), possibly followed by a number of zeros; this 3270 // not possible in Cases (2)-(6) or (15)-(17) which may get here 3271 // if this is 10^33 - 1 make it 10^34 - 1 and decrement exponent 3272 if (res.w[1] == 0x0000314dc6448d93ull && 3273 res.w[0] == 0x38c15b09ffffffffull) { // 10^33 - 1 3274 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 3275 res.w[0] = 0x378d8e63ffffffffull; 3276 e4--; 3277 } 3278 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 3279 is_midpoint_gt_even) { // double rounding error downward 3280 // res = res + 1 3281 res.w[0]++; 3282 if (res.w[0] == 0) 3283 res.w[1]++; 3284 is_midpoint_gt_even = 0; 3285 is_inexact_gt_midpoint = 1; 3286 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && 3287 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 3288 // if this second rounding was exact the result may still be 3289 // inexact because of the first rounding 3290 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { 3291 is_inexact_gt_midpoint = 1; 3292 } 3293 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { 3294 is_inexact_lt_midpoint = 1; 3295 } 3296 } else if (is_midpoint_gt_even && 3297 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { 3298 // pulled up to a midpoint 3299 is_inexact_lt_midpoint = 1; 3300 is_inexact_gt_midpoint = 0; 3301 is_midpoint_lt_even = 0; 3302 is_midpoint_gt_even = 0; 3303 } else if (is_midpoint_lt_even && 3304 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { 3305 // pulled down to a midpoint 3306 is_inexact_lt_midpoint = 0; 3307 is_inexact_gt_midpoint = 1; 3308 is_midpoint_lt_even = 0; 3309 is_midpoint_gt_even = 0; 3310 } else { 3311 ; 3312 } 3313 } 3314 3315 // determine tininess 3316 if (rnd_mode == ROUNDING_TO_NEAREST) { 3317 if (e4 < expmin) { 3318 is_tiny = 1; // for other rounding modes apply correction 3319 } 3320 } else { 3321 // for RM, RP, RZ, RA apply correction in order to determine tininess 3322 // but do not save the result; apply the correction to 3323 // (-1)^p_sign * res * 10^0 3324 P128.w[1] = p_sign | 0x3040000000000000ull | res.w[1]; 3325 P128.w[0] = res.w[0]; 3326 rounding_correction (rnd_mode, 3327 is_inexact_lt_midpoint, 3328 is_inexact_gt_midpoint, 3329 is_midpoint_lt_even, is_midpoint_gt_even, 3330 0, &P128, pfpsf); 3331 scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1 3332 // the number of digits in the significand is p34 = 34 3333 if (e4 + scale < expmin) { 3334 is_tiny = 1; 3335 } 3336 } 3337 3338 // the result rounded to the destination precision with unbounded exponent 3339 // is (-1)^p_sign * res * 10^e4 3340 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; // RN 3341 // res.w[0] unchanged; 3342 // Note: res is correct only if expmin <= e4 <= expmax 3343 ind = p34; // the number of decimal digits in the signifcand of res 3344 3345 // at this point we have the result rounded with unbounded exponent in 3346 // res and we know its tininess: 3347 // res = (-1)^p_sign * significand * 10^e4, 3348 // where q (significand) = ind = p34 3349 // Note: res is correct only if expmin <= e4 <= expmax 3350 3351 // check for overflow if RN 3352 if (rnd_mode == ROUNDING_TO_NEAREST 3353 && (ind + e4) > (p34 + expmax)) { 3354 res.w[1] = p_sign | 0x7800000000000000ull; 3355 res.w[0] = 0x0000000000000000ull; 3356 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); 3357 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 3358 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 3359 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 3360 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 3361 BID_SWAP128 (res); 3362 BID_RETURN (res) 3363 } // else not overflow or not RN, so continue 3364 3365 // from this point on this is similar to the last part of the computation 3366 // for Cases (15), (16), (17) 3367 3368 // if (e4 >= expmin) we have the result rounded with bounded exponent 3369 if (e4 < expmin) { 3370 x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res 3371 // where the result rounded [at most] once is 3372 // (-1)^p_sign * significand_res * 10^e4 3373 3374 // avoid double rounding error 3375 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; 3376 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; 3377 is_midpoint_lt_even0 = is_midpoint_lt_even; 3378 is_midpoint_gt_even0 = is_midpoint_gt_even; 3379 is_inexact_lt_midpoint = 0; 3380 is_inexact_gt_midpoint = 0; 3381 is_midpoint_lt_even = 0; 3382 is_midpoint_gt_even = 0; 3383 3384 if (x0 > ind) { 3385 // nothing is left of res when moving the decimal point left x0 digits 3386 is_inexact_lt_midpoint = 1; 3387 res.w[1] = p_sign | 0x0000000000000000ull; 3388 res.w[0] = 0x0000000000000000ull; 3389 e4 = expmin; 3390 } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34 3391 // this is <, =, or > 1/2 ulp 3392 // compare the ind-digit value in the significand of res with 3393 // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is 3394 // less than, equal to, or greater than 1/2 ulp (significand of res) 3395 R128.w[1] = res.w[1] & MASK_COEFF; 3396 R128.w[0] = res.w[0]; 3397 if (ind <= 19) { 3398 if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp 3399 lt_half_ulp = 1; 3400 is_inexact_lt_midpoint = 1; 3401 } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp 3402 eq_half_ulp = 1; 3403 is_midpoint_gt_even = 1; 3404 } else { // > 1/2 ulp 3405 gt_half_ulp = 1; 3406 is_inexact_gt_midpoint = 1; 3407 } 3408 } else { // if (ind <= 38) 3409 if (R128.w[1] < midpoint128[ind - 20].w[1] || 3410 (R128.w[1] == midpoint128[ind - 20].w[1] && 3411 R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp 3412 lt_half_ulp = 1; 3413 is_inexact_lt_midpoint = 1; 3414 } else if (R128.w[1] == midpoint128[ind - 20].w[1] && 3415 R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp 3416 eq_half_ulp = 1; 3417 is_midpoint_gt_even = 1; 3418 } else { // > 1/2 ulp 3419 gt_half_ulp = 1; 3420 is_inexact_gt_midpoint = 1; 3421 } 3422 } 3423 if (lt_half_ulp || eq_half_ulp) { 3424 // res = +0.0 * 10^expmin 3425 res.w[1] = 0x0000000000000000ull; 3426 res.w[0] = 0x0000000000000000ull; 3427 } else { // if (gt_half_ulp) 3428 // res = +1 * 10^expmin 3429 res.w[1] = 0x0000000000000000ull; 3430 res.w[0] = 0x0000000000000001ull; 3431 } 3432 res.w[1] = p_sign | res.w[1]; 3433 e4 = expmin; 3434 } else { // if (1 <= x0 <= ind - 1 <= 33) 3435 // round the ind-digit result to ind - x0 digits 3436 3437 if (ind <= 18) { // 2 <= ind <= 18 3438 round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp, 3439 &is_midpoint_lt_even, &is_midpoint_gt_even, 3440 &is_inexact_lt_midpoint, 3441 &is_inexact_gt_midpoint); 3442 res.w[1] = 0x0; 3443 res.w[0] = R64; 3444 } else if (ind <= 38) { 3445 P128.w[1] = res.w[1] & MASK_COEFF; 3446 P128.w[0] = res.w[0]; 3447 round128_19_38 (ind, x0, P128, &res, &incr_exp, 3448 &is_midpoint_lt_even, &is_midpoint_gt_even, 3449 &is_inexact_lt_midpoint, 3450 &is_inexact_gt_midpoint); 3451 } 3452 e4 = e4 + x0; // expmin 3453 // we want the exponent to be expmin, so if incr_exp = 1 then 3454 // multiply the rounded result by 10 - it will still fit in 113 bits 3455 if (incr_exp) { 3456 // 64 x 128 -> 128 3457 P128.w[1] = res.w[1] & MASK_COEFF; 3458 P128.w[0] = res.w[0]; 3459 __mul_64x128_to_128 (res, ten2k64[1], P128); 3460 } 3461 res.w[1] = 3462 p_sign | ((UINT64) (e4 + 6176) << 49) | (res. 3463 w[1] & MASK_COEFF); 3464 // avoid a double rounding error 3465 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 3466 is_midpoint_lt_even) { // double rounding error upward 3467 // res = res - 1 3468 res.w[0]--; 3469 if (res.w[0] == 0xffffffffffffffffull) 3470 res.w[1]--; 3471 // Note: a double rounding error upward is not possible; for this 3472 // the result after the first rounding would have to be 99...95 3473 // (35 digits in all), possibly followed by a number of zeros; this 3474 // not possible in this underflow case 3475 is_midpoint_lt_even = 0; 3476 is_inexact_lt_midpoint = 1; 3477 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 3478 is_midpoint_gt_even) { // double rounding error downward 3479 // res = res + 1 3480 res.w[0]++; 3481 if (res.w[0] == 0) 3482 res.w[1]++; 3483 is_midpoint_gt_even = 0; 3484 is_inexact_gt_midpoint = 1; 3485 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && 3486 !is_inexact_lt_midpoint 3487 && !is_inexact_gt_midpoint) { 3488 // if this second rounding was exact the result may still be 3489 // inexact because of the first rounding 3490 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { 3491 is_inexact_gt_midpoint = 1; 3492 } 3493 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { 3494 is_inexact_lt_midpoint = 1; 3495 } 3496 } else if (is_midpoint_gt_even && 3497 (is_inexact_gt_midpoint0 3498 || is_midpoint_lt_even0)) { 3499 // pulled up to a midpoint 3500 is_inexact_lt_midpoint = 1; 3501 is_inexact_gt_midpoint = 0; 3502 is_midpoint_lt_even = 0; 3503 is_midpoint_gt_even = 0; 3504 } else if (is_midpoint_lt_even && 3505 (is_inexact_lt_midpoint0 3506 || is_midpoint_gt_even0)) { 3507 // pulled down to a midpoint 3508 is_inexact_lt_midpoint = 0; 3509 is_inexact_gt_midpoint = 1; 3510 is_midpoint_lt_even = 0; 3511 is_midpoint_gt_even = 0; 3512 } else { 3513 ; 3514 } 3515 } 3516 } 3517 // res contains the correct result 3518 // apply correction if not rounding to nearest 3519 if (rnd_mode != ROUNDING_TO_NEAREST) { 3520 rounding_correction (rnd_mode, 3521 is_inexact_lt_midpoint, 3522 is_inexact_gt_midpoint, 3523 is_midpoint_lt_even, is_midpoint_gt_even, 3524 e4, &res, pfpsf); 3525 } 3526 if (is_midpoint_lt_even || is_midpoint_gt_even || 3527 is_inexact_lt_midpoint || is_inexact_gt_midpoint) { 3528 // set the inexact flag 3529 *pfpsf |= INEXACT_EXCEPTION; 3530 if (is_tiny) 3531 *pfpsf |= UNDERFLOW_EXCEPTION; 3532 } 3533 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 3534 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 3535 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 3536 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 3537 BID_SWAP128 (res); 3538 BID_RETURN (res) 3539 3540 } else if ((p34 <= delta && delta + q3 <= q4) || // Case (15) 3541 (delta < p34 && p34 < delta + q3 && delta + q3 <= q4) || //Case (16) 3542 (delta + q3 <= p34 && p34 < q4)) { // Case (17) 3543 3544 // calculate first the result rounded to the destination precision, with 3545 // unbounded exponent 3546 3547 add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4, 3548 rnd_mode, &is_midpoint_lt_even, 3549 &is_midpoint_gt_even, &is_inexact_lt_midpoint, 3550 &is_inexact_gt_midpoint, pfpsf, &res); 3551 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 3552 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 3553 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 3554 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 3555 BID_SWAP128 (res); 3556 BID_RETURN (res) 3557 3558 } else { 3559 ; 3560 } 3561 3562 } // end if delta < 0 3563 3564 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 3565 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 3566 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 3567 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 3568 BID_SWAP128 (res); 3569 BID_RETURN (res) 3570 3571} 3572 3573 3574#if DECIMAL_CALL_BY_REFERENCE 3575void 3576bid128_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT128 * pz 3577 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3578 _EXC_INFO_PARAM) { 3579 UINT128 x = *px, y = *py, z = *pz; 3580#if !DECIMAL_GLOBAL_ROUNDING 3581 unsigned int rnd_mode = *prnd_mode; 3582#endif 3583#else 3584UINT128 3585bid128_fma (UINT128 x, UINT128 y, UINT128 z 3586 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3587 _EXC_INFO_PARAM) { 3588#endif 3589 int is_midpoint_lt_even, is_midpoint_gt_even, 3590 is_inexact_lt_midpoint, is_inexact_gt_midpoint; 3591 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 3592 3593#if DECIMAL_CALL_BY_REFERENCE 3594 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3595 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, 3596 &res, &x, &y, &z 3597 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3598 _EXC_INFO_ARG); 3599#else 3600 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3601 &is_inexact_lt_midpoint, 3602 &is_inexact_gt_midpoint, x, y, 3603 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3604 _EXC_INFO_ARG); 3605#endif 3606 BID_RETURN (res); 3607} 3608 3609 3610#if DECIMAL_CALL_BY_REFERENCE 3611void 3612bid128ddd_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT64 * pz 3613 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3614 _EXC_INFO_PARAM) { 3615 UINT64 x = *px, y = *py, z = *pz; 3616#if !DECIMAL_GLOBAL_ROUNDING 3617 unsigned int rnd_mode = *prnd_mode; 3618#endif 3619#else 3620UINT128 3621bid128ddd_fma (UINT64 x, UINT64 y, UINT64 z 3622 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3623 _EXC_INFO_PARAM) { 3624#endif 3625 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, 3626 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; 3627 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 3628 UINT128 x1, y1, z1; 3629 3630#if DECIMAL_CALL_BY_REFERENCE 3631 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3632 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3633 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3634 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3635 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, 3636 &res, &x1, &y1, &z1 3637 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3638 _EXC_INFO_ARG); 3639#else 3640 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3641 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3642 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3643 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3644 &is_inexact_lt_midpoint, 3645 &is_inexact_gt_midpoint, x1, y1, 3646 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3647 _EXC_INFO_ARG); 3648#endif 3649 BID_RETURN (res); 3650} 3651 3652 3653#if DECIMAL_CALL_BY_REFERENCE 3654void 3655bid128ddq_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT128 * pz 3656 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3657 _EXC_INFO_PARAM) { 3658 UINT64 x = *px, y = *py; 3659 UINT128 z = *pz; 3660#if !DECIMAL_GLOBAL_ROUNDING 3661 unsigned int rnd_mode = *prnd_mode; 3662#endif 3663#else 3664UINT128 3665bid128ddq_fma (UINT64 x, UINT64 y, UINT128 z 3666 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3667 _EXC_INFO_PARAM) { 3668#endif 3669 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, 3670 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; 3671 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 3672 UINT128 x1, y1; 3673 3674#if DECIMAL_CALL_BY_REFERENCE 3675 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3676 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3677 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3678 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, 3679 &res, &x1, &y1, &z 3680 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3681 _EXC_INFO_ARG); 3682#else 3683 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3684 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3685 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3686 &is_inexact_lt_midpoint, 3687 &is_inexact_gt_midpoint, x1, y1, 3688 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3689 _EXC_INFO_ARG); 3690#endif 3691 BID_RETURN (res); 3692} 3693 3694 3695#if DECIMAL_CALL_BY_REFERENCE 3696void 3697bid128dqd_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT64 * pz 3698 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3699 _EXC_INFO_PARAM) { 3700 UINT64 x = *px, z = *pz; 3701#if !DECIMAL_GLOBAL_ROUNDING 3702 unsigned int rnd_mode = *prnd_mode; 3703#endif 3704#else 3705UINT128 3706bid128dqd_fma (UINT64 x, UINT128 y, UINT64 z 3707 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3708 _EXC_INFO_PARAM) { 3709#endif 3710 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, 3711 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; 3712 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 3713 UINT128 x1, z1; 3714 3715#if DECIMAL_CALL_BY_REFERENCE 3716 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3717 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3718 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3719 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, 3720 &res, &x1, py, &z1 3721 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3722 _EXC_INFO_ARG); 3723#else 3724 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3725 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3726 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3727 &is_inexact_lt_midpoint, 3728 &is_inexact_gt_midpoint, x1, y, 3729 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3730 _EXC_INFO_ARG); 3731#endif 3732 BID_RETURN (res); 3733} 3734 3735 3736#if DECIMAL_CALL_BY_REFERENCE 3737void 3738bid128dqq_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT128 * pz 3739 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3740 _EXC_INFO_PARAM) { 3741 UINT64 x = *px; 3742#if !DECIMAL_GLOBAL_ROUNDING 3743 unsigned int rnd_mode = *prnd_mode; 3744#endif 3745#else 3746UINT128 3747bid128dqq_fma (UINT64 x, UINT128 y, UINT128 z 3748 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3749 _EXC_INFO_PARAM) { 3750#endif 3751 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, 3752 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; 3753 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 3754 UINT128 x1; 3755 3756#if DECIMAL_CALL_BY_REFERENCE 3757 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3758 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3759 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, 3760 &res, &x1, py, pz 3761 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3762 _EXC_INFO_ARG); 3763#else 3764 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3765 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3766 &is_inexact_lt_midpoint, 3767 &is_inexact_gt_midpoint, x1, y, 3768 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3769 _EXC_INFO_ARG); 3770#endif 3771 BID_RETURN (res); 3772} 3773 3774 3775#if DECIMAL_CALL_BY_REFERENCE 3776void 3777bid128qdd_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT64 * pz 3778 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3779 _EXC_INFO_PARAM) { 3780 UINT64 y = *py, z = *pz; 3781#if !DECIMAL_GLOBAL_ROUNDING 3782 unsigned int rnd_mode = *prnd_mode; 3783#endif 3784#else 3785UINT128 3786bid128qdd_fma (UINT128 x, UINT64 y, UINT64 z 3787 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3788 _EXC_INFO_PARAM) { 3789#endif 3790 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, 3791 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; 3792 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 3793 UINT128 y1, z1; 3794 3795#if DECIMAL_CALL_BY_REFERENCE 3796 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3797 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3798 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3799 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, 3800 &res, px, &y1, &z1 3801 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3802 _EXC_INFO_ARG); 3803#else 3804 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3805 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3806 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3807 &is_inexact_lt_midpoint, 3808 &is_inexact_gt_midpoint, x, y1, 3809 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3810 _EXC_INFO_ARG); 3811#endif 3812 BID_RETURN (res); 3813} 3814 3815 3816#if DECIMAL_CALL_BY_REFERENCE 3817void 3818bid128qdq_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT128 * pz 3819 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3820 _EXC_INFO_PARAM) { 3821 UINT64 y = *py; 3822#if !DECIMAL_GLOBAL_ROUNDING 3823 unsigned int rnd_mode = *prnd_mode; 3824#endif 3825#else 3826UINT128 3827bid128qdq_fma (UINT128 x, UINT64 y, UINT128 z 3828 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3829 _EXC_INFO_PARAM) { 3830#endif 3831 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, 3832 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; 3833 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 3834 UINT128 y1; 3835 3836#if DECIMAL_CALL_BY_REFERENCE 3837 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3838 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3839 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, 3840 &res, px, &y1, pz 3841 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3842 _EXC_INFO_ARG); 3843#else 3844 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3845 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3846 &is_inexact_lt_midpoint, 3847 &is_inexact_gt_midpoint, x, y1, 3848 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3849 _EXC_INFO_ARG); 3850#endif 3851 BID_RETURN (res); 3852} 3853 3854 3855#if DECIMAL_CALL_BY_REFERENCE 3856void 3857bid128qqd_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT64 * pz 3858 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3859 _EXC_INFO_PARAM) { 3860 UINT64 z = *pz; 3861#if !DECIMAL_GLOBAL_ROUNDING 3862 unsigned int rnd_mode = *prnd_mode; 3863#endif 3864#else 3865UINT128 3866bid128qqd_fma (UINT128 x, UINT128 y, UINT64 z 3867 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3868 _EXC_INFO_PARAM) { 3869#endif 3870 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, 3871 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; 3872 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 3873 UINT128 z1; 3874 3875#if DECIMAL_CALL_BY_REFERENCE 3876 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3877 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3878 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, 3879 &res, px, py, &z1 3880 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3881 _EXC_INFO_ARG); 3882#else 3883 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3884 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3885 &is_inexact_lt_midpoint, 3886 &is_inexact_gt_midpoint, x, y, 3887 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3888 _EXC_INFO_ARG); 3889#endif 3890 BID_RETURN (res); 3891} 3892 3893// Note: bid128qqq_fma is represented by bid128_fma 3894 3895// Note: bid64ddd_fma is represented by bid64_fma 3896 3897#if DECIMAL_CALL_BY_REFERENCE 3898void 3899bid64ddq_fma (UINT64 * pres, UINT64 * px, UINT64 * py, UINT128 * pz 3900 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3901 _EXC_INFO_PARAM) { 3902 UINT64 x = *px, y = *py; 3903#if !DECIMAL_GLOBAL_ROUNDING 3904 unsigned int rnd_mode = *prnd_mode; 3905#endif 3906#else 3907UINT64 3908bid64ddq_fma (UINT64 x, UINT64 y, UINT128 z 3909 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3910 _EXC_INFO_PARAM) { 3911#endif 3912 UINT64 res1 = 0xbaddbaddbaddbaddull; 3913 UINT128 x1, y1; 3914 3915#if DECIMAL_CALL_BY_REFERENCE 3916 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3917 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3918 bid64qqq_fma (&res1, &x1, &y1, pz 3919 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3920 _EXC_INFO_ARG); 3921#else 3922 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3923 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3924 res1 = bid64qqq_fma (x1, y1, z 3925 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3926 _EXC_INFO_ARG); 3927#endif 3928 BID_RETURN (res1); 3929} 3930 3931 3932#if DECIMAL_CALL_BY_REFERENCE 3933void 3934bid64dqd_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT64 * pz 3935 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3936 _EXC_INFO_PARAM) { 3937 UINT64 x = *px, z = *pz; 3938#if !DECIMAL_GLOBAL_ROUNDING 3939 unsigned int rnd_mode = *prnd_mode; 3940#endif 3941#else 3942UINT64 3943bid64dqd_fma (UINT64 x, UINT128 y, UINT64 z 3944 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3945 _EXC_INFO_PARAM) { 3946#endif 3947 UINT64 res1 = 0xbaddbaddbaddbaddull; 3948 UINT128 x1, z1; 3949 3950#if DECIMAL_CALL_BY_REFERENCE 3951 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3952 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3953 bid64qqq_fma (&res1, &x1, py, &z1 3954 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3955 _EXC_INFO_ARG); 3956#else 3957 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3958 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3959 res1 = bid64qqq_fma (x1, y, z1 3960 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3961 _EXC_INFO_ARG); 3962#endif 3963 BID_RETURN (res1); 3964} 3965 3966 3967#if DECIMAL_CALL_BY_REFERENCE 3968void 3969bid64dqq_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT128 * pz 3970 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3971 _EXC_INFO_PARAM) { 3972 UINT64 x = *px; 3973#if !DECIMAL_GLOBAL_ROUNDING 3974 unsigned int rnd_mode = *prnd_mode; 3975#endif 3976#else 3977UINT64 3978bid64dqq_fma (UINT64 x, UINT128 y, UINT128 z 3979 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3980 _EXC_INFO_PARAM) { 3981#endif 3982 UINT64 res1 = 0xbaddbaddbaddbaddull; 3983 UINT128 x1; 3984 3985#if DECIMAL_CALL_BY_REFERENCE 3986 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3987 bid64qqq_fma (&res1, &x1, py, pz 3988 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3989 _EXC_INFO_ARG); 3990#else 3991 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3992 res1 = bid64qqq_fma (x1, y, z 3993 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3994 _EXC_INFO_ARG); 3995#endif 3996 BID_RETURN (res1); 3997} 3998 3999 4000#if DECIMAL_CALL_BY_REFERENCE 4001void 4002bid64qdd_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT64 * pz 4003 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 4004 _EXC_INFO_PARAM) { 4005 UINT64 y = *py, z = *pz; 4006#if !DECIMAL_GLOBAL_ROUNDING 4007 unsigned int rnd_mode = *prnd_mode; 4008#endif 4009#else 4010UINT64 4011bid64qdd_fma (UINT128 x, UINT64 y, UINT64 z 4012 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 4013 _EXC_INFO_PARAM) { 4014#endif 4015 UINT64 res1 = 0xbaddbaddbaddbaddull; 4016 UINT128 y1, z1; 4017 4018#if DECIMAL_CALL_BY_REFERENCE 4019 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4020 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4021 bid64qqq_fma (&res1, px, &y1, &z1 4022 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 4023 _EXC_INFO_ARG); 4024#else 4025 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4026 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4027 res1 = bid64qqq_fma (x, y1, z1 4028 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 4029 _EXC_INFO_ARG); 4030#endif 4031 BID_RETURN (res1); 4032} 4033 4034 4035#if DECIMAL_CALL_BY_REFERENCE 4036void 4037bid64qdq_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT128 * pz 4038 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 4039 _EXC_INFO_PARAM) { 4040 UINT64 y = *py; 4041#if !DECIMAL_GLOBAL_ROUNDING 4042 unsigned int rnd_mode = *prnd_mode; 4043#endif 4044#else 4045UINT64 4046bid64qdq_fma (UINT128 x, UINT64 y, UINT128 z 4047 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 4048 _EXC_INFO_PARAM) { 4049#endif 4050 UINT64 res1 = 0xbaddbaddbaddbaddull; 4051 UINT128 y1; 4052 4053#if DECIMAL_CALL_BY_REFERENCE 4054 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4055 bid64qqq_fma (&res1, px, &y1, pz 4056 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 4057 _EXC_INFO_ARG); 4058#else 4059 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4060 res1 = bid64qqq_fma (x, y1, z 4061 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 4062 _EXC_INFO_ARG); 4063#endif 4064 BID_RETURN (res1); 4065} 4066 4067 4068#if DECIMAL_CALL_BY_REFERENCE 4069void 4070bid64qqd_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT64 * pz 4071 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 4072 _EXC_INFO_PARAM) { 4073 UINT64 z = *pz; 4074#if !DECIMAL_GLOBAL_ROUNDING 4075 unsigned int rnd_mode = *prnd_mode; 4076#endif 4077#else 4078UINT64 4079bid64qqd_fma (UINT128 x, UINT128 y, UINT64 z 4080 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 4081 _EXC_INFO_PARAM) { 4082#endif 4083 UINT64 res1 = 0xbaddbaddbaddbaddull; 4084 UINT128 z1; 4085 4086#if DECIMAL_CALL_BY_REFERENCE 4087 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4088 bid64qqq_fma (&res1, px, py, &z1 4089 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 4090 _EXC_INFO_ARG); 4091#else 4092 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4093 res1 = bid64qqq_fma (x, y, z1 4094 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 4095 _EXC_INFO_ARG); 4096#endif 4097 BID_RETURN (res1); 4098} 4099 4100 4101#if DECIMAL_CALL_BY_REFERENCE 4102void 4103bid64qqq_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT128 * pz 4104 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 4105 _EXC_INFO_PARAM) { 4106 UINT128 x = *px, y = *py, z = *pz; 4107#if !DECIMAL_GLOBAL_ROUNDING 4108 unsigned int rnd_mode = *prnd_mode; 4109#endif 4110#else 4111UINT64 4112bid64qqq_fma (UINT128 x, UINT128 y, UINT128 z 4113 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 4114 _EXC_INFO_PARAM) { 4115#endif 4116 int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0, 4117 is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0; 4118 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, 4119 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; 4120 int incr_exp; 4121 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 4122 UINT128 res128 = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 4123 UINT64 res1 = 0xbaddbaddbaddbaddull; 4124 unsigned int save_fpsf; // needed because of the call to bid128_ext_fma 4125 UINT64 sign; 4126 UINT64 exp; 4127 int unbexp; 4128 UINT128 C; 4129 BID_UI64DOUBLE tmp; 4130 int nr_bits; 4131 int q, x0; 4132 int scale; 4133 int lt_half_ulp = 0, eq_half_ulp = 0; 4134 4135 // Note: for rounding modes other than RN or RA, the result can be obtained 4136 // by rounding first to BID128 and then to BID64 4137 4138 save_fpsf = *pfpsf; // sticky bits - caller value must be preserved 4139 *pfpsf = 0; 4140 4141#if DECIMAL_CALL_BY_REFERENCE 4142 bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0, 4143 &is_inexact_lt_midpoint0, &is_inexact_gt_midpoint0, 4144 &res, &x, &y, &z 4145 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 4146 _EXC_INFO_ARG); 4147#else 4148 res = bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0, 4149 &is_inexact_lt_midpoint0, 4150 &is_inexact_gt_midpoint0, x, y, 4151 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 4152 _EXC_INFO_ARG); 4153#endif 4154 4155 if ((rnd_mode == ROUNDING_DOWN) || (rnd_mode == ROUNDING_UP) || 4156 (rnd_mode == ROUNDING_TO_ZERO) || // no double rounding error is possible 4157 ((res.w[HIGH_128W] & MASK_NAN) == MASK_NAN) || //res=QNaN (cannot be SNaN) 4158 ((res.w[HIGH_128W] & MASK_ANY_INF) == MASK_INF)) { // result is infinity 4159#if DECIMAL_CALL_BY_REFERENCE 4160 bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG); 4161#else 4162 res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG); 4163#endif 4164 // determine the unbiased exponent of the result 4165 unbexp = ((res1 >> 53) & 0x3ff) - 398; 4166 4167 // if subnormal, res1 must have exp = -398 4168 // if tiny and inexact set underflow and inexact status flags 4169 if (!((res1 & MASK_NAN) == MASK_NAN) && // res1 not NaN 4170 (unbexp == -398) 4171 && ((res1 & MASK_BINARY_SIG1) < 1000000000000000ull) 4172 && (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 4173 || is_midpoint_lt_even0 || is_midpoint_gt_even0)) { 4174 // set the inexact flag and the underflow flag 4175 *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION); 4176 } else if (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 || 4177 is_midpoint_lt_even0 || is_midpoint_gt_even0) { 4178 // set the inexact flag and the underflow flag 4179 *pfpsf |= INEXACT_EXCEPTION; 4180 } 4181 4182 *pfpsf |= save_fpsf; 4183 BID_RETURN (res1); 4184 } // else continue, and use rounding to nearest to round to 16 digits 4185 4186 // at this point the result is rounded to nearest (even or away) to 34 digits 4187 // (or less if exact), and it is zero or finite non-zero canonical [sub]normal 4188 sign = res.w[HIGH_128W] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative 4189 exp = res.w[HIGH_128W] & MASK_EXP; // biased and shifted left 49 bits 4190 unbexp = (exp >> 49) - 6176; 4191 C.w[1] = res.w[HIGH_128W] & MASK_COEFF; 4192 C.w[0] = res.w[LOW_128W]; 4193 4194 if ((C.w[1] == 0x0 && C.w[0] == 0x0) || // result is zero 4195 (unbexp <= (-398 - 35)) || (unbexp >= (369 + 16))) { 4196 // clear under/overflow 4197#if DECIMAL_CALL_BY_REFERENCE 4198 bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG); 4199#else 4200 res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG); 4201#endif 4202 *pfpsf |= save_fpsf; 4203 BID_RETURN (res1); 4204 } // else continue 4205 4206 // -398 - 34 <= unbexp <= 369 + 15 4207 if (rnd_mode == ROUNDING_TIES_AWAY) { 4208 // apply correction, if needed, to make the result rounded to nearest-even 4209 if (is_midpoint_gt_even) { 4210 // res = res - 1 4211 res1--; // res1 is now even 4212 } // else the result is already correctly rounded to nearest-even 4213 } 4214 // at this point the result is finite, non-zero canonical normal or subnormal, 4215 // and in most cases overflow or underflow will not occur 4216 4217 // determine the number of digits q in the result 4218 // q = nr. of decimal digits in x 4219 // determine first the nr. of bits in x 4220 if (C.w[1] == 0) { 4221 if (C.w[0] >= 0x0020000000000000ull) { // x >= 2^53 4222 // split the 64-bit value in two 32-bit halves to avoid rounding errors 4223 if (C.w[0] >= 0x0000000100000000ull) { // x >= 2^32 4224 tmp.d = (double) (C.w[0] >> 32); // exact conversion 4225 nr_bits = 4226 33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 4227 } else { // x < 2^32 4228 tmp.d = (double) (C.w[0]); // exact conversion 4229 nr_bits = 4230 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 4231 } 4232 } else { // if x < 2^53 4233 tmp.d = (double) C.w[0]; // exact conversion 4234 nr_bits = 4235 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 4236 } 4237 } else { // C.w[1] != 0 => nr. bits = 64 + nr_bits (C.w[1]) 4238 tmp.d = (double) C.w[1]; // exact conversion 4239 nr_bits = 4240 65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 4241 } 4242 q = nr_digits[nr_bits - 1].digits; 4243 if (q == 0) { 4244 q = nr_digits[nr_bits - 1].digits1; 4245 if (C.w[1] > nr_digits[nr_bits - 1].threshold_hi || 4246 (C.w[1] == nr_digits[nr_bits - 1].threshold_hi && 4247 C.w[0] >= nr_digits[nr_bits - 1].threshold_lo)) 4248 q++; 4249 } 4250 // if q > 16, round to nearest even to 16 digits (but for underflow it may 4251 // have to be truncated even more) 4252 if (q > 16) { 4253 x0 = q - 16; 4254 if (q <= 18) { 4255 round64_2_18 (q, x0, C.w[0], &res1, &incr_exp, 4256 &is_midpoint_lt_even, &is_midpoint_gt_even, 4257 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); 4258 } else { // 19 <= q <= 34 4259 round128_19_38 (q, x0, C, &res128, &incr_exp, 4260 &is_midpoint_lt_even, &is_midpoint_gt_even, 4261 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); 4262 res1 = res128.w[0]; // the result fits in 64 bits 4263 } 4264 unbexp = unbexp + x0; 4265 if (incr_exp) 4266 unbexp++; 4267 q = 16; // need to set in case denormalization is necessary 4268 } else { 4269 // the result does not require a second rounding (and it must have 4270 // been exact in the first rounding, since q <= 16) 4271 res1 = C.w[0]; 4272 } 4273 4274 // avoid a double rounding error 4275 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 4276 is_midpoint_lt_even) { // double rounding error upward 4277 // res = res - 1 4278 res1--; // res1 becomes odd 4279 is_midpoint_lt_even = 0; 4280 is_inexact_lt_midpoint = 1; 4281 if (res1 == 0x00038d7ea4c67fffull) { // 10^15 - 1 4282 res1 = 0x002386f26fc0ffffull; // 10^16 - 1 4283 unbexp--; 4284 } 4285 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 4286 is_midpoint_gt_even) { // double rounding error downward 4287 // res = res + 1 4288 res1++; // res1 becomes odd (so it cannot be 10^16) 4289 is_midpoint_gt_even = 0; 4290 is_inexact_gt_midpoint = 1; 4291 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && 4292 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 4293 // if this second rounding was exact the result may still be 4294 // inexact because of the first rounding 4295 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { 4296 is_inexact_gt_midpoint = 1; 4297 } 4298 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { 4299 is_inexact_lt_midpoint = 1; 4300 } 4301 } else if (is_midpoint_gt_even && 4302 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { 4303 // pulled up to a midpoint 4304 is_inexact_lt_midpoint = 1; 4305 is_inexact_gt_midpoint = 0; 4306 is_midpoint_lt_even = 0; 4307 is_midpoint_gt_even = 0; 4308 } else if (is_midpoint_lt_even && 4309 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { 4310 // pulled down to a midpoint 4311 is_inexact_lt_midpoint = 0; 4312 is_inexact_gt_midpoint = 1; 4313 is_midpoint_lt_even = 0; 4314 is_midpoint_gt_even = 0; 4315 } else { 4316 ; 4317 } 4318 // this is the result rounded correctly to nearest even, with unbounded exp. 4319 4320 // check for overflow 4321 if (q + unbexp > P16 + expmax16) { 4322 res1 = sign | 0x7800000000000000ull; 4323 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); 4324 *pfpsf |= save_fpsf; 4325 BID_RETURN (res1) 4326 } else if (unbexp > expmax16) { // q + unbexp <= P16 + expmax16 4327 // not overflow; the result must be exact, and we can multiply res1 by 4328 // 10^(unbexp - expmax16) and the product will fit in 16 decimal digits 4329 scale = unbexp - expmax16; 4330 res1 = res1 * ten2k64[scale]; // res1 * 10^scale 4331 unbexp = expmax16; // unbexp - scale 4332 } else { 4333 ; // continue 4334 } 4335 4336 // check for underflow 4337 if (q + unbexp < P16 + expmin16) { 4338 if (unbexp < expmin16) { 4339 // we must truncate more of res 4340 x0 = expmin16 - unbexp; // x0 >= 1 4341 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; 4342 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; 4343 is_midpoint_lt_even0 = is_midpoint_lt_even; 4344 is_midpoint_gt_even0 = is_midpoint_gt_even; 4345 is_inexact_lt_midpoint = 0; 4346 is_inexact_gt_midpoint = 0; 4347 is_midpoint_lt_even = 0; 4348 is_midpoint_gt_even = 0; 4349 // the number of decimal digits in res1 is q 4350 if (x0 < q) { // 1 <= x0 <= q-1 => round res to q - x0 digits 4351 // 2 <= q <= 16, 1 <= x0 <= 15 4352 round64_2_18 (q, x0, res1, &res1, &incr_exp, 4353 &is_midpoint_lt_even, &is_midpoint_gt_even, 4354 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); 4355 if (incr_exp) { 4356 // res1 = 10^(q-x0), 1 <= q - x0 <= q - 1, 1 <= q - x0 <= 15 4357 res1 = ten2k64[q - x0]; 4358 } 4359 unbexp = unbexp + x0; // expmin16 4360 } else if (x0 == q) { 4361 // the second rounding is for 0.d(0)d(1)...d(q-1) * 10^emin 4362 // determine relationship with 1/2 ulp 4363 // q <= 16 4364 if (res1 < midpoint64[q - 1]) { // < 1/2 ulp 4365 lt_half_ulp = 1; 4366 is_inexact_lt_midpoint = 1; 4367 } else if (res1 == midpoint64[q - 1]) { // = 1/2 ulp 4368 eq_half_ulp = 1; 4369 is_midpoint_gt_even = 1; 4370 } else { // > 1/2 ulp 4371 // gt_half_ulp = 1; 4372 is_inexact_gt_midpoint = 1; 4373 } 4374 if (lt_half_ulp || eq_half_ulp) { 4375 // res = +0.0 * 10^expmin16 4376 res1 = 0x0000000000000000ull; 4377 } else { // if (gt_half_ulp) 4378 // res = +1 * 10^expmin16 4379 res1 = 0x0000000000000001ull; 4380 } 4381 unbexp = expmin16; 4382 } else { // if (x0 > q) 4383 // the second rounding is for 0.0...d(0)d(1)...d(q-1) * 10^emin 4384 res1 = 0x0000000000000000ull; 4385 unbexp = expmin16; 4386 is_inexact_lt_midpoint = 1; 4387 } 4388 // avoid a double rounding error 4389 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 4390 is_midpoint_lt_even) { // double rounding error upward 4391 // res = res - 1 4392 res1--; // res1 becomes odd 4393 is_midpoint_lt_even = 0; 4394 is_inexact_lt_midpoint = 1; 4395 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 4396 is_midpoint_gt_even) { // double rounding error downward 4397 // res = res + 1 4398 res1++; // res1 becomes odd 4399 is_midpoint_gt_even = 0; 4400 is_inexact_gt_midpoint = 1; 4401 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && 4402 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 4403 // if this rounding was exact the result may still be 4404 // inexact because of the previous roundings 4405 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { 4406 is_inexact_gt_midpoint = 1; 4407 } 4408 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { 4409 is_inexact_lt_midpoint = 1; 4410 } 4411 } else if (is_midpoint_gt_even && 4412 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { 4413 // pulled up to a midpoint 4414 is_inexact_lt_midpoint = 1; 4415 is_inexact_gt_midpoint = 0; 4416 is_midpoint_lt_even = 0; 4417 is_midpoint_gt_even = 0; 4418 } else if (is_midpoint_lt_even && 4419 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { 4420 // pulled down to a midpoint 4421 is_inexact_lt_midpoint = 0; 4422 is_inexact_gt_midpoint = 1; 4423 is_midpoint_lt_even = 0; 4424 is_midpoint_gt_even = 0; 4425 } else { 4426 ; 4427 } 4428 } 4429 // else if unbexp >= emin then q < P (because q + unbexp < P16 + expmin16) 4430 // and the result is tiny and exact 4431 4432 // check for inexact result 4433 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || 4434 is_midpoint_lt_even || is_midpoint_gt_even || 4435 is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 || 4436 is_midpoint_lt_even0 || is_midpoint_gt_even0) { 4437 // set the inexact flag and the underflow flag 4438 *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION); 4439 } 4440 } else if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || 4441 is_midpoint_lt_even || is_midpoint_gt_even) { 4442 *pfpsf |= INEXACT_EXCEPTION; 4443 } 4444 // this is the result rounded correctly to nearest, with bounded exponent 4445 4446 if (rnd_mode == ROUNDING_TIES_AWAY && is_midpoint_gt_even) { // correction 4447 // res = res + 1 4448 res1++; // res1 is now odd 4449 } // else the result is already correct 4450 4451 // assemble the result 4452 if (res1 < 0x0020000000000000ull) { // res < 2^53 4453 res1 = sign | ((UINT64) (unbexp + 398) << 53) | res1; 4454 } else { // res1 >= 2^53 4455 res1 = sign | MASK_STEERING_BITS | 4456 ((UINT64) (unbexp + 398) << 51) | (res1 & MASK_BINARY_SIG2); 4457 } 4458 *pfpsf |= save_fpsf; 4459 BID_RETURN (res1); 4460} 4461