1/* Copyright (C) 2007-2020 Free Software Foundation, Inc. 2 3This file is part of GCC. 4 5GCC is free software; you can redistribute it and/or modify it under 6the terms of the GNU General Public License as published by the Free 7Software Foundation; either version 3, or (at your option) any later 8version. 9 10GCC is distributed in the hope that it will be useful, but WITHOUT ANY 11WARRANTY; without even the implied warranty of MERCHANTABILITY or 12FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13for more details. 14 15Under Section 7 of GPL version 3, you are granted additional 16permissions described in the GCC Runtime Library Exception, version 173.1, as published by the Free Software Foundation. 18 19You should have received a copy of the GNU General Public License and 20a copy of the GCC Runtime Library Exception along with this program; 21see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22<http://www.gnu.org/licenses/>. */ 23 24#define BID_128RES 25#include "bid_internal.h" 26#include "bid_sqrt_macros.h" 27#ifdef UNCHANGED_BINARY_STATUS_FLAGS 28#include <fenv.h> 29 30#define FE_ALL_FLAGS FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT 31#endif 32 33BID128_FUNCTION_ARG1 (bid128_sqrt, x) 34 35 UINT256 M256, C256, C4, C8; 36 UINT128 CX, CX1, CX2, A10, S2, T128, TP128, CS, CSM, res; 37 UINT64 sign_x, Carry; 38 SINT64 D; 39 int_float fx, f64; 40 int exponent_x, bin_expon_cx; 41 int digits, scale, exponent_q; 42#ifdef UNCHANGED_BINARY_STATUS_FLAGS 43 fexcept_t binaryflags = 0; 44#endif 45 46 // unpack arguments, check for NaN or Infinity 47if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) { 48res.w[1] = CX.w[1]; 49res.w[0] = CX.w[0]; 50 // NaN ? 51if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) { 52#ifdef SET_STATUS_FLAGS 53 if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull) // sNaN 54 __set_status_flags (pfpsf, INVALID_EXCEPTION); 55#endif 56 res.w[1] = CX.w[1] & QUIET_MASK64; 57 BID_RETURN (res); 58} 59 // x is Infinity? 60if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) { 61 res.w[1] = CX.w[1]; 62 if (sign_x) { 63 // -Inf, return NaN 64 res.w[1] = 0x7c00000000000000ull; 65#ifdef SET_STATUS_FLAGS 66 __set_status_flags (pfpsf, INVALID_EXCEPTION); 67#endif 68 } 69 BID_RETURN (res); 70} 71 // x is 0 otherwise 72 73res.w[1] = 74 sign_x | 75 ((((UINT64) (exponent_x + DECIMAL_EXPONENT_BIAS_128)) >> 1) << 49); 76res.w[0] = 0; 77BID_RETURN (res); 78} 79if (sign_x) { 80 res.w[1] = 0x7c00000000000000ull; 81 res.w[0] = 0; 82#ifdef SET_STATUS_FLAGS 83 __set_status_flags (pfpsf, INVALID_EXCEPTION); 84#endif 85 BID_RETURN (res); 86} 87#ifdef UNCHANGED_BINARY_STATUS_FLAGS 88(void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS); 89#endif 90 // 2^64 91f64.i = 0x5f800000; 92 93 // fx ~ CX 94fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0]; 95bin_expon_cx = ((fx.i >> 23) & 0xff) - 0x7f; 96digits = estimate_decimal_digits[bin_expon_cx]; 97 98A10 = CX; 99if (exponent_x & 1) { 100 A10.w[1] = (CX.w[1] << 3) | (CX.w[0] >> 61); 101 A10.w[0] = CX.w[0] << 3; 102 CX2.w[1] = (CX.w[1] << 1) | (CX.w[0] >> 63); 103 CX2.w[0] = CX.w[0] << 1; 104 __add_128_128 (A10, A10, CX2); 105} 106 107CS.w[0] = short_sqrt128 (A10); 108CS.w[1] = 0; 109 // check for exact result 110if (CS.w[0] * CS.w[0] == A10.w[0]) { 111 __mul_64x64_to_128_fast (S2, CS.w[0], CS.w[0]); 112 if (S2.w[1] == A10.w[1]) // && S2.w[0]==A10.w[0]) 113 { 114 get_BID128_very_fast (&res, 0, 115 (exponent_x + 116 DECIMAL_EXPONENT_BIAS_128) >> 1, CS); 117#ifdef UNCHANGED_BINARY_STATUS_FLAGS 118 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); 119#endif 120 BID_RETURN (res); 121 } 122} 123 // get number of digits in CX 124D = CX.w[1] - power10_index_binexp_128[bin_expon_cx].w[1]; 125if (D > 0 126 || (!D && CX.w[0] >= power10_index_binexp_128[bin_expon_cx].w[0])) 127 digits++; 128 129 // if exponent is odd, scale coefficient by 10 130scale = 67 - digits; 131exponent_q = exponent_x - scale; 132scale += (exponent_q & 1); // exp. bias is even 133 134if (scale > 38) { 135 T128 = power10_table_128[scale - 37]; 136 __mul_128x128_low (CX1, CX, T128); 137 138 TP128 = power10_table_128[37]; 139 __mul_128x128_to_256 (C256, CX1, TP128); 140} else { 141 T128 = power10_table_128[scale]; 142 __mul_128x128_to_256 (C256, CX, T128); 143} 144 145 146 // 4*C256 147C4.w[3] = (C256.w[3] << 2) | (C256.w[2] >> 62); 148C4.w[2] = (C256.w[2] << 2) | (C256.w[1] >> 62); 149C4.w[1] = (C256.w[1] << 2) | (C256.w[0] >> 62); 150C4.w[0] = C256.w[0] << 2; 151 152long_sqrt128 (&CS, C256); 153 154#ifndef IEEE_ROUND_NEAREST 155#ifndef IEEE_ROUND_NEAREST_TIES_AWAY 156if (!((rnd_mode) & 3)) { 157#endif 158#endif 159 // compare to midpoints 160 CSM.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63); 161 CSM.w[0] = (CS.w[0] + CS.w[0]) | 1; 162 // CSM^2 163 //__mul_128x128_to_256(M256, CSM, CSM); 164 __sqr128_to_256 (M256, CSM); 165 166 if (C4.w[3] > M256.w[3] 167 || (C4.w[3] == M256.w[3] 168 && (C4.w[2] > M256.w[2] 169 || (C4.w[2] == M256.w[2] 170 && (C4.w[1] > M256.w[1] 171 || (C4.w[1] == M256.w[1] 172 && C4.w[0] > M256.w[0])))))) { 173 // round up 174 CS.w[0]++; 175 if (!CS.w[0]) 176 CS.w[1]++; 177 } else { 178 C8.w[1] = (CS.w[1] << 3) | (CS.w[0] >> 61); 179 C8.w[0] = CS.w[0] << 3; 180 // M256 - 8*CSM 181 __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]); 182 __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry); 183 __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry); 184 M256.w[3] = M256.w[3] - Carry; 185 186 // if CSM' > C256, round up 187 if (M256.w[3] > C4.w[3] 188 || (M256.w[3] == C4.w[3] 189 && (M256.w[2] > C4.w[2] 190 || (M256.w[2] == C4.w[2] 191 && (M256.w[1] > C4.w[1] 192 || (M256.w[1] == C4.w[1] 193 && M256.w[0] > C4.w[0])))))) { 194 // round down 195 if (!CS.w[0]) 196 CS.w[1]--; 197 CS.w[0]--; 198 } 199 } 200#ifndef IEEE_ROUND_NEAREST 201#ifndef IEEE_ROUND_NEAREST_TIES_AWAY 202} else { 203 __sqr128_to_256 (M256, CS); 204 C8.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63); 205 C8.w[0] = CS.w[0] << 1; 206 if (M256.w[3] > C256.w[3] 207 || (M256.w[3] == C256.w[3] 208 && (M256.w[2] > C256.w[2] 209 || (M256.w[2] == C256.w[2] 210 && (M256.w[1] > C256.w[1] 211 || (M256.w[1] == C256.w[1] 212 && M256.w[0] > C256.w[0])))))) { 213 __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]); 214 __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry); 215 __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry); 216 M256.w[3] = M256.w[3] - Carry; 217 M256.w[0]++; 218 if (!M256.w[0]) { 219 M256.w[1]++; 220 if (!M256.w[1]) { 221 M256.w[2]++; 222 if (!M256.w[2]) 223 M256.w[3]++; 224 } 225 } 226 227 if (!CS.w[0]) 228 CS.w[1]--; 229 CS.w[0]--; 230 231 if (M256.w[3] > C256.w[3] 232 || (M256.w[3] == C256.w[3] 233 && (M256.w[2] > C256.w[2] 234 || (M256.w[2] == C256.w[2] 235 && (M256.w[1] > C256.w[1] 236 || (M256.w[1] == C256.w[1] 237 && M256.w[0] > C256.w[0])))))) { 238 239 if (!CS.w[0]) 240 CS.w[1]--; 241 CS.w[0]--; 242 } 243 } 244 245 else { 246 __add_carry_out (M256.w[0], Carry, M256.w[0], C8.w[0]); 247 __add_carry_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry); 248 __add_carry_in_out (M256.w[2], Carry, M256.w[2], 0, Carry); 249 M256.w[3] = M256.w[3] + Carry; 250 M256.w[0]++; 251 if (!M256.w[0]) { 252 M256.w[1]++; 253 if (!M256.w[1]) { 254 M256.w[2]++; 255 if (!M256.w[2]) 256 M256.w[3]++; 257 } 258 } 259 if (M256.w[3] < C256.w[3] 260 || (M256.w[3] == C256.w[3] 261 && (M256.w[2] < C256.w[2] 262 || (M256.w[2] == C256.w[2] 263 && (M256.w[1] < C256.w[1] 264 || (M256.w[1] == C256.w[1] 265 && M256.w[0] <= C256.w[0])))))) { 266 267 CS.w[0]++; 268 if (!CS.w[0]) 269 CS.w[1]++; 270 } 271 } 272 // RU? 273 if ((rnd_mode) == ROUNDING_UP) { 274 CS.w[0]++; 275 if (!CS.w[0]) 276 CS.w[1]++; 277 } 278 279} 280#endif 281#endif 282 283#ifdef SET_STATUS_FLAGS 284__set_status_flags (pfpsf, INEXACT_EXCEPTION); 285#endif 286get_BID128_fast (&res, 0, 287 (exponent_q + DECIMAL_EXPONENT_BIAS_128) >> 1, CS); 288#ifdef UNCHANGED_BINARY_STATUS_FLAGS 289(void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); 290#endif 291BID_RETURN (res); 292} 293 294 295 296BID128_FUNCTION_ARGTYPE1 (bid128d_sqrt, UINT64, x) 297 298 UINT256 M256, C256, C4, C8; 299 UINT128 CX, CX1, CX2, A10, S2, T128, TP128, CS, CSM, res; 300 UINT64 sign_x, Carry; 301 SINT64 D; 302 int_float fx, f64; 303 int exponent_x, bin_expon_cx; 304 int digits, scale, exponent_q; 305#ifdef UNCHANGED_BINARY_STATUS_FLAGS 306 fexcept_t binaryflags = 0; 307#endif 308 309 // unpack arguments, check for NaN or Infinity 310 // unpack arguments, check for NaN or Infinity 311CX.w[1] = 0; 312if (!unpack_BID64 (&sign_x, &exponent_x, &CX.w[0], x)) { 313res.w[1] = CX.w[0]; 314res.w[0] = 0; 315 // NaN ? 316if ((x & 0x7c00000000000000ull) == 0x7c00000000000000ull) { 317#ifdef SET_STATUS_FLAGS 318 if ((x & SNAN_MASK64) == SNAN_MASK64) // sNaN 319 __set_status_flags (pfpsf, INVALID_EXCEPTION); 320#endif 321 res.w[0] = (CX.w[0] & 0x0003ffffffffffffull); 322 __mul_64x64_to_128 (res, res.w[0], power10_table_128[18].w[0]); 323 res.w[1] |= ((CX.w[0]) & 0xfc00000000000000ull); 324 BID_RETURN (res); 325} 326 // x is Infinity? 327if ((x & 0x7800000000000000ull) == 0x7800000000000000ull) { 328 if (sign_x) { 329 // -Inf, return NaN 330 res.w[1] = 0x7c00000000000000ull; 331#ifdef SET_STATUS_FLAGS 332 __set_status_flags (pfpsf, INVALID_EXCEPTION); 333#endif 334 } 335 BID_RETURN (res); 336} 337 // x is 0 otherwise 338 339exponent_x = 340 exponent_x - DECIMAL_EXPONENT_BIAS + DECIMAL_EXPONENT_BIAS_128; 341res.w[1] = 342 sign_x | ((((UINT64) (exponent_x + DECIMAL_EXPONENT_BIAS_128)) >> 1) 343 << 49); 344res.w[0] = 0; 345BID_RETURN (res); 346} 347if (sign_x) { 348 res.w[1] = 0x7c00000000000000ull; 349 res.w[0] = 0; 350#ifdef SET_STATUS_FLAGS 351 __set_status_flags (pfpsf, INVALID_EXCEPTION); 352#endif 353 BID_RETURN (res); 354} 355#ifdef UNCHANGED_BINARY_STATUS_FLAGS 356(void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS); 357#endif 358exponent_x = 359 exponent_x - DECIMAL_EXPONENT_BIAS + DECIMAL_EXPONENT_BIAS_128; 360 361 // 2^64 362f64.i = 0x5f800000; 363 364 // fx ~ CX 365fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0]; 366bin_expon_cx = ((fx.i >> 23) & 0xff) - 0x7f; 367digits = estimate_decimal_digits[bin_expon_cx]; 368 369A10 = CX; 370if (exponent_x & 1) { 371 A10.w[1] = (CX.w[1] << 3) | (CX.w[0] >> 61); 372 A10.w[0] = CX.w[0] << 3; 373 CX2.w[1] = (CX.w[1] << 1) | (CX.w[0] >> 63); 374 CX2.w[0] = CX.w[0] << 1; 375 __add_128_128 (A10, A10, CX2); 376} 377 378CS.w[0] = short_sqrt128 (A10); 379CS.w[1] = 0; 380 // check for exact result 381if (CS.w[0] * CS.w[0] == A10.w[0]) { 382 __mul_64x64_to_128_fast (S2, CS.w[0], CS.w[0]); 383 if (S2.w[1] == A10.w[1]) { 384 get_BID128_very_fast (&res, 0, 385 (exponent_x + DECIMAL_EXPONENT_BIAS_128) >> 1, 386 CS); 387#ifdef UNCHANGED_BINARY_STATUS_FLAGS 388 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); 389#endif 390 BID_RETURN (res); 391 } 392} 393 // get number of digits in CX 394D = CX.w[1] - power10_index_binexp_128[bin_expon_cx].w[1]; 395if (D > 0 396 || (!D && CX.w[0] >= power10_index_binexp_128[bin_expon_cx].w[0])) 397 digits++; 398 399 // if exponent is odd, scale coefficient by 10 400scale = 67 - digits; 401exponent_q = exponent_x - scale; 402scale += (exponent_q & 1); // exp. bias is even 403 404if (scale > 38) { 405 T128 = power10_table_128[scale - 37]; 406 __mul_128x128_low (CX1, CX, T128); 407 408 TP128 = power10_table_128[37]; 409 __mul_128x128_to_256 (C256, CX1, TP128); 410} else { 411 T128 = power10_table_128[scale]; 412 __mul_128x128_to_256 (C256, CX, T128); 413} 414 415 416 // 4*C256 417C4.w[3] = (C256.w[3] << 2) | (C256.w[2] >> 62); 418C4.w[2] = (C256.w[2] << 2) | (C256.w[1] >> 62); 419C4.w[1] = (C256.w[1] << 2) | (C256.w[0] >> 62); 420C4.w[0] = C256.w[0] << 2; 421 422long_sqrt128 (&CS, C256); 423 424#ifndef IEEE_ROUND_NEAREST 425#ifndef IEEE_ROUND_NEAREST_TIES_AWAY 426if (!((rnd_mode) & 3)) { 427#endif 428#endif 429 // compare to midpoints 430 CSM.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63); 431 CSM.w[0] = (CS.w[0] + CS.w[0]) | 1; 432 // CSM^2 433 //__mul_128x128_to_256(M256, CSM, CSM); 434 __sqr128_to_256 (M256, CSM); 435 436 if (C4.w[3] > M256.w[3] 437 || (C4.w[3] == M256.w[3] 438 && (C4.w[2] > M256.w[2] 439 || (C4.w[2] == M256.w[2] 440 && (C4.w[1] > M256.w[1] 441 || (C4.w[1] == M256.w[1] 442 && C4.w[0] > M256.w[0])))))) { 443 // round up 444 CS.w[0]++; 445 if (!CS.w[0]) 446 CS.w[1]++; 447 } else { 448 C8.w[1] = (CS.w[1] << 3) | (CS.w[0] >> 61); 449 C8.w[0] = CS.w[0] << 3; 450 // M256 - 8*CSM 451 __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]); 452 __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry); 453 __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry); 454 M256.w[3] = M256.w[3] - Carry; 455 456 // if CSM' > C256, round up 457 if (M256.w[3] > C4.w[3] 458 || (M256.w[3] == C4.w[3] 459 && (M256.w[2] > C4.w[2] 460 || (M256.w[2] == C4.w[2] 461 && (M256.w[1] > C4.w[1] 462 || (M256.w[1] == C4.w[1] 463 && M256.w[0] > C4.w[0])))))) { 464 // round down 465 if (!CS.w[0]) 466 CS.w[1]--; 467 CS.w[0]--; 468 } 469 } 470#ifndef IEEE_ROUND_NEAREST 471#ifndef IEEE_ROUND_NEAREST_TIES_AWAY 472} else { 473 __sqr128_to_256 (M256, CS); 474 C8.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63); 475 C8.w[0] = CS.w[0] << 1; 476 if (M256.w[3] > C256.w[3] 477 || (M256.w[3] == C256.w[3] 478 && (M256.w[2] > C256.w[2] 479 || (M256.w[2] == C256.w[2] 480 && (M256.w[1] > C256.w[1] 481 || (M256.w[1] == C256.w[1] 482 && M256.w[0] > C256.w[0])))))) { 483 __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]); 484 __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry); 485 __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry); 486 M256.w[3] = M256.w[3] - Carry; 487 M256.w[0]++; 488 if (!M256.w[0]) { 489 M256.w[1]++; 490 if (!M256.w[1]) { 491 M256.w[2]++; 492 if (!M256.w[2]) 493 M256.w[3]++; 494 } 495 } 496 497 if (!CS.w[0]) 498 CS.w[1]--; 499 CS.w[0]--; 500 501 if (M256.w[3] > C256.w[3] 502 || (M256.w[3] == C256.w[3] 503 && (M256.w[2] > C256.w[2] 504 || (M256.w[2] == C256.w[2] 505 && (M256.w[1] > C256.w[1] 506 || (M256.w[1] == C256.w[1] 507 && M256.w[0] > C256.w[0])))))) { 508 509 if (!CS.w[0]) 510 CS.w[1]--; 511 CS.w[0]--; 512 } 513 } 514 515 else { 516 __add_carry_out (M256.w[0], Carry, M256.w[0], C8.w[0]); 517 __add_carry_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry); 518 __add_carry_in_out (M256.w[2], Carry, M256.w[2], 0, Carry); 519 M256.w[3] = M256.w[3] + Carry; 520 M256.w[0]++; 521 if (!M256.w[0]) { 522 M256.w[1]++; 523 if (!M256.w[1]) { 524 M256.w[2]++; 525 if (!M256.w[2]) 526 M256.w[3]++; 527 } 528 } 529 if (M256.w[3] < C256.w[3] 530 || (M256.w[3] == C256.w[3] 531 && (M256.w[2] < C256.w[2] 532 || (M256.w[2] == C256.w[2] 533 && (M256.w[1] < C256.w[1] 534 || (M256.w[1] == C256.w[1] 535 && M256.w[0] <= C256.w[0])))))) { 536 537 CS.w[0]++; 538 if (!CS.w[0]) 539 CS.w[1]++; 540 } 541 } 542 // RU? 543 if ((rnd_mode) == ROUNDING_UP) { 544 CS.w[0]++; 545 if (!CS.w[0]) 546 CS.w[1]++; 547 } 548 549} 550#endif 551#endif 552 553#ifdef SET_STATUS_FLAGS 554__set_status_flags (pfpsf, INEXACT_EXCEPTION); 555#endif 556get_BID128_fast (&res, 0, (exponent_q + DECIMAL_EXPONENT_BIAS_128) >> 1, 557 CS); 558#ifdef UNCHANGED_BINARY_STATUS_FLAGS 559(void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); 560#endif 561BID_RETURN (res); 562 563 564} 565