1/* Copyright (C) 2007-2020 Free Software Foundation, Inc.
2
3This file is part of GCC.
4
5GCC is free software; you can redistribute it and/or modify it under
6the terms of the GNU General Public License as published by the Free
7Software Foundation; either version 3, or (at your option) any later
8version.
9
10GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11WARRANTY; without even the implied warranty of MERCHANTABILITY or
12FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13for more details.
14
15Under Section 7 of GPL version 3, you are granted additional
16permissions described in the GCC Runtime Library Exception, version
173.1, as published by the Free Software Foundation.
18
19You should have received a copy of the GNU General Public License and
20a copy of the GCC Runtime Library Exception along with this program;
21see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22<http://www.gnu.org/licenses/>.  */
23
24#define BID_128RES
25#include "bid_internal.h"
26#include "bid_sqrt_macros.h"
27#ifdef UNCHANGED_BINARY_STATUS_FLAGS
28#include <fenv.h>
29
30#define FE_ALL_FLAGS FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT
31#endif
32
33BID128_FUNCTION_ARG1 (bid128_sqrt, x)
34
35     UINT256 M256, C256, C4, C8;
36     UINT128 CX, CX1, CX2, A10, S2, T128, TP128, CS, CSM, res;
37     UINT64 sign_x, Carry;
38     SINT64 D;
39     int_float fx, f64;
40     int exponent_x, bin_expon_cx;
41     int digits, scale, exponent_q;
42#ifdef UNCHANGED_BINARY_STATUS_FLAGS
43     fexcept_t binaryflags = 0;
44#endif
45
46  // unpack arguments, check for NaN or Infinity
47if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) {
48res.w[1] = CX.w[1];
49res.w[0] = CX.w[0];
50    // NaN ?
51if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
52#ifdef SET_STATUS_FLAGS
53  if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull)	// sNaN
54    __set_status_flags (pfpsf, INVALID_EXCEPTION);
55#endif
56  res.w[1] = CX.w[1] & QUIET_MASK64;
57  BID_RETURN (res);
58}
59    // x is Infinity?
60if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) {
61  res.w[1] = CX.w[1];
62  if (sign_x) {
63    // -Inf, return NaN
64    res.w[1] = 0x7c00000000000000ull;
65#ifdef SET_STATUS_FLAGS
66    __set_status_flags (pfpsf, INVALID_EXCEPTION);
67#endif
68  }
69  BID_RETURN (res);
70}
71    // x is 0 otherwise
72
73res.w[1] =
74  sign_x |
75  ((((UINT64) (exponent_x + DECIMAL_EXPONENT_BIAS_128)) >> 1) << 49);
76res.w[0] = 0;
77BID_RETURN (res);
78}
79if (sign_x) {
80  res.w[1] = 0x7c00000000000000ull;
81  res.w[0] = 0;
82#ifdef SET_STATUS_FLAGS
83  __set_status_flags (pfpsf, INVALID_EXCEPTION);
84#endif
85  BID_RETURN (res);
86}
87#ifdef UNCHANGED_BINARY_STATUS_FLAGS
88(void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS);
89#endif
90  // 2^64
91f64.i = 0x5f800000;
92
93  // fx ~ CX
94fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0];
95bin_expon_cx = ((fx.i >> 23) & 0xff) - 0x7f;
96digits = estimate_decimal_digits[bin_expon_cx];
97
98A10 = CX;
99if (exponent_x & 1) {
100  A10.w[1] = (CX.w[1] << 3) | (CX.w[0] >> 61);
101  A10.w[0] = CX.w[0] << 3;
102  CX2.w[1] = (CX.w[1] << 1) | (CX.w[0] >> 63);
103  CX2.w[0] = CX.w[0] << 1;
104  __add_128_128 (A10, A10, CX2);
105}
106
107CS.w[0] = short_sqrt128 (A10);
108CS.w[1] = 0;
109  // check for exact result
110if (CS.w[0] * CS.w[0] == A10.w[0]) {
111  __mul_64x64_to_128_fast (S2, CS.w[0], CS.w[0]);
112  if (S2.w[1] == A10.w[1])	// && S2.w[0]==A10.w[0])
113  {
114    get_BID128_very_fast (&res, 0,
115			  (exponent_x +
116			   DECIMAL_EXPONENT_BIAS_128) >> 1, CS);
117#ifdef UNCHANGED_BINARY_STATUS_FLAGS
118    (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
119#endif
120    BID_RETURN (res);
121  }
122}
123  // get number of digits in CX
124D = CX.w[1] - power10_index_binexp_128[bin_expon_cx].w[1];
125if (D > 0
126    || (!D && CX.w[0] >= power10_index_binexp_128[bin_expon_cx].w[0]))
127  digits++;
128
129  // if exponent is odd, scale coefficient by 10
130scale = 67 - digits;
131exponent_q = exponent_x - scale;
132scale += (exponent_q & 1);	// exp. bias is even
133
134if (scale > 38) {
135  T128 = power10_table_128[scale - 37];
136  __mul_128x128_low (CX1, CX, T128);
137
138  TP128 = power10_table_128[37];
139  __mul_128x128_to_256 (C256, CX1, TP128);
140} else {
141  T128 = power10_table_128[scale];
142  __mul_128x128_to_256 (C256, CX, T128);
143}
144
145
146  // 4*C256
147C4.w[3] = (C256.w[3] << 2) | (C256.w[2] >> 62);
148C4.w[2] = (C256.w[2] << 2) | (C256.w[1] >> 62);
149C4.w[1] = (C256.w[1] << 2) | (C256.w[0] >> 62);
150C4.w[0] = C256.w[0] << 2;
151
152long_sqrt128 (&CS, C256);
153
154#ifndef IEEE_ROUND_NEAREST
155#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
156if (!((rnd_mode) & 3)) {
157#endif
158#endif
159  // compare to midpoints
160  CSM.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63);
161  CSM.w[0] = (CS.w[0] + CS.w[0]) | 1;
162  // CSM^2
163  //__mul_128x128_to_256(M256, CSM, CSM);
164  __sqr128_to_256 (M256, CSM);
165
166  if (C4.w[3] > M256.w[3]
167      || (C4.w[3] == M256.w[3]
168	  && (C4.w[2] > M256.w[2]
169	      || (C4.w[2] == M256.w[2]
170		  && (C4.w[1] > M256.w[1]
171		      || (C4.w[1] == M256.w[1]
172			  && C4.w[0] > M256.w[0])))))) {
173    // round up
174    CS.w[0]++;
175    if (!CS.w[0])
176      CS.w[1]++;
177  } else {
178    C8.w[1] = (CS.w[1] << 3) | (CS.w[0] >> 61);
179    C8.w[0] = CS.w[0] << 3;
180    // M256 - 8*CSM
181    __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
182    __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
183    __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
184    M256.w[3] = M256.w[3] - Carry;
185
186    // if CSM' > C256, round up
187    if (M256.w[3] > C4.w[3]
188	|| (M256.w[3] == C4.w[3]
189	    && (M256.w[2] > C4.w[2]
190		|| (M256.w[2] == C4.w[2]
191		    && (M256.w[1] > C4.w[1]
192			|| (M256.w[1] == C4.w[1]
193			    && M256.w[0] > C4.w[0])))))) {
194      // round down
195      if (!CS.w[0])
196	CS.w[1]--;
197      CS.w[0]--;
198    }
199  }
200#ifndef IEEE_ROUND_NEAREST
201#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
202} else {
203  __sqr128_to_256 (M256, CS);
204  C8.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63);
205  C8.w[0] = CS.w[0] << 1;
206  if (M256.w[3] > C256.w[3]
207      || (M256.w[3] == C256.w[3]
208	  && (M256.w[2] > C256.w[2]
209	      || (M256.w[2] == C256.w[2]
210		  && (M256.w[1] > C256.w[1]
211		      || (M256.w[1] == C256.w[1]
212			  && M256.w[0] > C256.w[0])))))) {
213    __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
214    __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
215    __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
216    M256.w[3] = M256.w[3] - Carry;
217    M256.w[0]++;
218    if (!M256.w[0]) {
219      M256.w[1]++;
220      if (!M256.w[1]) {
221	M256.w[2]++;
222	if (!M256.w[2])
223	  M256.w[3]++;
224      }
225    }
226
227    if (!CS.w[0])
228      CS.w[1]--;
229    CS.w[0]--;
230
231    if (M256.w[3] > C256.w[3]
232	|| (M256.w[3] == C256.w[3]
233	    && (M256.w[2] > C256.w[2]
234		|| (M256.w[2] == C256.w[2]
235		    && (M256.w[1] > C256.w[1]
236			|| (M256.w[1] == C256.w[1]
237			    && M256.w[0] > C256.w[0])))))) {
238
239      if (!CS.w[0])
240	CS.w[1]--;
241      CS.w[0]--;
242    }
243  }
244
245  else {
246    __add_carry_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
247    __add_carry_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
248    __add_carry_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
249    M256.w[3] = M256.w[3] + Carry;
250    M256.w[0]++;
251    if (!M256.w[0]) {
252      M256.w[1]++;
253      if (!M256.w[1]) {
254	M256.w[2]++;
255	if (!M256.w[2])
256	  M256.w[3]++;
257      }
258    }
259    if (M256.w[3] < C256.w[3]
260	|| (M256.w[3] == C256.w[3]
261	    && (M256.w[2] < C256.w[2]
262		|| (M256.w[2] == C256.w[2]
263		    && (M256.w[1] < C256.w[1]
264			|| (M256.w[1] == C256.w[1]
265			    && M256.w[0] <= C256.w[0])))))) {
266
267      CS.w[0]++;
268      if (!CS.w[0])
269	CS.w[1]++;
270    }
271  }
272  // RU?
273  if ((rnd_mode) == ROUNDING_UP) {
274    CS.w[0]++;
275    if (!CS.w[0])
276      CS.w[1]++;
277  }
278
279}
280#endif
281#endif
282
283#ifdef SET_STATUS_FLAGS
284__set_status_flags (pfpsf, INEXACT_EXCEPTION);
285#endif
286get_BID128_fast (&res, 0,
287		 (exponent_q + DECIMAL_EXPONENT_BIAS_128) >> 1, CS);
288#ifdef UNCHANGED_BINARY_STATUS_FLAGS
289(void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
290#endif
291BID_RETURN (res);
292}
293
294
295
296BID128_FUNCTION_ARGTYPE1 (bid128d_sqrt, UINT64, x)
297
298     UINT256 M256, C256, C4, C8;
299     UINT128 CX, CX1, CX2, A10, S2, T128, TP128, CS, CSM, res;
300     UINT64 sign_x, Carry;
301     SINT64 D;
302     int_float fx, f64;
303     int exponent_x, bin_expon_cx;
304     int digits, scale, exponent_q;
305#ifdef UNCHANGED_BINARY_STATUS_FLAGS
306     fexcept_t binaryflags = 0;
307#endif
308
309	// unpack arguments, check for NaN or Infinity
310   // unpack arguments, check for NaN or Infinity
311CX.w[1] = 0;
312if (!unpack_BID64 (&sign_x, &exponent_x, &CX.w[0], x)) {
313res.w[1] = CX.w[0];
314res.w[0] = 0;
315	   // NaN ?
316if ((x & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
317#ifdef SET_STATUS_FLAGS
318  if ((x & SNAN_MASK64) == SNAN_MASK64)	// sNaN
319    __set_status_flags (pfpsf, INVALID_EXCEPTION);
320#endif
321  res.w[0] = (CX.w[0] & 0x0003ffffffffffffull);
322  __mul_64x64_to_128 (res, res.w[0], power10_table_128[18].w[0]);
323  res.w[1] |= ((CX.w[0]) & 0xfc00000000000000ull);
324  BID_RETURN (res);
325}
326	   // x is Infinity?
327if ((x & 0x7800000000000000ull) == 0x7800000000000000ull) {
328  if (sign_x) {
329    // -Inf, return NaN
330    res.w[1] = 0x7c00000000000000ull;
331#ifdef SET_STATUS_FLAGS
332    __set_status_flags (pfpsf, INVALID_EXCEPTION);
333#endif
334  }
335  BID_RETURN (res);
336}
337	   // x is 0 otherwise
338
339exponent_x =
340  exponent_x - DECIMAL_EXPONENT_BIAS + DECIMAL_EXPONENT_BIAS_128;
341res.w[1] =
342  sign_x | ((((UINT64) (exponent_x + DECIMAL_EXPONENT_BIAS_128)) >> 1)
343	    << 49);
344res.w[0] = 0;
345BID_RETURN (res);
346}
347if (sign_x) {
348  res.w[1] = 0x7c00000000000000ull;
349  res.w[0] = 0;
350#ifdef SET_STATUS_FLAGS
351  __set_status_flags (pfpsf, INVALID_EXCEPTION);
352#endif
353  BID_RETURN (res);
354}
355#ifdef UNCHANGED_BINARY_STATUS_FLAGS
356(void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS);
357#endif
358exponent_x =
359  exponent_x - DECIMAL_EXPONENT_BIAS + DECIMAL_EXPONENT_BIAS_128;
360
361	   // 2^64
362f64.i = 0x5f800000;
363
364	   // fx ~ CX
365fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0];
366bin_expon_cx = ((fx.i >> 23) & 0xff) - 0x7f;
367digits = estimate_decimal_digits[bin_expon_cx];
368
369A10 = CX;
370if (exponent_x & 1) {
371  A10.w[1] = (CX.w[1] << 3) | (CX.w[0] >> 61);
372  A10.w[0] = CX.w[0] << 3;
373  CX2.w[1] = (CX.w[1] << 1) | (CX.w[0] >> 63);
374  CX2.w[0] = CX.w[0] << 1;
375  __add_128_128 (A10, A10, CX2);
376}
377
378CS.w[0] = short_sqrt128 (A10);
379CS.w[1] = 0;
380	   // check for exact result
381if (CS.w[0] * CS.w[0] == A10.w[0]) {
382  __mul_64x64_to_128_fast (S2, CS.w[0], CS.w[0]);
383  if (S2.w[1] == A10.w[1]) {
384    get_BID128_very_fast (&res, 0,
385			  (exponent_x + DECIMAL_EXPONENT_BIAS_128) >> 1,
386			  CS);
387#ifdef UNCHANGED_BINARY_STATUS_FLAGS
388    (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
389#endif
390    BID_RETURN (res);
391  }
392}
393	   // get number of digits in CX
394D = CX.w[1] - power10_index_binexp_128[bin_expon_cx].w[1];
395if (D > 0
396    || (!D && CX.w[0] >= power10_index_binexp_128[bin_expon_cx].w[0]))
397  digits++;
398
399		// if exponent is odd, scale coefficient by 10
400scale = 67 - digits;
401exponent_q = exponent_x - scale;
402scale += (exponent_q & 1);	// exp. bias is even
403
404if (scale > 38) {
405  T128 = power10_table_128[scale - 37];
406  __mul_128x128_low (CX1, CX, T128);
407
408  TP128 = power10_table_128[37];
409  __mul_128x128_to_256 (C256, CX1, TP128);
410} else {
411  T128 = power10_table_128[scale];
412  __mul_128x128_to_256 (C256, CX, T128);
413}
414
415
416	   // 4*C256
417C4.w[3] = (C256.w[3] << 2) | (C256.w[2] >> 62);
418C4.w[2] = (C256.w[2] << 2) | (C256.w[1] >> 62);
419C4.w[1] = (C256.w[1] << 2) | (C256.w[0] >> 62);
420C4.w[0] = C256.w[0] << 2;
421
422long_sqrt128 (&CS, C256);
423
424#ifndef IEEE_ROUND_NEAREST
425#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
426if (!((rnd_mode) & 3)) {
427#endif
428#endif
429  // compare to midpoints
430  CSM.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63);
431  CSM.w[0] = (CS.w[0] + CS.w[0]) | 1;
432  // CSM^2
433  //__mul_128x128_to_256(M256, CSM, CSM);
434  __sqr128_to_256 (M256, CSM);
435
436  if (C4.w[3] > M256.w[3]
437      || (C4.w[3] == M256.w[3]
438	  && (C4.w[2] > M256.w[2]
439	      || (C4.w[2] == M256.w[2]
440		  && (C4.w[1] > M256.w[1]
441		      || (C4.w[1] == M256.w[1]
442			  && C4.w[0] > M256.w[0])))))) {
443    // round up
444    CS.w[0]++;
445    if (!CS.w[0])
446      CS.w[1]++;
447  } else {
448    C8.w[1] = (CS.w[1] << 3) | (CS.w[0] >> 61);
449    C8.w[0] = CS.w[0] << 3;
450    // M256 - 8*CSM
451    __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
452    __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
453    __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
454    M256.w[3] = M256.w[3] - Carry;
455
456    // if CSM' > C256, round up
457    if (M256.w[3] > C4.w[3]
458	|| (M256.w[3] == C4.w[3]
459	    && (M256.w[2] > C4.w[2]
460		|| (M256.w[2] == C4.w[2]
461		    && (M256.w[1] > C4.w[1]
462			|| (M256.w[1] == C4.w[1]
463			    && M256.w[0] > C4.w[0])))))) {
464      // round down
465      if (!CS.w[0])
466	CS.w[1]--;
467      CS.w[0]--;
468    }
469  }
470#ifndef IEEE_ROUND_NEAREST
471#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
472} else {
473  __sqr128_to_256 (M256, CS);
474  C8.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63);
475  C8.w[0] = CS.w[0] << 1;
476  if (M256.w[3] > C256.w[3]
477      || (M256.w[3] == C256.w[3]
478	  && (M256.w[2] > C256.w[2]
479	      || (M256.w[2] == C256.w[2]
480		  && (M256.w[1] > C256.w[1]
481		      || (M256.w[1] == C256.w[1]
482			  && M256.w[0] > C256.w[0])))))) {
483    __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
484    __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
485    __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
486    M256.w[3] = M256.w[3] - Carry;
487    M256.w[0]++;
488    if (!M256.w[0]) {
489      M256.w[1]++;
490      if (!M256.w[1]) {
491	M256.w[2]++;
492	if (!M256.w[2])
493	  M256.w[3]++;
494      }
495    }
496
497    if (!CS.w[0])
498      CS.w[1]--;
499    CS.w[0]--;
500
501    if (M256.w[3] > C256.w[3]
502	|| (M256.w[3] == C256.w[3]
503	    && (M256.w[2] > C256.w[2]
504		|| (M256.w[2] == C256.w[2]
505		    && (M256.w[1] > C256.w[1]
506			|| (M256.w[1] == C256.w[1]
507			    && M256.w[0] > C256.w[0])))))) {
508
509      if (!CS.w[0])
510	CS.w[1]--;
511      CS.w[0]--;
512    }
513  }
514
515  else {
516    __add_carry_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
517    __add_carry_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
518    __add_carry_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
519    M256.w[3] = M256.w[3] + Carry;
520    M256.w[0]++;
521    if (!M256.w[0]) {
522      M256.w[1]++;
523      if (!M256.w[1]) {
524	M256.w[2]++;
525	if (!M256.w[2])
526	  M256.w[3]++;
527      }
528    }
529    if (M256.w[3] < C256.w[3]
530	|| (M256.w[3] == C256.w[3]
531	    && (M256.w[2] < C256.w[2]
532		|| (M256.w[2] == C256.w[2]
533		    && (M256.w[1] < C256.w[1]
534			|| (M256.w[1] == C256.w[1]
535			    && M256.w[0] <= C256.w[0])))))) {
536
537      CS.w[0]++;
538      if (!CS.w[0])
539	CS.w[1]++;
540    }
541  }
542  // RU?
543  if ((rnd_mode) == ROUNDING_UP) {
544    CS.w[0]++;
545    if (!CS.w[0])
546      CS.w[1]++;
547  }
548
549}
550#endif
551#endif
552
553#ifdef SET_STATUS_FLAGS
554__set_status_flags (pfpsf, INEXACT_EXCEPTION);
555#endif
556get_BID128_fast (&res, 0, (exponent_q + DECIMAL_EXPONENT_BIAS_128) >> 1,
557		 CS);
558#ifdef UNCHANGED_BINARY_STATUS_FLAGS
559(void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
560#endif
561BID_RETURN (res);
562
563
564}
565