1/* Copyright (C) 2007, 2009  Free Software Foundation, Inc.
2
3This file is part of GCC.
4
5GCC is free software; you can redistribute it and/or modify it under
6the terms of the GNU General Public License as published by the Free
7Software Foundation; either version 3, or (at your option) any later
8version.
9
10GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11WARRANTY; without even the implied warranty of MERCHANTABILITY or
12FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13for more details.
14
15Under Section 7 of GPL version 3, you are granted additional
16permissions described in the GCC Runtime Library Exception, version
173.1, as published by the Free Software Foundation.
18
19You should have received a copy of the GNU General Public License and
20a copy of the GCC Runtime Library Exception along with this program;
21see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22<http://www.gnu.org/licenses/>.  */
23
24/*****************************************************************************
25 *
26 *  BID128 fma   x * y + z
27 *
28 ****************************************************************************/
29
30#include "bid_internal.h"
31
32static void
33rounding_correction (unsigned int rnd_mode,
34        	     unsigned int is_inexact_lt_midpoint,
35        	     unsigned int is_inexact_gt_midpoint,
36        	     unsigned int is_midpoint_lt_even,
37        	     unsigned int is_midpoint_gt_even,
38        	     int unbexp,
39        	     UINT128 * ptrres, _IDEC_flags * ptrfpsf) {
40  // unbiased true exponent unbexp may be larger than emax
41
42  UINT128 res = *ptrres; // expected to have the correct sign and coefficient
43  // (the exponent field is ignored, as unbexp is used instead)
44  UINT64 sign, exp;
45  UINT64 C_hi, C_lo;
46
47  // general correction from RN to RA, RM, RP, RZ
48  // Note: if the result is negative, then is_inexact_lt_midpoint,
49  // is_inexact_gt_midpoint, is_midpoint_lt_even, and is_midpoint_gt_even
50  // have to be considered as if determined for the absolute value of the
51  // result (so they seem to be reversed)
52
53  if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
54      is_midpoint_lt_even || is_midpoint_gt_even) {
55    *ptrfpsf |= INEXACT_EXCEPTION;
56  }
57  // apply correction to result calculated with unbounded exponent
58  sign = res.w[1] & MASK_SIGN;
59  exp = (UINT64) (unbexp + 6176) << 49; // valid only if expmin<=unbexp<=expmax
60  C_hi = res.w[1] & MASK_COEFF;
61  C_lo = res.w[0];
62  if ((!sign && ((rnd_mode == ROUNDING_UP && is_inexact_lt_midpoint) ||
63      ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_UP) &&
64      is_midpoint_gt_even))) ||
65      (sign && ((rnd_mode == ROUNDING_DOWN && is_inexact_lt_midpoint) ||
66      ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_DOWN) &&
67      is_midpoint_gt_even)))) {
68    // C = C + 1
69    C_lo = C_lo + 1;
70    if (C_lo == 0)
71      C_hi = C_hi + 1;
72    if (C_hi == 0x0001ed09bead87c0ull && C_lo == 0x378d8e6400000000ull) {
73      // C = 10^34 => rounding overflow
74      C_hi = 0x0000314dc6448d93ull;
75      C_lo = 0x38c15b0a00000000ull; // 10^33
76      // exp = exp + EXP_P1;
77      unbexp = unbexp + 1;
78      exp = (UINT64) (unbexp + 6176) << 49;
79    }
80  } else if ((is_midpoint_lt_even || is_inexact_gt_midpoint) &&
81      ((sign && (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TO_ZERO)) ||
82      (!sign && (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TO_ZERO)))) {
83    // C = C - 1
84    C_lo = C_lo - 1;
85    if (C_lo == 0xffffffffffffffffull)
86      C_hi--;
87    // check if we crossed into the lower decade
88    if (C_hi == 0x0000314dc6448d93ull && C_lo == 0x38c15b09ffffffffull) {
89      // C = 10^33 - 1
90      if (exp > 0) {
91        C_hi = 0x0001ed09bead87c0ull; // 10^34 - 1
92        C_lo = 0x378d8e63ffffffffull;
93        // exp = exp - EXP_P1;
94        unbexp = unbexp - 1;
95        exp = (UINT64) (unbexp + 6176) << 49;
96      } else { // if exp = 0
97        if (is_midpoint_lt_even || is_midpoint_lt_even ||
98            is_inexact_gt_midpoint || is_inexact_gt_midpoint) // tiny & inexact
99          *ptrfpsf |= UNDERFLOW_EXCEPTION;
100      }
101    }
102  } else {
103    ; // the result is already correct
104  }
105  if (unbexp > expmax) { // 6111
106    *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
107    exp = 0;
108    if (!sign) { // result is positive
109      if (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TIES_AWAY) { // +inf
110        C_hi = 0x7800000000000000ull;
111        C_lo = 0x0000000000000000ull;
112      } else { // res = +MAXFP = (10^34-1) * 10^emax
113        C_hi = 0x5fffed09bead87c0ull;
114        C_lo = 0x378d8e63ffffffffull;
115      }
116    } else { // result is negative
117      if (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TIES_AWAY) { // -inf
118        C_hi = 0xf800000000000000ull;
119        C_lo = 0x0000000000000000ull;
120      } else { // res = -MAXFP = -(10^34-1) * 10^emax
121        C_hi = 0xdfffed09bead87c0ull;
122        C_lo = 0x378d8e63ffffffffull;
123      }
124    }
125  }
126  // assemble the result
127  res.w[1] = sign | exp | C_hi;
128  res.w[0] = C_lo;
129  *ptrres = res;
130}
131
132static void
133add256 (UINT256 x, UINT256 y, UINT256 * pz) {
134  // *z = x + yl assume the sum fits in 256 bits
135  UINT256 z;
136  z.w[0] = x.w[0] + y.w[0];
137  if (z.w[0] < x.w[0]) {
138    x.w[1]++;
139    if (x.w[1] == 0x0000000000000000ull) {
140      x.w[2]++;
141      if (x.w[2] == 0x0000000000000000ull) {
142        x.w[3]++;
143      }
144    }
145  }
146  z.w[1] = x.w[1] + y.w[1];
147  if (z.w[1] < x.w[1]) {
148    x.w[2]++;
149    if (x.w[2] == 0x0000000000000000ull) {
150      x.w[3]++;
151    }
152  }
153  z.w[2] = x.w[2] + y.w[2];
154  if (z.w[2] < x.w[2]) {
155    x.w[3]++;
156  }
157  z.w[3] = x.w[3] + y.w[3]; // it was assumed that no carry is possible
158  *pz = z;
159}
160
161static void
162sub256 (UINT256 x, UINT256 y, UINT256 * pz) {
163  // *z = x - y; assume x >= y
164  UINT256 z;
165  z.w[0] = x.w[0] - y.w[0];
166  if (z.w[0] > x.w[0]) {
167    x.w[1]--;
168    if (x.w[1] == 0xffffffffffffffffull) {
169      x.w[2]--;
170      if (x.w[2] == 0xffffffffffffffffull) {
171        x.w[3]--;
172      }
173    }
174  }
175  z.w[1] = x.w[1] - y.w[1];
176  if (z.w[1] > x.w[1]) {
177    x.w[2]--;
178    if (x.w[2] == 0xffffffffffffffffull) {
179      x.w[3]--;
180    }
181  }
182  z.w[2] = x.w[2] - y.w[2];
183  if (z.w[2] > x.w[2]) {
184    x.w[3]--;
185  }
186  z.w[3] = x.w[3] - y.w[3]; // no borrow possible, because x >= y
187  *pz = z;
188}
189
190
191static int
192nr_digits256 (UINT256 R256) {
193  int ind;
194  // determine the number of decimal digits in R256
195  if (R256.w[3] == 0x0 && R256.w[2] == 0x0 && R256.w[1] == 0x0) {
196    // between 1 and 19 digits
197    for (ind = 1; ind <= 19; ind++) {
198      if (R256.w[0] < ten2k64[ind]) {
199        break;
200      }
201    }
202    // ind digits
203  } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0 &&
204             (R256.w[1] < ten2k128[0].w[1] ||
205              (R256.w[1] == ten2k128[0].w[1]
206               && R256.w[0] < ten2k128[0].w[0]))) {
207    // 20 digits
208    ind = 20;
209  } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0) {
210    // between 21 and 38 digits
211    for (ind = 1; ind <= 18; ind++) {
212      if (R256.w[1] < ten2k128[ind].w[1] ||
213          (R256.w[1] == ten2k128[ind].w[1] &&
214           R256.w[0] < ten2k128[ind].w[0])) {
215        break;
216      }
217    }
218    // ind + 20 digits
219    ind = ind + 20;
220  } else if (R256.w[3] == 0x0 &&
221             (R256.w[2] < ten2k256[0].w[2] ||
222              (R256.w[2] == ten2k256[0].w[2] &&
223               R256.w[1] < ten2k256[0].w[1]) ||
224              (R256.w[2] == ten2k256[0].w[2] &&
225               R256.w[1] == ten2k256[0].w[1] &&
226               R256.w[0] < ten2k256[0].w[0]))) {
227    // 39 digits
228    ind = 39;
229  } else {
230    // between 40 and 68 digits
231    for (ind = 1; ind <= 29; ind++) {
232      if (R256.w[3] < ten2k256[ind].w[3] ||
233          (R256.w[3] == ten2k256[ind].w[3] &&
234           R256.w[2] < ten2k256[ind].w[2]) ||
235          (R256.w[3] == ten2k256[ind].w[3] &&
236           R256.w[2] == ten2k256[ind].w[2] &&
237           R256.w[1] < ten2k256[ind].w[1]) ||
238          (R256.w[3] == ten2k256[ind].w[3] &&
239           R256.w[2] == ten2k256[ind].w[2] &&
240           R256.w[1] == ten2k256[ind].w[1] &&
241           R256.w[0] < ten2k256[ind].w[0])) {
242        break;
243      }
244    }
245    // ind + 39 digits
246    ind = ind + 39;
247  }
248  return (ind);
249}
250
251// add/subtract C4 and C3 * 10^scale; this may follow a previous rounding, so
252// use the rounding information from ptr_is_* to avoid a double rounding error
253static void
254add_and_round (int q3,
255               int q4,
256               int e4,
257               int delta,
258               int p34,
259               UINT64 z_sign,
260               UINT64 p_sign,
261               UINT128 C3,
262               UINT256 C4,
263               int rnd_mode,
264               int *ptr_is_midpoint_lt_even,
265               int *ptr_is_midpoint_gt_even,
266               int *ptr_is_inexact_lt_midpoint,
267               int *ptr_is_inexact_gt_midpoint,
268               _IDEC_flags * ptrfpsf, UINT128 * ptrres) {
269
270  int scale;
271  int x0;
272  int ind;
273  UINT64 R64;
274  UINT128 P128, R128;
275  UINT192 P192, R192;
276  UINT256 R256;
277  int is_midpoint_lt_even = 0;
278  int is_midpoint_gt_even = 0;
279  int is_inexact_lt_midpoint = 0;
280  int is_inexact_gt_midpoint = 0;
281  int is_midpoint_lt_even0 = 0;
282  int is_midpoint_gt_even0 = 0;
283  int is_inexact_lt_midpoint0 = 0;
284  int is_inexact_gt_midpoint0 = 0;
285  int incr_exp = 0;
286  int is_tiny = 0;
287  int lt_half_ulp = 0;
288  int eq_half_ulp = 0;
289  // int gt_half_ulp = 0;
290  UINT128 res = *ptrres;
291
292  // scale C3 up by 10^(q4-delta-q3), 0 <= q4-delta-q3 <= 2*P34-2 = 66
293  scale = q4 - delta - q3; // 0 <= scale <= 66 (or 0 <= scale <= 68 if this
294  // comes from Cases (2), (3), (4), (5), (6), with 0 <= |delta| <= 1
295
296  // calculate C3 * 10^scale in R256 (it has at most 67 decimal digits for
297  // Cases (15),(16),(17) and at most 69 for Cases (2),(3),(4),(5),(6))
298  if (scale == 0) {
299    R256.w[3] = 0x0ull;
300    R256.w[2] = 0x0ull;
301    R256.w[1] = C3.w[1];
302    R256.w[0] = C3.w[0];
303  } else if (scale <= 19) { // 10^scale fits in 64 bits
304    P128.w[1] = 0;
305    P128.w[0] = ten2k64[scale];
306    __mul_128x128_to_256 (R256, P128, C3);
307  } else if (scale <= 38) { // 10^scale fits in 128 bits
308    __mul_128x128_to_256 (R256, ten2k128[scale - 20], C3);
309  } else if (scale <= 57) { // 39 <= scale <= 57
310    // 10^scale fits in 192 bits but C3 * 10^scale fits in 223 or 230 bits
311    // (10^67 has 223 bits; 10^69 has 230 bits);
312    // must split the computation:
313    // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127
314    // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty
315    // Note that 1 <= scale - 38 <= 19 => 10^(scale-38) fits in 64 bits
316    __mul_64x128_to_128 (R128, ten2k64[scale - 38], C3);
317    // now multiply R128 by 10^38
318    __mul_128x128_to_256 (R256, R128, ten2k128[18]);
319  } else { // 58 <= scale <= 66
320    // 10^scale takes between 193 and 220 bits,
321    // and C3 * 10^scale fits in 223 bits (10^67/10^69 has 223/230 bits)
322    // must split the computation:
323    // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127
324    // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty
325    // Note that 20 <= scale - 38 <= 30 => 10^(scale-38) fits in 128 bits
326    // Calculate first 10^(scale-38) * C3, which fits in 128 bits; because
327    // 10^(scale-38) takes more than 64 bits, C3 will take less than 64
328    __mul_64x128_to_128 (R128, C3.w[0], ten2k128[scale - 58]);
329    // now calculate 10*38 * 10^(scale-38) * C3
330    __mul_128x128_to_256 (R256, R128, ten2k128[18]);
331  }
332  // C3 * 10^scale is now in R256
333
334  // for Cases (15), (16), (17) C4 > C3 * 10^scale because C4 has at least
335  // one extra digit; for Cases (2), (3), (4), (5), or (6) any order is
336  // possible
337  // add/subtract C4 and C3 * 10^scale; the exponent is e4
338  if (p_sign == z_sign) { // R256 = C4 + R256
339    // calculate R256 = C4 + C3 * 10^scale = C4 + R256 which is exact,
340    // but may require rounding
341    add256 (C4, R256, &R256);
342  } else { // if (p_sign != z_sign) { // R256 = C4 - R256
343    // calculate R256 = C4 - C3 * 10^scale = C4 - R256 or
344    // R256 = C3 * 10^scale - C4 = R256 - C4 which is exact,
345    // but may require rounding
346
347    // compare first R256 = C3 * 10^scale and C4
348    if (R256.w[3] > C4.w[3] || (R256.w[3] == C4.w[3] && R256.w[2] > C4.w[2]) ||
349        (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] > C4.w[1]) ||
350        (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] == C4.w[1] &&
351        R256.w[0] >= C4.w[0])) { // C3 * 10^scale >= C4
352      // calculate R256 = C3 * 10^scale - C4 = R256 - C4, which is exact,
353      // but may require rounding
354      sub256 (R256, C4, &R256);
355      // flip p_sign too, because the result has the sign of z
356      p_sign = z_sign;
357    } else { // if C4 > C3 * 10^scale
358      // calculate R256 = C4 - C3 * 10^scale = C4 - R256, which is exact,
359      // but may require rounding
360      sub256 (C4, R256, &R256);
361    }
362    // if the result is pure zero, the sign depends on the rounding mode
363    // (x*y and z had opposite signs)
364    if (R256.w[3] == 0x0ull && R256.w[2] == 0x0ull &&
365        R256.w[1] == 0x0ull && R256.w[0] == 0x0ull) {
366      if (rnd_mode != ROUNDING_DOWN)
367        p_sign = 0x0000000000000000ull;
368      else
369        p_sign = 0x8000000000000000ull;
370      // the exponent is max (e4, expmin)
371      if (e4 < -6176)
372        e4 = expmin;
373      // assemble result
374      res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49);
375      res.w[0] = 0x0;
376      *ptrres = res;
377      return;
378    }
379  }
380
381  // determine the number of decimal digits in R256
382  ind = nr_digits256 (R256);
383
384  // the exact result is (-1)^p_sign * R256 * 10^e4 where q (R256) = ind;
385  // round to the destination precision, with unbounded exponent
386
387  if (ind <= p34) {
388    // result rounded to the destination precision with unbounded exponent
389    // is exact
390    if (ind + e4 < p34 + expmin) {
391      is_tiny = 1; // applies to all rounding modes
392    }
393    res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R256.w[1];
394    res.w[0] = R256.w[0];
395    // Note: res is correct only if expmin <= e4 <= expmax
396  } else { // if (ind > p34)
397    // if more than P digits, round to nearest to P digits
398    // round R256 to p34 digits
399    x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68
400    if (ind <= 38) {
401      P128.w[1] = R256.w[1];
402      P128.w[0] = R256.w[0];
403      round128_19_38 (ind, x0, P128, &R128, &incr_exp,
404        	      &is_midpoint_lt_even, &is_midpoint_gt_even,
405        	      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
406    } else if (ind <= 57) {
407      P192.w[2] = R256.w[2];
408      P192.w[1] = R256.w[1];
409      P192.w[0] = R256.w[0];
410      round192_39_57 (ind, x0, P192, &R192, &incr_exp,
411        	      &is_midpoint_lt_even, &is_midpoint_gt_even,
412        	      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
413      R128.w[1] = R192.w[1];
414      R128.w[0] = R192.w[0];
415    } else { // if (ind <= 68)
416      round256_58_76 (ind, x0, R256, &R256, &incr_exp,
417        	      &is_midpoint_lt_even, &is_midpoint_gt_even,
418        	      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
419      R128.w[1] = R256.w[1];
420      R128.w[0] = R256.w[0];
421    }
422    // the rounded result has p34 = 34 digits
423    e4 = e4 + x0 + incr_exp;
424    if (rnd_mode == ROUNDING_TO_NEAREST) {
425      if (e4 < expmin) {
426        is_tiny = 1; // for other rounding modes apply correction
427      }
428    } else {
429      // for RM, RP, RZ, RA apply correction in order to determine tininess
430      // but do not save the result; apply the correction to
431      // (-1)^p_sign * significand * 10^0
432      P128.w[1] = p_sign | 0x3040000000000000ull | R128.w[1];
433      P128.w[0] = R128.w[0];
434      rounding_correction (rnd_mode,
435        		   is_inexact_lt_midpoint,
436        		   is_inexact_gt_midpoint, is_midpoint_lt_even,
437        		   is_midpoint_gt_even, 0, &P128, ptrfpsf);
438      scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1
439      // the number of digits in the significand is p34 = 34
440      if (e4 + scale < expmin) {
441        is_tiny = 1;
442      }
443    }
444    ind = p34; // the number of decimal digits in the signifcand of res
445    res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R128.w[1]; // RN
446    res.w[0] = R128.w[0];
447    // Note: res is correct only if expmin <= e4 <= expmax
448    // set the inexact flag after rounding with bounded exponent, if any
449  }
450  // at this point we have the result rounded with unbounded exponent in
451  // res and we know its tininess:
452  // res = (-1)^p_sign * significand * 10^e4,
453  // where q (significand) = ind <= p34
454  // Note: res is correct only if expmin <= e4 <= expmax
455
456  // check for overflow if RN
457  if (rnd_mode == ROUNDING_TO_NEAREST && (ind + e4) > (p34 + expmax)) {
458    res.w[1] = p_sign | 0x7800000000000000ull;
459    res.w[0] = 0x0000000000000000ull;
460    *ptrres = res;
461    *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
462    return; // BID_RETURN (res)
463  } // else not overflow or not RN, so continue
464
465  // if (e4 >= expmin) we have the result rounded with bounded exponent
466  if (e4 < expmin) {
467    x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res
468    // where the result rounded [at most] once is
469    //   (-1)^p_sign * significand_res * 10^e4
470
471    // avoid double rounding error
472    is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
473    is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
474    is_midpoint_lt_even0 = is_midpoint_lt_even;
475    is_midpoint_gt_even0 = is_midpoint_gt_even;
476    is_inexact_lt_midpoint = 0;
477    is_inexact_gt_midpoint = 0;
478    is_midpoint_lt_even = 0;
479    is_midpoint_gt_even = 0;
480
481    if (x0 > ind) {
482      // nothing is left of res when moving the decimal point left x0 digits
483      is_inexact_lt_midpoint = 1;
484      res.w[1] = p_sign | 0x0000000000000000ull;
485      res.w[0] = 0x0000000000000000ull;
486      e4 = expmin;
487    } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34
488      // this is <, =, or > 1/2 ulp
489      // compare the ind-digit value in the significand of res with
490      // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is
491      // less than, equal to, or greater than 1/2 ulp (significand of res)
492      R128.w[1] = res.w[1] & MASK_COEFF;
493      R128.w[0] = res.w[0];
494      if (ind <= 19) {
495        if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp
496          lt_half_ulp = 1;
497          is_inexact_lt_midpoint = 1;
498        } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp
499          eq_half_ulp = 1;
500          is_midpoint_gt_even = 1;
501        } else { // > 1/2 ulp
502          // gt_half_ulp = 1;
503          is_inexact_gt_midpoint = 1;
504        }
505      } else { // if (ind <= 38) {
506        if (R128.w[1] < midpoint128[ind - 20].w[1] ||
507            (R128.w[1] == midpoint128[ind - 20].w[1] &&
508            R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp
509          lt_half_ulp = 1;
510          is_inexact_lt_midpoint = 1;
511        } else if (R128.w[1] == midpoint128[ind - 20].w[1] &&
512            R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp
513          eq_half_ulp = 1;
514          is_midpoint_gt_even = 1;
515        } else { // > 1/2 ulp
516          // gt_half_ulp = 1;
517          is_inexact_gt_midpoint = 1;
518        }
519      }
520      if (lt_half_ulp || eq_half_ulp) {
521        // res = +0.0 * 10^expmin
522        res.w[1] = 0x0000000000000000ull;
523        res.w[0] = 0x0000000000000000ull;
524      } else { // if (gt_half_ulp)
525        // res = +1 * 10^expmin
526        res.w[1] = 0x0000000000000000ull;
527        res.w[0] = 0x0000000000000001ull;
528      }
529      res.w[1] = p_sign | res.w[1];
530      e4 = expmin;
531    } else { // if (1 <= x0 <= ind - 1 <= 33)
532      // round the ind-digit result to ind - x0 digits
533
534      if (ind <= 18) { // 2 <= ind <= 18
535        round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
536        	      &is_midpoint_lt_even, &is_midpoint_gt_even,
537        	      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
538        res.w[1] = 0x0;
539        res.w[0] = R64;
540      } else if (ind <= 38) {
541        P128.w[1] = res.w[1] & MASK_COEFF;
542        P128.w[0] = res.w[0];
543        round128_19_38 (ind, x0, P128, &res, &incr_exp,
544        		&is_midpoint_lt_even, &is_midpoint_gt_even,
545        		&is_inexact_lt_midpoint,
546        		&is_inexact_gt_midpoint);
547      }
548      e4 = e4 + x0; // expmin
549      // we want the exponent to be expmin, so if incr_exp = 1 then
550      // multiply the rounded result by 10 - it will still fit in 113 bits
551      if (incr_exp) {
552        // 64 x 128 -> 128
553        P128.w[1] = res.w[1] & MASK_COEFF;
554        P128.w[0] = res.w[0];
555        __mul_64x128_to_128 (res, ten2k64[1], P128);
556      }
557      res.w[1] =
558        p_sign | ((UINT64) (e4 + 6176) << 49) | (res.w[1] & MASK_COEFF);
559      // avoid a double rounding error
560      if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
561          is_midpoint_lt_even) { // double rounding error upward
562        // res = res - 1
563        res.w[0]--;
564        if (res.w[0] == 0xffffffffffffffffull)
565          res.w[1]--;
566        // Note: a double rounding error upward is not possible; for this
567        // the result after the first rounding would have to be 99...95
568        // (35 digits in all), possibly followed by a number of zeros; this
569        // is not possible in Cases (2)-(6) or (15)-(17) which may get here
570        is_midpoint_lt_even = 0;
571        is_inexact_lt_midpoint = 1;
572      } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
573          is_midpoint_gt_even) { // double rounding error downward
574        // res = res + 1
575        res.w[0]++;
576        if (res.w[0] == 0)
577          res.w[1]++;
578        is_midpoint_gt_even = 0;
579        is_inexact_gt_midpoint = 1;
580      } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
581        	 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
582        // if this second rounding was exact the result may still be
583        // inexact because of the first rounding
584        if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
585          is_inexact_gt_midpoint = 1;
586        }
587        if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
588          is_inexact_lt_midpoint = 1;
589        }
590      } else if (is_midpoint_gt_even &&
591        	 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
592        // pulled up to a midpoint
593        is_inexact_lt_midpoint = 1;
594        is_inexact_gt_midpoint = 0;
595        is_midpoint_lt_even = 0;
596        is_midpoint_gt_even = 0;
597      } else if (is_midpoint_lt_even &&
598        	 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
599        // pulled down to a midpoint
600        is_inexact_lt_midpoint = 0;
601        is_inexact_gt_midpoint = 1;
602        is_midpoint_lt_even = 0;
603        is_midpoint_gt_even = 0;
604      } else {
605        ;
606      }
607    }
608  }
609  // res contains the correct result
610  // apply correction if not rounding to nearest
611  if (rnd_mode != ROUNDING_TO_NEAREST) {
612    rounding_correction (rnd_mode,
613        		 is_inexact_lt_midpoint, is_inexact_gt_midpoint,
614        		 is_midpoint_lt_even, is_midpoint_gt_even,
615        		 e4, &res, ptrfpsf);
616  }
617  if (is_midpoint_lt_even || is_midpoint_gt_even ||
618      is_inexact_lt_midpoint || is_inexact_gt_midpoint) {
619    // set the inexact flag
620    *ptrfpsf |= INEXACT_EXCEPTION;
621    if (is_tiny)
622      *ptrfpsf |= UNDERFLOW_EXCEPTION;
623  }
624
625  *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
626  *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
627  *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
628  *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
629  *ptrres = res;
630  return;
631}
632
633
634#if DECIMAL_CALL_BY_REFERENCE
635static void
636bid128_ext_fma (int *ptr_is_midpoint_lt_even,
637        	int *ptr_is_midpoint_gt_even,
638        	int *ptr_is_inexact_lt_midpoint,
639        	int *ptr_is_inexact_gt_midpoint, UINT128 * pres,
640        	UINT128 * px, UINT128 * py,
641        	UINT128 *
642        	pz _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
643        	_EXC_INFO_PARAM) {
644  UINT128 x = *px, y = *py, z = *pz;
645#if !DECIMAL_GLOBAL_ROUNDING
646  unsigned int rnd_mode = *prnd_mode;
647#endif
648#else
649static UINT128
650bid128_ext_fma (int *ptr_is_midpoint_lt_even,
651        	int *ptr_is_midpoint_gt_even,
652        	int *ptr_is_inexact_lt_midpoint,
653        	int *ptr_is_inexact_gt_midpoint, UINT128 x, UINT128 y,
654        	UINT128 z _RND_MODE_PARAM _EXC_FLAGS_PARAM
655        	_EXC_MASKS_PARAM _EXC_INFO_PARAM) {
656#endif
657
658  UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
659  UINT64 x_sign, y_sign, z_sign, p_sign, tmp_sign;
660  UINT64 x_exp = 0, y_exp = 0, z_exp = 0, p_exp;
661  int true_p_exp;
662  UINT128 C1, C2, C3;
663  UINT256 C4;
664  int q1 = 0, q2 = 0, q3 = 0, q4;
665  int e1, e2, e3, e4;
666  int scale, ind, delta, x0;
667  int p34 = P34; // used to modify the limit on the number of digits
668  BID_UI64DOUBLE tmp;
669  int x_nr_bits, y_nr_bits, z_nr_bits;
670  unsigned int save_fpsf;
671  int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0;
672  int is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
673  int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0;
674  int is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0;
675  int incr_exp = 0;
676  int lsb;
677  int lt_half_ulp = 0;
678  int eq_half_ulp = 0;
679  int gt_half_ulp = 0;
680  int is_tiny = 0;
681  UINT64 R64, tmp64;
682  UINT128 P128, R128;
683  UINT192 P192, R192;
684  UINT256 R256;
685
686  // the following are based on the table of special cases for fma; the NaN
687  // behavior is similar to that of the IA-64 Architecture fma
688
689  // identify cases where at least one operand is NaN
690
691  BID_SWAP128 (x);
692  BID_SWAP128 (y);
693  BID_SWAP128 (z);
694  if ((y.w[1] & MASK_NAN) == MASK_NAN) { // y is NAN
695    // if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y)
696    // check first for non-canonical NaN payload
697    if (((y.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
698        (((y.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
699         (y.w[0] > 0x38c15b09ffffffffull))) {
700      y.w[1] = y.w[1] & 0xffffc00000000000ull;
701      y.w[0] = 0x0ull;
702    }
703    if ((y.w[1] & MASK_SNAN) == MASK_SNAN) { // y is SNAN
704      // set invalid flag
705      *pfpsf |= INVALID_EXCEPTION;
706      // return quiet (y)
707      res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
708      res.w[0] = y.w[0];
709    } else { // y is QNaN
710      // return y
711      res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
712      res.w[0] = y.w[0];
713      // if z = SNaN or x = SNaN signal invalid exception
714      if ((z.w[1] & MASK_SNAN) == MASK_SNAN ||
715          (x.w[1] & MASK_SNAN) == MASK_SNAN) {
716        // set invalid flag
717        *pfpsf |= INVALID_EXCEPTION;
718      }
719    }
720    *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
721    *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
722    *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
723    *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
724    BID_SWAP128 (res);
725    BID_RETURN (res)
726  } else if ((z.w[1] & MASK_NAN) == MASK_NAN) { // z is NAN
727    // if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z)
728    // check first for non-canonical NaN payload
729    if (((z.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
730        (((z.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
731         (z.w[0] > 0x38c15b09ffffffffull))) {
732      z.w[1] = z.w[1] & 0xffffc00000000000ull;
733      z.w[0] = 0x0ull;
734    }
735    if ((z.w[1] & MASK_SNAN) == MASK_SNAN) { // z is SNAN
736      // set invalid flag
737      *pfpsf |= INVALID_EXCEPTION;
738      // return quiet (z)
739      res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
740      res.w[0] = z.w[0];
741    } else { // z is QNaN
742      // return z
743      res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
744      res.w[0] = z.w[0];
745      // if x = SNaN signal invalid exception
746      if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {
747        // set invalid flag
748        *pfpsf |= INVALID_EXCEPTION;
749      }
750    }
751    *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
752    *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
753    *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
754    *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
755    BID_SWAP128 (res);
756    BID_RETURN (res)
757  } else if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
758    // if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x)
759    // check first for non-canonical NaN payload
760    if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
761        (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
762         (x.w[0] > 0x38c15b09ffffffffull))) {
763      x.w[1] = x.w[1] & 0xffffc00000000000ull;
764      x.w[0] = 0x0ull;
765    }
766    if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
767      // set invalid flag
768      *pfpsf |= INVALID_EXCEPTION;
769      // return quiet (x)
770      res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
771      res.w[0] = x.w[0];
772    } else { // x is QNaN
773      // return x
774      res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
775      res.w[0] = x.w[0];
776    }
777    *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
778    *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
779    *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
780    *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
781    BID_SWAP128 (res);
782    BID_RETURN (res)
783  }
784  // x, y, z are 0, f, or inf but not NaN => unpack the arguments and check
785  // for non-canonical values
786
787  x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
788  C1.w[1] = x.w[1] & MASK_COEFF;
789  C1.w[0] = x.w[0];
790  if ((x.w[1] & MASK_ANY_INF) != MASK_INF) { // x != inf
791    // if x is not infinity check for non-canonical values - treated as zero
792    if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
793      // non-canonical
794      x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
795      C1.w[1] = 0; // significand high
796      C1.w[0] = 0; // significand low
797    } else { // G0_G1 != 11
798      x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
799      if (C1.w[1] > 0x0001ed09bead87c0ull ||
800          (C1.w[1] == 0x0001ed09bead87c0ull &&
801           C1.w[0] > 0x378d8e63ffffffffull)) {
802        // x is non-canonical if coefficient is larger than 10^34 -1
803        C1.w[1] = 0;
804        C1.w[0] = 0;
805      } else { // canonical
806        ;
807      }
808    }
809  }
810  y_sign = y.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
811  C2.w[1] = y.w[1] & MASK_COEFF;
812  C2.w[0] = y.w[0];
813  if ((y.w[1] & MASK_ANY_INF) != MASK_INF) { // y != inf
814    // if y is not infinity check for non-canonical values - treated as zero
815    if ((y.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
816      // non-canonical
817      y_exp = (y.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
818      C2.w[1] = 0; // significand high
819      C2.w[0] = 0; // significand low
820    } else { // G0_G1 != 11
821      y_exp = y.w[1] & MASK_EXP; // biased and shifted left 49 bits
822      if (C2.w[1] > 0x0001ed09bead87c0ull ||
823          (C2.w[1] == 0x0001ed09bead87c0ull &&
824           C2.w[0] > 0x378d8e63ffffffffull)) {
825        // y is non-canonical if coefficient is larger than 10^34 -1
826        C2.w[1] = 0;
827        C2.w[0] = 0;
828      } else { // canonical
829        ;
830      }
831    }
832  }
833  z_sign = z.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
834  C3.w[1] = z.w[1] & MASK_COEFF;
835  C3.w[0] = z.w[0];
836  if ((z.w[1] & MASK_ANY_INF) != MASK_INF) { // z != inf
837    // if z is not infinity check for non-canonical values - treated as zero
838    if ((z.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
839      // non-canonical
840      z_exp = (z.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
841      C3.w[1] = 0; // significand high
842      C3.w[0] = 0; // significand low
843    } else { // G0_G1 != 11
844      z_exp = z.w[1] & MASK_EXP; // biased and shifted left 49 bits
845      if (C3.w[1] > 0x0001ed09bead87c0ull ||
846          (C3.w[1] == 0x0001ed09bead87c0ull &&
847           C3.w[0] > 0x378d8e63ffffffffull)) {
848        // z is non-canonical if coefficient is larger than 10^34 -1
849        C3.w[1] = 0;
850        C3.w[0] = 0;
851      } else { // canonical
852        ;
853      }
854    }
855  }
856
857  p_sign = x_sign ^ y_sign; // sign of the product
858
859  // identify cases where at least one operand is infinity
860
861  if ((x.w[1] & MASK_ANY_INF) == MASK_INF) { // x = inf
862    if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf
863      if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
864        if (p_sign == z_sign) {
865          res.w[1] = z_sign | MASK_INF;
866          res.w[0] = 0x0;
867        } else {
868          // return QNaN Indefinite
869          res.w[1] = 0x7c00000000000000ull;
870          res.w[0] = 0x0000000000000000ull;
871          // set invalid flag
872          *pfpsf |= INVALID_EXCEPTION;
873        }
874      } else { // z = 0 or z = f
875        res.w[1] = p_sign | MASK_INF;
876        res.w[0] = 0x0;
877      }
878    } else if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f
879      if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
880        if (p_sign == z_sign) {
881          res.w[1] = z_sign | MASK_INF;
882          res.w[0] = 0x0;
883        } else {
884          // return QNaN Indefinite
885          res.w[1] = 0x7c00000000000000ull;
886          res.w[0] = 0x0000000000000000ull;
887          // set invalid flag
888          *pfpsf |= INVALID_EXCEPTION;
889        }
890      } else { // z = 0 or z = f
891        res.w[1] = p_sign | MASK_INF;
892        res.w[0] = 0x0;
893      }
894    } else { // y = 0
895      // return QNaN Indefinite
896      res.w[1] = 0x7c00000000000000ull;
897      res.w[0] = 0x0000000000000000ull;
898      // set invalid flag
899      *pfpsf |= INVALID_EXCEPTION;
900    }
901    *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
902    *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
903    *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
904    *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
905    BID_SWAP128 (res);
906    BID_RETURN (res)
907  } else if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf
908    if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
909      // x = f, necessarily
910      if ((p_sign != z_sign)
911          || (C1.w[1] == 0x0ull && C1.w[0] == 0x0ull)) {
912        // return QNaN Indefinite
913        res.w[1] = 0x7c00000000000000ull;
914        res.w[0] = 0x0000000000000000ull;
915        // set invalid flag
916        *pfpsf |= INVALID_EXCEPTION;
917      } else {
918        res.w[1] = z_sign | MASK_INF;
919        res.w[0] = 0x0;
920      }
921    } else if (C1.w[1] == 0x0 && C1.w[0] == 0x0) { // x = 0
922      // z = 0, f, inf
923      // return QNaN Indefinite
924      res.w[1] = 0x7c00000000000000ull;
925      res.w[0] = 0x0000000000000000ull;
926      // set invalid flag
927      *pfpsf |= INVALID_EXCEPTION;
928    } else {
929      // x = f and z = 0, f, necessarily
930      res.w[1] = p_sign | MASK_INF;
931      res.w[0] = 0x0;
932    }
933    *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
934    *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
935    *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
936    *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
937    BID_SWAP128 (res);
938    BID_RETURN (res)
939  } else if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
940    // x = 0, f and y = 0, f, necessarily
941    res.w[1] = z_sign | MASK_INF;
942    res.w[0] = 0x0;
943    *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
944    *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
945    *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
946    *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
947    BID_SWAP128 (res);
948    BID_RETURN (res)
949  }
950
951  true_p_exp = (x_exp >> 49) - 6176 + (y_exp >> 49) - 6176;
952  if (true_p_exp < -6176)
953    p_exp = 0; // cannot be less than EXP_MIN
954  else
955    p_exp = (UINT64) (true_p_exp + 6176) << 49;
956
957  if (((C1.w[1] == 0x0 && C1.w[0] == 0x0) || (C2.w[1] == 0x0 && C2.w[0] == 0x0)) && C3.w[1] == 0x0 && C3.w[0] == 0x0) { // (x = 0 or y = 0) and z = 0
958    // the result is 0
959    if (p_exp < z_exp)
960      res.w[1] = p_exp; // preferred exponent
961    else
962      res.w[1] = z_exp; // preferred exponent
963    if (p_sign == z_sign) {
964      res.w[1] |= z_sign;
965      res.w[0] = 0x0;
966    } else { // x * y and z have opposite signs
967      if (rnd_mode == ROUNDING_DOWN) {
968        // res = -0.0
969        res.w[1] |= MASK_SIGN;
970        res.w[0] = 0x0;
971      } else {
972        // res = +0.0
973        // res.w[1] |= 0x0;
974        res.w[0] = 0x0;
975      }
976    }
977    *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
978    *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
979    *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
980    *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
981    BID_SWAP128 (res);
982    BID_RETURN (res)
983  }
984  // from this point on, we may need to know the number of decimal digits
985  // in the significands of x, y, z when x, y, z != 0
986
987  if (C1.w[1] != 0 || C1.w[0] != 0) { // x = f (non-zero finite)
988    // q1 = nr. of decimal digits in x
989    // determine first the nr. of bits in x
990    if (C1.w[1] == 0) {
991      if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
992        // split the 64-bit value in two 32-bit halves to avoid rounding errors
993        if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
994          tmp.d = (double) (C1.w[0] >> 32); // exact conversion
995          x_nr_bits =
996            33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
997        } else { // x < 2^32
998          tmp.d = (double) (C1.w[0]); // exact conversion
999          x_nr_bits =
1000            1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1001        }
1002      } else { // if x < 2^53
1003        tmp.d = (double) C1.w[0]; // exact conversion
1004        x_nr_bits =
1005          1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1006      }
1007    } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
1008      tmp.d = (double) C1.w[1]; // exact conversion
1009      x_nr_bits =
1010        65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1011    }
1012    q1 = nr_digits[x_nr_bits - 1].digits;
1013    if (q1 == 0) {
1014      q1 = nr_digits[x_nr_bits - 1].digits1;
1015      if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
1016          (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
1017           C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
1018        q1++;
1019    }
1020  }
1021
1022  if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f (non-zero finite)
1023    if (C2.w[1] == 0) {
1024      if (C2.w[0] >= 0x0020000000000000ull) { // y >= 2^53
1025        // split the 64-bit value in two 32-bit halves to avoid rounding errors
1026        if (C2.w[0] >= 0x0000000100000000ull) { // y >= 2^32
1027          tmp.d = (double) (C2.w[0] >> 32); // exact conversion
1028          y_nr_bits =
1029            32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1030        } else { // y < 2^32
1031          tmp.d = (double) C2.w[0]; // exact conversion
1032          y_nr_bits =
1033            ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1034        }
1035      } else { // if y < 2^53
1036        tmp.d = (double) C2.w[0]; // exact conversion
1037        y_nr_bits =
1038          ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1039      }
1040    } else { // C2.w[1] != 0 => nr. bits = 64 + nr_bits (C2.w[1])
1041      tmp.d = (double) C2.w[1]; // exact conversion
1042      y_nr_bits =
1043        64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1044    }
1045
1046    q2 = nr_digits[y_nr_bits].digits;
1047    if (q2 == 0) {
1048      q2 = nr_digits[y_nr_bits].digits1;
1049      if (C2.w[1] > nr_digits[y_nr_bits].threshold_hi ||
1050          (C2.w[1] == nr_digits[y_nr_bits].threshold_hi &&
1051           C2.w[0] >= nr_digits[y_nr_bits].threshold_lo))
1052        q2++;
1053    }
1054  }
1055
1056  if (C3.w[1] != 0 || C3.w[0] != 0) { // z = f (non-zero finite)
1057    if (C3.w[1] == 0) {
1058      if (C3.w[0] >= 0x0020000000000000ull) { // z >= 2^53
1059        // split the 64-bit value in two 32-bit halves to avoid rounding errors
1060        if (C3.w[0] >= 0x0000000100000000ull) { // z >= 2^32
1061          tmp.d = (double) (C3.w[0] >> 32); // exact conversion
1062          z_nr_bits =
1063            32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1064        } else { // z < 2^32
1065          tmp.d = (double) C3.w[0]; // exact conversion
1066          z_nr_bits =
1067            ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1068        }
1069      } else { // if z < 2^53
1070        tmp.d = (double) C3.w[0]; // exact conversion
1071        z_nr_bits =
1072          ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1073      }
1074    } else { // C3.w[1] != 0 => nr. bits = 64 + nr_bits (C3.w[1])
1075      tmp.d = (double) C3.w[1]; // exact conversion
1076      z_nr_bits =
1077        64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1078    }
1079
1080    q3 = nr_digits[z_nr_bits].digits;
1081    if (q3 == 0) {
1082      q3 = nr_digits[z_nr_bits].digits1;
1083      if (C3.w[1] > nr_digits[z_nr_bits].threshold_hi ||
1084          (C3.w[1] == nr_digits[z_nr_bits].threshold_hi &&
1085           C3.w[0] >= nr_digits[z_nr_bits].threshold_lo))
1086        q3++;
1087    }
1088  }
1089
1090  if ((C1.w[1] == 0x0 && C1.w[0] == 0x0) ||
1091      (C2.w[1] == 0x0 && C2.w[0] == 0x0)) {
1092    // x = 0 or y = 0
1093    // z = f, necessarily; for 0 + z return z, with the preferred exponent
1094    // the result is z, but need to get the preferred exponent
1095    if (z_exp <= p_exp) { // the preferred exponent is z_exp
1096      res.w[1] = z_sign | (z_exp & MASK_EXP) | C3.w[1];
1097      res.w[0] = C3.w[0];
1098    } else { // if (p_exp < z_exp) the preferred exponent is p_exp
1099      // return (C3 * 10^scale) * 10^(z_exp - scale)
1100      // where scale = min (p34-q3, (z_exp-p_exp) >> 49)
1101      scale = p34 - q3;
1102      ind = (z_exp - p_exp) >> 49;
1103      if (ind < scale)
1104        scale = ind;
1105      if (scale == 0) {
1106        res.w[1] = z.w[1]; // & MASK_COEFF, which is redundant
1107        res.w[0] = z.w[0];
1108      } else if (q3 <= 19) { // z fits in 64 bits
1109        if (scale <= 19) { // 10^scale fits in 64 bits
1110          // 64 x 64 C3.w[0] * ten2k64[scale]
1111          __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1112        } else { // 10^scale fits in 128 bits
1113          // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1114          __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
1115        }
1116      } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
1117        // 64 x 128 ten2k64[scale] * C3
1118        __mul_128x64_to_128 (res, ten2k64[scale], C3);
1119      }
1120      // subtract scale from the exponent
1121      z_exp = z_exp - ((UINT64) scale << 49);
1122      res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
1123    }
1124    *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1125    *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1126    *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1127    *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1128    BID_SWAP128 (res);
1129    BID_RETURN (res)
1130  } else {
1131    ; // continue with x = f, y = f, z = 0 or x = f, y = f, z = f
1132  }
1133
1134  e1 = (x_exp >> 49) - 6176; // unbiased exponent of x
1135  e2 = (y_exp >> 49) - 6176; // unbiased exponent of y
1136  e3 = (z_exp >> 49) - 6176; // unbiased exponent of z
1137  e4 = e1 + e2; // unbiased exponent of the exact x * y
1138
1139  // calculate C1 * C2 and its number of decimal digits, q4
1140
1141  // the exact product has either q1 + q2 - 1 or q1 + q2 decimal digits
1142  // where 2 <= q1 + q2 <= 68
1143  // calculate C4 = C1 * C2 and determine q
1144  C4.w[3] = C4.w[2] = C4.w[1] = C4.w[0] = 0;
1145  if (q1 + q2 <= 19) { // if 2 <= q1 + q2 <= 19, C4 = C1 * C2 fits in 64 bits
1146    C4.w[0] = C1.w[0] * C2.w[0];
1147    // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1148    if (C4.w[0] < ten2k64[q1 + q2 - 1])
1149      q4 = q1 + q2 - 1; // q4 in [1, 18]
1150    else
1151      q4 = q1 + q2; // q4 in [2, 19]
1152    // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1153  } else if (q1 + q2 == 20) { // C4 = C1 * C2 fits in 64 or 128 bits
1154    // q1 <= 19 and q2 <= 19 so both C1 and C2 fit in 64 bits
1155    __mul_64x64_to_128MACH (C4, C1.w[0], C2.w[0]);
1156    // if C4 < 10^(q1+q2-1) = 10^19 then q4 = q1+q2-1 = 19 else q4 = q1+q2 = 20
1157    if (C4.w[1] == 0 && C4.w[0] < ten2k64[19]) { // 19 = q1+q2-1
1158      // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1159      q4 = 19; // 19 = q1 + q2 - 1
1160    } else {
1161      // if (C4.w[1] == 0)
1162      //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1163      // else
1164      //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1165      q4 = 20; // 20 = q1 + q2
1166    }
1167  } else if (q1 + q2 <= 38) { // 21 <= q1 + q2 <= 38
1168    // C4 = C1 * C2 fits in 64 or 128 bits
1169    // (64 bits possibly, but only when q1 + q2 = 21 and C4 has 20 digits)
1170    // at least one of C1, C2 has at most 19 decimal digits & fits in 64 bits
1171    if (q1 <= 19) {
1172      __mul_128x64_to_128 (C4, C1.w[0], C2);
1173    } else { // q2 <= 19
1174      __mul_128x64_to_128 (C4, C2.w[0], C1);
1175    }
1176    // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1177    if (C4.w[1] < ten2k128[q1 + q2 - 21].w[1] ||
1178        (C4.w[1] == ten2k128[q1 + q2 - 21].w[1] &&
1179         C4.w[0] < ten2k128[q1 + q2 - 21].w[0])) {
1180      // if (C4.w[1] == 0) // q4 = 20, necessarily
1181      //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1182      // else
1183      //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1184      q4 = q1 + q2 - 1; // q4 in [20, 37]
1185    } else {
1186      // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1187      q4 = q1 + q2; // q4 in [21, 38]
1188    }
1189  } else if (q1 + q2 == 39) { // C4 = C1 * C2 fits in 128 or 192 bits
1190    // both C1 and C2 fit in 128 bits (actually in 113 bits)
1191    // may replace this by 128x128_to192
1192    __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] is 0
1193    // if C4 < 10^(q1+q2-1) = 10^38 then q4 = q1+q2-1 = 38 else q4 = q1+q2 = 39
1194    if (C4.w[2] == 0 && (C4.w[1] < ten2k128[18].w[1] ||
1195        		 (C4.w[1] == ten2k128[18].w[1]
1196        		  && C4.w[0] < ten2k128[18].w[0]))) {
1197      // 18 = 38 - 20 = q1+q2-1 - 20
1198      // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1199      q4 = 38; // 38 = q1 + q2 - 1
1200    } else {
1201      // if (C4.w[2] == 0)
1202      // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1203      // else
1204      //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1205      q4 = 39; // 39 = q1 + q2
1206    }
1207  } else if (q1 + q2 <= 57) { // 40 <= q1 + q2 <= 57
1208    // C4 = C1 * C2 fits in 128 or 192 bits
1209    // (128 bits possibly, but only when q1 + q2 = 40 and C4 has 39 digits)
1210    // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one
1211    // may fit in 64 bits
1212    if (C1.w[1] == 0) { // C1 fits in 64 bits
1213      // __mul_64x128_full (REShi64, RESlo128, A64, B128)
1214      __mul_64x128_full (C4.w[2], C4, C1.w[0], C2);
1215    } else if (C2.w[1] == 0) { // C2 fits in 64 bits
1216      // __mul_64x128_full (REShi64, RESlo128, A64, B128)
1217      __mul_64x128_full (C4.w[2], C4, C2.w[0], C1);
1218    } else { // both C1 and C2 require 128 bits
1219      // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1);
1220      __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0
1221    }
1222    // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1223    if (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] ||
1224        (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] &&
1225         (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] ||
1226          (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] &&
1227           C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))) {
1228      // if (C4.w[2] == 0) // q4 = 39, necessarily
1229      //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1230      // else
1231      //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1232      q4 = q1 + q2 - 1; // q4 in [39, 56]
1233    } else {
1234      // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1235      q4 = q1 + q2; // q4 in [40, 57]
1236    }
1237  } else if (q1 + q2 == 58) { // C4 = C1 * C2 fits in 192 or 256 bits
1238    // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one
1239    // may fit in 64 bits
1240    if (C1.w[1] == 0) { // C1 * C2 will fit in 192 bits
1241      __mul_64x128_full (C4.w[2], C4, C1.w[0], C2); // may use 64x128_to_192
1242    } else if (C2.w[1] == 0) { // C1 * C2 will fit in 192 bits
1243      __mul_64x128_full (C4.w[2], C4, C2.w[0], C1); // may use 64x128_to_192
1244    } else { // C1 * C2 will fit in 192 bits or in 256 bits
1245      __mul_128x128_to_256 (C4, C1, C2);
1246    }
1247    // if C4 < 10^(q1+q2-1) = 10^57 then q4 = q1+q2-1 = 57 else q4 = q1+q2 = 58
1248    if (C4.w[3] == 0 && (C4.w[2] < ten2k256[18].w[2] ||
1249        		 (C4.w[2] == ten2k256[18].w[2]
1250        		  && (C4.w[1] < ten2k256[18].w[1]
1251        		      || (C4.w[1] == ten2k256[18].w[1]
1252        			  && C4.w[0] < ten2k256[18].w[0]))))) {
1253      // 18 = 57 - 39 = q1+q2-1 - 39
1254      // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1255      q4 = 57; // 57 = q1 + q2 - 1
1256    } else {
1257      // if (C4.w[3] == 0)
1258      //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1259      // else
1260      //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
1261      q4 = 58; // 58 = q1 + q2
1262    }
1263  } else { // if 59 <= q1 + q2 <= 68
1264    // C4 = C1 * C2 fits in 192 or 256 bits
1265    // (192 bits possibly, but only when q1 + q2 = 59 and C4 has 58 digits)
1266    // both C1 and C2 fit in 128 bits (actually in 113 bits); none fits in
1267    // 64 bits
1268    // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1);
1269    __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0
1270    // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1271    if (C4.w[3] < ten2k256[q1 + q2 - 40].w[3] ||
1272        (C4.w[3] == ten2k256[q1 + q2 - 40].w[3] &&
1273         (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] ||
1274          (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] &&
1275           (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] ||
1276            (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] &&
1277             C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))))) {
1278      // if (C4.w[3] == 0) // q4 = 58, necessarily
1279      //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1280      // else
1281      //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
1282      q4 = q1 + q2 - 1; // q4 in [58, 67]
1283    } else {
1284      // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
1285      q4 = q1 + q2; // q4 in [59, 68]
1286    }
1287  }
1288
1289  if (C3.w[1] == 0x0 && C3.w[0] == 0x0) { // x = f, y = f, z = 0
1290    save_fpsf = *pfpsf; // sticky bits - caller value must be preserved
1291    *pfpsf = 0;
1292
1293    if (q4 > p34) {
1294
1295      // truncate C4 to p34 digits into res
1296      // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68
1297      x0 = q4 - p34;
1298      if (q4 <= 38) {
1299        P128.w[1] = C4.w[1];
1300        P128.w[0] = C4.w[0];
1301        round128_19_38 (q4, x0, P128, &res, &incr_exp,
1302        		&is_midpoint_lt_even, &is_midpoint_gt_even,
1303        		&is_inexact_lt_midpoint,
1304        		&is_inexact_gt_midpoint);
1305      } else if (q4 <= 57) { // 35 <= q4 <= 57
1306        P192.w[2] = C4.w[2];
1307        P192.w[1] = C4.w[1];
1308        P192.w[0] = C4.w[0];
1309        round192_39_57 (q4, x0, P192, &R192, &incr_exp,
1310        		&is_midpoint_lt_even, &is_midpoint_gt_even,
1311        		&is_inexact_lt_midpoint,
1312        		&is_inexact_gt_midpoint);
1313        res.w[0] = R192.w[0];
1314        res.w[1] = R192.w[1];
1315      } else { // if (q4 <= 68)
1316        round256_58_76 (q4, x0, C4, &R256, &incr_exp,
1317        		&is_midpoint_lt_even, &is_midpoint_gt_even,
1318        		&is_inexact_lt_midpoint,
1319        		&is_inexact_gt_midpoint);
1320        res.w[0] = R256.w[0];
1321        res.w[1] = R256.w[1];
1322      }
1323      e4 = e4 + x0;
1324      if (incr_exp) {
1325        e4 = e4 + 1;
1326      }
1327      q4 = p34;
1328      // res is now the coefficient of the result rounded to the destination
1329      // precision, with unbounded exponent; the exponent is e4; q4=digits(res)
1330    } else { // if (q4 <= p34)
1331      // C4 * 10^e4 is the result rounded to the destination precision, with
1332      // unbounded exponent (which is exact)
1333
1334      if ((q4 + e4 <= p34 + expmax) && (e4 > expmax)) {
1335        // e4 is too large, but can be brought within range by scaling up C4
1336        scale = e4 - expmax; // 1 <= scale < P-q4 <= P-1 => 1 <= scale <= P-2
1337        // res = (C4 * 10^scale) * 10^expmax
1338        if (q4 <= 19) { // C4 fits in 64 bits
1339          if (scale <= 19) { // 10^scale fits in 64 bits
1340            // 64 x 64 C4.w[0] * ten2k64[scale]
1341            __mul_64x64_to_128MACH (res, C4.w[0], ten2k64[scale]);
1342          } else { // 10^scale fits in 128 bits
1343            // 64 x 128 C4.w[0] * ten2k128[scale - 20]
1344            __mul_128x64_to_128 (res, C4.w[0], ten2k128[scale - 20]);
1345          }
1346        } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits
1347          // 64 x 128 ten2k64[scale] * CC43
1348          __mul_128x64_to_128 (res, ten2k64[scale], C4);
1349        }
1350        e4 = e4 - scale; // expmax
1351        q4 = q4 + scale;
1352      } else {
1353        res.w[1] = C4.w[1];
1354        res.w[0] = C4.w[0];
1355      }
1356      // res is the coefficient of the result rounded to the destination
1357      // precision, with unbounded exponent (it has q4 digits); the exponent
1358      // is e4 (exact result)
1359    }
1360
1361    // check for overflow
1362    if (q4 + e4 > p34 + expmax) {
1363      if (rnd_mode == ROUNDING_TO_NEAREST) {
1364        res.w[1] = p_sign | 0x7800000000000000ull; // +/-inf
1365        res.w[0] = 0x0000000000000000ull;
1366        *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
1367      } else {
1368        res.w[1] = p_sign | res.w[1];
1369        rounding_correction (rnd_mode,
1370        		     is_inexact_lt_midpoint,
1371        		     is_inexact_gt_midpoint,
1372        		     is_midpoint_lt_even, is_midpoint_gt_even,
1373        		     e4, &res, pfpsf);
1374      }
1375      *pfpsf |= save_fpsf;
1376      *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1377      *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1378      *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1379      *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1380      BID_SWAP128 (res);
1381      BID_RETURN (res)
1382    }
1383    // check for underflow
1384    if (q4 + e4 < expmin + P34) {
1385      is_tiny = 1; // the result is tiny
1386      if (e4 < expmin) {
1387        // if e4 < expmin, we must truncate more of res
1388        x0 = expmin - e4; // x0 >= 1
1389        is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
1390        is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
1391        is_midpoint_lt_even0 = is_midpoint_lt_even;
1392        is_midpoint_gt_even0 = is_midpoint_gt_even;
1393        is_inexact_lt_midpoint = 0;
1394        is_inexact_gt_midpoint = 0;
1395        is_midpoint_lt_even = 0;
1396        is_midpoint_gt_even = 0;
1397        // the number of decimal digits in res is q4
1398        if (x0 < q4) { // 1 <= x0 <= q4-1 => round res to q4 - x0 digits
1399          if (q4 <= 18) { // 2 <= q4 <= 18, 1 <= x0 <= 17
1400            round64_2_18 (q4, x0, res.w[0], &R64, &incr_exp,
1401        		  &is_midpoint_lt_even, &is_midpoint_gt_even,
1402        		  &is_inexact_lt_midpoint,
1403        		  &is_inexact_gt_midpoint);
1404            if (incr_exp) {
1405              // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
1406              R64 = ten2k64[q4 - x0];
1407            }
1408            // res.w[1] = 0; (from above)
1409            res.w[0] = R64;
1410          } else { // if (q4 <= 34)
1411            // 19 <= q4 <= 38
1412            P128.w[1] = res.w[1];
1413            P128.w[0] = res.w[0];
1414            round128_19_38 (q4, x0, P128, &res, &incr_exp,
1415        		    &is_midpoint_lt_even, &is_midpoint_gt_even,
1416        		    &is_inexact_lt_midpoint,
1417        		    &is_inexact_gt_midpoint);
1418            if (incr_exp) {
1419              // increase coefficient by a factor of 10; this will be <= 10^33
1420              // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
1421              if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
1422        	// res.w[1] = 0;
1423        	res.w[0] = ten2k64[q4 - x0];
1424              } else { // 20 <= q4 - x0 <= 37
1425        	res.w[0] = ten2k128[q4 - x0 - 20].w[0];
1426        	res.w[1] = ten2k128[q4 - x0 - 20].w[1];
1427              }
1428            }
1429          }
1430          e4 = e4 + x0; // expmin
1431        } else if (x0 == q4) {
1432          // the second rounding is for 0.d(0)d(1)...d(q4-1) * 10^emin
1433          // determine relationship with 1/2 ulp
1434          if (q4 <= 19) {
1435            if (res.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp
1436              lt_half_ulp = 1;
1437              is_inexact_lt_midpoint = 1;
1438            } else if (res.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp
1439              eq_half_ulp = 1;
1440              is_midpoint_gt_even = 1;
1441            } else { // > 1/2 ulp
1442              // gt_half_ulp = 1;
1443              is_inexact_gt_midpoint = 1;
1444            }
1445          } else { // if (q4 <= 34)
1446            if (res.w[1] < midpoint128[q4 - 20].w[1] ||
1447                (res.w[1] == midpoint128[q4 - 20].w[1] &&
1448                res.w[0] < midpoint128[q4 - 20].w[0])) { // < 1/2 ulp
1449              lt_half_ulp = 1;
1450              is_inexact_lt_midpoint = 1;
1451            } else if (res.w[1] == midpoint128[q4 - 20].w[1] &&
1452                res.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp
1453              eq_half_ulp = 1;
1454              is_midpoint_gt_even = 1;
1455            } else { // > 1/2 ulp
1456              // gt_half_ulp = 1;
1457              is_inexact_gt_midpoint = 1;
1458            }
1459          }
1460          if (lt_half_ulp || eq_half_ulp) {
1461            // res = +0.0 * 10^expmin
1462            res.w[1] = 0x0000000000000000ull;
1463            res.w[0] = 0x0000000000000000ull;
1464          } else { // if (gt_half_ulp)
1465            // res = +1 * 10^expmin
1466            res.w[1] = 0x0000000000000000ull;
1467            res.w[0] = 0x0000000000000001ull;
1468          }
1469          e4 = expmin;
1470        } else { // if (x0 > q4)
1471          // the second rounding is for 0.0...d(0)d(1)...d(q4-1) * 10^emin
1472          res.w[1] = 0;
1473          res.w[0] = 0;
1474          e4 = expmin;
1475          is_inexact_lt_midpoint = 1;
1476        }
1477        // avoid a double rounding error
1478        if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
1479            is_midpoint_lt_even) { // double rounding error upward
1480          // res = res - 1
1481          res.w[0]--;
1482          if (res.w[0] == 0xffffffffffffffffull)
1483            res.w[1]--;
1484          // Note: a double rounding error upward is not possible; for this
1485          // the result after the first rounding would have to be 99...95
1486          // (35 digits in all), possibly followed by a number of zeros; this
1487          // not possible for f * f + 0
1488          is_midpoint_lt_even = 0;
1489          is_inexact_lt_midpoint = 1;
1490        } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
1491            is_midpoint_gt_even) { // double rounding error downward
1492          // res = res + 1
1493          res.w[0]++;
1494          if (res.w[0] == 0)
1495            res.w[1]++;
1496          is_midpoint_gt_even = 0;
1497          is_inexact_gt_midpoint = 1;
1498        } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
1499        	   !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
1500          // if this second rounding was exact the result may still be
1501          // inexact because of the first rounding
1502          if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
1503            is_inexact_gt_midpoint = 1;
1504          }
1505          if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
1506            is_inexact_lt_midpoint = 1;
1507          }
1508        } else if (is_midpoint_gt_even &&
1509        	   (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
1510          // pulled up to a midpoint
1511          is_inexact_lt_midpoint = 1;
1512          is_inexact_gt_midpoint = 0;
1513          is_midpoint_lt_even = 0;
1514          is_midpoint_gt_even = 0;
1515        } else if (is_midpoint_lt_even &&
1516        	   (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
1517          // pulled down to a midpoint
1518          is_inexact_lt_midpoint = 0;
1519          is_inexact_gt_midpoint = 1;
1520          is_midpoint_lt_even = 0;
1521          is_midpoint_gt_even = 0;
1522        } else {
1523          ;
1524        }
1525      } else { // if e4 >= emin then q4 < P and the result is tiny and exact
1526        if (e3 < e4) {
1527          // if (e3 < e4) the preferred exponent is e3
1528          // return (C4 * 10^scale) * 10^(e4 - scale)
1529          // where scale = min (p34-q4, (e4 - e3))
1530          scale = p34 - q4;
1531          ind = e4 - e3;
1532          if (ind < scale)
1533            scale = ind;
1534          if (scale == 0) {
1535            ; // res and e4 are unchanged
1536          } else if (q4 <= 19) { // C4 fits in 64 bits
1537            if (scale <= 19) { // 10^scale fits in 64 bits
1538              // 64 x 64 res.w[0] * ten2k64[scale]
1539              __mul_64x64_to_128MACH (res, res.w[0], ten2k64[scale]);
1540            } else { // 10^scale fits in 128 bits
1541              // 64 x 128 res.w[0] * ten2k128[scale - 20]
1542              __mul_128x64_to_128 (res, res.w[0], ten2k128[scale - 20]);
1543            }
1544          } else { // res fits in 128 bits, but 10^scale must fit in 64 bits
1545            // 64 x 128 ten2k64[scale] * C3
1546            __mul_128x64_to_128 (res, ten2k64[scale], res);
1547          }
1548          // subtract scale from the exponent
1549          e4 = e4 - scale;
1550        }
1551      }
1552
1553      // check for inexact result
1554      if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
1555          is_midpoint_lt_even || is_midpoint_gt_even) {
1556        // set the inexact flag and the underflow flag
1557        *pfpsf |= INEXACT_EXCEPTION;
1558        *pfpsf |= UNDERFLOW_EXCEPTION;
1559      }
1560      res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1];
1561      if (rnd_mode != ROUNDING_TO_NEAREST) {
1562        rounding_correction (rnd_mode,
1563        		     is_inexact_lt_midpoint,
1564        		     is_inexact_gt_midpoint,
1565        		     is_midpoint_lt_even, is_midpoint_gt_even,
1566        		     e4, &res, pfpsf);
1567      }
1568      *pfpsf |= save_fpsf;
1569      *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1570      *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1571      *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1572      *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1573      BID_SWAP128 (res);
1574      BID_RETURN (res)
1575    }
1576    // no overflow, and no underflow for rounding to nearest
1577    res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1];
1578
1579    if (rnd_mode != ROUNDING_TO_NEAREST) {
1580      rounding_correction (rnd_mode,
1581        		   is_inexact_lt_midpoint,
1582        		   is_inexact_gt_midpoint,
1583        		   is_midpoint_lt_even, is_midpoint_gt_even,
1584        		   e4, &res, pfpsf);
1585      // if e4 = expmin && significand < 10^33 => result is tiny (for RD, RZ)
1586      if (e4 == expmin) {
1587        if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull ||
1588            ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&
1589             res.w[0] < 0x38c15b0a00000000ull)) {
1590          is_tiny = 1;
1591        }
1592      }
1593    }
1594
1595    if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
1596        is_midpoint_lt_even || is_midpoint_gt_even) {
1597      // set the inexact flag
1598      *pfpsf |= INEXACT_EXCEPTION;
1599      if (is_tiny)
1600        *pfpsf |= UNDERFLOW_EXCEPTION;
1601    }
1602
1603    if ((*pfpsf & INEXACT_EXCEPTION) == 0) { // x * y is exact
1604      // need to ensure that the result has the preferred exponent
1605      p_exp = res.w[1] & MASK_EXP;
1606      if (z_exp < p_exp) { // the preferred exponent is z_exp
1607        // signficand of res in C3
1608        C3.w[1] = res.w[1] & MASK_COEFF;
1609        C3.w[0] = res.w[0];
1610        // the number of decimal digits of x * y is q4 <= 34
1611        // Note: the coefficient fits in 128 bits
1612
1613        // return (C3 * 10^scale) * 10^(p_exp - scale)
1614        // where scale = min (p34-q4, (p_exp-z_exp) >> 49)
1615        scale = p34 - q4;
1616        ind = (p_exp - z_exp) >> 49;
1617        if (ind < scale)
1618          scale = ind;
1619        // subtract scale from the exponent
1620        p_exp = p_exp - ((UINT64) scale << 49);
1621        if (scale == 0) {
1622          ; // leave res unchanged
1623        } else if (q4 <= 19) { // x * y fits in 64 bits
1624          if (scale <= 19) { // 10^scale fits in 64 bits
1625            // 64 x 64 C3.w[0] * ten2k64[scale]
1626            __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1627          } else { // 10^scale fits in 128 bits
1628            // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1629            __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
1630          }
1631          res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
1632        } else { // x * y fits in 128 bits, but 10^scale must fit in 64 bits
1633          // 64 x 128 ten2k64[scale] * C3
1634          __mul_128x64_to_128 (res, ten2k64[scale], C3);
1635          res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
1636        }
1637      } // else leave the result as it is, because p_exp <= z_exp
1638    }
1639    *pfpsf |= save_fpsf;
1640    *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1641    *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1642    *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1643    *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1644    BID_SWAP128 (res);
1645    BID_RETURN (res)
1646  } // else we have f * f + f
1647
1648  // continue with x = f, y = f, z = f
1649
1650  delta = q3 + e3 - q4 - e4;
1651delta_ge_zero:
1652  if (delta >= 0) {
1653
1654    if (p34 <= delta - 1 ||	// Case (1')
1655        (p34 == delta && e3 + 6176 < p34 - q3)) { // Case (1''A)
1656      // check for overflow, which can occur only in Case (1')
1657      if ((q3 + e3) > (p34 + expmax) && p34 <= delta - 1) {
1658        // e3 > expmax implies p34 <= delta-1 and e3 > expmax is a necessary
1659        // condition for (q3 + e3) > (p34 + expmax)
1660        if (rnd_mode == ROUNDING_TO_NEAREST) {
1661          res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
1662          res.w[0] = 0x0000000000000000ull;
1663          *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
1664        } else {
1665          if (p_sign == z_sign) {
1666            is_inexact_lt_midpoint = 1;
1667          } else {
1668            is_inexact_gt_midpoint = 1;
1669          }
1670          // q3 <= p34; if (q3 < p34) scale C3 up by 10^(p34-q3)
1671          scale = p34 - q3;
1672          if (scale == 0) {
1673            res.w[1] = z_sign | C3.w[1];
1674            res.w[0] = C3.w[0];
1675          } else {
1676            if (q3 <= 19) { // C3 fits in 64 bits
1677              if (scale <= 19) { // 10^scale fits in 64 bits
1678        	// 64 x 64 C3.w[0] * ten2k64[scale]
1679        	__mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1680              } else { // 10^scale fits in 128 bits
1681        	// 64 x 128 C3.w[0] * ten2k128[scale - 20]
1682        	__mul_128x64_to_128 (res, C3.w[0],
1683        			     ten2k128[scale - 20]);
1684              }
1685            } else { // C3 fits in 128 bits, but 10^scale must fit in 64 bits
1686              // 64 x 128 ten2k64[scale] * C3
1687              __mul_128x64_to_128 (res, ten2k64[scale], C3);
1688            }
1689            // the coefficient in res has q3 + scale = p34 digits
1690          }
1691          e3 = e3 - scale;
1692          res.w[1] = z_sign | res.w[1];
1693          rounding_correction (rnd_mode,
1694        		       is_inexact_lt_midpoint,
1695        		       is_inexact_gt_midpoint,
1696        		       is_midpoint_lt_even, is_midpoint_gt_even,
1697        		       e3, &res, pfpsf);
1698        }
1699        *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1700        *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1701        *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1702        *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1703        BID_SWAP128 (res);
1704        BID_RETURN (res)
1705      }
1706      // res = z
1707      if (q3 < p34) { // the preferred exponent is z_exp - (p34 - q3)
1708        // return (C3 * 10^scale) * 10^(z_exp - scale)
1709        // where scale = min (p34-q3, z_exp-EMIN)
1710        scale = p34 - q3;
1711        ind = e3 + 6176;
1712        if (ind < scale)
1713          scale = ind;
1714        if (scale == 0) {
1715          res.w[1] = C3.w[1];
1716          res.w[0] = C3.w[0];
1717        } else if (q3 <= 19) { // z fits in 64 bits
1718          if (scale <= 19) { // 10^scale fits in 64 bits
1719            // 64 x 64 C3.w[0] * ten2k64[scale]
1720            __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1721          } else { // 10^scale fits in 128 bits
1722            // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1723            __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
1724          }
1725        } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
1726          // 64 x 128 ten2k64[scale] * C3
1727          __mul_128x64_to_128 (res, ten2k64[scale], C3);
1728        }
1729        // the coefficient in res has q3 + scale digits
1730        // subtract scale from the exponent
1731        z_exp = z_exp - ((UINT64) scale << 49);
1732        e3 = e3 - scale;
1733        res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
1734        if (scale + q3 < p34)
1735          *pfpsf |= UNDERFLOW_EXCEPTION;
1736      } else {
1737        scale = 0;
1738        res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | C3.w[1];
1739        res.w[0] = C3.w[0];
1740      }
1741
1742      // use the following to avoid double rounding errors when operating on
1743      // mixed formats in rounding to nearest, and for correcting the result
1744      // if not rounding to nearest
1745      if ((p_sign != z_sign) && (delta == (q3 + scale + 1))) {
1746        // there is a gap of exactly one digit between the scaled C3 and C4
1747        // C3 * 10^ scale = 10^(q3+scale-1) <=> C3 = 10^(q3-1) is special case
1748        if ((q3 <= 19 && C3.w[0] != ten2k64[q3 - 1]) ||
1749            (q3 == 20 && (C3.w[1] != 0 || C3.w[0] != ten2k64[19])) ||
1750            (q3 >= 21 && (C3.w[1] != ten2k128[q3 - 21].w[1] ||
1751        		  C3.w[0] != ten2k128[q3 - 21].w[0]))) {
1752          // C3 * 10^ scale != 10^(q3-1)
1753          // if ((res.w[1] & MASK_COEFF) != 0x0000314dc6448d93ull ||
1754          // res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33
1755          is_inexact_gt_midpoint = 1; // if (z_sign), set as if for abs. value
1756        } else { // if C3 * 10^scale = 10^(q3+scale-1)
1757          // ok from above e3 = (z_exp >> 49) - 6176;
1758          // the result is always inexact
1759          if (q4 == 1) {
1760            R64 = C4.w[0];
1761          } else {
1762            // if q4 > 1 then truncate C4 from q4 digits to 1 digit;
1763            // x = q4-1, 1 <= x <= 67 and check if this operation is exact
1764            if (q4 <= 18) { // 2 <= q4 <= 18
1765              round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp,
1766        		    &is_midpoint_lt_even, &is_midpoint_gt_even,
1767        		    &is_inexact_lt_midpoint,
1768        		    &is_inexact_gt_midpoint);
1769            } else if (q4 <= 38) {
1770              P128.w[1] = C4.w[1];
1771              P128.w[0] = C4.w[0];
1772              round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp,
1773        		      &is_midpoint_lt_even,
1774        		      &is_midpoint_gt_even,
1775        		      &is_inexact_lt_midpoint,
1776        		      &is_inexact_gt_midpoint);
1777              R64 = R128.w[0]; // one decimal digit
1778            } else if (q4 <= 57) {
1779              P192.w[2] = C4.w[2];
1780              P192.w[1] = C4.w[1];
1781              P192.w[0] = C4.w[0];
1782              round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp,
1783        		      &is_midpoint_lt_even,
1784        		      &is_midpoint_gt_even,
1785        		      &is_inexact_lt_midpoint,
1786        		      &is_inexact_gt_midpoint);
1787              R64 = R192.w[0]; // one decimal digit
1788            } else { // if (q4 <= 68)
1789              round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp,
1790        		      &is_midpoint_lt_even,
1791        		      &is_midpoint_gt_even,
1792        		      &is_inexact_lt_midpoint,
1793        		      &is_inexact_gt_midpoint);
1794              R64 = R256.w[0]; // one decimal digit
1795            }
1796            if (incr_exp) {
1797              R64 = 10;
1798            }
1799          }
1800          if (q4 == 1 && C4.w[0] == 5) {
1801            is_inexact_lt_midpoint = 0;
1802            is_inexact_gt_midpoint = 0;
1803            is_midpoint_lt_even = 1;
1804            is_midpoint_gt_even = 0;
1805          } else if ((e3 == expmin) ||
1806        	     R64 < 5 || (R64 == 5 && is_inexact_gt_midpoint)) {
1807            // result does not change
1808            is_inexact_lt_midpoint = 0;
1809            is_inexact_gt_midpoint = 1;
1810            is_midpoint_lt_even = 0;
1811            is_midpoint_gt_even = 0;
1812          } else {
1813            is_inexact_lt_midpoint = 1;
1814            is_inexact_gt_midpoint = 0;
1815            is_midpoint_lt_even = 0;
1816            is_midpoint_gt_even = 0;
1817            // result decremented is 10^(q3+scale) - 1
1818            if ((q3 + scale) <= 19) {
1819              res.w[1] = 0;
1820              res.w[0] = ten2k64[q3 + scale];
1821            } else { // if ((q3 + scale + 1) <= 35)
1822              res.w[1] = ten2k128[q3 + scale - 20].w[1];
1823              res.w[0] = ten2k128[q3 + scale - 20].w[0];
1824            }
1825            res.w[0] = res.w[0] - 1; // borrow never occurs
1826            z_exp = z_exp - EXP_P1;
1827            e3 = e3 - 1;
1828            res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
1829          }
1830          if (e3 == expmin) {
1831            if (R64 < 5 || (R64 == 5 && !is_inexact_lt_midpoint)) {
1832              ; // result not tiny (in round-to-nearest mode)
1833            } else {
1834              *pfpsf |= UNDERFLOW_EXCEPTION;
1835            }
1836          }
1837        } // end 10^(q3+scale-1)
1838        // set the inexact flag
1839        *pfpsf |= INEXACT_EXCEPTION;
1840      } else {
1841        if (p_sign == z_sign) {
1842          // if (z_sign), set as if for absolute value
1843          is_inexact_lt_midpoint = 1;
1844        } else { // if (p_sign != z_sign)
1845          // if (z_sign), set as if for absolute value
1846          is_inexact_gt_midpoint = 1;
1847        }
1848        *pfpsf |= INEXACT_EXCEPTION;
1849      }
1850      // the result is always inexact => set the inexact flag
1851      // Determine tininess:
1852      //    if (exp > expmin)
1853      //      the result is not tiny
1854      //    else // if exp = emin
1855      //      if (q3 + scale < p34)
1856      //        the result is tiny
1857      //      else // if (q3 + scale = p34)
1858      //        if (C3 * 10^scale > 10^33)
1859      //          the result is not tiny
1860      //        else // if C3 * 10^scale = 10^33
1861      //          if (xy * z > 0)
1862      //            the result is not tiny
1863      //          else // if (xy * z < 0)
1864      //            if (z > 0)
1865      //              if rnd_mode != RP
1866      //                the result is tiny
1867      //              else // if RP
1868      //                the result is not tiny
1869      //            else // if (z < 0)
1870      //              if rnd_mode != RM
1871      //                the result is tiny
1872      //              else // if RM
1873      //                the result is not tiny
1874      //              endif
1875      //            endif
1876      //          endif
1877      //        endif
1878      //      endif
1879      //    endif
1880      if ((e3 == expmin && (q3 + scale) < p34) ||
1881          (e3 == expmin && (q3 + scale) == p34 &&
1882          (res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&	// 10^33_high
1883          res.w[0] == 0x38c15b0a00000000ull &&	// 10^33_low
1884          z_sign != p_sign && ((!z_sign && rnd_mode != ROUNDING_UP) ||
1885          (z_sign && rnd_mode != ROUNDING_DOWN)))) {
1886        *pfpsf |= UNDERFLOW_EXCEPTION;
1887      }
1888      if (rnd_mode != ROUNDING_TO_NEAREST) {
1889        rounding_correction (rnd_mode,
1890        		     is_inexact_lt_midpoint,
1891        		     is_inexact_gt_midpoint,
1892        		     is_midpoint_lt_even, is_midpoint_gt_even,
1893        		     e3, &res, pfpsf);
1894      }
1895      *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1896      *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1897      *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1898      *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1899      BID_SWAP128 (res);
1900      BID_RETURN (res)
1901
1902    } else if (p34 == delta) { // Case (1''B)
1903
1904      // because Case (1''A) was treated above, e3 + 6176 >= p34 - q3
1905      // and C3 can be scaled up to p34 digits if needed
1906
1907      // scale C3 to p34 digits if needed
1908      scale = p34 - q3; // 0 <= scale <= p34 - 1
1909      if (scale == 0) {
1910        res.w[1] = C3.w[1];
1911        res.w[0] = C3.w[0];
1912      } else if (q3 <= 19) { // z fits in 64 bits
1913        if (scale <= 19) { // 10^scale fits in 64 bits
1914          // 64 x 64 C3.w[0] * ten2k64[scale]
1915          __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1916        } else { // 10^scale fits in 128 bits
1917          // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1918          __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
1919        }
1920      } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
1921        // 64 x 128 ten2k64[scale] * C3
1922        __mul_128x64_to_128 (res, ten2k64[scale], C3);
1923      }
1924      // subtract scale from the exponent
1925      z_exp = z_exp - ((UINT64) scale << 49);
1926      e3 = e3 - scale;
1927      // now z_sign, z_exp, and res correspond to a z scaled to p34 = 34 digits
1928
1929      // determine whether x * y is less than, equal to, or greater than
1930      // 1/2 ulp (z)
1931      if (q4 <= 19) {
1932        if (C4.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp
1933          lt_half_ulp = 1;
1934        } else if (C4.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp
1935          eq_half_ulp = 1;
1936        } else { // > 1/2 ulp
1937          gt_half_ulp = 1;
1938        }
1939      } else if (q4 <= 38) {
1940        if (C4.w[2] == 0 && (C4.w[1] < midpoint128[q4 - 20].w[1] ||
1941            (C4.w[1] == midpoint128[q4 - 20].w[1] &&
1942            C4.w[0] < midpoint128[q4 - 20].w[0]))) { // < 1/2 ulp
1943          lt_half_ulp = 1;
1944        } else if (C4.w[2] == 0 && C4.w[1] == midpoint128[q4 - 20].w[1] &&
1945            C4.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp
1946          eq_half_ulp = 1;
1947        } else { // > 1/2 ulp
1948          gt_half_ulp = 1;
1949        }
1950      } else if (q4 <= 58) {
1951        if (C4.w[3] == 0 && (C4.w[2] < midpoint192[q4 - 39].w[2] ||
1952            (C4.w[2] == midpoint192[q4 - 39].w[2] &&
1953            C4.w[1] < midpoint192[q4 - 39].w[1]) ||
1954            (C4.w[2] == midpoint192[q4 - 39].w[2] &&
1955            C4.w[1] == midpoint192[q4 - 39].w[1] &&
1956            C4.w[0] < midpoint192[q4 - 39].w[0]))) { // < 1/2 ulp
1957          lt_half_ulp = 1;
1958        } else if (C4.w[3] == 0 && C4.w[2] == midpoint192[q4 - 39].w[2] &&
1959            C4.w[1] == midpoint192[q4 - 39].w[1] &&
1960            C4.w[0] == midpoint192[q4 - 39].w[0]) { // = 1/2 ulp
1961          eq_half_ulp = 1;
1962        } else { // > 1/2 ulp
1963          gt_half_ulp = 1;
1964        }
1965      } else {
1966        if (C4.w[3] < midpoint256[q4 - 59].w[3] ||
1967            (C4.w[3] == midpoint256[q4 - 59].w[3] &&
1968            C4.w[2] < midpoint256[q4 - 59].w[2]) ||
1969            (C4.w[3] == midpoint256[q4 - 59].w[3] &&
1970            C4.w[2] == midpoint256[q4 - 59].w[2] &&
1971            C4.w[1] < midpoint256[q4 - 59].w[1]) ||
1972            (C4.w[3] == midpoint256[q4 - 59].w[3] &&
1973            C4.w[2] == midpoint256[q4 - 59].w[2] &&
1974            C4.w[1] == midpoint256[q4 - 59].w[1] &&
1975            C4.w[0] < midpoint256[q4 - 59].w[0])) { // < 1/2 ulp
1976          lt_half_ulp = 1;
1977        } else if (C4.w[3] == midpoint256[q4 - 59].w[3] &&
1978            C4.w[2] == midpoint256[q4 - 59].w[2] &&
1979            C4.w[1] == midpoint256[q4 - 59].w[1] &&
1980            C4.w[0] == midpoint256[q4 - 59].w[0]) { // = 1/2 ulp
1981          eq_half_ulp = 1;
1982        } else { // > 1/2 ulp
1983          gt_half_ulp = 1;
1984        }
1985      }
1986
1987      if (p_sign == z_sign) {
1988        if (lt_half_ulp) {
1989          res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
1990          // use the following to avoid double rounding errors when operating on
1991          // mixed formats in rounding to nearest
1992          is_inexact_lt_midpoint = 1; // if (z_sign), as if for absolute value
1993        } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) {
1994          // add 1 ulp to the significand
1995          res.w[0]++;
1996          if (res.w[0] == 0x0ull)
1997            res.w[1]++;
1998          // check for rounding overflow, when coeff == 10^34
1999          if ((res.w[1] & MASK_COEFF) == 0x0001ed09bead87c0ull &&
2000              res.w[0] == 0x378d8e6400000000ull) { // coefficient = 10^34
2001            e3 = e3 + 1;
2002            // coeff = 10^33
2003            z_exp = ((UINT64) (e3 + 6176) << 49) & MASK_EXP;
2004            res.w[1] = 0x0000314dc6448d93ull;
2005            res.w[0] = 0x38c15b0a00000000ull;
2006          }
2007          // end add 1 ulp
2008          res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2009          if (eq_half_ulp) {
2010            is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
2011          } else {
2012            is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value
2013          }
2014        } else { // if (eq_half_ulp && !(res.w[0] & 0x01))
2015          // leave unchanged
2016          res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2017          is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value
2018        }
2019        // the result is always inexact, and never tiny
2020        // set the inexact flag
2021        *pfpsf |= INEXACT_EXCEPTION;
2022        // check for overflow
2023        if (e3 > expmax && rnd_mode == ROUNDING_TO_NEAREST) {
2024          res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
2025          res.w[0] = 0x0000000000000000ull;
2026          *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
2027          *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2028          *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2029          *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2030          *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2031          BID_SWAP128 (res);
2032          BID_RETURN (res)
2033        }
2034        if (rnd_mode != ROUNDING_TO_NEAREST) {
2035          rounding_correction (rnd_mode,
2036        		       is_inexact_lt_midpoint,
2037        		       is_inexact_gt_midpoint,
2038        		       is_midpoint_lt_even, is_midpoint_gt_even,
2039        		       e3, &res, pfpsf);
2040          z_exp = res.w[1] & MASK_EXP;
2041        }
2042      } else { // if (p_sign != z_sign)
2043        // consider two cases, because C3 * 10^scale = 10^33 is a special case
2044        if (res.w[1] != 0x0000314dc6448d93ull ||
2045            res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33
2046          if (lt_half_ulp) {
2047            res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2048            // use the following to avoid double rounding errors when operating
2049            // on mixed formats in rounding to nearest
2050            is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value
2051          } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) {
2052            // subtract 1 ulp from the significand
2053            res.w[0]--;
2054            if (res.w[0] == 0xffffffffffffffffull)
2055              res.w[1]--;
2056            res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2057            if (eq_half_ulp) {
2058              is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value
2059            } else {
2060              is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value
2061            }
2062          } else { // if (eq_half_ulp && !(res.w[0] & 0x01))
2063            // leave unchanged
2064            res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2065            is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
2066          }
2067          // the result is always inexact, and never tiny
2068          // check for overflow for RN
2069          if (e3 > expmax) {
2070            if (rnd_mode == ROUNDING_TO_NEAREST) {
2071              res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
2072              res.w[0] = 0x0000000000000000ull;
2073              *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
2074            } else {
2075              rounding_correction (rnd_mode,
2076        			   is_inexact_lt_midpoint,
2077        			   is_inexact_gt_midpoint,
2078        			   is_midpoint_lt_even,
2079        			   is_midpoint_gt_even, e3, &res,
2080        			   pfpsf);
2081            }
2082            *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2083            *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2084            *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2085            *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2086            BID_SWAP128 (res);
2087            BID_RETURN (res)
2088          }
2089          // set the inexact flag
2090          *pfpsf |= INEXACT_EXCEPTION;
2091          if (rnd_mode != ROUNDING_TO_NEAREST) {
2092            rounding_correction (rnd_mode,
2093        			 is_inexact_lt_midpoint,
2094        			 is_inexact_gt_midpoint,
2095        			 is_midpoint_lt_even,
2096        			 is_midpoint_gt_even, e3, &res, pfpsf);
2097          }
2098          z_exp = res.w[1] & MASK_EXP;
2099        } else { // if C3 * 10^scale = 10^33
2100          e3 = (z_exp >> 49) - 6176;
2101          if (e3 > expmin) {
2102            // the result is exact if exp > expmin and C4 = d*10^(q4-1),
2103            // where d = 1, 2, 3, ..., 9; it could be tiny too, but exact
2104            if (q4 == 1) {
2105              // if q4 = 1 the result is exact
2106              // result coefficient = 10^34 - C4
2107              res.w[1] = 0x0001ed09bead87c0ull;
2108              res.w[0] = 0x378d8e6400000000ull - C4.w[0];
2109              z_exp = z_exp - EXP_P1;
2110              e3 = e3 - 1;
2111              res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2112            } else {
2113              // if q4 > 1 then truncate C4 from q4 digits to 1 digit;
2114              // x = q4-1, 1 <= x <= 67 and check if this operation is exact
2115              if (q4 <= 18) { // 2 <= q4 <= 18
2116        	round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp,
2117        		      &is_midpoint_lt_even,
2118        		      &is_midpoint_gt_even,
2119        		      &is_inexact_lt_midpoint,
2120        		      &is_inexact_gt_midpoint);
2121              } else if (q4 <= 38) {
2122        	P128.w[1] = C4.w[1];
2123        	P128.w[0] = C4.w[0];
2124        	round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp,
2125        			&is_midpoint_lt_even,
2126        			&is_midpoint_gt_even,
2127        			&is_inexact_lt_midpoint,
2128        			&is_inexact_gt_midpoint);
2129        	R64 = R128.w[0]; // one decimal digit
2130              } else if (q4 <= 57) {
2131        	P192.w[2] = C4.w[2];
2132        	P192.w[1] = C4.w[1];
2133        	P192.w[0] = C4.w[0];
2134        	round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp,
2135        			&is_midpoint_lt_even,
2136        			&is_midpoint_gt_even,
2137        			&is_inexact_lt_midpoint,
2138        			&is_inexact_gt_midpoint);
2139        	R64 = R192.w[0]; // one decimal digit
2140              } else { // if (q4 <= 68)
2141        	round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp,
2142        			&is_midpoint_lt_even,
2143        			&is_midpoint_gt_even,
2144        			&is_inexact_lt_midpoint,
2145        			&is_inexact_gt_midpoint);
2146        	R64 = R256.w[0]; // one decimal digit
2147              }
2148              if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2149        	  !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2150        	// the result is exact: 10^34 - R64
2151        	// incr_exp = 0 with certainty
2152        	z_exp = z_exp - EXP_P1;
2153        	e3 = e3 - 1;
2154        	res.w[1] =
2155        	  z_sign | (z_exp & MASK_EXP) | 0x0001ed09bead87c0ull;
2156        	res.w[0] = 0x378d8e6400000000ull - R64;
2157              } else {
2158        	// We want R64 to be the top digit of C4, but we actually
2159        	// obtained (C4 * 10^(-q4+1))RN; a correction may be needed,
2160        	// because the top digit is (C4 * 10^(-q4+1))RZ
2161        	// however, if incr_exp = 1 then R64 = 10 with certainty
2162        	if (incr_exp) {
2163        	  R64 = 10;
2164        	}
2165        	// the result is inexact as C4 has more than 1 significant digit
2166        	// and C3 * 10^scale = 10^33
2167        	// example of case that is treated here:
2168        	// 100...0 * 10^e3 - 0.41 * 10^e3 =
2169        	// 0999...9.59 * 10^e3 -> rounds to 99...96*10^(e3-1)
2170        	// note that (e3 > expmin}
2171        	// in order to round, subtract R64 from 10^34 and then compare
2172        	// C4 - R64 * 10^(q4-1) with 1/2 ulp
2173        	// calculate 10^34 - R64
2174        	res.w[1] = 0x0001ed09bead87c0ull;
2175        	res.w[0] = 0x378d8e6400000000ull - R64;
2176        	z_exp = z_exp - EXP_P1; // will be OR-ed with sign & significand
2177        	// calculate C4 - R64 * 10^(q4-1); this is a rare case and
2178        	// R64 is small, 1 <= R64 <= 9
2179        	e3 = e3 - 1;
2180        	if (is_inexact_lt_midpoint) {
2181        	  is_inexact_lt_midpoint = 0;
2182        	  is_inexact_gt_midpoint = 1;
2183        	} else if (is_inexact_gt_midpoint) {
2184        	  is_inexact_gt_midpoint = 0;
2185        	  is_inexact_lt_midpoint = 1;
2186        	} else if (is_midpoint_lt_even) {
2187        	  is_midpoint_lt_even = 0;
2188        	  is_midpoint_gt_even = 1;
2189        	} else if (is_midpoint_gt_even) {
2190        	  is_midpoint_gt_even = 0;
2191        	  is_midpoint_lt_even = 1;
2192        	} else {
2193        	  ;
2194        	}
2195        	// the result is always inexact, and never tiny
2196        	// check for overflow for RN
2197        	if (e3 > expmax) {
2198        	  if (rnd_mode == ROUNDING_TO_NEAREST) {
2199        	    res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
2200        	    res.w[0] = 0x0000000000000000ull;
2201        	    *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
2202        	  } else {
2203        	    rounding_correction (rnd_mode,
2204        				 is_inexact_lt_midpoint,
2205        				 is_inexact_gt_midpoint,
2206        				 is_midpoint_lt_even,
2207        				 is_midpoint_gt_even, e3, &res,
2208        				 pfpsf);
2209        	  }
2210        	  *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2211        	  *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2212        	  *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2213        	  *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2214        	  BID_SWAP128 (res);
2215        	  BID_RETURN (res)
2216        	}
2217        	// set the inexact flag
2218        	*pfpsf |= INEXACT_EXCEPTION;
2219        	res.w[1] =
2220        	  z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
2221        	if (rnd_mode != ROUNDING_TO_NEAREST) {
2222        	  rounding_correction (rnd_mode,
2223        			       is_inexact_lt_midpoint,
2224        			       is_inexact_gt_midpoint,
2225        			       is_midpoint_lt_even,
2226        			       is_midpoint_gt_even, e3, &res,
2227        			       pfpsf);
2228        	}
2229        	z_exp = res.w[1] & MASK_EXP;
2230              } // end result is inexact
2231            } // end q4 > 1
2232          } else { // if (e3 = emin)
2233            // if e3 = expmin the result is also tiny (the condition for
2234            // tininess is C4 > 050...0 [q4 digits] which is met because
2235            // the msd of C4 is not zero)
2236            // the result is tiny and inexact in all rounding modes;
2237            // it is either 100...0 or 0999...9 (use lt_half_ulp, eq_half_ulp,
2238            // gt_half_ulp to calculate)
2239            // if (lt_half_ulp || eq_half_ulp) res = 10^33 stays unchanged
2240
2241            // p_sign != z_sign so swap gt_half_ulp and lt_half_ulp
2242            if (gt_half_ulp) { // res = 10^33 - 1
2243              res.w[1] = 0x0000314dc6448d93ull;
2244              res.w[0] = 0x38c15b09ffffffffull;
2245            } else {
2246              res.w[1] = 0x0000314dc6448d93ull;
2247              res.w[0] = 0x38c15b0a00000000ull;
2248            }
2249            res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2250            *pfpsf |= UNDERFLOW_EXCEPTION; // inexact is set later
2251
2252            if (eq_half_ulp) {
2253              is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
2254            } else if (lt_half_ulp) {
2255              is_inexact_gt_midpoint = 1; //if(z_sign), as if for absolute value
2256            } else { // if (gt_half_ulp)
2257              is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value
2258            }
2259
2260            if (rnd_mode != ROUNDING_TO_NEAREST) {
2261              rounding_correction (rnd_mode,
2262                  is_inexact_lt_midpoint,
2263                  is_inexact_gt_midpoint,
2264                  is_midpoint_lt_even,
2265                  is_midpoint_gt_even, e3, &res,
2266                  pfpsf);
2267              z_exp = res.w[1] & MASK_EXP;
2268            }
2269          } // end e3 = emin
2270          // set the inexact flag (if the result was not exact)
2271          if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
2272              is_midpoint_lt_even || is_midpoint_gt_even)
2273            *pfpsf |= INEXACT_EXCEPTION;
2274        } // end 10^33
2275      } // end if (p_sign != z_sign)
2276      res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2277      *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2278      *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2279      *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2280      *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2281      BID_SWAP128 (res);
2282      BID_RETURN (res)
2283
2284    } else if (((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2)
2285        (q3 <= delta && delta + q4 <= p34) || // Case (3)
2286        (delta < q3 && p34 < delta + q4) || // Case (4)
2287        (delta < q3 && q3 <= delta + q4 && delta + q4 <= p34) || // Case (5)
2288        (delta + q4 < q3)) && // Case (6)
2289        !(delta <= 1 && p_sign != z_sign)) { // Case (2), (3), (4), (5) or (6)
2290
2291      // the result has the sign of z
2292
2293      if ((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2)
2294          (delta < q3 && p34 < delta + q4)) { // Case (4)
2295        // round first the sum x * y + z with unbounded exponent
2296        // scale C3 up by scale = p34 - q3, 1 <= scale <= p34-1,
2297        // 1 <= scale <= 33
2298        // calculate res = C3 * 10^scale
2299        scale = p34 - q3;
2300        x0 = delta + q4 - p34;
2301      } else if (delta + q4 < q3) { // Case (6)
2302        // make Case (6) look like Case (3) or Case (5) with scale = 0
2303        // by scaling up C4 by 10^(q3 - delta - q4)
2304        scale = q3 - delta - q4; // 1 <= scale <= 33
2305        if (q4 <= 19) { // 1 <= scale <= 19; C4 fits in 64 bits
2306          if (scale <= 19) { // 10^scale fits in 64 bits
2307            // 64 x 64 C4.w[0] * ten2k64[scale]
2308            __mul_64x64_to_128MACH (P128, C4.w[0], ten2k64[scale]);
2309          } else { // 10^scale fits in 128 bits
2310            // 64 x 128 C4.w[0] * ten2k128[scale - 20]
2311            __mul_128x64_to_128 (P128, C4.w[0], ten2k128[scale - 20]);
2312          }
2313        } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits
2314          // 64 x 128 ten2k64[scale] * C4
2315          __mul_128x64_to_128 (P128, ten2k64[scale], C4);
2316        }
2317        C4.w[0] = P128.w[0];
2318        C4.w[1] = P128.w[1];
2319        // e4 does not need adjustment, as it is not used from this point on
2320        scale = 0;
2321        x0 = 0;
2322        // now Case (6) looks like Case (3) or Case (5) with scale = 0
2323      } else { // if Case (3) or Case (5)
2324        // Note: Case (3) is similar to Case (2), but scale differs and the
2325        // result is exact, unless it is tiny (so x0 = 0 when calculating the
2326        // result with unbounded exponent)
2327
2328        // calculate first the sum x * y + z with unbounded exponent (exact)
2329        // scale C3 up by scale = delta + q4 - q3, 1 <= scale <= p34-1,
2330        // 1 <= scale <= 33
2331        // calculate res = C3 * 10^scale
2332        scale = delta + q4 - q3;
2333        x0 = 0;
2334        // Note: the comments which follow refer [mainly] to Case (2)]
2335      }
2336
2337    case2_repeat:
2338      if (scale == 0) { // this could happen e.g. if we return to case2_repeat
2339        // or in Case (4)
2340        res.w[1] = C3.w[1];
2341        res.w[0] = C3.w[0];
2342      } else if (q3 <= 19) { // 1 <= scale <= 19; z fits in 64 bits
2343        if (scale <= 19) { // 10^scale fits in 64 bits
2344          // 64 x 64 C3.w[0] * ten2k64[scale]
2345          __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
2346        } else { // 10^scale fits in 128 bits
2347          // 64 x 128 C3.w[0] * ten2k128[scale - 20]
2348          __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
2349        }
2350      } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
2351        // 64 x 128 ten2k64[scale] * C3
2352        __mul_128x64_to_128 (res, ten2k64[scale], C3);
2353      }
2354      // e3 is already calculated
2355      e3 = e3 - scale;
2356      // now res = C3 * 10^scale and e3 = e3 - scale
2357      // Note: C3 * 10^scale could be 10^34 if we returned to case2_repeat
2358      // because the result was too small
2359
2360      // round C4 to nearest to q4 - x0 digits, where x0 = delta + q4 - p34,
2361      // 1 <= x0 <= min (q4 - 1, 2 * p34 - 1) <=> 1 <= x0 <= min (q4 - 1, 67)
2362      // Also: 1 <= q4 - x0 <= p34 -1 => 1 <= q4 - x0 <= 33 (so the result of
2363      // the rounding fits in 128 bits!)
2364      // x0 = delta + q4 - p34 (calculated before reaching case2_repeat)
2365      // because q3 + q4 - x0 <= P => x0 >= q3 + q4 - p34
2366      if (x0 == 0) { // this could happen only if we return to case2_repeat, or
2367        // for Case (3) or Case (6)
2368        R128.w[1] = C4.w[1];
2369        R128.w[0] = C4.w[0];
2370      } else if (q4 <= 18) {
2371        // 2 <= q4 <= 18, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 17
2372        round64_2_18 (q4, x0, C4.w[0], &R64, &incr_exp,
2373            &is_midpoint_lt_even, &is_midpoint_gt_even,
2374            &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
2375        if (incr_exp) {
2376          // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
2377          R64 = ten2k64[q4 - x0];
2378        }
2379        R128.w[1] = 0;
2380        R128.w[0] = R64;
2381      } else if (q4 <= 38) {
2382        // 19 <= q4 <= 38, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 37
2383        P128.w[1] = C4.w[1];
2384        P128.w[0] = C4.w[0];
2385        round128_19_38 (q4, x0, P128, &R128, &incr_exp,
2386            &is_midpoint_lt_even, &is_midpoint_gt_even,
2387            &is_inexact_lt_midpoint,
2388            &is_inexact_gt_midpoint);
2389        if (incr_exp) {
2390          // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
2391          if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
2392            R128.w[0] = ten2k64[q4 - x0];
2393            // R128.w[1] stays 0
2394          } else { // 20 <= q4 - x0 <= 37
2395            R128.w[0] = ten2k128[q4 - x0 - 20].w[0];
2396            R128.w[1] = ten2k128[q4 - x0 - 20].w[1];
2397          }
2398        }
2399      } else if (q4 <= 57) {
2400        // 38 <= q4 <= 57, max(1, q3+q4-p34) <= x0 <= q4 - 1, 5 <= x0 <= 56
2401        P192.w[2] = C4.w[2];
2402        P192.w[1] = C4.w[1];
2403        P192.w[0] = C4.w[0];
2404        round192_39_57 (q4, x0, P192, &R192, &incr_exp,
2405            &is_midpoint_lt_even, &is_midpoint_gt_even,
2406            &is_inexact_lt_midpoint,
2407            &is_inexact_gt_midpoint);
2408        // R192.w[2] is always 0
2409        if (incr_exp) {
2410          // R192 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 5, 1 <= q4 - x0 <= 52
2411          if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
2412            R192.w[0] = ten2k64[q4 - x0];
2413            // R192.w[1] stays 0
2414            // R192.w[2] stays 0
2415          } else { // 20 <= q4 - x0 <= 33
2416            R192.w[0] = ten2k128[q4 - x0 - 20].w[0];
2417            R192.w[1] = ten2k128[q4 - x0 - 20].w[1];
2418            // R192.w[2] stays 0
2419          }
2420        }
2421        R128.w[1] = R192.w[1];
2422        R128.w[0] = R192.w[0];
2423      } else {
2424        // 58 <= q4 <= 68, max(1, q3+q4-p34) <= x0 <= q4 - 1, 25 <= x0 <= 67
2425        round256_58_76 (q4, x0, C4, &R256, &incr_exp,
2426            &is_midpoint_lt_even, &is_midpoint_gt_even,
2427            &is_inexact_lt_midpoint,
2428            &is_inexact_gt_midpoint);
2429        // R256.w[3] and R256.w[2] are always 0
2430        if (incr_exp) {
2431          // R256 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 25, 1 <= q4 - x0 <= 43
2432          if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
2433            R256.w[0] = ten2k64[q4 - x0];
2434            // R256.w[1] stays 0
2435            // R256.w[2] stays 0
2436            // R256.w[3] stays 0
2437          } else { // 20 <= q4 - x0 <= 33
2438            R256.w[0] = ten2k128[q4 - x0 - 20].w[0];
2439            R256.w[1] = ten2k128[q4 - x0 - 20].w[1];
2440            // R256.w[2] stays 0
2441            // R256.w[3] stays 0
2442          }
2443        }
2444        R128.w[1] = R256.w[1];
2445        R128.w[0] = R256.w[0];
2446      }
2447      // now add C3 * 10^scale in res and the signed top (q4-x0) digits of C4,
2448      // rounded to nearest, which were copied into R128
2449      if (z_sign == p_sign) {
2450        lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale
2451        // the sum can result in [up to] p34 or p34 + 1 digits
2452        res.w[0] = res.w[0] + R128.w[0];
2453        res.w[1] = res.w[1] + R128.w[1];
2454        if (res.w[0] < R128.w[0])
2455          res.w[1]++; // carry
2456        // if res > 10^34 - 1 need to increase x0 and decrease scale by 1
2457        if (res.w[1] > 0x0001ed09bead87c0ull ||
2458            (res.w[1] == 0x0001ed09bead87c0ull &&
2459             res.w[0] > 0x378d8e63ffffffffull)) {
2460          // avoid double rounding error
2461          is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
2462          is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
2463          is_midpoint_lt_even0 = is_midpoint_lt_even;
2464          is_midpoint_gt_even0 = is_midpoint_gt_even;
2465          is_inexact_lt_midpoint = 0;
2466          is_inexact_gt_midpoint = 0;
2467          is_midpoint_lt_even = 0;
2468          is_midpoint_gt_even = 0;
2469          P128.w[1] = res.w[1];
2470          P128.w[0] = res.w[0];
2471          round128_19_38 (35, 1, P128, &res, &incr_exp,
2472              &is_midpoint_lt_even, &is_midpoint_gt_even,
2473              &is_inexact_lt_midpoint,
2474              &is_inexact_gt_midpoint);
2475          // incr_exp is 0 with certainty in this case
2476          // avoid a double rounding error
2477          if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
2478              is_midpoint_lt_even) { // double rounding error upward
2479            // res = res - 1
2480            res.w[0]--;
2481            if (res.w[0] == 0xffffffffffffffffull)
2482              res.w[1]--;
2483            // Note: a double rounding error upward is not possible; for this
2484            // the result after the first rounding would have to be 99...95
2485            // (35 digits in all), possibly followed by a number of zeros; this
2486            // not possible in Cases (2)-(6) or (15)-(17) which may get here
2487            is_midpoint_lt_even = 0;
2488            is_inexact_lt_midpoint = 1;
2489          } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
2490              is_midpoint_gt_even) { // double rounding error downward
2491            // res = res + 1
2492            res.w[0]++;
2493            if (res.w[0] == 0)
2494              res.w[1]++;
2495            is_midpoint_gt_even = 0;
2496            is_inexact_gt_midpoint = 1;
2497          } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2498        	     !is_inexact_lt_midpoint
2499        	     && !is_inexact_gt_midpoint) {
2500            // if this second rounding was exact the result may still be
2501            // inexact because of the first rounding
2502            if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
2503              is_inexact_gt_midpoint = 1;
2504            }
2505            if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
2506              is_inexact_lt_midpoint = 1;
2507            }
2508          } else if (is_midpoint_gt_even &&
2509        	     (is_inexact_gt_midpoint0
2510        	      || is_midpoint_lt_even0)) {
2511            // pulled up to a midpoint
2512            is_inexact_lt_midpoint = 1;
2513            is_inexact_gt_midpoint = 0;
2514            is_midpoint_lt_even = 0;
2515            is_midpoint_gt_even = 0;
2516          } else if (is_midpoint_lt_even &&
2517        	     (is_inexact_lt_midpoint0
2518        	      || is_midpoint_gt_even0)) {
2519            // pulled down to a midpoint
2520            is_inexact_lt_midpoint = 0;
2521            is_inexact_gt_midpoint = 1;
2522            is_midpoint_lt_even = 0;
2523            is_midpoint_gt_even = 0;
2524          } else {
2525            ;
2526          }
2527          // adjust exponent
2528          e3 = e3 + 1;
2529          if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2530              !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2531            if (is_midpoint_lt_even0 || is_midpoint_gt_even0 ||
2532        	is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) {
2533              is_inexact_lt_midpoint = 1;
2534            }
2535          }
2536        } else {
2537          // this is the result rounded with unbounded exponent, unless a
2538          // correction is needed
2539          res.w[1] = res.w[1] & MASK_COEFF;
2540          if (lsb == 1) {
2541            if (is_midpoint_gt_even) {
2542              // res = res + 1
2543              is_midpoint_gt_even = 0;
2544              is_midpoint_lt_even = 1;
2545              res.w[0]++;
2546              if (res.w[0] == 0x0)
2547        	res.w[1]++;
2548              // check for rounding overflow
2549              if (res.w[1] == 0x0001ed09bead87c0ull &&
2550        	  res.w[0] == 0x378d8e6400000000ull) {
2551        	// res = 10^34 => rounding overflow
2552        	res.w[1] = 0x0000314dc6448d93ull;
2553        	res.w[0] = 0x38c15b0a00000000ull; // 10^33
2554        	e3++;
2555              }
2556            } else if (is_midpoint_lt_even) {
2557              // res = res - 1
2558              is_midpoint_lt_even = 0;
2559              is_midpoint_gt_even = 1;
2560              res.w[0]--;
2561              if (res.w[0] == 0xffffffffffffffffull)
2562        	res.w[1]--;
2563              // if the result is pure zero, the sign depends on the rounding
2564              // mode (x*y and z had opposite signs)
2565              if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) {
2566        	if (rnd_mode != ROUNDING_DOWN)
2567        	  z_sign = 0x0000000000000000ull;
2568        	else
2569        	  z_sign = 0x8000000000000000ull;
2570        	// the exponent is max (e3, expmin)
2571        	res.w[1] = 0x0;
2572        	res.w[0] = 0x0;
2573        	*ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2574        	*ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2575        	*ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2576        	*ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2577        	BID_SWAP128 (res);
2578        	BID_RETURN (res)
2579              }
2580            } else {
2581              ;
2582            }
2583          }
2584        }
2585      } else { // if (z_sign != p_sign)
2586        lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale; R128 contains rounded C4
2587        // used to swap rounding indicators if p_sign != z_sign
2588        // the sum can result in [up to] p34 or p34 - 1 digits
2589        tmp64 = res.w[0];
2590        res.w[0] = res.w[0] - R128.w[0];
2591        res.w[1] = res.w[1] - R128.w[1];
2592        if (res.w[0] > tmp64)
2593          res.w[1]--; // borrow
2594        // if res < 10^33 and exp > expmin need to decrease x0 and
2595        // increase scale by 1
2596        if (e3 > expmin && ((res.w[1] < 0x0000314dc6448d93ull ||
2597        		     (res.w[1] == 0x0000314dc6448d93ull &&
2598        		      res.w[0] < 0x38c15b0a00000000ull)) ||
2599        		    (is_inexact_lt_midpoint
2600        		     && res.w[1] == 0x0000314dc6448d93ull
2601        		     && res.w[0] == 0x38c15b0a00000000ull))
2602            && x0 >= 1) {
2603          x0 = x0 - 1;
2604          // first restore e3, otherwise it will be too small
2605          e3 = e3 + scale;
2606          scale = scale + 1;
2607          is_inexact_lt_midpoint = 0;
2608          is_inexact_gt_midpoint = 0;
2609          is_midpoint_lt_even = 0;
2610          is_midpoint_gt_even = 0;
2611          incr_exp = 0;
2612          goto case2_repeat;
2613        }
2614        // else this is the result rounded with unbounded exponent;
2615        // because the result has opposite sign to that of C4 which was
2616        // rounded, need to change the rounding indicators
2617        if (is_inexact_lt_midpoint) {
2618          is_inexact_lt_midpoint = 0;
2619          is_inexact_gt_midpoint = 1;
2620        } else if (is_inexact_gt_midpoint) {
2621          is_inexact_gt_midpoint = 0;
2622          is_inexact_lt_midpoint = 1;
2623        } else if (lsb == 0) {
2624          if (is_midpoint_lt_even) {
2625            is_midpoint_lt_even = 0;
2626            is_midpoint_gt_even = 1;
2627          } else if (is_midpoint_gt_even) {
2628            is_midpoint_gt_even = 0;
2629            is_midpoint_lt_even = 1;
2630          } else {
2631            ;
2632          }
2633        } else if (lsb == 1) {
2634          if (is_midpoint_lt_even) {
2635            // res = res + 1
2636            res.w[0]++;
2637            if (res.w[0] == 0x0)
2638              res.w[1]++;
2639            // check for rounding overflow
2640            if (res.w[1] == 0x0001ed09bead87c0ull &&
2641        	res.w[0] == 0x378d8e6400000000ull) {
2642              // res = 10^34 => rounding overflow
2643              res.w[1] = 0x0000314dc6448d93ull;
2644              res.w[0] = 0x38c15b0a00000000ull; // 10^33
2645              e3++;
2646            }
2647          } else if (is_midpoint_gt_even) {
2648            // res = res - 1
2649            res.w[0]--;
2650            if (res.w[0] == 0xffffffffffffffffull)
2651              res.w[1]--;
2652            // if the result is pure zero, the sign depends on the rounding
2653            // mode (x*y and z had opposite signs)
2654            if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) {
2655              if (rnd_mode != ROUNDING_DOWN)
2656        	z_sign = 0x0000000000000000ull;
2657              else
2658        	z_sign = 0x8000000000000000ull;
2659              // the exponent is max (e3, expmin)
2660              res.w[1] = 0x0;
2661              res.w[0] = 0x0;
2662              *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2663              *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2664              *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2665              *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2666              BID_SWAP128 (res);
2667              BID_RETURN (res)
2668            }
2669          } else {
2670            ;
2671          }
2672        } else {
2673          ;
2674        }
2675      }
2676      // check for underflow
2677      if (e3 == expmin) { // and if significand < 10^33 => result is tiny
2678        if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull ||
2679            ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&
2680             res.w[0] < 0x38c15b0a00000000ull)) {
2681          is_tiny = 1;
2682        }
2683      } else if (e3 < expmin) {
2684        // the result is tiny, so we must truncate more of res
2685        is_tiny = 1;
2686        x0 = expmin - e3;
2687        is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
2688        is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
2689        is_midpoint_lt_even0 = is_midpoint_lt_even;
2690        is_midpoint_gt_even0 = is_midpoint_gt_even;
2691        is_inexact_lt_midpoint = 0;
2692        is_inexact_gt_midpoint = 0;
2693        is_midpoint_lt_even = 0;
2694        is_midpoint_gt_even = 0;
2695        // determine the number of decimal digits in res
2696        if (res.w[1] == 0x0) {
2697          // between 1 and 19 digits
2698          for (ind = 1; ind <= 19; ind++) {
2699            if (res.w[0] < ten2k64[ind]) {
2700              break;
2701            }
2702          }
2703          // ind digits
2704        } else if (res.w[1] < ten2k128[0].w[1] ||
2705        	   (res.w[1] == ten2k128[0].w[1]
2706        	    && res.w[0] < ten2k128[0].w[0])) {
2707          // 20 digits
2708          ind = 20;
2709        } else { // between 21 and 38 digits
2710          for (ind = 1; ind <= 18; ind++) {
2711            if (res.w[1] < ten2k128[ind].w[1] ||
2712        	(res.w[1] == ten2k128[ind].w[1] &&
2713        	 res.w[0] < ten2k128[ind].w[0])) {
2714              break;
2715            }
2716          }
2717          // ind + 20 digits
2718          ind = ind + 20;
2719        }
2720
2721        // at this point ind >= x0; because delta >= 2 on this path, the case
2722        // ind = x0 can occur only in Case (2) or case (3), when C3 has one
2723        // digit (q3 = 1) equal to 1 (C3 = 1), e3 is expmin (e3 = expmin),
2724        // the signs of x * y and z are opposite, and through cancellation
2725        // the most significant decimal digit in res has the weight
2726        // 10^(emin-1); however, it is clear that in this case the most
2727        // significant digit is 9, so the result before rounding is
2728        // 0.9... * 10^emin
2729        // Otherwise, ind > x0 because there are non-zero decimal digits in the
2730        // result with weight of at least 10^emin, and correction for underflow
2731        //  can be carried out using the round*_*_2_* () routines
2732        if (x0 == ind) { // the result before rounding is 0.9... * 10^emin
2733          res.w[1] = 0x0;
2734          res.w[0] = 0x1;
2735          is_inexact_gt_midpoint = 1;
2736        } else if (ind <= 18) { // check that 2 <= ind
2737          // 2 <= ind <= 18, 1 <= x0 <= 17
2738          round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
2739        		&is_midpoint_lt_even, &is_midpoint_gt_even,
2740        		&is_inexact_lt_midpoint,
2741        		&is_inexact_gt_midpoint);
2742          if (incr_exp) {
2743            // R64 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 17
2744            R64 = ten2k64[ind - x0];
2745          }
2746          res.w[1] = 0;
2747          res.w[0] = R64;
2748        } else if (ind <= 38) {
2749          // 19 <= ind <= 38
2750          P128.w[1] = res.w[1];
2751          P128.w[0] = res.w[0];
2752          round128_19_38 (ind, x0, P128, &res, &incr_exp,
2753        		  &is_midpoint_lt_even, &is_midpoint_gt_even,
2754        		  &is_inexact_lt_midpoint,
2755        		  &is_inexact_gt_midpoint);
2756          if (incr_exp) {
2757            // R128 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 37
2758            if (ind - x0 <= 19) { // 1 <= ind - x0 <= 19
2759              res.w[0] = ten2k64[ind - x0];
2760              // res.w[1] stays 0
2761            } else { // 20 <= ind - x0 <= 37
2762              res.w[0] = ten2k128[ind - x0 - 20].w[0];
2763              res.w[1] = ten2k128[ind - x0 - 20].w[1];
2764            }
2765          }
2766        }
2767        // avoid a double rounding error
2768        if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
2769            is_midpoint_lt_even) { // double rounding error upward
2770          // res = res - 1
2771          res.w[0]--;
2772          if (res.w[0] == 0xffffffffffffffffull)
2773            res.w[1]--;
2774          // Note: a double rounding error upward is not possible; for this
2775          // the result after the first rounding would have to be 99...95
2776          // (35 digits in all), possibly followed by a number of zeros; this
2777          // not possible in Cases (2)-(6) which may get here
2778          is_midpoint_lt_even = 0;
2779          is_inexact_lt_midpoint = 1;
2780        } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
2781            is_midpoint_gt_even) { // double rounding error downward
2782          // res = res + 1
2783          res.w[0]++;
2784          if (res.w[0] == 0)
2785            res.w[1]++;
2786          is_midpoint_gt_even = 0;
2787          is_inexact_gt_midpoint = 1;
2788        } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2789        	   !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2790          // if this second rounding was exact the result may still be
2791          // inexact because of the first rounding
2792          if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
2793            is_inexact_gt_midpoint = 1;
2794          }
2795          if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
2796            is_inexact_lt_midpoint = 1;
2797          }
2798        } else if (is_midpoint_gt_even &&
2799        	   (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
2800          // pulled up to a midpoint
2801          is_inexact_lt_midpoint = 1;
2802          is_inexact_gt_midpoint = 0;
2803          is_midpoint_lt_even = 0;
2804          is_midpoint_gt_even = 0;
2805        } else if (is_midpoint_lt_even &&
2806        	   (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
2807          // pulled down to a midpoint
2808          is_inexact_lt_midpoint = 0;
2809          is_inexact_gt_midpoint = 1;
2810          is_midpoint_lt_even = 0;
2811          is_midpoint_gt_even = 0;
2812        } else {
2813          ;
2814        }
2815        // adjust exponent
2816        e3 = e3 + x0;
2817        if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2818            !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2819          if (is_midpoint_lt_even0 || is_midpoint_gt_even0 ||
2820              is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) {
2821            is_inexact_lt_midpoint = 1;
2822          }
2823        }
2824      } else {
2825        ; // not underflow
2826      }
2827      // check for inexact result
2828      if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
2829          is_midpoint_lt_even || is_midpoint_gt_even) {
2830        // set the inexact flag
2831        *pfpsf |= INEXACT_EXCEPTION;
2832        if (is_tiny)
2833          *pfpsf |= UNDERFLOW_EXCEPTION;
2834      }
2835      // now check for significand = 10^34 (may have resulted from going
2836      // back to case2_repeat)
2837      if (res.w[1] == 0x0001ed09bead87c0ull &&
2838          res.w[0] == 0x378d8e6400000000ull) { // if  res = 10^34
2839        res.w[1] = 0x0000314dc6448d93ull; // res = 10^33
2840        res.w[0] = 0x38c15b0a00000000ull;
2841        e3 = e3 + 1;
2842      }
2843      res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
2844      // check for overflow
2845      if (rnd_mode == ROUNDING_TO_NEAREST && e3 > expmax) {
2846        res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
2847        res.w[0] = 0x0000000000000000ull;
2848        *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
2849      }
2850      if (rnd_mode != ROUNDING_TO_NEAREST) {
2851        rounding_correction (rnd_mode,
2852        		     is_inexact_lt_midpoint,
2853        		     is_inexact_gt_midpoint,
2854        		     is_midpoint_lt_even, is_midpoint_gt_even,
2855        		     e3, &res, pfpsf);
2856      }
2857      *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2858      *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2859      *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2860      *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2861      BID_SWAP128 (res);
2862      BID_RETURN (res)
2863
2864    } else {
2865
2866      // we get here only if delta <= 1 in Cases (2), (3), (4), (5), or (6) and
2867      // the signs of x*y and z are opposite; in these cases massive
2868      // cancellation can occur, so it is better to scale either C3 or C4 and
2869      // to perform the subtraction before rounding; rounding is performed
2870      // next, depending on the number of decimal digits in the result and on
2871      // the exponent value
2872      // Note: overlow is not possible in this case
2873      // this is similar to Cases (15), (16), and (17)
2874
2875      if (delta + q4 < q3) { // from Case (6)
2876        // Case (6) with 0<= delta <= 1 is similar to Cases (15), (16), and
2877        // (17) if we swap (C3, C4), (q3, q4), (e3, e4), (z_sign, p_sign)
2878        // and call add_and_round; delta stays positive
2879        // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3
2880        P128.w[1] = C3.w[1];
2881        P128.w[0] = C3.w[0];
2882        C3.w[1] = C4.w[1];
2883        C3.w[0] = C4.w[0];
2884        C4.w[1] = P128.w[1];
2885        C4.w[0] = P128.w[0];
2886        ind = q3;
2887        q3 = q4;
2888        q4 = ind;
2889        ind = e3;
2890        e3 = e4;
2891        e4 = ind;
2892        tmp_sign = z_sign;
2893        z_sign = p_sign;
2894        p_sign = tmp_sign;
2895      } else { // from Cases (2), (3), (4), (5)
2896        // In Cases (2), (3), (4), (5) with 0 <= delta <= 1 C3 has to be
2897        // scaled up by q4 + delta - q3; this is the same as in Cases (15),
2898        // (16), and (17) if we just change the sign of delta
2899        delta = -delta;
2900      }
2901      add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4,
2902        	     rnd_mode, &is_midpoint_lt_even,
2903        	     &is_midpoint_gt_even, &is_inexact_lt_midpoint,
2904        	     &is_inexact_gt_midpoint, pfpsf, &res);
2905      *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2906      *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2907      *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2908      *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2909      BID_SWAP128 (res);
2910      BID_RETURN (res)
2911
2912    }
2913
2914  } else { // if delta < 0
2915
2916    delta = -delta;
2917
2918    if (p34 < q4 && q4 <= delta) { // Case (7)
2919
2920      // truncate C4 to p34 digits into res
2921      // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68
2922      x0 = q4 - p34;
2923      if (q4 <= 38) {
2924        P128.w[1] = C4.w[1];
2925        P128.w[0] = C4.w[0];
2926        round128_19_38 (q4, x0, P128, &res, &incr_exp,
2927        		&is_midpoint_lt_even, &is_midpoint_gt_even,
2928        		&is_inexact_lt_midpoint,
2929        		&is_inexact_gt_midpoint);
2930      } else if (q4 <= 57) { // 35 <= q4 <= 57
2931        P192.w[2] = C4.w[2];
2932        P192.w[1] = C4.w[1];
2933        P192.w[0] = C4.w[0];
2934        round192_39_57 (q4, x0, P192, &R192, &incr_exp,
2935        		&is_midpoint_lt_even, &is_midpoint_gt_even,
2936        		&is_inexact_lt_midpoint,
2937        		&is_inexact_gt_midpoint);
2938        res.w[0] = R192.w[0];
2939        res.w[1] = R192.w[1];
2940      } else { // if (q4 <= 68)
2941        round256_58_76 (q4, x0, C4, &R256, &incr_exp,
2942        		&is_midpoint_lt_even, &is_midpoint_gt_even,
2943        		&is_inexact_lt_midpoint,
2944        		&is_inexact_gt_midpoint);
2945        res.w[0] = R256.w[0];
2946        res.w[1] = R256.w[1];
2947      }
2948      e4 = e4 + x0;
2949      if (incr_exp) {
2950        e4 = e4 + 1;
2951      }
2952      if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2953          !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2954        // if C4 rounded to p34 digits is exact then the result is inexact,
2955        // in a way that depends on the signs of x * y and z
2956        if (p_sign == z_sign) {
2957          is_inexact_lt_midpoint = 1;
2958        } else { // if (p_sign != z_sign)
2959          if (res.w[1] != 0x0000314dc6448d93ull ||
2960              res.w[0] != 0x38c15b0a00000000ull) { // res != 10^33
2961            is_inexact_gt_midpoint = 1;
2962          } else { // res = 10^33 and exact is a special case
2963            // if C3 < 1/2 ulp then res = 10^33 and is_inexact_gt_midpoint = 1
2964            // if C3 = 1/2 ulp then res = 10^33 and is_midpoint_lt_even = 1
2965            // if C3 > 1/2 ulp then res = 10^34-1 and is_inexact_lt_midpoint = 1
2966            // Note: ulp is really ulp/10 (after borrow which propagates to msd)
2967            if (delta > p34 + 1) { // C3 < 1/2
2968              // res = 10^33, unchanged
2969              is_inexact_gt_midpoint = 1;
2970            } else { // if (delta == p34 + 1)
2971              if (q3 <= 19) {
2972        	if (C3.w[0] < midpoint64[q3 - 1]) { // C3 < 1/2 ulp
2973        	  // res = 10^33, unchanged
2974        	  is_inexact_gt_midpoint = 1;
2975        	} else if (C3.w[0] == midpoint64[q3 - 1]) { // C3 = 1/2 ulp
2976        	  // res = 10^33, unchanged
2977        	  is_midpoint_lt_even = 1;
2978        	} else { // if (C3.w[0] > midpoint64[q3-1]), C3 > 1/2 ulp
2979        	  res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
2980        	  res.w[0] = 0x378d8e63ffffffffull;
2981        	  e4 = e4 - 1;
2982        	  is_inexact_lt_midpoint = 1;
2983        	}
2984              } else { // if (20 <= q3 <=34)
2985        	if (C3.w[1] < midpoint128[q3 - 20].w[1] ||
2986                    (C3.w[1] == midpoint128[q3 - 20].w[1] &&
2987                    C3.w[0] < midpoint128[q3 - 20].w[0])) { // C3 < 1/2 ulp
2988        	  // res = 10^33, unchanged
2989        	  is_inexact_gt_midpoint = 1;
2990        	} else if (C3.w[1] == midpoint128[q3 - 20].w[1] &&
2991                    C3.w[0] == midpoint128[q3 - 20].w[0]) { // C3 = 1/2 ulp
2992        	  // res = 10^33, unchanged
2993        	  is_midpoint_lt_even = 1;
2994        	} else { // if (C3 > midpoint128[q3-20]), C3 > 1/2 ulp
2995        	  res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
2996        	  res.w[0] = 0x378d8e63ffffffffull;
2997        	  e4 = e4 - 1;
2998        	  is_inexact_lt_midpoint = 1;
2999        	}
3000              }
3001            }
3002          }
3003        }
3004      } else if (is_midpoint_lt_even) {
3005        if (z_sign != p_sign) {
3006          // needs correction: res = res - 1
3007          res.w[0] = res.w[0] - 1;
3008          if (res.w[0] == 0xffffffffffffffffull)
3009            res.w[1]--;
3010          // if it is (10^33-1)*10^e4 then the corect result is
3011          // (10^34-1)*10(e4-1)
3012          if (res.w[1] == 0x0000314dc6448d93ull &&
3013              res.w[0] == 0x38c15b09ffffffffull) {
3014            res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
3015            res.w[0] = 0x378d8e63ffffffffull;
3016            e4 = e4 - 1;
3017          }
3018          is_midpoint_lt_even = 0;
3019          is_inexact_lt_midpoint = 1;
3020        } else { // if (z_sign == p_sign)
3021          is_midpoint_lt_even = 0;
3022          is_inexact_gt_midpoint = 1;
3023        }
3024      } else if (is_midpoint_gt_even) {
3025        if (z_sign == p_sign) {
3026          // needs correction: res = res + 1 (cannot cross in the next binade)
3027          res.w[0] = res.w[0] + 1;
3028          if (res.w[0] == 0x0000000000000000ull)
3029            res.w[1]++;
3030          is_midpoint_gt_even = 0;
3031          is_inexact_gt_midpoint = 1;
3032        } else { // if (z_sign != p_sign)
3033          is_midpoint_gt_even = 0;
3034          is_inexact_lt_midpoint = 1;
3035        }
3036      } else {
3037        ; // the rounded result is already correct
3038      }
3039      // check for overflow
3040      if (rnd_mode == ROUNDING_TO_NEAREST && e4 > expmax) {
3041        res.w[1] = p_sign | 0x7800000000000000ull;
3042        res.w[0] = 0x0000000000000000ull;
3043        *pfpsf |= (OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
3044      } else { // no overflow or not RN
3045        p_exp = ((UINT64) (e4 + 6176) << 49);
3046        res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
3047      }
3048      if (rnd_mode != ROUNDING_TO_NEAREST) {
3049        rounding_correction (rnd_mode,
3050        		     is_inexact_lt_midpoint,
3051        		     is_inexact_gt_midpoint,
3052        		     is_midpoint_lt_even, is_midpoint_gt_even,
3053        		     e4, &res, pfpsf);
3054      }
3055      if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
3056          is_midpoint_lt_even || is_midpoint_gt_even) {
3057        // set the inexact flag
3058        *pfpsf |= INEXACT_EXCEPTION;
3059      }
3060      *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3061      *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3062      *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3063      *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3064      BID_SWAP128 (res);
3065      BID_RETURN (res)
3066
3067    } else if ((q4 <= p34 && p34 <= delta) || // Case (8)
3068        (q4 <= delta && delta < p34 && p34 < delta + q3) || // Case (9)
3069        (q4 <= delta && delta + q3 <= p34) || // Case (10)
3070        (delta < q4 && q4 <= p34 && p34 < delta + q3) || // Case (13)
3071        (delta < q4 && q4 <= delta + q3 && delta + q3 <= p34) || // Case (14)
3072        (delta + q3 < q4 && q4 <= p34)) { // Case (18)
3073
3074      // Case (8) is similar to Case (1), with C3 and C4 swapped
3075      // Case (9) is similar to Case (2), with C3 and C4 swapped
3076      // Case (10) is similar to Case (3), with C3 and C4 swapped
3077      // Case (13) is similar to Case (4), with C3 and C4 swapped
3078      // Case (14) is similar to Case (5), with C3 and C4 swapped
3079      // Case (18) is similar to Case (6), with C3 and C4 swapped
3080
3081      // swap (C3, C4), (q3, q4), (e3, 34), (z_sign, p_sign), (z_exp, p_exp)
3082      // and go back to delta_ge_zero
3083      // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3
3084      P128.w[1] = C3.w[1];
3085      P128.w[0] = C3.w[0];
3086      C3.w[1] = C4.w[1];
3087      C3.w[0] = C4.w[0];
3088      C4.w[1] = P128.w[1];
3089      C4.w[0] = P128.w[0];
3090      ind = q3;
3091      q3 = q4;
3092      q4 = ind;
3093      ind = e3;
3094      e3 = e4;
3095      e4 = ind;
3096      tmp_sign = z_sign;
3097      z_sign = p_sign;
3098      p_sign = tmp_sign;
3099      tmp.ui64 = z_exp;
3100      z_exp = p_exp;
3101      p_exp = tmp.ui64;
3102      goto delta_ge_zero;
3103
3104    } else if ((p34 <= delta && delta < q4 && q4 < delta + q3) || // Case (11)
3105               (delta < p34 && p34 < q4 && q4 < delta + q3)) { // Case (12)
3106
3107      // round C3 to nearest to q3 - x0 digits, where x0 = e4 - e3,
3108      // 1 <= x0 <= q3 - 1 <= p34 - 1
3109      x0 = e4 - e3; // or x0 = delta + q3 - q4
3110      if (q3 <= 18) { // 2 <= q3 <= 18
3111        round64_2_18 (q3, x0, C3.w[0], &R64, &incr_exp,
3112        	      &is_midpoint_lt_even, &is_midpoint_gt_even,
3113        	      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
3114        // C3.w[1] = 0;
3115        C3.w[0] = R64;
3116      } else if (q3 <= 38) {
3117        round128_19_38 (q3, x0, C3, &R128, &incr_exp,
3118        		&is_midpoint_lt_even, &is_midpoint_gt_even,
3119        		&is_inexact_lt_midpoint,
3120        		&is_inexact_gt_midpoint);
3121        C3.w[1] = R128.w[1];
3122        C3.w[0] = R128.w[0];
3123      }
3124      // the rounded result has q3 - x0 digits
3125      // we want the exponent to be e4, so if incr_exp = 1 then
3126      // multiply the rounded result by 10 - it will still fit in 113 bits
3127      if (incr_exp) {
3128        // 64 x 128 -> 128
3129        P128.w[1] = C3.w[1];
3130        P128.w[0] = C3.w[0];
3131        __mul_64x128_to_128 (C3, ten2k64[1], P128);
3132      }
3133      e3 = e3 + x0; // this is e4
3134      // now add/subtract the 256-bit C4 and the new (and shorter) 128-bit C3;
3135      // the result will have the sign of x * y; the exponent is e4
3136      R256.w[3] = 0;
3137      R256.w[2] = 0;
3138      R256.w[1] = C3.w[1];
3139      R256.w[0] = C3.w[0];
3140      if (p_sign == z_sign) { // R256 = C4 + R256
3141        add256 (C4, R256, &R256);
3142      } else { // if (p_sign != z_sign) { // R256 = C4 - R256
3143        sub256 (C4, R256, &R256); // the result cannot be pure zero
3144        // because the result has opposite sign to that of R256 which was
3145        // rounded, need to change the rounding indicators
3146        lsb = C4.w[0] & 0x01;
3147        if (is_inexact_lt_midpoint) {
3148          is_inexact_lt_midpoint = 0;
3149          is_inexact_gt_midpoint = 1;
3150        } else if (is_inexact_gt_midpoint) {
3151          is_inexact_gt_midpoint = 0;
3152          is_inexact_lt_midpoint = 1;
3153        } else if (lsb == 0) {
3154          if (is_midpoint_lt_even) {
3155            is_midpoint_lt_even = 0;
3156            is_midpoint_gt_even = 1;
3157          } else if (is_midpoint_gt_even) {
3158            is_midpoint_gt_even = 0;
3159            is_midpoint_lt_even = 1;
3160          } else {
3161            ;
3162          }
3163        } else if (lsb == 1) {
3164          if (is_midpoint_lt_even) {
3165            // res = res + 1
3166            R256.w[0]++;
3167            if (R256.w[0] == 0x0) {
3168              R256.w[1]++;
3169              if (R256.w[1] == 0x0) {
3170        	R256.w[2]++;
3171        	if (R256.w[2] == 0x0) {
3172        	  R256.w[3]++;
3173        	}
3174              }
3175            }
3176            // no check for rounding overflow - R256 was a difference
3177          } else if (is_midpoint_gt_even) {
3178            // res = res - 1
3179            R256.w[0]--;
3180            if (R256.w[0] == 0xffffffffffffffffull) {
3181              R256.w[1]--;
3182              if (R256.w[1] == 0xffffffffffffffffull) {
3183        	R256.w[2]--;
3184        	if (R256.w[2] == 0xffffffffffffffffull) {
3185        	  R256.w[3]--;
3186        	}
3187              }
3188            }
3189          } else {
3190            ;
3191          }
3192        } else {
3193          ;
3194        }
3195      }
3196      // determine the number of decimal digits in R256
3197      ind = nr_digits256 (R256); // ind >= p34
3198      // if R256 is sum, then ind > p34; if R256 is a difference, then
3199      // ind >= p34; this means that we can calculate the result rounded to
3200      // the destination precision, with unbounded exponent, starting from R256
3201      // and using the indicators from the rounding of C3 to avoid a double
3202      // rounding error
3203
3204      if (ind < p34) {
3205        ;
3206      } else if (ind == p34) {
3207        // the result rounded to the destination precision with
3208        // unbounded exponent
3209        // is (-1)^p_sign * R256 * 10^e4
3210        res.w[1] = R256.w[1];
3211        res.w[0] = R256.w[0];
3212      } else { // if (ind > p34)
3213        // if more than P digits, round to nearest to P digits
3214        // round R256 to p34 digits
3215        x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68
3216        // save C3 rounding indicators to help avoid double rounding error
3217        is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
3218        is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
3219        is_midpoint_lt_even0 = is_midpoint_lt_even;
3220        is_midpoint_gt_even0 = is_midpoint_gt_even;
3221        // initialize rounding indicators
3222        is_inexact_lt_midpoint = 0;
3223        is_inexact_gt_midpoint = 0;
3224        is_midpoint_lt_even = 0;
3225        is_midpoint_gt_even = 0;
3226        // round to p34 digits; the result fits in 113 bits
3227        if (ind <= 38) {
3228          P128.w[1] = R256.w[1];
3229          P128.w[0] = R256.w[0];
3230          round128_19_38 (ind, x0, P128, &R128, &incr_exp,
3231        		  &is_midpoint_lt_even, &is_midpoint_gt_even,
3232        		  &is_inexact_lt_midpoint,
3233        		  &is_inexact_gt_midpoint);
3234        } else if (ind <= 57) {
3235          P192.w[2] = R256.w[2];
3236          P192.w[1] = R256.w[1];
3237          P192.w[0] = R256.w[0];
3238          round192_39_57 (ind, x0, P192, &R192, &incr_exp,
3239        		  &is_midpoint_lt_even, &is_midpoint_gt_even,
3240        		  &is_inexact_lt_midpoint,
3241        		  &is_inexact_gt_midpoint);
3242          R128.w[1] = R192.w[1];
3243          R128.w[0] = R192.w[0];
3244        } else { // if (ind <= 68)
3245          round256_58_76 (ind, x0, R256, &R256, &incr_exp,
3246        		  &is_midpoint_lt_even, &is_midpoint_gt_even,
3247        		  &is_inexact_lt_midpoint,
3248        		  &is_inexact_gt_midpoint);
3249          R128.w[1] = R256.w[1];
3250          R128.w[0] = R256.w[0];
3251        }
3252        // the rounded result has p34 = 34 digits
3253        e4 = e4 + x0 + incr_exp;
3254
3255        res.w[1] = R128.w[1];
3256        res.w[0] = R128.w[0];
3257
3258        // avoid a double rounding error
3259        if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
3260            is_midpoint_lt_even) { // double rounding error upward
3261          // res = res - 1
3262          res.w[0]--;
3263          if (res.w[0] == 0xffffffffffffffffull)
3264            res.w[1]--;
3265          is_midpoint_lt_even = 0;
3266          is_inexact_lt_midpoint = 1;
3267          // Note: a double rounding error upward is not possible; for this
3268          // the result after the first rounding would have to be 99...95
3269          // (35 digits in all), possibly followed by a number of zeros; this
3270          // not possible in Cases (2)-(6) or (15)-(17) which may get here
3271          // if this is 10^33 - 1 make it 10^34 - 1 and decrement exponent
3272          if (res.w[1] == 0x0000314dc6448d93ull &&
3273            res.w[0] == 0x38c15b09ffffffffull) { // 10^33 - 1
3274            res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
3275            res.w[0] = 0x378d8e63ffffffffull;
3276            e4--;
3277          }
3278        } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
3279            is_midpoint_gt_even) { // double rounding error downward
3280          // res = res + 1
3281          res.w[0]++;
3282          if (res.w[0] == 0)
3283            res.w[1]++;
3284          is_midpoint_gt_even = 0;
3285          is_inexact_gt_midpoint = 1;
3286        } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
3287        	   !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
3288          // if this second rounding was exact the result may still be
3289          // inexact because of the first rounding
3290          if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
3291            is_inexact_gt_midpoint = 1;
3292          }
3293          if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
3294            is_inexact_lt_midpoint = 1;
3295          }
3296        } else if (is_midpoint_gt_even &&
3297        	   (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
3298          // pulled up to a midpoint
3299          is_inexact_lt_midpoint = 1;
3300          is_inexact_gt_midpoint = 0;
3301          is_midpoint_lt_even = 0;
3302          is_midpoint_gt_even = 0;
3303        } else if (is_midpoint_lt_even &&
3304        	   (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
3305          // pulled down to a midpoint
3306          is_inexact_lt_midpoint = 0;
3307          is_inexact_gt_midpoint = 1;
3308          is_midpoint_lt_even = 0;
3309          is_midpoint_gt_even = 0;
3310        } else {
3311          ;
3312        }
3313      }
3314
3315      // determine tininess
3316      if (rnd_mode == ROUNDING_TO_NEAREST) {
3317        if (e4 < expmin) {
3318          is_tiny = 1; // for other rounding modes apply correction
3319        }
3320      } else {
3321        // for RM, RP, RZ, RA apply correction in order to determine tininess
3322        // but do not save the result; apply the correction to
3323        // (-1)^p_sign * res * 10^0
3324        P128.w[1] = p_sign | 0x3040000000000000ull | res.w[1];
3325        P128.w[0] = res.w[0];
3326        rounding_correction (rnd_mode,
3327        		     is_inexact_lt_midpoint,
3328        		     is_inexact_gt_midpoint,
3329        		     is_midpoint_lt_even, is_midpoint_gt_even,
3330        		     0, &P128, pfpsf);
3331        scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1
3332        // the number of digits in the significand is p34 = 34
3333        if (e4 + scale < expmin) {
3334          is_tiny = 1;
3335        }
3336      }
3337
3338      // the result rounded to the destination precision with unbounded exponent
3339      // is (-1)^p_sign * res * 10^e4
3340      res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; // RN
3341      // res.w[0] unchanged;
3342      // Note: res is correct only if expmin <= e4 <= expmax
3343      ind = p34; // the number of decimal digits in the signifcand of res
3344
3345      // at this point we have the result rounded with unbounded exponent in
3346      // res and we know its tininess:
3347      // res = (-1)^p_sign * significand * 10^e4,
3348      // where q (significand) = ind = p34
3349      // Note: res is correct only if expmin <= e4 <= expmax
3350
3351      // check for overflow if RN
3352      if (rnd_mode == ROUNDING_TO_NEAREST
3353          && (ind + e4) > (p34 + expmax)) {
3354        res.w[1] = p_sign | 0x7800000000000000ull;
3355        res.w[0] = 0x0000000000000000ull;
3356        *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
3357        *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3358        *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3359        *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3360        *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3361        BID_SWAP128 (res);
3362        BID_RETURN (res)
3363      } // else not overflow or not RN, so continue
3364
3365      // from this point on this is similar to the last part of the computation
3366      // for Cases (15), (16), (17)
3367
3368      // if (e4 >= expmin) we have the result rounded with bounded exponent
3369      if (e4 < expmin) {
3370        x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res
3371        // where the result rounded [at most] once is
3372        //   (-1)^p_sign * significand_res * 10^e4
3373
3374        // avoid double rounding error
3375        is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
3376        is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
3377        is_midpoint_lt_even0 = is_midpoint_lt_even;
3378        is_midpoint_gt_even0 = is_midpoint_gt_even;
3379        is_inexact_lt_midpoint = 0;
3380        is_inexact_gt_midpoint = 0;
3381        is_midpoint_lt_even = 0;
3382        is_midpoint_gt_even = 0;
3383
3384        if (x0 > ind) {
3385          // nothing is left of res when moving the decimal point left x0 digits
3386          is_inexact_lt_midpoint = 1;
3387          res.w[1] = p_sign | 0x0000000000000000ull;
3388          res.w[0] = 0x0000000000000000ull;
3389          e4 = expmin;
3390        } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34
3391          // this is <, =, or > 1/2 ulp
3392          // compare the ind-digit value in the significand of res with
3393          // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is
3394          // less than, equal to, or greater than 1/2 ulp (significand of res)
3395          R128.w[1] = res.w[1] & MASK_COEFF;
3396          R128.w[0] = res.w[0];
3397          if (ind <= 19) {
3398            if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp
3399              lt_half_ulp = 1;
3400              is_inexact_lt_midpoint = 1;
3401            } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp
3402              eq_half_ulp = 1;
3403              is_midpoint_gt_even = 1;
3404            } else { // > 1/2 ulp
3405              gt_half_ulp = 1;
3406              is_inexact_gt_midpoint = 1;
3407            }
3408          } else { // if (ind <= 38)
3409            if (R128.w[1] < midpoint128[ind - 20].w[1] ||
3410                (R128.w[1] == midpoint128[ind - 20].w[1] &&
3411                R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp
3412              lt_half_ulp = 1;
3413              is_inexact_lt_midpoint = 1;
3414            } else if (R128.w[1] == midpoint128[ind - 20].w[1] &&
3415                R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp
3416              eq_half_ulp = 1;
3417              is_midpoint_gt_even = 1;
3418            } else { // > 1/2 ulp
3419              gt_half_ulp = 1;
3420              is_inexact_gt_midpoint = 1;
3421            }
3422          }
3423          if (lt_half_ulp || eq_half_ulp) {
3424            // res = +0.0 * 10^expmin
3425            res.w[1] = 0x0000000000000000ull;
3426            res.w[0] = 0x0000000000000000ull;
3427          } else { // if (gt_half_ulp)
3428            // res = +1 * 10^expmin
3429            res.w[1] = 0x0000000000000000ull;
3430            res.w[0] = 0x0000000000000001ull;
3431          }
3432          res.w[1] = p_sign | res.w[1];
3433          e4 = expmin;
3434        } else { // if (1 <= x0 <= ind - 1 <= 33)
3435          // round the ind-digit result to ind - x0 digits
3436
3437          if (ind <= 18) { // 2 <= ind <= 18
3438            round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
3439        		  &is_midpoint_lt_even, &is_midpoint_gt_even,
3440        		  &is_inexact_lt_midpoint,
3441        		  &is_inexact_gt_midpoint);
3442            res.w[1] = 0x0;
3443            res.w[0] = R64;
3444          } else if (ind <= 38) {
3445            P128.w[1] = res.w[1] & MASK_COEFF;
3446            P128.w[0] = res.w[0];
3447            round128_19_38 (ind, x0, P128, &res, &incr_exp,
3448        		    &is_midpoint_lt_even, &is_midpoint_gt_even,
3449        		    &is_inexact_lt_midpoint,
3450        		    &is_inexact_gt_midpoint);
3451          }
3452          e4 = e4 + x0; // expmin
3453          // we want the exponent to be expmin, so if incr_exp = 1 then
3454          // multiply the rounded result by 10 - it will still fit in 113 bits
3455          if (incr_exp) {
3456            // 64 x 128 -> 128
3457            P128.w[1] = res.w[1] & MASK_COEFF;
3458            P128.w[0] = res.w[0];
3459            __mul_64x128_to_128 (res, ten2k64[1], P128);
3460          }
3461          res.w[1] =
3462            p_sign | ((UINT64) (e4 + 6176) << 49) | (res.
3463        					     w[1] & MASK_COEFF);
3464          // avoid a double rounding error
3465          if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
3466                is_midpoint_lt_even) { // double rounding error upward
3467            // res = res - 1
3468            res.w[0]--;
3469            if (res.w[0] == 0xffffffffffffffffull)
3470              res.w[1]--;
3471            // Note: a double rounding error upward is not possible; for this
3472            // the result after the first rounding would have to be 99...95
3473            // (35 digits in all), possibly followed by a number of zeros; this
3474            // not possible in this underflow case
3475            is_midpoint_lt_even = 0;
3476            is_inexact_lt_midpoint = 1;
3477          } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
3478                is_midpoint_gt_even) { // double rounding error downward
3479            // res = res + 1
3480            res.w[0]++;
3481            if (res.w[0] == 0)
3482              res.w[1]++;
3483            is_midpoint_gt_even = 0;
3484            is_inexact_gt_midpoint = 1;
3485          } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
3486        	     !is_inexact_lt_midpoint
3487        	     && !is_inexact_gt_midpoint) {
3488            // if this second rounding was exact the result may still be
3489            // inexact because of the first rounding
3490            if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
3491              is_inexact_gt_midpoint = 1;
3492            }
3493            if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
3494              is_inexact_lt_midpoint = 1;
3495            }
3496          } else if (is_midpoint_gt_even &&
3497        	     (is_inexact_gt_midpoint0
3498        	      || is_midpoint_lt_even0)) {
3499            // pulled up to a midpoint
3500            is_inexact_lt_midpoint = 1;
3501            is_inexact_gt_midpoint = 0;
3502            is_midpoint_lt_even = 0;
3503            is_midpoint_gt_even = 0;
3504          } else if (is_midpoint_lt_even &&
3505        	     (is_inexact_lt_midpoint0
3506        	      || is_midpoint_gt_even0)) {
3507            // pulled down to a midpoint
3508            is_inexact_lt_midpoint = 0;
3509            is_inexact_gt_midpoint = 1;
3510            is_midpoint_lt_even = 0;
3511            is_midpoint_gt_even = 0;
3512          } else {
3513            ;
3514          }
3515        }
3516      }
3517      // res contains the correct result
3518      // apply correction if not rounding to nearest
3519      if (rnd_mode != ROUNDING_TO_NEAREST) {
3520        rounding_correction (rnd_mode,
3521        		     is_inexact_lt_midpoint,
3522        		     is_inexact_gt_midpoint,
3523        		     is_midpoint_lt_even, is_midpoint_gt_even,
3524        		     e4, &res, pfpsf);
3525      }
3526      if (is_midpoint_lt_even || is_midpoint_gt_even ||
3527          is_inexact_lt_midpoint || is_inexact_gt_midpoint) {
3528        // set the inexact flag
3529        *pfpsf |= INEXACT_EXCEPTION;
3530        if (is_tiny)
3531          *pfpsf |= UNDERFLOW_EXCEPTION;
3532      }
3533      *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3534      *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3535      *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3536      *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3537      BID_SWAP128 (res);
3538      BID_RETURN (res)
3539
3540    } else if ((p34 <= delta && delta + q3 <= q4) || // Case (15)
3541        (delta < p34 && p34 < delta + q3 && delta + q3 <= q4) || //Case (16)
3542        (delta + q3 <= p34 && p34 < q4)) { // Case (17)
3543
3544      // calculate first the result rounded to the destination precision, with
3545      // unbounded exponent
3546
3547      add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4,
3548              rnd_mode, &is_midpoint_lt_even,
3549              &is_midpoint_gt_even, &is_inexact_lt_midpoint,
3550              &is_inexact_gt_midpoint, pfpsf, &res);
3551      *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3552      *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3553      *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3554      *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3555      BID_SWAP128 (res);
3556      BID_RETURN (res)
3557
3558    } else {
3559      ;
3560    }
3561
3562  } // end if delta < 0
3563
3564  *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3565  *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3566  *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3567  *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3568  BID_SWAP128 (res);
3569  BID_RETURN (res)
3570
3571}
3572
3573
3574#if DECIMAL_CALL_BY_REFERENCE
3575void
3576bid128_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT128 * pz
3577            _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3578            _EXC_INFO_PARAM) {
3579  UINT128 x = *px, y = *py, z = *pz;
3580#if !DECIMAL_GLOBAL_ROUNDING
3581  unsigned int rnd_mode = *prnd_mode;
3582#endif
3583#else
3584UINT128
3585bid128_fma (UINT128 x, UINT128 y, UINT128 z
3586            _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3587            _EXC_INFO_PARAM) {
3588#endif
3589  int is_midpoint_lt_even, is_midpoint_gt_even,
3590    is_inexact_lt_midpoint, is_inexact_gt_midpoint;
3591  UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3592
3593#if DECIMAL_CALL_BY_REFERENCE
3594  bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3595        	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3596        	  &res, &x, &y, &z
3597        	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3598        	  _EXC_INFO_ARG);
3599#else
3600  res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3601        		&is_inexact_lt_midpoint,
3602        		&is_inexact_gt_midpoint, x, y,
3603        		z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3604        		_EXC_INFO_ARG);
3605#endif
3606  BID_RETURN (res);
3607}
3608
3609
3610#if DECIMAL_CALL_BY_REFERENCE
3611void
3612bid128ddd_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT64 * pz
3613               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3614               _EXC_INFO_PARAM) {
3615  UINT64 x = *px, y = *py, z = *pz;
3616#if !DECIMAL_GLOBAL_ROUNDING
3617  unsigned int rnd_mode = *prnd_mode;
3618#endif
3619#else
3620UINT128
3621bid128ddd_fma (UINT64 x, UINT64 y, UINT64 z
3622               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3623               _EXC_INFO_PARAM) {
3624#endif
3625  int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3626    is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3627  UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3628  UINT128 x1, y1, z1;
3629
3630#if DECIMAL_CALL_BY_REFERENCE
3631  bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3632  bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3633  bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3634  bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3635        	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3636        	  &res, &x1, &y1, &z1
3637        	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3638        	  _EXC_INFO_ARG);
3639#else
3640  x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3641  y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3642  z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3643  res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3644        		&is_inexact_lt_midpoint,
3645        		&is_inexact_gt_midpoint, x1, y1,
3646        		z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3647        		_EXC_INFO_ARG);
3648#endif
3649  BID_RETURN (res);
3650}
3651
3652
3653#if DECIMAL_CALL_BY_REFERENCE
3654void
3655bid128ddq_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT128 * pz
3656               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3657               _EXC_INFO_PARAM) {
3658  UINT64 x = *px, y = *py;
3659  UINT128 z = *pz;
3660#if !DECIMAL_GLOBAL_ROUNDING
3661  unsigned int rnd_mode = *prnd_mode;
3662#endif
3663#else
3664UINT128
3665bid128ddq_fma (UINT64 x, UINT64 y, UINT128 z
3666               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3667               _EXC_INFO_PARAM) {
3668#endif
3669  int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3670    is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3671  UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3672  UINT128 x1, y1;
3673
3674#if DECIMAL_CALL_BY_REFERENCE
3675  bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3676  bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3677  bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3678        	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3679        	  &res, &x1, &y1, &z
3680        	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3681        	  _EXC_INFO_ARG);
3682#else
3683  x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3684  y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3685  res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3686        		&is_inexact_lt_midpoint,
3687        		&is_inexact_gt_midpoint, x1, y1,
3688        		z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3689        		_EXC_INFO_ARG);
3690#endif
3691  BID_RETURN (res);
3692}
3693
3694
3695#if DECIMAL_CALL_BY_REFERENCE
3696void
3697bid128dqd_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT64 * pz
3698               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3699               _EXC_INFO_PARAM) {
3700  UINT64 x = *px, z = *pz;
3701#if !DECIMAL_GLOBAL_ROUNDING
3702  unsigned int rnd_mode = *prnd_mode;
3703#endif
3704#else
3705UINT128
3706bid128dqd_fma (UINT64 x, UINT128 y, UINT64 z
3707               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3708               _EXC_INFO_PARAM) {
3709#endif
3710  int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3711    is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3712  UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3713  UINT128 x1, z1;
3714
3715#if DECIMAL_CALL_BY_REFERENCE
3716  bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3717  bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3718  bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3719        	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3720        	  &res, &x1, py, &z1
3721        	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3722        	  _EXC_INFO_ARG);
3723#else
3724  x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3725  z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3726  res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3727        		&is_inexact_lt_midpoint,
3728        		&is_inexact_gt_midpoint, x1, y,
3729        		z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3730        		_EXC_INFO_ARG);
3731#endif
3732  BID_RETURN (res);
3733}
3734
3735
3736#if DECIMAL_CALL_BY_REFERENCE
3737void
3738bid128dqq_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT128 * pz
3739               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3740               _EXC_INFO_PARAM) {
3741  UINT64 x = *px;
3742#if !DECIMAL_GLOBAL_ROUNDING
3743  unsigned int rnd_mode = *prnd_mode;
3744#endif
3745#else
3746UINT128
3747bid128dqq_fma (UINT64 x, UINT128 y, UINT128 z
3748               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3749               _EXC_INFO_PARAM) {
3750#endif
3751  int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3752    is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3753  UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3754  UINT128 x1;
3755
3756#if DECIMAL_CALL_BY_REFERENCE
3757  bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3758  bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3759        	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3760        	  &res, &x1, py, pz
3761        	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3762        	  _EXC_INFO_ARG);
3763#else
3764  x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3765  res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3766        		&is_inexact_lt_midpoint,
3767        		&is_inexact_gt_midpoint, x1, y,
3768        		z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3769        		_EXC_INFO_ARG);
3770#endif
3771  BID_RETURN (res);
3772}
3773
3774
3775#if DECIMAL_CALL_BY_REFERENCE
3776void
3777bid128qdd_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT64 * pz
3778               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3779               _EXC_INFO_PARAM) {
3780  UINT64 y = *py, z = *pz;
3781#if !DECIMAL_GLOBAL_ROUNDING
3782  unsigned int rnd_mode = *prnd_mode;
3783#endif
3784#else
3785UINT128
3786bid128qdd_fma (UINT128 x, UINT64 y, UINT64 z
3787               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3788               _EXC_INFO_PARAM) {
3789#endif
3790  int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3791    is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3792  UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3793  UINT128 y1, z1;
3794
3795#if DECIMAL_CALL_BY_REFERENCE
3796  bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3797  bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3798  bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3799        	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3800        	  &res, px, &y1, &z1
3801        	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3802        	  _EXC_INFO_ARG);
3803#else
3804  y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3805  z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3806  res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3807        		&is_inexact_lt_midpoint,
3808        		&is_inexact_gt_midpoint, x, y1,
3809        		z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3810        		_EXC_INFO_ARG);
3811#endif
3812  BID_RETURN (res);
3813}
3814
3815
3816#if DECIMAL_CALL_BY_REFERENCE
3817void
3818bid128qdq_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT128 * pz
3819               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3820               _EXC_INFO_PARAM) {
3821  UINT64 y = *py;
3822#if !DECIMAL_GLOBAL_ROUNDING
3823  unsigned int rnd_mode = *prnd_mode;
3824#endif
3825#else
3826UINT128
3827bid128qdq_fma (UINT128 x, UINT64 y, UINT128 z
3828               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3829               _EXC_INFO_PARAM) {
3830#endif
3831  int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3832    is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3833  UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3834  UINT128 y1;
3835
3836#if DECIMAL_CALL_BY_REFERENCE
3837  bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3838  bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3839        	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3840        	  &res, px, &y1, pz
3841        	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3842        	  _EXC_INFO_ARG);
3843#else
3844  y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3845  res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3846        		&is_inexact_lt_midpoint,
3847        		&is_inexact_gt_midpoint, x, y1,
3848        		z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3849        		_EXC_INFO_ARG);
3850#endif
3851  BID_RETURN (res);
3852}
3853
3854
3855#if DECIMAL_CALL_BY_REFERENCE
3856void
3857bid128qqd_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT64 * pz
3858               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3859               _EXC_INFO_PARAM) {
3860  UINT64 z = *pz;
3861#if !DECIMAL_GLOBAL_ROUNDING
3862  unsigned int rnd_mode = *prnd_mode;
3863#endif
3864#else
3865UINT128
3866bid128qqd_fma (UINT128 x, UINT128 y, UINT64 z
3867               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3868               _EXC_INFO_PARAM) {
3869#endif
3870  int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3871    is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3872  UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3873  UINT128 z1;
3874
3875#if DECIMAL_CALL_BY_REFERENCE
3876  bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3877  bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3878        	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3879        	  &res, px, py, &z1
3880        	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3881        	  _EXC_INFO_ARG);
3882#else
3883  z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3884  res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3885        		&is_inexact_lt_midpoint,
3886        		&is_inexact_gt_midpoint, x, y,
3887        		z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3888        		_EXC_INFO_ARG);
3889#endif
3890  BID_RETURN (res);
3891}
3892
3893// Note: bid128qqq_fma is represented by bid128_fma
3894
3895// Note: bid64ddd_fma is represented by bid64_fma
3896
3897#if DECIMAL_CALL_BY_REFERENCE
3898void
3899bid64ddq_fma (UINT64 * pres, UINT64 * px, UINT64 * py, UINT128 * pz
3900              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3901              _EXC_INFO_PARAM) {
3902  UINT64 x = *px, y = *py;
3903#if !DECIMAL_GLOBAL_ROUNDING
3904  unsigned int rnd_mode = *prnd_mode;
3905#endif
3906#else
3907UINT64
3908bid64ddq_fma (UINT64 x, UINT64 y, UINT128 z
3909              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3910              _EXC_INFO_PARAM) {
3911#endif
3912  UINT64 res1 = 0xbaddbaddbaddbaddull;
3913  UINT128 x1, y1;
3914
3915#if DECIMAL_CALL_BY_REFERENCE
3916  bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3917  bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3918  bid64qqq_fma (&res1, &x1, &y1, pz
3919        	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3920        	_EXC_INFO_ARG);
3921#else
3922  x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3923  y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3924  res1 = bid64qqq_fma (x1, y1, z
3925        	       _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3926        	       _EXC_INFO_ARG);
3927#endif
3928  BID_RETURN (res1);
3929}
3930
3931
3932#if DECIMAL_CALL_BY_REFERENCE
3933void
3934bid64dqd_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT64 * pz
3935              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3936              _EXC_INFO_PARAM) {
3937  UINT64 x = *px, z = *pz;
3938#if !DECIMAL_GLOBAL_ROUNDING
3939  unsigned int rnd_mode = *prnd_mode;
3940#endif
3941#else
3942UINT64
3943bid64dqd_fma (UINT64 x, UINT128 y, UINT64 z
3944              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3945              _EXC_INFO_PARAM) {
3946#endif
3947  UINT64 res1 = 0xbaddbaddbaddbaddull;
3948  UINT128 x1, z1;
3949
3950#if DECIMAL_CALL_BY_REFERENCE
3951  bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3952  bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3953  bid64qqq_fma (&res1, &x1, py, &z1
3954        	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3955        	_EXC_INFO_ARG);
3956#else
3957  x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3958  z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3959  res1 = bid64qqq_fma (x1, y, z1
3960        	       _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3961        	       _EXC_INFO_ARG);
3962#endif
3963  BID_RETURN (res1);
3964}
3965
3966
3967#if DECIMAL_CALL_BY_REFERENCE
3968void
3969bid64dqq_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT128 * pz
3970              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3971              _EXC_INFO_PARAM) {
3972  UINT64 x = *px;
3973#if !DECIMAL_GLOBAL_ROUNDING
3974  unsigned int rnd_mode = *prnd_mode;
3975#endif
3976#else
3977UINT64
3978bid64dqq_fma (UINT64 x, UINT128 y, UINT128 z
3979              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3980              _EXC_INFO_PARAM) {
3981#endif
3982  UINT64 res1 = 0xbaddbaddbaddbaddull;
3983  UINT128 x1;
3984
3985#if DECIMAL_CALL_BY_REFERENCE
3986  bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3987  bid64qqq_fma (&res1, &x1, py, pz
3988        	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3989        	_EXC_INFO_ARG);
3990#else
3991  x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3992  res1 = bid64qqq_fma (x1, y, z
3993        	       _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3994        	       _EXC_INFO_ARG);
3995#endif
3996  BID_RETURN (res1);
3997}
3998
3999
4000#if DECIMAL_CALL_BY_REFERENCE
4001void
4002bid64qdd_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT64 * pz
4003              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4004              _EXC_INFO_PARAM) {
4005  UINT64 y = *py, z = *pz;
4006#if !DECIMAL_GLOBAL_ROUNDING
4007  unsigned int rnd_mode = *prnd_mode;
4008#endif
4009#else
4010UINT64
4011bid64qdd_fma (UINT128 x, UINT64 y, UINT64 z
4012              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4013              _EXC_INFO_PARAM) {
4014#endif
4015  UINT64 res1 = 0xbaddbaddbaddbaddull;
4016  UINT128 y1, z1;
4017
4018#if DECIMAL_CALL_BY_REFERENCE
4019  bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4020  bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4021  bid64qqq_fma (&res1, px, &y1, &z1
4022        	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4023        	_EXC_INFO_ARG);
4024#else
4025  y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4026  z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4027  res1 = bid64qqq_fma (x, y1, z1
4028        	       _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4029        	       _EXC_INFO_ARG);
4030#endif
4031  BID_RETURN (res1);
4032}
4033
4034
4035#if DECIMAL_CALL_BY_REFERENCE
4036void
4037bid64qdq_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT128 * pz
4038              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4039              _EXC_INFO_PARAM) {
4040  UINT64 y = *py;
4041#if !DECIMAL_GLOBAL_ROUNDING
4042  unsigned int rnd_mode = *prnd_mode;
4043#endif
4044#else
4045UINT64
4046bid64qdq_fma (UINT128 x, UINT64 y, UINT128 z
4047              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4048              _EXC_INFO_PARAM) {
4049#endif
4050  UINT64 res1 = 0xbaddbaddbaddbaddull;
4051  UINT128 y1;
4052
4053#if DECIMAL_CALL_BY_REFERENCE
4054  bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4055  bid64qqq_fma (&res1, px, &y1, pz
4056        	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4057        	_EXC_INFO_ARG);
4058#else
4059  y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4060  res1 = bid64qqq_fma (x, y1, z
4061        	       _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4062        	       _EXC_INFO_ARG);
4063#endif
4064  BID_RETURN (res1);
4065}
4066
4067
4068#if DECIMAL_CALL_BY_REFERENCE
4069void
4070bid64qqd_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT64 * pz
4071              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4072              _EXC_INFO_PARAM) {
4073  UINT64 z = *pz;
4074#if !DECIMAL_GLOBAL_ROUNDING
4075  unsigned int rnd_mode = *prnd_mode;
4076#endif
4077#else
4078UINT64
4079bid64qqd_fma (UINT128 x, UINT128 y, UINT64 z
4080              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4081              _EXC_INFO_PARAM) {
4082#endif
4083  UINT64 res1 = 0xbaddbaddbaddbaddull;
4084  UINT128 z1;
4085
4086#if DECIMAL_CALL_BY_REFERENCE
4087  bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4088  bid64qqq_fma (&res1, px, py, &z1
4089        	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4090        	_EXC_INFO_ARG);
4091#else
4092  z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4093  res1 = bid64qqq_fma (x, y, z1
4094        	       _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4095        	       _EXC_INFO_ARG);
4096#endif
4097  BID_RETURN (res1);
4098}
4099
4100
4101#if DECIMAL_CALL_BY_REFERENCE
4102void
4103bid64qqq_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT128 * pz
4104              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4105              _EXC_INFO_PARAM) {
4106  UINT128 x = *px, y = *py, z = *pz;
4107#if !DECIMAL_GLOBAL_ROUNDING
4108  unsigned int rnd_mode = *prnd_mode;
4109#endif
4110#else
4111UINT64
4112bid64qqq_fma (UINT128 x, UINT128 y, UINT128 z
4113              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4114              _EXC_INFO_PARAM) {
4115#endif
4116  int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0,
4117    is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0;
4118  int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
4119    is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
4120  int incr_exp;
4121  UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
4122  UINT128 res128 = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
4123  UINT64 res1 = 0xbaddbaddbaddbaddull;
4124  unsigned int save_fpsf; // needed because of the call to bid128_ext_fma
4125  UINT64 sign;
4126  UINT64 exp;
4127  int unbexp;
4128  UINT128 C;
4129  BID_UI64DOUBLE tmp;
4130  int nr_bits;
4131  int q, x0;
4132  int scale;
4133  int lt_half_ulp = 0, eq_half_ulp = 0;
4134
4135  // Note: for rounding modes other than RN or RA, the result can be obtained
4136  // by rounding first to BID128 and then to BID64
4137
4138  save_fpsf = *pfpsf; // sticky bits - caller value must be preserved
4139  *pfpsf = 0;
4140
4141#if DECIMAL_CALL_BY_REFERENCE
4142  bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0,
4143        	  &is_inexact_lt_midpoint0, &is_inexact_gt_midpoint0,
4144        	  &res, &x, &y, &z
4145        	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4146        	  _EXC_INFO_ARG);
4147#else
4148  res = bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0,
4149        		&is_inexact_lt_midpoint0,
4150        		&is_inexact_gt_midpoint0, x, y,
4151        		z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4152        		_EXC_INFO_ARG);
4153#endif
4154
4155  if ((rnd_mode == ROUNDING_DOWN) || (rnd_mode == ROUNDING_UP) ||
4156      (rnd_mode == ROUNDING_TO_ZERO) || // no double rounding error is possible
4157      ((res.w[HIGH_128W] & MASK_NAN) == MASK_NAN) || //res=QNaN (cannot be SNaN)
4158      ((res.w[HIGH_128W] & MASK_ANY_INF) == MASK_INF)) { // result is infinity
4159#if DECIMAL_CALL_BY_REFERENCE
4160    bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG);
4161#else
4162    res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG);
4163#endif
4164    // determine the unbiased exponent of the result
4165    unbexp = ((res1 >> 53) & 0x3ff) - 398;
4166
4167    // if subnormal, res1  must have exp = -398
4168    // if tiny and inexact set underflow and inexact status flags
4169    if (!((res1 & MASK_NAN) == MASK_NAN) &&	// res1 not NaN
4170        (unbexp == -398)
4171        && ((res1 & MASK_BINARY_SIG1) < 1000000000000000ull)
4172        && (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0
4173            || is_midpoint_lt_even0 || is_midpoint_gt_even0)) {
4174      // set the inexact flag and the underflow flag
4175      *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION);
4176    } else if (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 ||
4177               is_midpoint_lt_even0 || is_midpoint_gt_even0) {
4178      // set the inexact flag and the underflow flag
4179      *pfpsf |= INEXACT_EXCEPTION;
4180    }
4181
4182    *pfpsf |= save_fpsf;
4183    BID_RETURN (res1);
4184  } // else continue, and use rounding to nearest to round to 16 digits
4185
4186  // at this point the result is rounded to nearest (even or away) to 34 digits
4187  // (or less if exact), and it is zero or finite non-zero canonical [sub]normal
4188  sign = res.w[HIGH_128W] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
4189  exp = res.w[HIGH_128W] & MASK_EXP; // biased and shifted left 49 bits
4190  unbexp = (exp >> 49) - 6176;
4191  C.w[1] = res.w[HIGH_128W] & MASK_COEFF;
4192  C.w[0] = res.w[LOW_128W];
4193
4194  if ((C.w[1] == 0x0 && C.w[0] == 0x0) ||	// result is zero
4195      (unbexp <= (-398 - 35)) || (unbexp >= (369 + 16))) {
4196      // clear under/overflow
4197#if DECIMAL_CALL_BY_REFERENCE
4198    bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG);
4199#else
4200    res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG);
4201#endif
4202    *pfpsf |= save_fpsf;
4203    BID_RETURN (res1);
4204  } // else continue
4205
4206  // -398 - 34 <= unbexp <= 369 + 15
4207  if (rnd_mode == ROUNDING_TIES_AWAY) {
4208    // apply correction, if needed, to make the result rounded to nearest-even
4209    if (is_midpoint_gt_even) {
4210      // res = res - 1
4211      res1--; // res1 is now even
4212    } // else the result is already correctly rounded to nearest-even
4213  }
4214  // at this point the result is finite, non-zero canonical normal or subnormal,
4215  // and in most cases overflow or underflow will not occur
4216
4217  // determine the number of digits q in the result
4218  // q = nr. of decimal digits in x
4219  // determine first the nr. of bits in x
4220  if (C.w[1] == 0) {
4221    if (C.w[0] >= 0x0020000000000000ull) { // x >= 2^53
4222      // split the 64-bit value in two 32-bit halves to avoid rounding errors
4223      if (C.w[0] >= 0x0000000100000000ull) { // x >= 2^32
4224        tmp.d = (double) (C.w[0] >> 32); // exact conversion
4225        nr_bits =
4226          33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
4227      } else { // x < 2^32
4228        tmp.d = (double) (C.w[0]); // exact conversion
4229        nr_bits =
4230          1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
4231      }
4232    } else { // if x < 2^53
4233      tmp.d = (double) C.w[0]; // exact conversion
4234      nr_bits =
4235        1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
4236    }
4237  } else { // C.w[1] != 0 => nr. bits = 64 + nr_bits (C.w[1])
4238    tmp.d = (double) C.w[1]; // exact conversion
4239    nr_bits =
4240      65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
4241  }
4242  q = nr_digits[nr_bits - 1].digits;
4243  if (q == 0) {
4244    q = nr_digits[nr_bits - 1].digits1;
4245    if (C.w[1] > nr_digits[nr_bits - 1].threshold_hi ||
4246        (C.w[1] == nr_digits[nr_bits - 1].threshold_hi &&
4247         C.w[0] >= nr_digits[nr_bits - 1].threshold_lo))
4248      q++;
4249  }
4250  // if q > 16, round to nearest even to 16 digits (but for underflow it may
4251  // have to be truncated even more)
4252  if (q > 16) {
4253    x0 = q - 16;
4254    if (q <= 18) {
4255      round64_2_18 (q, x0, C.w[0], &res1, &incr_exp,
4256        	    &is_midpoint_lt_even, &is_midpoint_gt_even,
4257        	    &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
4258    } else { // 19 <= q <= 34
4259      round128_19_38 (q, x0, C, &res128, &incr_exp,
4260        	      &is_midpoint_lt_even, &is_midpoint_gt_even,
4261        	      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
4262      res1 = res128.w[0]; // the result fits in 64 bits
4263    }
4264    unbexp = unbexp + x0;
4265    if (incr_exp)
4266      unbexp++;
4267    q = 16; // need to set in case denormalization is necessary
4268  } else {
4269    // the result does not require a second rounding (and it must have
4270    // been exact in the first rounding, since q <= 16)
4271    res1 = C.w[0];
4272  }
4273
4274  // avoid a double rounding error
4275  if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
4276      is_midpoint_lt_even) { // double rounding error upward
4277    // res = res - 1
4278    res1--; // res1 becomes odd
4279    is_midpoint_lt_even = 0;
4280    is_inexact_lt_midpoint = 1;
4281    if (res1 == 0x00038d7ea4c67fffull) { // 10^15 - 1
4282      res1 = 0x002386f26fc0ffffull; // 10^16 - 1
4283      unbexp--;
4284    }
4285  } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
4286      is_midpoint_gt_even) { // double rounding error downward
4287    // res = res + 1
4288    res1++; // res1 becomes odd (so it cannot be 10^16)
4289    is_midpoint_gt_even = 0;
4290    is_inexact_gt_midpoint = 1;
4291  } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
4292             !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
4293    // if this second rounding was exact the result may still be
4294    // inexact because of the first rounding
4295    if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
4296      is_inexact_gt_midpoint = 1;
4297    }
4298    if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
4299      is_inexact_lt_midpoint = 1;
4300    }
4301  } else if (is_midpoint_gt_even &&
4302             (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
4303    // pulled up to a midpoint
4304    is_inexact_lt_midpoint = 1;
4305    is_inexact_gt_midpoint = 0;
4306    is_midpoint_lt_even = 0;
4307    is_midpoint_gt_even = 0;
4308  } else if (is_midpoint_lt_even &&
4309             (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
4310    // pulled down to a midpoint
4311    is_inexact_lt_midpoint = 0;
4312    is_inexact_gt_midpoint = 1;
4313    is_midpoint_lt_even = 0;
4314    is_midpoint_gt_even = 0;
4315  } else {
4316    ;
4317  }
4318  // this is the result rounded correctly to nearest even, with unbounded exp.
4319
4320  // check for overflow
4321  if (q + unbexp > P16 + expmax16) {
4322    res1 = sign | 0x7800000000000000ull;
4323    *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
4324    *pfpsf |= save_fpsf;
4325    BID_RETURN (res1)
4326  } else if (unbexp > expmax16) { // q + unbexp <= P16 + expmax16
4327    // not overflow; the result must be exact, and we can multiply res1 by
4328    // 10^(unbexp - expmax16) and the product will fit in 16 decimal digits
4329    scale = unbexp - expmax16;
4330    res1 = res1 * ten2k64[scale]; // res1 * 10^scale
4331    unbexp = expmax16; // unbexp - scale
4332  } else {
4333    ; // continue
4334  }
4335
4336  // check for underflow
4337  if (q + unbexp < P16 + expmin16) {
4338    if (unbexp < expmin16) {
4339      // we must truncate more of res
4340      x0 = expmin16 - unbexp; // x0 >= 1
4341      is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
4342      is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
4343      is_midpoint_lt_even0 = is_midpoint_lt_even;
4344      is_midpoint_gt_even0 = is_midpoint_gt_even;
4345      is_inexact_lt_midpoint = 0;
4346      is_inexact_gt_midpoint = 0;
4347      is_midpoint_lt_even = 0;
4348      is_midpoint_gt_even = 0;
4349      // the number of decimal digits in res1 is q
4350      if (x0 < q) { // 1 <= x0 <= q-1 => round res to q - x0 digits
4351        // 2 <= q <= 16, 1 <= x0 <= 15
4352        round64_2_18 (q, x0, res1, &res1, &incr_exp,
4353        	      &is_midpoint_lt_even, &is_midpoint_gt_even,
4354        	      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
4355        if (incr_exp) {
4356          // res1 = 10^(q-x0), 1 <= q - x0 <= q - 1, 1 <= q - x0 <= 15
4357          res1 = ten2k64[q - x0];
4358        }
4359        unbexp = unbexp + x0; // expmin16
4360      } else if (x0 == q) {
4361        // the second rounding is for 0.d(0)d(1)...d(q-1) * 10^emin
4362        // determine relationship with 1/2 ulp
4363        // q <= 16
4364        if (res1 < midpoint64[q - 1]) { // < 1/2 ulp
4365          lt_half_ulp = 1;
4366          is_inexact_lt_midpoint = 1;
4367        } else if (res1 == midpoint64[q - 1]) { // = 1/2 ulp
4368          eq_half_ulp = 1;
4369          is_midpoint_gt_even = 1;
4370        } else { // > 1/2 ulp
4371          // gt_half_ulp = 1;
4372          is_inexact_gt_midpoint = 1;
4373        }
4374        if (lt_half_ulp || eq_half_ulp) {
4375          // res = +0.0 * 10^expmin16
4376          res1 = 0x0000000000000000ull;
4377        } else { // if (gt_half_ulp)
4378          // res = +1 * 10^expmin16
4379          res1 = 0x0000000000000001ull;
4380        }
4381        unbexp = expmin16;
4382      } else { // if (x0 > q)
4383        // the second rounding is for 0.0...d(0)d(1)...d(q-1) * 10^emin
4384        res1 = 0x0000000000000000ull;
4385        unbexp = expmin16;
4386        is_inexact_lt_midpoint = 1;
4387      }
4388      // avoid a double rounding error
4389      if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
4390          is_midpoint_lt_even) { // double rounding error upward
4391        // res = res - 1
4392        res1--; // res1 becomes odd
4393        is_midpoint_lt_even = 0;
4394        is_inexact_lt_midpoint = 1;
4395      } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
4396          is_midpoint_gt_even) { // double rounding error downward
4397        // res = res + 1
4398        res1++; // res1 becomes odd
4399        is_midpoint_gt_even = 0;
4400        is_inexact_gt_midpoint = 1;
4401      } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
4402        	 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
4403        // if this rounding was exact the result may still be
4404        // inexact because of the previous roundings
4405        if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
4406          is_inexact_gt_midpoint = 1;
4407        }
4408        if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
4409          is_inexact_lt_midpoint = 1;
4410        }
4411      } else if (is_midpoint_gt_even &&
4412        	 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
4413        // pulled up to a midpoint
4414        is_inexact_lt_midpoint = 1;
4415        is_inexact_gt_midpoint = 0;
4416        is_midpoint_lt_even = 0;
4417        is_midpoint_gt_even = 0;
4418      } else if (is_midpoint_lt_even &&
4419        	 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
4420        // pulled down to a midpoint
4421        is_inexact_lt_midpoint = 0;
4422        is_inexact_gt_midpoint = 1;
4423        is_midpoint_lt_even = 0;
4424        is_midpoint_gt_even = 0;
4425      } else {
4426        ;
4427      }
4428    }
4429    // else if unbexp >= emin then q < P (because q + unbexp < P16 + expmin16)
4430    // and the result is tiny and exact
4431
4432    // check for inexact result
4433    if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
4434        is_midpoint_lt_even || is_midpoint_gt_even ||
4435        is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 ||
4436        is_midpoint_lt_even0 || is_midpoint_gt_even0) {
4437      // set the inexact flag and the underflow flag
4438      *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION);
4439    }
4440  } else if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
4441             is_midpoint_lt_even || is_midpoint_gt_even) {
4442    *pfpsf |= INEXACT_EXCEPTION;
4443  }
4444  // this is the result rounded correctly to nearest, with bounded exponent
4445
4446  if (rnd_mode == ROUNDING_TIES_AWAY && is_midpoint_gt_even) { // correction
4447    // res = res + 1
4448    res1++; // res1 is now odd
4449  } // else the result is already correct
4450
4451  // assemble the result
4452  if (res1 < 0x0020000000000000ull) { // res < 2^53
4453    res1 = sign | ((UINT64) (unbexp + 398) << 53) | res1;
4454  } else { // res1 >= 2^53
4455    res1 = sign | MASK_STEERING_BITS |
4456      ((UINT64) (unbexp + 398) << 51) | (res1 & MASK_BINARY_SIG2);
4457  }
4458  *pfpsf |= save_fpsf;
4459  BID_RETURN (res1);
4460}
4461