1/* Sum -- efficiently sum a list of floating-point numbers
2
3Copyright 2014-2023 Free Software Foundation, Inc.
4Contributed by the AriC and Caramba projects, INRIA.
5
6This file is part of the GNU MPFR Library.
7
8The GNU MPFR Library is free software; you can redistribute it and/or modify
9it under the terms of the GNU Lesser General Public License as published by
10the Free Software Foundation; either version 3 of the License, or (at your
11option) any later version.
12
13The GNU MPFR Library is distributed in the hope that it will be useful, but
14WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16License for more details.
17
18You should have received a copy of the GNU Lesser General Public License
19along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
2151 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22
23#define MPFR_NEED_LONGLONG_H
24#include "mpfr-impl.h"
25
26/* Note: In the prototypes, one uses
27 *
28 *   const mpfr_ptr *x      i.e.:  __mpfr_struct *const *x
29 *
30 * instead of
31 *
32 *   const mpfr_srcptr *x   i.e.:  const __mpfr_struct *const *x
33 *
34 * because here one has a double indirection and the type matching rules
35 * from the C standard in such a case are stricter and they would yield
36 * annoying errors for the user in practice. See:
37 *
38 *   Why can't I pass a char ** to a function which expects a const char **?
39 *
40 * in the comp.lang.c FAQ:
41 *
42 *   https://c-faq.com/ansi/constmismatch.html
43 */
44
45/* See the doc/sum.txt file for the algorithm and a part of its proof
46(this will later go into algorithms.tex).
47
48TODO [VL, after a discussion with James Demmel]: Compared to
49  James Demmel and Yozo Hida, Fast and accurate floating-point summation
50  with application to computational geometry, Numerical Algorithms,
51  volume 37, number 1-4, pages 101--112, 2004.
52sorting is not necessary here. It is not done because in the most common
53cases (where big cancellations are rare), it would take time and be
54useless. However, the lack of sorting increases the worst case complexity.
55For instance, consider many inputs that cancel one another (two by two).
56One would need n/2 iterations, where each iteration reads the exponent
57of each input, therefore n*n/2 read operations. Using a worst-case sort
58in O(n log n) could give a O(n log n) worst-case complexity. As we don't
59want to slow down the most common cases, this could be done at the 3rd
60iteration. But are there practical applications which would be used as
61tests?
62
63Note: see the following paper and its references:
64  http://www.acsel-lab.com/arithmetic/arith21/papers/p54.pdf
65  (J. Demmel and H. D. Nguyen, Fast Reproducible Floating-Point Summation)
66VL: This is very different:
67          In MPFR             In the paper & references
68    arbitrary precision            fixed precision
69     correct rounding        just reproducible rounding
70    integer operations        floating-point operations
71        sequential             parallel (& sequential)
72*/
73
74#ifdef MPFR_COV_CHECK
75int __gmpfr_cov_sum_tmd[MPFR_RND_MAX][2][2][3][2][2] = { 0 };
76#endif
77
78/* Update minexp (V) after detecting a potential integer overflow in
79   extreme cases (only a 32-bit ABI may be concerned in practice).
80   Instead of an assertion failure below, we could
81   1. check that the ulp of each regular input has an exponent >= MPFR_EXP_MIN
82      (with an assertion failure if this is not the case);
83   2. set minexp to MPFR_EXP_MIN and shift the accumulator accordingly
84      (the sum will then be exact).
85   However, such cases, which involve huge precisions, will probably
86   never occur in practice (at least with a 64-bit ABI) and are not
87   easily testable due to these huge precisions. Moreover, switching
88   to a 64-bit ABI would be a better solution for such computations.
89   So, let's leave this unimplemented. */
90#define SAFE_SUB(V,E,SH)                        \
91  do                                            \
92    {                                           \
93      mpfr_prec_t sh = (SH);                    \
94      MPFR_ASSERTN ((E) >= MPFR_EXP_MIN + sh);  \
95      V = (E) - sh;                             \
96    }                                           \
97  while (0)
98
99/* Function sum_raw
100 * ================
101 *
102 * Accumulate a new [minexp,maxexp[ block into (wp,ws). If e and err denote
103 * the exponents of the computed result and of the error bound respectively,
104 * while e - err is less than some given bound (due to cancellation), shift
105 * the accumulator and reiterate.
106 *
107 * Inputs:
108 *   wp: pointer to the accumulator (least significant limb first).
109 *   ws: size of the accumulator (in limbs).
110 *   wq: precision of the accumulator (ws * GMP_NUMB_BITS).
111 *   x: array of the input numbers.
112 *   n: size of this array (number of inputs, regular or not).
113 *   minexp: exponent of the least significant bit of the first block.
114 *   maxexp: exponent of the first block (exponent of its MSB + 1).
115 *   tp: pointer to a temporary area (pre-allocated).
116 *   ts: size of this temporary area.
117 *   logn: ceil(log2(rn)), where rn is the number of regular inputs.
118 *   prec: lower bound for e - err (as described above).
119 *   ep: pointer to mpfr_exp_t (see below), or a null pointer.
120 *   minexpp: pointer to mpfr_exp_t (see below), or a null pointer.
121 *   maxexpp: pointer to mpfr_exp_t (see below), or a null pointer.
122 *
123 * Preconditions:
124 *   prec >= 1
125 *   wq >= logn + prec + 2
126 *
127 * This function returns 0 if the accumulator is 0 (which implies that
128 * the exact sum for this sum_raw invocation is 0), otherwise the number
129 * of cancelled bits (>= 1), defined as the number of identical bits on
130 * the most significant part of the accumulator. In the latter case, it
131 * also returns the following data in variables passed by reference, if
132 * the pointers are not NULL:
133 * - in ep: the exponent e of the computed result;
134 * - in minexpp: the last value of minexp;
135 * - in maxexpp: the new value of maxexp (for the next iteration after
136 *   the first invocation of sum_raw in the main code).
137 *
138 * Notes:
139 * - minexp is also the exponent of the least significant bit of the
140 *   accumulator;
141 * - the temporary area must be large enough to hold a shifted input
142 *   block, and the value of ts is used only when the full assertions
143 *   are checked (i.e. with the --enable-assert configure option), to
144 *   check that a buffer overflow doesn't occur;
145 * - contrary to the returned value of minexp (the value in the last
146 *   iteration), the returned value of maxexp is the one for the next
147 *   iteration (= maxexp2 of the last iteration).
148 */
149static mpfr_prec_t
150sum_raw (mp_limb_t *wp, mp_size_t ws, mpfr_prec_t wq, const mpfr_ptr *x,
151         unsigned long n, mpfr_exp_t minexp, mpfr_exp_t maxexp,
152         mp_limb_t *tp, mp_size_t ts, int logn, mpfr_prec_t prec,
153         mpfr_exp_t *ep, mpfr_exp_t *minexpp, mpfr_exp_t *maxexpp)
154{
155  MPFR_LOG_FUNC
156    (("ws=%Pd ts=%Pd prec=%Pd", (mpfr_prec_t) ws, (mpfr_prec_t) ts, prec),
157     ("", 0));
158
159  /* The C code below requires prec >= 0 due to the use of unsigned
160     integer arithmetic on it. Actually the computation makes sense
161     only with prec >= 1 (otherwise one can't even know the sign of
162     the result), hence the following assertion. */
163  MPFR_ASSERTD (prec >= 1);
164
165  /* Consistency check. */
166  MPFR_ASSERTD (wq == (mpfr_prec_t) ws * GMP_NUMB_BITS);
167
168  /* The following precondition together with prec >= 1 will imply:
169     minexp - shiftq < maxexp2, as required by the algorithm. */
170  MPFR_ASSERTD (wq >= logn + prec + 2);
171
172  while (1)
173    {
174      mpfr_exp_t maxexp2 = MPFR_EXP_MIN;
175      unsigned long i;
176
177      MPFR_LOG_MSG (("sum_raw loop: "
178                     "maxexp=%" MPFR_EXP_FSPEC "d "
179                     "minexp=%" MPFR_EXP_FSPEC "d\n",
180                     (mpfr_eexp_t) maxexp, (mpfr_eexp_t) minexp));
181
182      MPFR_ASSERTD (maxexp > minexp);
183
184      for (i = 0; i < n; i++)
185        if (! MPFR_IS_SINGULAR (x[i]))  /* Step 1 (see sum_raw in sum.txt) */
186          {
187            mp_limb_t *dp, *vp;
188            mp_size_t ds, vs, vds;
189            mpfr_exp_t xe, vd;
190            mpfr_prec_t xq;
191            int tr;
192
193            xe = MPFR_GET_EXP (x[i]);
194            xq = MPFR_GET_PREC (x[i]);
195
196            vp = MPFR_MANT (x[i]);
197            vs = MPFR_PREC2LIMBS (xq);
198            vd = xe - vs * GMP_NUMB_BITS - minexp;
199            /* vd is the exponent of the least significant represented bit of
200               x[i] (including the trailing bits, whose value is 0) minus the
201               exponent of the least significant bit of the accumulator. To
202               make the code simpler, we won't try to filter out the trailing
203               bits of x[i]. */
204
205            /* Steps 2, 3, 4 (see sum_raw in sum.txt) */
206
207            if (vd < 0)
208              {
209                /* This covers the following cases:
210                 *     [-+- accumulator ---]
211                 *   [---|----- x[i] ------|--]
212                 *       |   [----- x[i] --|--]
213                 *       |                 |[----- x[i] -----]
214                 *       |                 |    [----- x[i] -----]
215                 *     maxexp           minexp
216                 */
217
218                /* Step 2 for subcase vd < 0 */
219
220                if (xe <= minexp)
221                  {
222                    /* x[i] is entirely after the LSB of the accumulator,
223                       so that it will be ignored at this iteration. */
224                    if (xe > maxexp2)
225                      {
226                        maxexp2 = xe;
227                        /* And since the exponent of x[i] is valid... */
228                        MPFR_ASSERTD (maxexp2 >= MPFR_EMIN_MIN);
229                      }
230                    continue;
231                  }
232
233                /* Step 3 for subcase vd < 0 */
234
235                /* If some significant bits of x[i] are after the LSB of the
236                   accumulator, then maxexp2 will necessarily be minexp. */
237                if (MPFR_LIKELY (xe - xq < minexp))
238                  maxexp2 = minexp;
239
240                /* Step 4 for subcase vd < 0 */
241
242                /* We need to ignore the least |vd| significant bits of x[i].
243                   First, let's ignore the least vds = |vd| / GMP_NUMB_BITS
244                   limbs. */
245                vd = - vd;
246                vds = vd / GMP_NUMB_BITS;
247                vs -= vds;
248                MPFR_ASSERTD (vs > 0);  /* see xe <= minexp test above */
249                vp += vds;
250                vd -= vds * GMP_NUMB_BITS;
251                MPFR_ASSERTD (vd >= 0 && vd < GMP_NUMB_BITS);
252
253                if (xe > maxexp)
254                  {
255                    vs -= (xe - maxexp) / GMP_NUMB_BITS;
256                    MPFR_ASSERTD (vs > 0);
257                    tr = (xe - maxexp) % GMP_NUMB_BITS;
258                  }
259                else
260                  tr = 0;
261
262                if (vd != 0)
263                  {
264                    MPFR_ASSERTD (vs <= ts);
265                    mpn_rshift (tp, vp, vs, vd);
266                    vp = tp;
267                    tr += vd;
268                    if (tr >= GMP_NUMB_BITS)
269                      {
270                        vs--;
271                        tr -= GMP_NUMB_BITS;
272                      }
273                    MPFR_ASSERTD (vs >= 1);
274                    MPFR_ASSERTD (tr >= 0 && tr < GMP_NUMB_BITS);
275                    if (tr != 0)
276                      {
277                        tp[vs-1] &= MPFR_LIMB_MASK (GMP_NUMB_BITS - tr);
278                        tr = 0;
279                      }
280                    /* Truncation has now been taken into account. */
281                    MPFR_ASSERTD (tr == 0);
282                  }
283
284                dp = wp;
285                ds = ws;
286              }
287            else  /* vd >= 0 */
288              {
289                /* This covers the following cases:
290                 *               [-+- accumulator ---]
291                 *   [- x[i] -]    |                 |
292                 *             [---|-- x[i] ------]  |
293                 *          [------|-- x[i] ---------]
294                 *                 |   [- x[i] -]    |
295                 *               maxexp           minexp
296                 */
297
298                /* Steps 2 and 3 for subcase vd >= 0 */
299
300                MPFR_ASSERTD (xe - xq >= minexp);  /* see definition of vd */
301
302                /* Step 4 for subcase vd >= 0 */
303
304                /* We need to ignore the least vd significant bits
305                   of the accumulator. First, let's ignore the least
306                   vds = vd / GMP_NUMB_BITS limbs. -> (dp,ds) */
307                vds = vd / GMP_NUMB_BITS;
308                ds = ws - vds;
309                if (ds <= 0)
310                  continue;
311                dp = wp + vds;
312                vd -= vds * GMP_NUMB_BITS;
313                MPFR_ASSERTD (vd >= 0 && vd < GMP_NUMB_BITS);
314
315                /* The low part of x[i] (to be determined) will have to be
316                   shifted vd bits to the left if vd != 0. */
317
318                if (xe > maxexp)
319                  {
320                    vs -= (xe - maxexp) / GMP_NUMB_BITS;
321                    if (vs <= 0)
322                      continue;
323                    tr = (xe - maxexp) % GMP_NUMB_BITS;
324                  }
325                else
326                  tr = 0;
327
328                MPFR_ASSERTD (tr >= 0 && tr < GMP_NUMB_BITS && vs > 0);
329
330                /* We need to consider the least significant vs limbs of x[i]
331                   except the most significant tr bits. */
332
333                if (vd != 0)
334                  {
335                    mp_limb_t carry;
336
337                    MPFR_ASSERTD (vs <= ts);
338                    carry = mpn_lshift (tp, vp, vs, vd);
339                    tr -= vd;
340                    if (tr < 0)
341                      {
342                        tr += GMP_NUMB_BITS;
343                        MPFR_ASSERTD (vs + 1 <= ts);
344                        tp[vs++] = carry;
345                      }
346                    MPFR_ASSERTD (tr >= 0 && tr < GMP_NUMB_BITS);
347                    vp = tp;
348                  }
349              }  /* vd >= 0 */
350
351            MPFR_ASSERTD (vs > 0 && vs <= ds);
352
353            /* We can't truncate the most significant limb of the input
354               (in case it hasn't been shifted to the temporary area).
355               So, let's ignore it now. It will be taken into account
356               via carry propagation after the addition. */
357            if (tr != 0)
358              vs--;
359
360            /* Step 5 (see sum_raw in sum.txt) */
361
362            if (MPFR_IS_POS (x[i]))
363              {
364                mp_limb_t carry;
365
366                carry = vs > 0 ? mpn_add_n (dp, dp, vp, vs) : 0;
367                MPFR_ASSERTD (carry <= 1);
368                if (tr != 0)
369                  carry += vp[vs] & MPFR_LIMB_MASK (GMP_NUMB_BITS - tr);
370                if (ds > vs)
371                  mpn_add_1 (dp + vs, dp + vs, ds - vs, carry);
372              }
373            else
374              {
375                mp_limb_t borrow;
376
377                borrow = vs > 0 ? mpn_sub_n (dp, dp, vp, vs) : 0;
378                MPFR_ASSERTD (borrow <= 1);
379                if (tr != 0)
380                  borrow += vp[vs] & MPFR_LIMB_MASK (GMP_NUMB_BITS - tr);
381                if (ds > vs)
382                  mpn_sub_1 (dp + vs, dp + vs, ds - vs, borrow);
383              }
384          }
385
386      {
387        mpfr_prec_t cancel;  /* number of cancelled bits */
388        mp_size_t wi;        /* index in the accumulator */
389        mp_limb_t a, b;
390        int cnt;
391
392        cancel = 0;
393        wi = ws - 1;
394        MPFR_ASSERTD (wi >= 0);
395        a = wp[wi] >> (GMP_NUMB_BITS - 1) ? MPFR_LIMB_MAX : MPFR_LIMB_ZERO;
396
397        while (wi >= 0)
398          if ((b = wp[wi]) == a)
399            {
400              cancel += GMP_NUMB_BITS;
401              wi--;
402            }
403          else
404            {
405              b ^= a;
406              MPFR_ASSERTD (b != 0);
407              count_leading_zeros (cnt, b);
408              cancel += cnt;
409              break;
410            }
411
412        if (wi >= 0 || a != MPFR_LIMB_ZERO)  /* accumulator != 0 */
413          {
414            mpfr_exp_t e;        /* exponent of the computed result */
415            mpfr_exp_t err;      /* exponent of the error bound */
416
417            MPFR_LOG_MSG (("accumulator %s 0, cancel=%Pd\n",
418                           a != MPFR_LIMB_ZERO ? "<" : ">", cancel));
419
420            MPFR_ASSERTD (cancel > 0);
421            e = minexp + wq - cancel;
422            MPFR_ASSERTD (e >= minexp);
423            err = maxexp2 + logn;  /* OK even if maxexp2 == MPFR_EXP_MIN */
424
425            /* The absolute value of the truncated sum is in the binade
426               [2^(e-1),2^e] (closed on both ends due to two's complement).
427               The error is strictly less than 2^err (and is 0 if
428               maxexp2 == MPFR_EXP_MIN). */
429
430            /* This basically tests whether err <= e - prec without
431               potential integer overflow (since prec >= 0)...
432               Note that the maxexp2 == MPFR_EXP_MIN test is there just for
433               the potential corner case e - prec < MPFR_EXP_MIN + logn.
434               Such corner cases, involving specific huge-precision numbers,
435               are probably not supported in many places in MPFR, but this
436               test doesn't hurt... */
437            if (maxexp2 == MPFR_EXP_MIN ||
438                (err <= e && SAFE_DIFF (mpfr_uexp_t, e, err) >= prec))
439              {
440                MPFR_LOG_MSG (("(err=%" MPFR_EXP_FSPEC "d) <= (e=%"
441                               MPFR_EXP_FSPEC "d) - (prec=%Pd)\n",
442                               (mpfr_eexp_t) err, (mpfr_eexp_t) e, prec));
443                /* To avoid tests or copies, we consider the only two cases
444                   that will occur in sum_aux. */
445                MPFR_ASSERTD ((ep != NULL &&
446                               minexpp != NULL &&
447                               maxexpp != NULL) ||
448                              (ep == NULL &&
449                               minexpp == NULL &&
450                               maxexpp == NULL));
451                if (ep != NULL)
452                  {
453                    *ep = e;
454                    *minexpp = minexp;
455                    *maxexpp = maxexp2;
456                  }
457                MPFR_LOG_MSG (("return with minexp=%" MPFR_EXP_FSPEC
458                               "d maxexp2=%" MPFR_EXP_FSPEC "d%s\n",
459                               (mpfr_eexp_t) minexp, (mpfr_eexp_t) maxexp2,
460                               maxexp2 == MPFR_EXP_MIN ?
461                               " (MPFR_EXP_MIN)" : ""));
462                return cancel;
463              }
464            else
465              {
466                mpfr_exp_t diffexp;
467                mpfr_prec_t shiftq;
468                mpfr_size_t shifts;
469                int shiftc;
470
471                MPFR_LOG_MSG (("e=%" MPFR_EXP_FSPEC "d err=%" MPFR_EXP_FSPEC
472                               "d maxexp2=%" MPFR_EXP_FSPEC "d%s\n",
473                               (mpfr_eexp_t) e, (mpfr_eexp_t) err,
474                               (mpfr_eexp_t) maxexp2,
475                               maxexp2 == MPFR_EXP_MIN ?
476                               " (MPFR_EXP_MIN)" : ""));
477
478                diffexp = err - e;
479                if (diffexp < 0)
480                  diffexp = 0;
481                /* diffexp = max(0, err - e) */
482
483                MPFR_LOG_MSG (("diffexp=%" MPFR_EXP_FSPEC "d\n",
484                                (mpfr_eexp_t) diffexp));
485
486                MPFR_ASSERTD (diffexp < cancel - 2);
487                shiftq = cancel - 2 - (mpfr_prec_t) diffexp;
488                /* equivalent to: minexp + wq - 2 - max(e,err) */
489                MPFR_ASSERTD (shiftq > 0);
490                shifts = shiftq / GMP_NUMB_BITS;
491                shiftc = shiftq % GMP_NUMB_BITS;
492                MPFR_LOG_MSG (("shiftq = %Pd = %Pd * GMP_NUMB_BITS + %d\n",
493                               shiftq, (mpfr_prec_t) shifts, shiftc));
494                if (MPFR_LIKELY (shiftc != 0))
495                  mpn_lshift (wp + shifts, wp, ws - shifts, shiftc);
496                else
497                  mpn_copyd (wp + shifts, wp, ws - shifts);
498                MPN_ZERO (wp, shifts);
499                /* Compute minexp = minexp - shiftq safely. */
500                SAFE_SUB (minexp, minexp, shiftq);
501                MPFR_ASSERTD (minexp < maxexp2);
502              }
503          }
504        else if (maxexp2 == MPFR_EXP_MIN)
505          {
506            MPFR_LOG_MSG (("accumulator = 0, maxexp2 = MPFR_EXP_MIN\n", 0));
507            return 0;
508          }
509        else
510          {
511            MPFR_LOG_MSG (("accumulator = 0, reiterate\n", 0));
512            /* Compute minexp = maxexp2 - (wq - (logn + 1)) safely. */
513            SAFE_SUB (minexp, maxexp2, wq - (logn + 1));
514            /* Note: the logn + 1 corresponds to cq in the main code. */
515          }
516      }
517
518      maxexp = maxexp2;
519    }
520}
521
522/**********************************************************************/
523
524/* Generic case: all the inputs are finite numbers,
525   with at least 3 regular numbers. */
526static int
527sum_aux (mpfr_ptr sum, const mpfr_ptr *x, unsigned long n, mpfr_rnd_t rnd,
528         mpfr_exp_t maxexp, unsigned long rn)
529{
530  mp_limb_t *sump;
531  mp_limb_t *tp;  /* pointer to a temporary area */
532  mp_limb_t *wp;  /* pointer to the accumulator */
533  mp_size_t ts;   /* size of the temporary area, in limbs */
534  mp_size_t ws;   /* size of the accumulator, in limbs */
535  mp_size_t zs;   /* size of the TMD accumulator, in limbs */
536  mpfr_prec_t wq; /* size of the accumulator, in bits */
537  int logn;       /* ceil(log2(rn)) */
538  int cq;
539  mpfr_prec_t sq;
540  int inex;
541  MPFR_TMP_DECL (marker);
542
543  MPFR_LOG_FUNC
544    (("n=%lu rnd=%d maxexp=%" MPFR_EXP_FSPEC "d rn=%lu",
545      n, rnd, (mpfr_eexp_t) maxexp, rn),
546     ("sum[%Pu]=%.*Rg", mpfr_get_prec (sum), mpfr_log_prec, sum));
547
548  MPFR_ASSERTD (rn >= 3 && rn <= n);
549
550  /* In practice, no integer overflow on the exponent. */
551  MPFR_STAT_STATIC_ASSERT (MPFR_EXP_MAX - MPFR_EMAX_MAX >=
552                           sizeof (unsigned long) * CHAR_BIT);
553
554  /* Set up some variables and the accumulator. */
555
556  sump = MPFR_MANT (sum);
557
558  /* rn is the number of regular inputs (the singular ones will be
559     ignored). Compute logn = ceil(log2(rn)). */
560  logn = MPFR_INT_CEIL_LOG2 (rn);
561  MPFR_ASSERTD (logn >= 2);
562
563  MPFR_LOG_MSG (("logn=%d maxexp=%" MPFR_EXP_FSPEC "d\n",
564                 logn, (mpfr_eexp_t) maxexp));
565
566  sq = MPFR_GET_PREC (sum);
567  cq = logn + 1;
568
569  /* First determine the size of the accumulator.
570   * cq + sq + logn + 2 >= logn + sq + 5, which will be used later.
571   * The assertion wq - cq - sq >= 4 is another way to check that.
572   */
573  ws = MPFR_PREC2LIMBS (cq + sq + logn + 2);
574  wq = (mpfr_prec_t) ws * GMP_NUMB_BITS;
575  MPFR_ASSERTD (wq - cq - sq >= 4);
576
577  /* TODO: timings, comparing with a larger zs. */
578  zs = MPFR_PREC2LIMBS (wq - sq);
579
580  MPFR_LOG_MSG (("cq=%d sq=%Pd logn=%d wq=%Pd\n", cq, sq, logn, wq));
581
582  /* An input block will have up to wq - cq bits, and its shifted value
583     (to be correctly aligned) may take GMP_NUMB_BITS - 1 additional bits. */
584  ts = MPFR_PREC2LIMBS (wq - cq + GMP_NUMB_BITS - 1);
585
586  MPFR_TMP_MARK (marker);
587
588  /* Note: If the TMD does not occur, which should be the case for most
589     sums, allocating zs limbs is not necessary. However, we choose to
590     do this now (thus in all cases) because zs is very small, so that
591     the difference on the memory footprint will not be noticeable.
592     More precisely, zs is at most 2 in practice with the current code;
593     we may want to increase it in order to avoid performance issues in
594     some unlikely corner cases, but even in this case, it will remain
595     small.
596     One will have:
597       [------ ts ------][------ ws ------][- zs -]
598     The following would probably be better:
599       [------ ts ------]  [------ ws ------]
600                   [- zs -]
601     i.e. where the TMD accumulator (partially or completely) takes
602     some unneeded part of the temporary area in order to improve
603     data locality. But
604       * in low precision, data locality is regarded as ensured even
605         with the actual choice;
606       * in high precision, data locality for TMD resolution may not
607         be that important.
608  */
609  tp = MPFR_TMP_LIMBS_ALLOC (ts + ws + zs);
610  wp = tp + ts;
611
612  MPN_ZERO (wp, ws);  /* zero the accumulator */
613
614  {
615    mpfr_exp_t minexp;   /* exponent of the LSB of the block for sum_raw */
616    mpfr_prec_t cancel;  /* number of cancelled bits */
617    mpfr_exp_t e;        /* temporary exponent of the result */
618    mpfr_exp_t u;        /* temporary exponent of the ulp (quantum) */
619    mp_limb_t lbit;      /* last bit (useful if even rounding) */
620    mp_limb_t rbit;      /* rounding bit (corrected in halfway case) */
621    int corr;            /* correction term (from -1 to 2) */
622    int sd, sh;          /* shift counts */
623    mp_size_t sn;        /* size of the output number */
624    int tmd;             /* 0: the TMD does not occur
625                            1: the TMD occurs on a machine number
626                            2: the TMD occurs on a midpoint */
627    int neg;             /* 0 if positive sum, 1 if negative */
628    int sgn;             /* +1 if positive sum, -1 if negative */
629
630    MPFR_LOG_MSG (("Compute an approximation with sum_raw...\n", 0));
631
632    /* Compute minexp = maxexp - (wq - cq) safely. */
633    SAFE_SUB (minexp, maxexp, wq - cq);
634    MPFR_ASSERTD (wq >= logn + sq + 5);
635    cancel = sum_raw (wp, ws, wq, x, n, minexp, maxexp, tp, ts,
636                      logn, sq + 3, &e, &minexp, &maxexp);
637
638    if (MPFR_UNLIKELY (cancel == 0))
639      {
640        /* The exact sum is zero. Since not all inputs are 0, the sum
641         * is +0 except in MPFR_RNDD, as specified according to the
642         * IEEE 754 rules for the addition of two numbers.
643         */
644        MPFR_SET_SIGN (sum, (rnd != MPFR_RNDD ?
645                             MPFR_SIGN_POS : MPFR_SIGN_NEG));
646        MPFR_SET_ZERO (sum);
647        MPFR_TMP_FREE (marker);
648        MPFR_RET (0);
649      }
650
651    /* The absolute value of the truncated sum is in the binade
652       [2^(e-1),2^e] (closed on both ends due to two's complement).
653       The error is strictly less than 2^(maxexp + logn) (and is 0
654       if maxexp == MPFR_EXP_MIN). */
655
656    u = e - sq;  /* e being the exponent, u is the ulp of the target */
657
658    /* neg = 1 if negative, 0 if positive. */
659    neg = wp[ws-1] >> (GMP_NUMB_BITS - 1);
660    MPFR_ASSERTD (neg == 0 || neg == 1);
661
662    sgn = neg ? -1 : 1;
663    MPFR_ASSERTN (sgn == (neg ? MPFR_SIGN_NEG : MPFR_SIGN_POS));
664
665    MPFR_LOG_MSG (("neg=%d sgn=%d cancel=%Pd"
666                   " e=%" MPFR_EXP_FSPEC "d"
667                   " u=%" MPFR_EXP_FSPEC "d"
668                   " maxexp=%" MPFR_EXP_FSPEC "d%s\n",
669                   neg, sgn, cancel, (mpfr_eexp_t) e, (mpfr_eexp_t) u,
670                   (mpfr_eexp_t) maxexp,
671                   maxexp == MPFR_EXP_MIN ? " (MPFR_EXP_MIN)" : ""));
672
673    if (rnd == MPFR_RNDF)
674      {
675        /* Rounding the approximate value to nearest (ties don't matter) is
676           sufficient. We need to get the rounding bit; the code is similar
677           to a part from the generic code (here, corr = rbit). */
678        if (MPFR_LIKELY (u > minexp))
679          {
680            mpfr_prec_t tq;
681            mp_size_t wi;
682            int td;
683
684            tq = u - minexp;
685            MPFR_ASSERTD (tq > 0); /* number of trailing bits */
686            MPFR_LOG_MSG (("tq=%Pd\n", tq));
687
688            wi = tq / GMP_NUMB_BITS;
689            td = tq % GMP_NUMB_BITS;
690            corr = td >= 1 ? ((wp[wi] >> (td - 1)) & MPFR_LIMB_ONE) :
691              (MPFR_ASSERTD (wi >= 1), wp[wi-1] >> (GMP_NUMB_BITS - 1));
692          }
693        else
694          corr = 0;
695        inex = 0;  /* not meaningful, but needs to have a value */
696      }
697    else  /* rnd != MPFR_RNDF */
698      {
699        if (MPFR_LIKELY (u > minexp))
700          {
701            mpfr_prec_t tq;
702            mp_size_t wi;
703            int td;
704
705            tq = u - minexp;
706            MPFR_ASSERTD (tq > 0); /* number of trailing bits */
707            MPFR_LOG_MSG (("tq=%Pd\n", tq));
708
709            wi = tq / GMP_NUMB_BITS;
710
711            /* Determine the rounding bit, which is represented. */
712            td = tq % GMP_NUMB_BITS;
713            lbit = (wp[wi] >> td) & MPFR_LIMB_ONE;
714            rbit = td >= 1 ? ((wp[wi] >> (td - 1)) & MPFR_LIMB_ONE) :
715              (MPFR_ASSERTD (wi >= 1), wp[wi-1] >> (GMP_NUMB_BITS - 1));
716            MPFR_ASSERTD (rbit == 0 || rbit == 1);
717            MPFR_LOG_MSG (("rbit=%d\n", (int) rbit));
718
719            if (maxexp == MPFR_EXP_MIN)
720              {
721                /* The sum in the accumulator is exact. Determine inex:
722                   inex = 0 if the final sum is exact, else 1, i.e.
723                   inex = rounding bit || sticky bit. In round to nearest,
724                   also determine the rounding direction: obtained from
725                   the rounding bit possibly except in halfway cases.
726                   Halfway cases are rounded toward -inf iff the last bit
727                   of the truncated significand in two's complement is 0
728                   (in precision > 1, because the parity after rounding is
729                   the same in two's complement and sign + magnitude; in
730                   precision 1, one checks that the rule works for both
731                   positive (lbit == 1) and negative (lbit == 0) numbers,
732                   rounding halfway cases away from zero). */
733                if (MPFR_LIKELY (rbit == 0 || (rnd == MPFR_RNDN && lbit == 0)))
734                  {
735                    /* We need to determine the sticky bit, either to set inex
736                       (if the rounding bit is 0) or to possibly "correct" rbit
737                       (round to nearest, halfway case rounded downward) from
738                       which the rounding direction will be determined. */
739                    MPFR_LOG_MSG (("Determine the sticky bit...\n", 0));
740
741                    inex = td >= 2 ? (wp[wi] & MPFR_LIMB_MASK (td - 1)) != 0
742                      : td == 0 ?
743                      (MPFR_ASSERTD (wi >= 1),
744                       (wp[--wi] & MPFR_LIMB_MASK (GMP_NUMB_BITS - 1)) != 0)
745                      : 0;
746
747                    if (!inex)
748                      {
749                        while (!inex && wi > 0)
750                          inex = wp[--wi] != 0;
751                        if (!inex && rbit != 0)
752                          {
753                            /* sticky bit = 0, rounding bit = 1,
754                               i.e. halfway case, which will be
755                               rounded downward (see earlier if). */
756                            MPFR_ASSERTD (rnd == MPFR_RNDN);
757                            inex = 1;
758                            rbit = 0;  /* even rounding downward */
759                            MPFR_LOG_MSG (("Halfway case rounded downward;"
760                                           " set inex=1 rbit=0\n", 0));
761                          }
762                      }
763                  }
764                else
765                  inex = 1;
766                tmd = 0;  /* We can round correctly -> no TMD. */
767              }
768            else  /* maxexp > MPFR_EXP_MIN */
769              {
770                mpfr_exp_t d;
771                mp_limb_t limb, mask;
772                int nbits;
773
774                /* Since maxexp was set to either the exponent of a x[i] or
775                   to minexp... */
776                MPFR_ASSERTD (maxexp >= MPFR_EMIN_MIN || maxexp == minexp);
777
778                inex = 1;  /* We do not know whether the sum is exact. */
779
780                MPFR_ASSERTD (u <= MPFR_EMAX_MAX && u <= minexp + wq);
781                d = u - (maxexp + logn);  /* representable */
782                MPFR_ASSERTD (d >= 3);  /* due to prec = sq + 3 in sum_raw */
783
784                /* Let's see whether the TMD occurs by looking at the d bits
785                   following the ulp bit, or the d-1 bits after the rounding
786                   bit. */
787
788                /* First chunk after the rounding bit... It starts at:
789                   (wi,td-2) if td >= 2,
790                   (wi-1,td-2+GMP_NUMB_BITS) if td < 2. */
791                if (td == 0)
792                  {
793                    MPFR_ASSERTD (wi >= 1);
794                    limb = wp[--wi];
795                    mask = MPFR_LIMB_MASK (GMP_NUMB_BITS - 1);
796                    nbits = GMP_NUMB_BITS;
797                  }
798                else if (td == 1)
799                  {
800                    limb = wi >= 1 ? wp[--wi] : MPFR_LIMB_ZERO;
801                    mask = MPFR_LIMB_MAX;
802                    nbits = GMP_NUMB_BITS + 1;
803                  }
804                else  /* td >= 2 */
805                  {
806                    MPFR_ASSERTD (td >= 2);
807                    limb = wp[wi];
808                    mask = MPFR_LIMB_MASK (td - 1);
809                    nbits = td;
810                  }
811
812                /* nbits: number of bits of the first chunk + 1
813                   (the +1 is for the rounding bit). */
814
815                if (nbits > d)
816                  {
817                    /* Some low significant bits must be ignored. */
818                    limb >>= nbits - d;
819                    mask >>= nbits - d;
820                    d = 0;
821                  }
822                else
823                  {
824                    d -= nbits;
825                    MPFR_ASSERTD (d >= 0);
826                  }
827
828                limb &= mask;
829                tmd =
830                  limb == MPFR_LIMB_ZERO ?
831                    (rbit == 0 ? 1 : rnd == MPFR_RNDN ? 2 : 0) :
832                  limb == mask ?
833                    (limb = MPFR_LIMB_MAX,
834                     rbit != 0 ? 1 : rnd == MPFR_RNDN ? 2 : 0) : 0;
835
836                while (tmd != 0 && d != 0)
837                  {
838                    mp_limb_t limb2;
839
840                    MPFR_ASSERTD (d > 0);
841                    if (wi == 0)
842                      {
843                        /* The non-represented bits are 0's. */
844                        if (limb != MPFR_LIMB_ZERO)
845                          tmd = 0;
846                        break;
847                      }
848                    MPFR_ASSERTD (wi > 0);
849                    limb2 = wp[--wi];
850                    if (d < GMP_NUMB_BITS)
851                      {
852                        int c = GMP_NUMB_BITS - d;
853                        MPFR_ASSERTD (c > 0 && c < GMP_NUMB_BITS);
854                        if ((limb2 >> c) != (limb >> c))
855                          tmd = 0;
856                        break;
857                      }
858                    if (limb2 != limb)
859                      tmd = 0;
860                    d -= GMP_NUMB_BITS;
861                  }
862              }
863          }
864        else  /* u <= minexp */
865          {
866            /* The exact value of the accumulator will be copied.
867             * The TMD occurs if and only if there are bits still
868             * not taken into account, and if it occurs, this is
869             * necessarily on a machine number (-> tmd = 1).
870             */
871            lbit = u == minexp ? wp[0] & MPFR_LIMB_ONE : 0;
872            rbit = 0;
873            inex = tmd = maxexp != MPFR_EXP_MIN;
874          }
875
876        MPFR_ASSERTD (rbit == 0 || rbit == 1);
877
878        MPFR_LOG_MSG (("tmd=%d lbit=%d rbit=%d inex=%d neg=%d\n",
879                       tmd, (int) lbit, (int) rbit, inex, neg));
880
881        /* Here, if the final sum is known to be exact, inex = 0, otherwise
882         * inex = 1. We have a truncated significand, a trailing term t such
883         * that 0 <= t < 1 ulp, and an error on the trailing term bounded by
884         * t' in absolute value. Thus the error e on the truncated significand
885         * satisfies -t' <= e < 1 ulp + t'. Thus one has 4 correction cases
886         * denoted by a corr value between -1 and 2 depending on e, neg, rbit,
887         * and the rounding mode:
888         *   -1: equivalent to nextbelow;
889         *    0: the truncated significand is not corrected;
890         *    1: add 1 ulp;
891         *    2: add 1 ulp, then nextabove.
892         * The nextbelow and nextabove are used here since there may be a
893         * change of the binade.
894         */
895
896        if (tmd == 0)  /* no TMD */
897          {
898            switch (rnd)
899              {
900              case MPFR_RNDD:
901                corr = 0;
902                break;
903              case MPFR_RNDU:
904                corr = inex;
905                break;
906              case MPFR_RNDZ:
907                corr = inex && neg;
908                break;
909              case MPFR_RNDA:
910                corr = inex && !neg;
911                break;
912              default:
913                MPFR_ASSERTN (rnd == MPFR_RNDN);
914                /* Note: for halfway cases (maxexp == MPFR_EXP_MIN) that are
915                   rounded downward, rbit has been changed to 0 so that corr
916                   is set correctly. */
917                corr = rbit;
918              }
919            MPFR_ASSERTD (corr == 0 || corr == 1);
920            if (inex &&
921                corr == 0)  /* two's complement significand decreased */
922              inex = -1;
923          }
924        else  /* tmd */
925          {
926            mpfr_exp_t minexp2;
927            mpfr_prec_t cancel2;
928            mpfr_exp_t err;  /* exponent of the error bound */
929            mp_size_t zz;    /* nb of limbs to zero in the TMD accumulator */
930            mp_limb_t *zp;   /* pointer to the TMD accumulator */
931            mpfr_prec_t zq;  /* size of the TMD accumulator, in bits */
932            int sst;         /* sign of the secondary term */
933
934            /* TMD case. Here we use a new variable minexp2, with the same
935               meaning as minexp, as we want to keep the minexp value for
936               the copy to the destination. */
937
938            MPFR_ASSERTD (maxexp > MPFR_EXP_MIN);
939            MPFR_ASSERTD (tmd == 1 || tmd == 2);
940
941            /* TMD accumulator */
942            zp = wp + ws;
943            zq = (mpfr_prec_t) zs * GMP_NUMB_BITS;
944
945            err = maxexp + logn;
946
947            MPFR_LOG_MSG (("TMD with"
948                           " maxexp=%" MPFR_EXP_FSPEC "d"
949                           " err=%" MPFR_EXP_FSPEC "d"
950                           " zs=%Pd"
951                           " zq=%Pd\n",
952                           (mpfr_eexp_t) maxexp, (mpfr_eexp_t) err,
953                           (mpfr_prec_t) zs, zq));
954
955            /* The d-1 bits from u-2 to u-d (= err) are identical. */
956
957            if (err >= minexp)
958              {
959                mpfr_prec_t tq;
960                mp_size_t wi;
961                int td;
962
963                /* Let's keep the last 2 over the d-1 identical bits and the
964                   following bits, i.e. the bits from err+1 to minexp. */
965                tq = err - minexp + 2;  /* tq = number of such bits */
966                MPFR_LOG_MSG (("[TMD] tq=%Pd\n", tq));
967                MPFR_ASSERTD (tq >= 2);
968
969                wi = tq / GMP_NUMB_BITS;
970                td = tq % GMP_NUMB_BITS;
971
972                /* Note: The "else" (td == 0) branch below can be executed
973                   only if tq >= GMP_NUMB_BITS, which is possible only when
974                   logn is large enough. Indeed, if tq > logn + some constant,
975                   this means that the TMD did not occur.
976                   TODO: Find an upper bound on tq, and add a corresponding
977                   MPFR_ASSERTD assertion / hint. On some platforms, this
978                   branch might be dead code, and such information would
979                   allow the compiler to remove it.
980                   It seems that this branch is never tested (r12754). */
981
982                if (td != 0)
983                  {
984                    wi++;  /* number of words with represented bits */
985                    td = GMP_NUMB_BITS - td;
986                    zz = zs - wi;
987                    MPFR_ASSERTD (zz >= 0 && zz < zs);
988                    mpn_lshift (zp + zz, wp, wi, td);
989                  }
990                else
991                  {
992                    MPFR_ASSERTD (wi > 0);
993                    zz = zs - wi;
994                    MPFR_ASSERTD (zz >= 0 && zz < zs);
995                    if (zz > 0)
996                      MPN_COPY (zp + zz, wp, wi);
997                  }
998
999                /* Compute minexp2 = minexp - (zs * GMP_NUMB_BITS + td)
1000                   safely. */
1001                SAFE_SUB (minexp2, minexp, zz * GMP_NUMB_BITS + td);
1002                MPFR_ASSERTD (minexp2 == err + 2 - zq);
1003              }
1004            else  /* err < minexp */
1005              {
1006                /* At least one of the identical bits is not represented,
1007                   meaning that it is 0 and all these bits are 0's. Thus
1008                   the accumulator will be 0. The new minexp is determined
1009                   from maxexp, with cq bits reserved to avoid an overflow
1010                   (as in the early steps). */
1011                MPFR_LOG_MSG (("[TMD] err < minexp\n", 0));
1012                zz = zs;
1013
1014                /* Compute minexp2 = maxexp - (zq - cq) safely. */
1015                SAFE_SUB (minexp2, maxexp, zq - cq);
1016                MPFR_ASSERTD (minexp2 == err + 1 - zq);
1017              }
1018
1019            MPN_ZERO (zp, zz);
1020
1021            /* We need to determine the sign sst of the secondary term.
1022               In sum_raw, since the truncated sum corresponding to this
1023               secondary term will be in [2^(e-1),2^e] and the error
1024               strictly less than 2^err, we can stop the iterations when
1025               e - err >= 1 (this bound is the 11th argument of sum_raw). */
1026            cancel2 = sum_raw (zp, zs, zq, x, n, minexp2, maxexp, tp, ts,
1027                               logn, 1, NULL, NULL, NULL);
1028
1029            if (cancel2 != 0)
1030              sst = MPFR_LIMB_MSB (zp[zs-1]) == 0 ? 1 : -1;
1031            else if (tmd == 1)
1032              sst = 0;
1033            else
1034              {
1035                /* For halfway cases, let's virtually eliminate them
1036                   by setting a sst equivalent to a non-halfway case,
1037                   which depends on the last bit of the pre-rounded
1038                   result. */
1039                MPFR_ASSERTD (rnd == MPFR_RNDN && tmd == 2);
1040                sst = lbit != 0 ? 1 : -1;
1041              }
1042
1043            MPFR_LOG_MSG (("[TMD] tmd=%d rbit=%d sst=%d\n",
1044                           tmd, (int) rbit, sst));
1045
1046            /* Do not consider the corrected sst for MPFR_COV_SET */
1047            MPFR_COV_SET (sum_tmd[(int) rnd][tmd-1][rbit]
1048                          [cancel2 == 0 ? 1 : sst+1][neg][sq > MPFR_PREC_MIN]);
1049
1050            inex =
1051              MPFR_IS_LIKE_RNDD (rnd, sgn) ? (sst ? -1 : 0) :
1052              MPFR_IS_LIKE_RNDU (rnd, sgn) ? (sst ?  1 : 0) :
1053              (MPFR_ASSERTD (rnd == MPFR_RNDN),
1054               tmd == 1 ? - sst : sst);
1055
1056            if (tmd == 2 && sst == (rbit != 0 ? -1 : 1))
1057              corr = 1 - (int) rbit;
1058            else if (MPFR_IS_LIKE_RNDD (rnd, sgn) && sst == -1)
1059              corr = (int) rbit - 1;
1060            else if (MPFR_IS_LIKE_RNDU (rnd, sgn) && sst == +1)
1061              corr = (int) rbit + 1;
1062            else
1063              corr = (int) rbit;
1064          }  /* tmd */
1065      }  /* rnd != MPFR_RNDF */
1066
1067    MPFR_LOG_MSG (("neg=%d corr=%d inex=%d\n", neg, corr, inex));
1068
1069    /* Sign handling (-> absolute value and sign), together with
1070       rounding. The most common cases are corr = 0 and corr = 1
1071       as this is necessarily the case when the TMD did not occur. */
1072
1073    MPFR_ASSERTD (corr >= -1 && corr <= 2);
1074
1075    MPFR_SIGN (sum) = sgn;
1076
1077    /* Let's copy/shift the bits [max(u,minexp),e) to the
1078       most significant part of the destination, and zero
1079       the least significant part (there can be one only if
1080       u < minexp). The trailing bits of the destination may
1081       contain garbage at this point. */
1082
1083    sn = MPFR_PREC2LIMBS (sq);
1084    sd = (mpfr_prec_t) sn * GMP_NUMB_BITS - sq;
1085    sh = cancel % GMP_NUMB_BITS;
1086
1087    MPFR_ASSERTD (sd >= 0 && sd < GMP_NUMB_BITS);
1088
1089    if (MPFR_LIKELY (u > minexp))
1090      {
1091        mp_size_t wi;
1092
1093        /* Recompute the initial value of wi. */
1094        wi = (u - minexp) / GMP_NUMB_BITS;
1095        if (MPFR_LIKELY (sh != 0))
1096          {
1097            mp_size_t fi;
1098
1099            fi = (e - minexp) / GMP_NUMB_BITS - (sn - 1);
1100            MPFR_ASSERTD (fi == wi || fi == wi + 1);
1101            mpn_lshift (sump, wp + fi, sn, sh);
1102            if (fi != wi)
1103              sump[0] |= wp[wi] >> (GMP_NUMB_BITS - sh);
1104          }
1105        else
1106          {
1107            MPFR_ASSERTD ((mpfr_prec_t) (ws - (wi + sn)) * GMP_NUMB_BITS
1108                          == cancel);
1109            MPN_COPY (sump, wp + wi, sn);
1110          }
1111      }
1112    else  /* u <= minexp */
1113      {
1114        mp_size_t en;
1115
1116        en = (e - minexp + (GMP_NUMB_BITS - 1)) / GMP_NUMB_BITS;
1117        if (MPFR_LIKELY (sh != 0))
1118          mpn_lshift (sump + sn - en, wp, en, sh);
1119        else if (MPFR_UNLIKELY (en > 0))
1120          MPN_COPY (sump + sn - en, wp, en);
1121        if (sn > en)
1122          MPN_ZERO (sump, sn - en);
1123      }
1124
1125    /* Let's take the complement if the result is negative, and at
1126       the same time, do the rounding and zero the trailing bits.
1127       As this is valid only for precisions >= 2, there is special
1128       code for precision 1 first. */
1129
1130    if (MPFR_UNLIKELY (sq == 1))  /* precision 1 */
1131      {
1132        sump[0] = MPFR_LIMB_HIGHBIT;
1133        e += neg ? 1 - corr : corr;
1134      }
1135    else if (neg)  /* negative result with sq > 1 */
1136      {
1137        MPFR_ASSERTD (MPFR_LIMB_MSB (sump[sn-1]) == 0);
1138
1139        /* abs(x + corr) = - (x + corr) = com(x) + (1 - corr) */
1140
1141        /* We want to avoid separate mpn_com (or mpn_neg) and mpn_add_1
1142           (or mpn_sub_1) operations, as they could yield two loops in
1143           some particular cases involving a long sequence of 0's in
1144           the low significant bits (except the least significant bit,
1145           which doesn't matter). */
1146
1147        if (corr <= 1)
1148          {
1149            mp_limb_t corr2;
1150
1151            /* Here we can just do the correction operation on the
1152               least significant limb, then do either a mpn_com or
1153               a mpn_neg on the remaining limbs, depending on the
1154               carry (BTW, mpn_neg is just a mpn_com with an initial
1155               carry propagation: after some point, mpn_neg does a
1156               complement). */
1157
1158            corr2 = (mp_limb_t) (1 - corr) << sd;
1159            /* Note: If corr = -1, this can overflow to corr2 = 0.
1160               This case is taken into account below. */
1161
1162            sump[0] = (~ (sump[0] | MPFR_LIMB_MASK (sd))) + corr2;
1163
1164            if (sump[0] < corr2 || (corr2 == 0 && corr < 0))
1165              {
1166                if (sn == 1 || ! mpn_neg (sump + 1, sump + 1, sn - 1))
1167                  {
1168                    /* Note: The | is important when sump[sn-1] is not 0
1169                       (this can occur with sn = 1 and corr = -1). TODO:
1170                       Add something to make sure that this is tested. */
1171                    sump[sn-1] |= MPFR_LIMB_HIGHBIT;
1172                    e++;
1173                  }
1174              }
1175            else if (sn > 1)
1176              mpn_com (sump + 1, sump + 1, sn - 1);
1177          }
1178        else  /* corr == 2 */
1179          {
1180            mp_limb_t corr2, c;
1181            mp_size_t i = 1;
1182
1183            /* We want to compute com(x) - 1, but GMP doesn't have an
1184               operation for that. The fact is that a sequence of low
1185               significant bits 1 is invariant. Starting at the first
1186               low significant bit 0, we can do the complement with
1187               mpn_com. */
1188
1189            corr2 = MPFR_LIMB_ONE << sd;
1190            c = ~ (sump[0] | MPFR_LIMB_MASK (sd));
1191            sump[0] = c - corr2;
1192
1193            if (c == 0)
1194              {
1195                while (MPFR_ASSERTD (i < sn), sump[i] == MPFR_LIMB_MAX)
1196                  i++;
1197                sump[i] = (~ sump[i]) - 1;
1198                i++;
1199              }
1200
1201            if (i < sn)
1202              mpn_com (sump + i, sump + i, sn - i);
1203            else if (MPFR_UNLIKELY (MPFR_LIMB_MSB (sump[sn-1]) == 0))
1204              {
1205                /* Happens on 01111...111, whose complement is
1206                   10000...000, and com(x) - 1 is 01111...111. */
1207                sump[sn-1] |= MPFR_LIMB_HIGHBIT;
1208                e--;
1209              }
1210          }
1211      }
1212    else  /* positive result with sq > 1 */
1213      {
1214        MPFR_ASSERTD (MPFR_LIMB_MSB (sump[sn-1]) != 0);
1215        sump[0] &= ~ MPFR_LIMB_MASK (sd);
1216
1217        if (corr > 0)
1218          {
1219            mp_limb_t corr2, carry_out;
1220
1221            corr2 = (mp_limb_t) corr << sd;
1222            /* If corr == 2 && sd == GMP_NUMB_BITS - 1, this overflows
1223               to corr2 = 0. This case is taken into account below. */
1224
1225            carry_out = corr2 != 0 ? mpn_add_1 (sump, sump, sn, corr2) :
1226              (MPFR_ASSERTD (sn > 1),
1227               mpn_add_1 (sump + 1, sump + 1, sn - 1, MPFR_LIMB_ONE));
1228
1229            MPFR_ASSERTD (sump[sn-1] >> (GMP_NUMB_BITS - 1) == !carry_out);
1230
1231            if (MPFR_UNLIKELY (carry_out))
1232              {
1233                /* Note: The | is important when sump[sn-1] is not 0
1234                   (this can occur with sn = 1 and corr = 2). TODO:
1235                   Add something to make sure that this is tested. */
1236                sump[sn-1] |= MPFR_LIMB_HIGHBIT;
1237                e++;
1238              }
1239          }
1240
1241        if (corr < 0)
1242          {
1243            mpn_sub_1 (sump, sump, sn, MPFR_LIMB_ONE << sd);
1244
1245            if (MPFR_UNLIKELY (MPFR_LIMB_MSB (sump[sn-1]) == 0))
1246              {
1247                sump[sn-1] |= MPFR_LIMB_HIGHBIT;
1248                e--;
1249              }
1250          }
1251      }
1252
1253    MPFR_ASSERTD (MPFR_LIMB_MSB (sump[sn-1]) != 0);
1254    MPFR_LOG_MSG (("Set exponent e=%" MPFR_EXP_FSPEC "d\n", (mpfr_eexp_t) e));
1255    /* e may be outside the current exponent range, but this will be checked
1256       with mpfr_check_range below. */
1257    MPFR_EXP (sum) = e;
1258  }  /* main block */
1259
1260  MPFR_TMP_FREE (marker);
1261  return mpfr_check_range (sum, inex, rnd);
1262}
1263
1264/**********************************************************************/
1265
1266int
1267mpfr_sum (mpfr_ptr sum, const mpfr_ptr *x, unsigned long n, mpfr_rnd_t rnd)
1268{
1269  MPFR_LOG_FUNC
1270    (("n=%lu rnd=%d", n, rnd),
1271     ("sum[%Pu]=%.*Rg", mpfr_get_prec (sum), mpfr_log_prec, sum));
1272
1273  if (MPFR_UNLIKELY (n <= 2))
1274    {
1275      if (n == 0)
1276        {
1277          MPFR_SET_ZERO (sum);
1278          MPFR_SET_POS (sum);
1279          MPFR_RET (0);
1280        }
1281      else if (n == 1)
1282        return mpfr_set (sum, x[0], rnd);
1283      else
1284        return mpfr_add (sum, x[0], x[1], rnd);
1285    }
1286  else
1287    {
1288      mpfr_exp_t maxexp = MPFR_EXP_MIN;  /* max(Empty) */
1289      unsigned long i;
1290      unsigned long rn = 0;  /* will be the number of regular inputs */
1291      /* sign of infinities and zeros (0: currently unknown) */
1292      int sign_inf = 0, sign_zero = 0;
1293
1294      MPFR_LOG_MSG (("Check for special inputs (n = %lu >= 3)\n", n));
1295
1296      for (i = 0; i < n; i++)
1297        {
1298          if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x[i])))
1299            {
1300              if (MPFR_IS_NAN (x[i]))
1301                {
1302                  /* The current value x[i] is NaN. Then the sum is NaN. */
1303                nan:
1304                  MPFR_SET_NAN (sum);
1305                  MPFR_RET_NAN;
1306                }
1307              else if (MPFR_IS_INF (x[i]))
1308                {
1309                  /* The current value x[i] is an infinity.
1310                     There are two cases:
1311                     1. This is the first infinity value (sign_inf == 0).
1312                        Then set sign_inf to its sign, and go on.
1313                     2. All the infinities found until now have the same
1314                        sign sign_inf. If this new infinity has a different
1315                        sign, then return NaN immediately, else go on. */
1316                  if (sign_inf == 0)
1317                    sign_inf = MPFR_SIGN (x[i]);
1318                  else if (MPFR_SIGN (x[i]) != sign_inf)
1319                    goto nan;
1320                }
1321              else if (MPFR_UNLIKELY (rn == 0))
1322                {
1323                  /* The current value x[i] is a zero. The code below matters
1324                     only when all values found until now are zeros, otherwise
1325                     it is harmless (the test rn == 0 above is just a minor
1326                     optimization).
1327                     Here we track the sign of the zero result when all inputs
1328                     are zeros: if all zeros have the same sign, the result
1329                     will have this sign, otherwise (i.e. if there is at least
1330                     a zero of each sign), the sign of the zero result depends
1331                     only on the rounding mode (note that this choice is
1332                     sticky when new zeros are considered). */
1333                  MPFR_ASSERTD (MPFR_IS_ZERO (x[i]));
1334                  if (sign_zero == 0)
1335                    sign_zero = MPFR_SIGN (x[i]);
1336                  else if (MPFR_SIGN (x[i]) != sign_zero)
1337                    sign_zero = rnd == MPFR_RNDD ? -1 : 1;
1338                }
1339            }
1340          else
1341            {
1342              /* The current value x[i] is a regular number. */
1343              mpfr_exp_t e = MPFR_GET_EXP (x[i]);
1344              if (e > maxexp)
1345                maxexp = e;  /* maximum exponent found until now */
1346              rn++;  /* current number of regular inputs */
1347            }
1348        }
1349
1350      MPFR_LOG_MSG (("rn=%lu sign_inf=%d sign_zero=%d\n",
1351                     rn, sign_inf, sign_zero));
1352
1353      /* At this point the result cannot be NaN (this case has already
1354         been filtered out). */
1355
1356      if (MPFR_UNLIKELY (sign_inf != 0))
1357        {
1358          /* At least one infinity, and all of them have the same sign
1359             sign_inf. The sum is the infinity of this sign. */
1360          MPFR_SET_INF (sum);
1361          MPFR_SET_SIGN (sum, sign_inf);
1362          MPFR_RET (0);
1363        }
1364
1365      /* At this point, all the inputs are finite numbers. */
1366
1367      if (MPFR_UNLIKELY (rn == 0))
1368        {
1369          /* All the numbers were zeros (and there is at least one).
1370             The sum is zero with sign sign_zero. */
1371          MPFR_ASSERTD (sign_zero != 0);
1372          MPFR_SET_ZERO (sum);
1373          MPFR_SET_SIGN (sum, sign_zero);
1374          MPFR_RET (0);
1375        }
1376
1377      /* Optimize the case where there are only two regular numbers. */
1378      if (MPFR_UNLIKELY (rn <= 2))
1379        {
1380          unsigned long h = ULONG_MAX;
1381
1382          for (i = 0; i < n; i++)
1383            if (! MPFR_IS_SINGULAR (x[i]))
1384              {
1385                if (rn == 1)
1386                  return mpfr_set (sum, x[i], rnd);
1387                if (h != ULONG_MAX)
1388                  return mpfr_add (sum, x[h], x[i], rnd);
1389                h = i;
1390              }
1391          MPFR_RET_NEVER_GO_HERE();
1392        }
1393
1394      return sum_aux (sum, x, n, rnd, maxexp, rn);
1395    }
1396}
1397