1/* mpfr_mul -- multiply two floating-point numbers
2
3Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
4Contributed by the Arenaire and Cacao projects, INRIA.
5
6This file is part of the GNU MPFR Library.
7
8The GNU MPFR Library is free software; you can redistribute it and/or modify
9it under the terms of the GNU Lesser General Public License as published by
10the Free Software Foundation; either version 3 of the License, or (at your
11option) any later version.
12
13The GNU MPFR Library is distributed in the hope that it will be useful, but
14WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16License for more details.
17
18You should have received a copy of the GNU Lesser General Public License
19along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
2151 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22
23#define MPFR_NEED_LONGLONG_H
24#include "mpfr-impl.h"
25
26
27/********* BEGINNING CHECK *************/
28
29/* Check if we have to check the result of mpfr_mul.
30   TODO: Find a better (and faster?) check than using old implementation */
31#ifdef WANT_ASSERT
32# if WANT_ASSERT >= 3
33
34int mpfr_mul2 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode);
35static int
36mpfr_mul3 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode)
37{
38  /* Old implementation */
39  int sign_product, cc, inexact;
40  mpfr_exp_t ax;
41  mp_limb_t *tmp;
42  mp_limb_t b1;
43  mpfr_prec_t bq, cq;
44  mp_size_t bn, cn, tn, k;
45  MPFR_TMP_DECL(marker);
46
47  /* deal with special cases */
48  if (MPFR_ARE_SINGULAR(b,c))
49    {
50      if (MPFR_IS_NAN(b) || MPFR_IS_NAN(c))
51        {
52          MPFR_SET_NAN(a);
53          MPFR_RET_NAN;
54        }
55      sign_product = MPFR_MULT_SIGN( MPFR_SIGN(b) , MPFR_SIGN(c) );
56      if (MPFR_IS_INF(b))
57        {
58          if (MPFR_IS_INF(c) || MPFR_NOTZERO(c))
59            {
60              MPFR_SET_SIGN(a,sign_product);
61              MPFR_SET_INF(a);
62              MPFR_RET(0); /* exact */
63            }
64          else
65            {
66              MPFR_SET_NAN(a);
67              MPFR_RET_NAN;
68            }
69        }
70      else if (MPFR_IS_INF(c))
71        {
72          if (MPFR_NOTZERO(b))
73            {
74              MPFR_SET_SIGN(a, sign_product);
75              MPFR_SET_INF(a);
76              MPFR_RET(0); /* exact */
77            }
78          else
79            {
80              MPFR_SET_NAN(a);
81              MPFR_RET_NAN;
82            }
83        }
84      else
85        {
86          MPFR_ASSERTD(MPFR_IS_ZERO(b) || MPFR_IS_ZERO(c));
87          MPFR_SET_SIGN(a, sign_product);
88          MPFR_SET_ZERO(a);
89          MPFR_RET(0); /* 0 * 0 is exact */
90        }
91    }
92  sign_product = MPFR_MULT_SIGN( MPFR_SIGN(b) , MPFR_SIGN(c) );
93
94  ax = MPFR_GET_EXP (b) + MPFR_GET_EXP (c);
95
96  bq = MPFR_PREC(b);
97  cq = MPFR_PREC(c);
98
99  MPFR_ASSERTD(bq+cq > bq); /* PREC_MAX is /2 so no integer overflow */
100
101  bn = (bq+GMP_NUMB_BITS-1)/GMP_NUMB_BITS; /* number of limbs of b */
102  cn = (cq+GMP_NUMB_BITS-1)/GMP_NUMB_BITS; /* number of limbs of c */
103  k = bn + cn; /* effective nb of limbs used by b*c (= tn or tn+1) below */
104  tn = (bq + cq + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS;
105  /* <= k, thus no int overflow */
106  MPFR_ASSERTD(tn <= k);
107
108  /* Check for no size_t overflow*/
109  MPFR_ASSERTD((size_t) k <= ((size_t) -1) / BYTES_PER_MP_LIMB);
110  MPFR_TMP_MARK(marker);
111  tmp = (mp_limb_t *) MPFR_TMP_ALLOC((size_t) k * BYTES_PER_MP_LIMB);
112
113  /* multiplies two mantissa in temporary allocated space */
114  b1 = (MPFR_LIKELY(bn >= cn)) ?
115    mpn_mul (tmp, MPFR_MANT(b), bn, MPFR_MANT(c), cn)
116    : mpn_mul (tmp, MPFR_MANT(c), cn, MPFR_MANT(b), bn);
117
118  /* now tmp[0]..tmp[k-1] contains the product of both mantissa,
119     with tmp[k-1]>=2^(GMP_NUMB_BITS-2) */
120  b1 >>= GMP_NUMB_BITS - 1; /* msb from the product */
121
122  /* if the mantissas of b and c are uniformly distributed in ]1/2, 1],
123     then their product is in ]1/4, 1/2] with probability 2*ln(2)-1 ~ 0.386
124     and in [1/2, 1] with probability 2-2*ln(2) ~ 0.614 */
125  tmp += k - tn;
126  if (MPFR_UNLIKELY(b1 == 0))
127    mpn_lshift (tmp, tmp, tn, 1); /* tn <= k, so no stack corruption */
128  cc = mpfr_round_raw (MPFR_MANT (a), tmp, bq + cq,
129                       MPFR_IS_NEG_SIGN(sign_product),
130                       MPFR_PREC (a), rnd_mode, &inexact);
131
132  /* cc = 1 ==> result is a power of two */
133  if (MPFR_UNLIKELY(cc))
134    MPFR_MANT(a)[MPFR_LIMB_SIZE(a)-1] = MPFR_LIMB_HIGHBIT;
135
136  MPFR_TMP_FREE(marker);
137
138  {
139    mpfr_exp_t ax2 = ax + (mpfr_exp_t) (b1 - 1 + cc);
140    if (MPFR_UNLIKELY( ax2 > __gmpfr_emax))
141      return mpfr_overflow (a, rnd_mode, sign_product);
142    if (MPFR_UNLIKELY( ax2 < __gmpfr_emin))
143      {
144        /* In the rounding to the nearest mode, if the exponent of the exact
145           result (i.e. before rounding, i.e. without taking cc into account)
146           is < __gmpfr_emin - 1 or the exact result is a power of 2 (i.e. if
147           both arguments are powers of 2), then round to zero. */
148        if (rnd_mode == MPFR_RNDN &&
149            (ax + (mpfr_exp_t) b1 < __gmpfr_emin ||
150             (mpfr_powerof2_raw (b) && mpfr_powerof2_raw (c))))
151          rnd_mode = MPFR_RNDZ;
152        return mpfr_underflow (a, rnd_mode, sign_product);
153      }
154    MPFR_SET_EXP (a, ax2);
155    MPFR_SET_SIGN(a, sign_product);
156  }
157  MPFR_RET (inexact);
158}
159
160int
161mpfr_mul (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode)
162{
163  mpfr_t ta, tb, tc;
164  int inexact1, inexact2;
165
166  mpfr_init2 (ta, MPFR_PREC (a));
167  mpfr_init2 (tb, MPFR_PREC (b));
168  mpfr_init2 (tc, MPFR_PREC (c));
169  MPFR_ASSERTN (mpfr_set (tb, b, MPFR_RNDN) == 0);
170  MPFR_ASSERTN (mpfr_set (tc, c, MPFR_RNDN) == 0);
171
172  inexact2 = mpfr_mul3 (ta, tb, tc, rnd_mode);
173  inexact1  = mpfr_mul2 (a, b, c, rnd_mode);
174  if (mpfr_cmp (ta, a) || inexact1*inexact2 < 0
175      || (inexact1*inexact2 == 0 && (inexact1|inexact2) != 0))
176    {
177      fprintf (stderr, "mpfr_mul return different values for %s\n"
178               "Prec_a = %lu, Prec_b = %lu, Prec_c = %lu\nB = ",
179               mpfr_print_rnd_mode (rnd_mode),
180               MPFR_PREC (a), MPFR_PREC (b), MPFR_PREC (c));
181      mpfr_out_str (stderr, 16, 0, tb, MPFR_RNDN);
182      fprintf (stderr, "\nC = ");
183      mpfr_out_str (stderr, 16, 0, tc, MPFR_RNDN);
184      fprintf (stderr, "\nOldMul: ");
185      mpfr_out_str (stderr, 16, 0, ta, MPFR_RNDN);
186      fprintf (stderr, "\nNewMul: ");
187      mpfr_out_str (stderr, 16, 0, a, MPFR_RNDN);
188      fprintf (stderr, "\nNewInexact = %d | OldInexact = %d\n",
189               inexact1, inexact2);
190      MPFR_ASSERTN(0);
191    }
192
193  mpfr_clears (ta, tb, tc, (mpfr_ptr) 0);
194  return inexact1;
195}
196
197# define mpfr_mul mpfr_mul2
198# endif
199#endif
200
201/****** END OF CHECK *******/
202
203/* Multiply 2 mpfr_t */
204
205int
206mpfr_mul (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode)
207{
208  int sign, inexact;
209  mpfr_exp_t ax, ax2;
210  mp_limb_t *tmp;
211  mp_limb_t b1;
212  mpfr_prec_t bq, cq;
213  mp_size_t bn, cn, tn, k;
214  MPFR_TMP_DECL (marker);
215
216  MPFR_LOG_FUNC (("b[%#R]=%R c[%#R]=%R rnd=%d", b, b, c, c, rnd_mode),
217                 ("a[%#R]=%R inexact=%d", a, a, inexact));
218
219  /* deal with special cases */
220  if (MPFR_ARE_SINGULAR (b, c))
221    {
222      if (MPFR_IS_NAN (b) || MPFR_IS_NAN (c))
223        {
224          MPFR_SET_NAN (a);
225          MPFR_RET_NAN;
226        }
227      sign = MPFR_MULT_SIGN (MPFR_SIGN (b), MPFR_SIGN (c));
228      if (MPFR_IS_INF (b))
229        {
230          if (!MPFR_IS_ZERO (c))
231            {
232              MPFR_SET_SIGN (a, sign);
233              MPFR_SET_INF (a);
234              MPFR_RET (0);
235            }
236          else
237            {
238              MPFR_SET_NAN (a);
239              MPFR_RET_NAN;
240            }
241        }
242      else if (MPFR_IS_INF (c))
243        {
244          if (!MPFR_IS_ZERO (b))
245            {
246              MPFR_SET_SIGN (a, sign);
247              MPFR_SET_INF (a);
248              MPFR_RET(0);
249            }
250          else
251            {
252              MPFR_SET_NAN (a);
253              MPFR_RET_NAN;
254            }
255        }
256      else
257        {
258          MPFR_ASSERTD (MPFR_IS_ZERO(b) || MPFR_IS_ZERO(c));
259          MPFR_SET_SIGN (a, sign);
260          MPFR_SET_ZERO (a);
261          MPFR_RET (0);
262        }
263    }
264  sign = MPFR_MULT_SIGN (MPFR_SIGN (b), MPFR_SIGN (c));
265
266  ax = MPFR_GET_EXP (b) + MPFR_GET_EXP (c);
267  /* Note: the exponent of the exact result will be e = bx + cx + ec with
268     ec in {-1,0,1} and the following assumes that e is representable. */
269
270  /* FIXME: Useful since we do an exponent check after ?
271   * It is useful iff the precision is big, there is an overflow
272   * and we are doing further mults...*/
273#ifdef HUGE
274  if (MPFR_UNLIKELY (ax > __gmpfr_emax + 1))
275    return mpfr_overflow (a, rnd_mode, sign);
276  if (MPFR_UNLIKELY (ax < __gmpfr_emin - 2))
277    return mpfr_underflow (a, rnd_mode == MPFR_RNDN ? MPFR_RNDZ : rnd_mode,
278                           sign);
279#endif
280
281  bq = MPFR_PREC (b);
282  cq = MPFR_PREC (c);
283
284  MPFR_ASSERTD (bq+cq > bq); /* PREC_MAX is /2 so no integer overflow */
285
286  bn = (bq+GMP_NUMB_BITS-1)/GMP_NUMB_BITS; /* number of limbs of b */
287  cn = (cq+GMP_NUMB_BITS-1)/GMP_NUMB_BITS; /* number of limbs of c */
288  k = bn + cn; /* effective nb of limbs used by b*c (= tn or tn+1) below */
289  tn = (bq + cq + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS;
290  MPFR_ASSERTD (tn <= k); /* tn <= k, thus no int overflow */
291
292  /* Check for no size_t overflow*/
293  MPFR_ASSERTD ((size_t) k <= ((size_t) -1) / BYTES_PER_MP_LIMB);
294  MPFR_TMP_MARK (marker);
295  tmp = (mp_limb_t *) MPFR_TMP_ALLOC ((size_t) k * BYTES_PER_MP_LIMB);
296
297  /* multiplies two mantissa in temporary allocated space */
298  if (MPFR_UNLIKELY (bn < cn))
299    {
300      mpfr_srcptr z = b;
301      mp_size_t zn  = bn;
302      b = c;
303      bn = cn;
304      c = z;
305      cn = zn;
306    }
307  MPFR_ASSERTD (bn >= cn);
308  if (MPFR_LIKELY (bn <= 2))
309    {
310      if (bn == 1)
311        {
312          /* 1 limb * 1 limb */
313          umul_ppmm (tmp[1], tmp[0], MPFR_MANT (b)[0], MPFR_MANT (c)[0]);
314          b1 = tmp[1];
315        }
316      else if (MPFR_UNLIKELY (cn == 1))
317        {
318          /* 2 limbs * 1 limb */
319          mp_limb_t t;
320          umul_ppmm (tmp[1], tmp[0], MPFR_MANT (b)[0], MPFR_MANT (c)[0]);
321          umul_ppmm (tmp[2], t, MPFR_MANT (b)[1], MPFR_MANT (c)[0]);
322          add_ssaaaa (tmp[2], tmp[1], tmp[2], tmp[1], 0, t);
323          b1 = tmp[2];
324        }
325      else
326        {
327          /* 2 limbs * 2 limbs */
328          mp_limb_t t1, t2, t3;
329          /* First 2 limbs * 1 limb */
330          umul_ppmm (tmp[1], tmp[0], MPFR_MANT (b)[0], MPFR_MANT (c)[0]);
331          umul_ppmm (tmp[2], t1, MPFR_MANT (b)[1], MPFR_MANT (c)[0]);
332          add_ssaaaa (tmp[2], tmp[1], tmp[2], tmp[1], 0, t1);
333          /* Second, the other 2 limbs * 1 limb product */
334          umul_ppmm (t1, t2, MPFR_MANT (b)[0], MPFR_MANT (c)[1]);
335          umul_ppmm (tmp[3], t3, MPFR_MANT (b)[1], MPFR_MANT (c)[1]);
336          add_ssaaaa (tmp[3], t1, tmp[3], t1, 0, t3);
337          /* Sum those two partial products */
338          add_ssaaaa (tmp[2], tmp[1], tmp[2], tmp[1], t1, t2);
339          tmp[3] += (tmp[2] < t1);
340          b1 = tmp[3];
341        }
342      b1 >>= (GMP_NUMB_BITS - 1);
343      tmp += k - tn;
344      if (MPFR_UNLIKELY (b1 == 0))
345        mpn_lshift (tmp, tmp, tn, 1); /* tn <= k, so no stack corruption */
346    }
347  else
348    /* Mulders' mulhigh. Disable if squaring, since it is not tuned for
349       such a case */
350    if (MPFR_UNLIKELY (bn > MPFR_MUL_THRESHOLD && b != c))
351      {
352        mp_limb_t *bp, *cp;
353        mp_size_t n;
354        mpfr_prec_t p;
355
356        /* Fist check if we can reduce the precision of b or c:
357           exact values are a nightmare for the short product trick */
358        bp = MPFR_MANT (b);
359        cp = MPFR_MANT (c);
360        MPFR_ASSERTN (MPFR_MUL_THRESHOLD >= 1);
361        if (MPFR_UNLIKELY ((bp[0] == 0 && bp[1] == 0) ||
362                           (cp[0] == 0 && cp[1] == 0)))
363          {
364            mpfr_t b_tmp, c_tmp;
365
366            MPFR_TMP_FREE (marker);
367            /* Check for b */
368            while (*bp == 0)
369              {
370                bp++;
371                bn--;
372                MPFR_ASSERTD (bn > 0);
373              } /* This must end since the MSL is != 0 */
374
375            /* Check for c too */
376            while (*cp == 0)
377              {
378                cp++;
379                cn--;
380                MPFR_ASSERTD (cn > 0);
381              } /* This must end since the MSL is != 0 */
382
383            /* It is not the faster way, but it is safer */
384            MPFR_SET_SAME_SIGN (b_tmp, b);
385            MPFR_SET_EXP (b_tmp, MPFR_GET_EXP (b));
386            MPFR_PREC (b_tmp) = bn * GMP_NUMB_BITS;
387            MPFR_MANT (b_tmp) = bp;
388
389            MPFR_SET_SAME_SIGN (c_tmp, c);
390            MPFR_SET_EXP (c_tmp, MPFR_GET_EXP (c));
391            MPFR_PREC (c_tmp) = cn * GMP_NUMB_BITS;
392            MPFR_MANT (c_tmp) = cp;
393
394            /* Call again mpfr_mul with the fixed arguments */
395            return mpfr_mul (a, b_tmp, c_tmp, rnd_mode);
396          }
397
398        /* Compute estimated precision of mulhigh.
399           We could use `+ (n < cn) + (n < bn)' instead of `+ 2',
400           but does it worth it? */
401        n = MPFR_LIMB_SIZE (a) + 1;
402        n = MIN (n, cn);
403        MPFR_ASSERTD (n >= 1 && 2*n <= k && n <= cn && n <= bn);
404        p = n * GMP_NUMB_BITS - MPFR_INT_CEIL_LOG2 (n + 2);
405        bp += bn - n;
406        cp += cn - n;
407
408        /* Check if MulHigh can produce a roundable result.
409           We may lost 1 bit due to RNDN, 1 due to final shift. */
410        if (MPFR_UNLIKELY (MPFR_PREC (a) > p - 5))
411          {
412            if (MPFR_UNLIKELY (MPFR_PREC (a) > p - 5 + GMP_NUMB_BITS
413                               || bn <= MPFR_MUL_THRESHOLD+1))
414              {
415                /* MulHigh can't produce a roundable result. */
416                MPFR_LOG_MSG (("mpfr_mulhigh can't be used (%lu VS %lu)\n",
417                               MPFR_PREC (a), p));
418                goto full_multiply;
419              }
420            /* Add one extra limb to mantissa of b and c. */
421            if (bn > n)
422              bp --;
423            else
424              {
425                bp = (mp_limb_t*) MPFR_TMP_ALLOC ((n+1) * sizeof (mp_limb_t));
426                bp[0] = 0;
427                MPN_COPY (bp + 1, MPFR_MANT (b) + bn - n, n);
428              }
429            if (cn > n)
430              cp --; /* FIXME: Could this happen? */
431            else
432              {
433                cp = (mp_limb_t*) MPFR_TMP_ALLOC ((n+1) * sizeof (mp_limb_t));
434                cp[0] = 0;
435                MPN_COPY (cp + 1, MPFR_MANT (c) + cn - n, n);
436              }
437            /* We will compute with one extra limb */
438            n++;
439            p = n * GMP_NUMB_BITS - MPFR_INT_CEIL_LOG2 (n + 2);
440            /* Due to some nasty reasons we can have only 4 bits */
441            MPFR_ASSERTD (MPFR_PREC (a) <= p - 4);
442
443            if (MPFR_LIKELY (k < 2*n))
444              {
445                tmp = (mp_limb_t*) MPFR_TMP_ALLOC (2 * n * sizeof (mp_limb_t));
446                tmp += 2*n-k; /* `tmp' still points to an area of `k' limbs */
447              }
448          }
449        MPFR_LOG_MSG (("Use mpfr_mulhigh (%lu VS %lu)\n", MPFR_PREC (a), p));
450        /* Compute an approximation of the product of b and c */
451        mpfr_mulhigh_n (tmp + k - 2 * n, bp, cp, n);
452        /* now tmp[0]..tmp[k-1] contains the product of both mantissa,
453           with tmp[k-1]>=2^(GMP_NUMB_BITS-2) */
454        b1 = tmp[k-1] >> (GMP_NUMB_BITS - 1); /* msb from the product */
455
456        /* If the mantissas of b and c are uniformly distributed in (1/2, 1],
457           then their product is in (1/4, 1/2] with probability 2*ln(2)-1
458           ~ 0.386 and in [1/2, 1] with probability 2-2*ln(2) ~ 0.614 */
459        if (MPFR_UNLIKELY (b1 == 0))
460          /* Warning: the mpfr_mulhigh_n call above only surely affects
461             tmp[k-n-1..k-1], thus we shift only those limbs */
462          mpn_lshift (tmp + k - n - 1, tmp + k - n - 1, n + 1, 1);
463        tmp += k - tn;
464        MPFR_ASSERTD (MPFR_LIMB_MSB (tmp[tn-1]) != 0);
465
466        if (MPFR_UNLIKELY (!mpfr_round_p (tmp, tn, p+b1-1, MPFR_PREC(a)
467                                          + (rnd_mode == MPFR_RNDN))))
468          {
469            tmp -= k - tn; /* tmp may have changed, FIX IT!!!!! */
470            goto full_multiply;
471          }
472      }
473    else
474      {
475      full_multiply:
476        MPFR_LOG_MSG (("Use mpn_mul\n", 0));
477        b1 = mpn_mul (tmp, MPFR_MANT (b), bn, MPFR_MANT (c), cn);
478
479        /* now tmp[0]..tmp[k-1] contains the product of both mantissa,
480           with tmp[k-1]>=2^(GMP_NUMB_BITS-2) */
481        b1 >>= GMP_NUMB_BITS - 1; /* msb from the product */
482
483        /* if the mantissas of b and c are uniformly distributed in (1/2, 1],
484           then their product is in (1/4, 1/2] with probability 2*ln(2)-1
485           ~ 0.386 and in [1/2, 1] with probability 2-2*ln(2) ~ 0.614 */
486        tmp += k - tn;
487        if (MPFR_UNLIKELY (b1 == 0))
488          mpn_lshift (tmp, tmp, tn, 1); /* tn <= k, so no stack corruption */
489      }
490
491  ax2 = ax + (mpfr_exp_t) (b1 - 1);
492  MPFR_RNDRAW (inexact, a, tmp, bq+cq, rnd_mode, sign, ax2++);
493  MPFR_TMP_FREE (marker);
494  MPFR_EXP  (a) = ax2; /* Can't use MPFR_SET_EXP: Expo may be out of range */
495  MPFR_SET_SIGN (a, sign);
496  if (MPFR_UNLIKELY (ax2 > __gmpfr_emax))
497    return mpfr_overflow (a, rnd_mode, sign);
498  if (MPFR_UNLIKELY (ax2 < __gmpfr_emin))
499    {
500      /* In the rounding to the nearest mode, if the exponent of the exact
501         result (i.e. before rounding, i.e. without taking cc into account)
502         is < __gmpfr_emin - 1 or the exact result is a power of 2 (i.e. if
503         both arguments are powers of 2), then round to zero. */
504      if (rnd_mode == MPFR_RNDN
505          && (ax + (mpfr_exp_t) b1 < __gmpfr_emin
506              || (mpfr_powerof2_raw (b) && mpfr_powerof2_raw (c))))
507        rnd_mode = MPFR_RNDZ;
508      return mpfr_underflow (a, rnd_mode, sign);
509    }
510  MPFR_RET (inexact);
511}
512