1/* mpfr_pow_z -- power function x^z with z a MPZ
2
3Copyright 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
4Contributed by the Arenaire and Cacao projects, INRIA.
5
6This file is part of the GNU MPFR Library.
7
8The GNU MPFR Library is free software; you can redistribute it and/or modify
9it under the terms of the GNU Lesser General Public License as published by
10the Free Software Foundation; either version 3 of the License, or (at your
11option) any later version.
12
13The GNU MPFR Library is distributed in the hope that it will be useful, but
14WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16License for more details.
17
18You should have received a copy of the GNU Lesser General Public License
19along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
2151 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22
23#define MPFR_NEED_LONGLONG_H
24#include "mpfr-impl.h"
25
26/* y <- x^|z| with z != 0
27   if cr=1: ensures correct rounding of y
28   if cr=0: does not ensure correct rounding, but avoid spurious overflow
29   or underflow, and uses the precision of y as working precision (warning,
30   y and x might be the same variable). */
31static int
32mpfr_pow_pos_z (mpfr_ptr y, mpfr_srcptr x, mpz_srcptr z, mpfr_rnd_t rnd, int cr)
33{
34  mpfr_t res;
35  mpfr_prec_t prec, err;
36  int inexact;
37  mpfr_rnd_t rnd1, rnd2;
38  mpz_t absz;
39  mp_size_t size_z;
40  MPFR_ZIV_DECL (loop);
41  MPFR_BLOCK_DECL (flags);
42
43  MPFR_LOG_FUNC (("x[%#R]=%R z=? rnd=%d cr=%d", x, x, rnd, cr),
44                 ("y[%#R]=%R inexact=%d", y, y, inexact));
45
46  MPFR_ASSERTD (mpz_sgn (z) != 0);
47
48  if (MPFR_UNLIKELY (mpz_cmpabs_ui (z, 1) == 0))
49    return mpfr_set (y, x, rnd);
50
51  absz[0] = z[0];
52  SIZ (absz) = ABS(SIZ(absz)); /* Hack to get abs(z) */
53  MPFR_MPZ_SIZEINBASE2 (size_z, z);
54
55  /* round toward 1 (or -1) to avoid spurious overflow or underflow,
56     i.e. if an overflow or underflow occurs, it is a real exception
57     and is not just due to the rounding error. */
58  rnd1 = (MPFR_EXP(x) >= 1) ? MPFR_RNDZ
59    : (MPFR_IS_POS(x) ? MPFR_RNDU : MPFR_RNDD);
60  rnd2 = (MPFR_EXP(x) >= 1) ? MPFR_RNDD : MPFR_RNDU;
61
62  if (cr != 0)
63    prec = MPFR_PREC (y) + 3 + size_z + MPFR_INT_CEIL_LOG2 (MPFR_PREC (y));
64  else
65    prec = MPFR_PREC (y);
66  mpfr_init2 (res, prec);
67
68  MPFR_ZIV_INIT (loop, prec);
69  for (;;)
70    {
71      unsigned int inexmul;  /* will be non-zero if res may be inexact */
72      mp_size_t i = size_z;
73
74      /* now 2^(i-1) <= z < 2^i */
75      /* see below (case z < 0) for the error analysis, which is identical,
76         except if z=n, the maximal relative error is here 2(n-1)2^(-prec)
77         instead of 2(2n-1)2^(-prec) for z<0. */
78      MPFR_ASSERTD (prec > (mpfr_prec_t) i);
79      err = prec - 1 - (mpfr_prec_t) i;
80
81      MPFR_BLOCK (flags,
82                  inexmul = mpfr_mul (res, x, x, rnd2);
83                  MPFR_ASSERTD (i >= 2);
84                  if (mpz_tstbit (absz, i - 2))
85                    inexmul |= mpfr_mul (res, res, x, rnd1);
86                  for (i -= 3; i >= 0 && !MPFR_BLOCK_EXCEP; i--)
87                    {
88                      inexmul |= mpfr_mul (res, res, res, rnd2);
89                      if (mpz_tstbit (absz, i))
90                        inexmul |= mpfr_mul (res, res, x, rnd1);
91                    });
92      if (MPFR_LIKELY (inexmul == 0 || cr == 0
93                       || MPFR_OVERFLOW (flags) || MPFR_UNDERFLOW (flags)
94                       || MPFR_CAN_ROUND (res, err, MPFR_PREC (y), rnd)))
95        break;
96      /* Can't decide correct rounding, increase the precision */
97      MPFR_ZIV_NEXT (loop, prec);
98      mpfr_set_prec (res, prec);
99    }
100  MPFR_ZIV_FREE (loop);
101
102  /* Check Overflow */
103  if (MPFR_OVERFLOW (flags))
104    {
105      MPFR_LOG_MSG (("overflow\n", 0));
106      inexact = mpfr_overflow (y, rnd, mpz_odd_p (absz) ?
107                               MPFR_SIGN (x) : MPFR_SIGN_POS);
108    }
109  /* Check Underflow */
110  else if (MPFR_UNDERFLOW (flags))
111    {
112      MPFR_LOG_MSG (("underflow\n", 0));
113      if (rnd == MPFR_RNDN)
114        {
115          mpfr_t y2, zz;
116
117          /* We cannot decide now whether the result should be rounded
118             toward zero or +Inf. So, let's use the general case of
119             mpfr_pow, which can do that. But the problem is that the
120             result can be exact! However, it is sufficient to try to
121             round on 2 bits (the precision does not matter in case of
122             underflow, since MPFR does not have subnormals), in which
123             case, the result cannot be exact due to previous filtering
124             of trivial cases. */
125          MPFR_ASSERTD (mpfr_cmp_si_2exp (x, MPFR_SIGN (x),
126                                          MPFR_EXP (x) - 1) != 0);
127          mpfr_init2 (y2, 2);
128          mpfr_init2 (zz, ABS (SIZ (z)) * GMP_NUMB_BITS);
129          inexact = mpfr_set_z (zz, z, MPFR_RNDN);
130          MPFR_ASSERTN (inexact == 0);
131          inexact = mpfr_pow_general (y2, x, zz, rnd, 1,
132                                      (mpfr_save_expo_t *) NULL);
133          mpfr_clear (zz);
134          mpfr_set (y, y2, MPFR_RNDN);
135          mpfr_clear (y2);
136          __gmpfr_flags = MPFR_FLAGS_INEXACT | MPFR_FLAGS_UNDERFLOW;
137        }
138      else
139        {
140          inexact = mpfr_underflow (y, rnd, mpz_odd_p (absz) ?
141                                    MPFR_SIGN (x) : MPFR_SIGN_POS);
142        }
143    }
144  else
145    inexact = mpfr_set (y, res, rnd);
146
147  mpfr_clear (res);
148  return inexact;
149}
150
151/* The computation of y = pow(x,z) is done by
152 *    y = set_ui(1)      if z = 0
153 *    y = pow_ui(x,z)    if z > 0
154 *    y = pow_ui(1/x,-z) if z < 0
155 *
156 * Note: in case z < 0, we could also compute 1/pow_ui(x,-z). However, in
157 * case MAX < 1/MIN, where MAX is the largest positive value, i.e.,
158 * MAX = nextbelow(+Inf), and MIN is the smallest positive value, i.e.,
159 * MIN = nextabove(+0), then x^(-z) might produce an overflow, whereas
160 * x^z is representable.
161 */
162
163int
164mpfr_pow_z (mpfr_ptr y, mpfr_srcptr x, mpz_srcptr z, mpfr_rnd_t rnd)
165{
166  int   inexact;
167  mpz_t tmp;
168  MPFR_SAVE_EXPO_DECL (expo);
169
170  MPFR_LOG_FUNC (("x[%#R]=%R z=? rnd=%d", x, x, rnd),
171                 ("y[%#R]=%R inexact=%d", y, y, inexact));
172
173  /* x^0 = 1 for any x, even a NaN */
174  if (MPFR_UNLIKELY (mpz_sgn (z) == 0))
175    return mpfr_set_ui (y, 1, rnd);
176
177  if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
178    {
179      if (MPFR_IS_NAN (x))
180        {
181          MPFR_SET_NAN (y);
182          MPFR_RET_NAN;
183        }
184      else if (MPFR_IS_INF (x))
185        {
186          /* Inf^n = Inf, (-Inf)^n = Inf for n even, -Inf for n odd */
187          /* Inf ^(-n) = 0, sign = + if x>0 or z even */
188          if (mpz_sgn (z) > 0)
189            MPFR_SET_INF (y);
190          else
191            MPFR_SET_ZERO (y);
192          if (MPFR_UNLIKELY (MPFR_IS_NEG (x) && mpz_odd_p (z)))
193            MPFR_SET_NEG (y);
194          else
195            MPFR_SET_POS (y);
196          MPFR_RET (0);
197        }
198      else /* x is zero */
199        {
200          MPFR_ASSERTD (MPFR_IS_ZERO(x));
201          if (mpz_sgn (z) > 0)
202            /* 0^n = +/-0 for any n */
203            MPFR_SET_ZERO (y);
204          else
205            /* 0^(-n) if +/- INF */
206            MPFR_SET_INF (y);
207          if (MPFR_LIKELY (MPFR_IS_POS (x) || mpz_even_p (z)))
208            MPFR_SET_POS (y);
209          else
210            MPFR_SET_NEG (y);
211          MPFR_RET(0);
212        }
213    }
214
215  MPFR_SAVE_EXPO_MARK (expo);
216
217  /* detect exact powers: x^-n is exact iff x is a power of 2
218     Do it if n > 0 too as this is faster and this filtering is
219     needed in case of underflow. */
220  if (MPFR_UNLIKELY (mpfr_cmp_si_2exp (x, MPFR_SIGN (x),
221                                       MPFR_EXP (x) - 1) == 0))
222    {
223      mpfr_exp_t expx = MPFR_EXP (x); /* warning: x and y may be the same
224                                         variable */
225
226      MPFR_LOG_MSG (("x^n with x power of two\n", 0));
227      mpfr_set_si (y, mpz_odd_p (z) ? MPFR_INT_SIGN(x) : 1, rnd);
228      MPFR_ASSERTD (MPFR_IS_FP (y));
229      mpz_init (tmp);
230      mpz_mul_si (tmp, z, expx - 1);
231      MPFR_ASSERTD (MPFR_GET_EXP (y) == 1);
232      mpz_add_ui (tmp, tmp, 1);
233      inexact = 0;
234      if (MPFR_UNLIKELY (mpz_cmp_si (tmp, __gmpfr_emin) < 0))
235        {
236          MPFR_LOG_MSG (("underflow\n", 0));
237          /* |y| is a power of two, thus |y| <= 2^(emin-2), and in
238             rounding to nearest, the value must be rounded to 0. */
239          if (rnd == MPFR_RNDN)
240            rnd = MPFR_RNDZ;
241          inexact = mpfr_underflow (y, rnd, MPFR_SIGN (y));
242        }
243      else if (MPFR_UNLIKELY (mpz_cmp_si (tmp, __gmpfr_emax) > 0))
244        {
245          MPFR_LOG_MSG (("overflow\n", 0));
246          inexact = mpfr_overflow (y, rnd, MPFR_SIGN (y));
247        }
248      else
249        MPFR_SET_EXP (y, mpz_get_si (tmp));
250      mpz_clear (tmp);
251      MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
252    }
253  else if (mpz_sgn (z) > 0)
254    {
255      inexact = mpfr_pow_pos_z (y, x, z, rnd, 1);
256      MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
257    }
258  else
259    {
260      /* Declaration of the intermediary variable */
261      mpfr_t t;
262      mpfr_prec_t Nt;   /* Precision of the intermediary variable */
263      mpfr_rnd_t rnd1;
264      mp_size_t size_z;
265      MPFR_ZIV_DECL (loop);
266
267      MPFR_MPZ_SIZEINBASE2 (size_z, z);
268
269      /* initial working precision */
270      Nt = MPFR_PREC (y);
271      Nt = Nt + size_z + 3 + MPFR_INT_CEIL_LOG2 (Nt);
272      /* ensures Nt >= bits(z)+2 */
273
274      /* initialise of intermediary variable */
275      mpfr_init2 (t, Nt);
276
277      /* We will compute rnd(rnd1(1/x) ^ (-z)), where rnd1 is the rounding
278         toward sign(x), to avoid spurious overflow or underflow. */
279      rnd1 = MPFR_EXP (x) < 1 ? MPFR_RNDZ :
280        (MPFR_SIGN (x) > 0 ? MPFR_RNDU : MPFR_RNDD);
281
282      MPFR_ZIV_INIT (loop, Nt);
283      for (;;)
284        {
285          MPFR_BLOCK_DECL (flags);
286
287          /* compute (1/x)^(-z), -z>0 */
288          /* As emin = -emax, an underflow cannot occur in the division.
289             And if an overflow occurs, then this means that x^z overflows
290             too (since we have rounded toward 1 or -1). */
291          MPFR_BLOCK (flags, mpfr_ui_div (t, 1, x, rnd1));
292          MPFR_ASSERTD (! MPFR_UNDERFLOW (flags));
293          /* t = (1/x)*(1+theta) where |theta| <= 2^(-Nt) */
294          if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags)))
295            goto overflow;
296          MPFR_BLOCK (flags, mpfr_pow_pos_z (t, t, z, rnd, 0));
297          /* Now if z=-n, t = x^z*(1+theta)^(2n-1) where |theta| <= 2^(-Nt),
298             with theta maybe different from above. If (2n-1)*2^(-Nt) <= 1/2,
299             which is satisfied as soon as Nt >= bits(z)+2, then we can use
300             Lemma \ref{lemma_graillat} from algorithms.tex, which yields
301             t = x^z*(1+theta) with |theta| <= 2(2n-1)*2^(-Nt), thus the
302             error is bounded by 2(2n-1) ulps <= 2^(bits(z)+2) ulps. */
303          if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags)))
304            {
305            overflow:
306              MPFR_ZIV_FREE (loop);
307              mpfr_clear (t);
308              MPFR_SAVE_EXPO_FREE (expo);
309              MPFR_LOG_MSG (("overflow\n", 0));
310              return mpfr_overflow (y, rnd,
311                                    mpz_odd_p (z) ? MPFR_SIGN (x) :
312                                    MPFR_SIGN_POS);
313            }
314          if (MPFR_UNLIKELY (MPFR_UNDERFLOW (flags)))
315            {
316              MPFR_ZIV_FREE (loop);
317              mpfr_clear (t);
318              MPFR_LOG_MSG (("underflow\n", 0));
319              if (rnd == MPFR_RNDN)
320                {
321                  mpfr_t y2, zz;
322
323                  /* We cannot decide now whether the result should be
324                     rounded toward zero or away from zero. So, like
325                     in mpfr_pow_pos_z, let's use the general case of
326                     mpfr_pow in precision 2. */
327                  MPFR_ASSERTD (mpfr_cmp_si_2exp (x, MPFR_SIGN (x),
328                                                  MPFR_EXP (x) - 1) != 0);
329                  mpfr_init2 (y2, 2);
330                  mpfr_init2 (zz, ABS (SIZ (z)) * GMP_NUMB_BITS);
331                  inexact = mpfr_set_z (zz, z, MPFR_RNDN);
332                  MPFR_ASSERTN (inexact == 0);
333                  inexact = mpfr_pow_general (y2, x, zz, rnd, 1,
334                                              (mpfr_save_expo_t *) NULL);
335                  mpfr_clear (zz);
336                  mpfr_set (y, y2, MPFR_RNDN);
337                  mpfr_clear (y2);
338                  MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, MPFR_FLAGS_UNDERFLOW);
339                  goto end;
340                }
341              else
342                {
343                  MPFR_SAVE_EXPO_FREE (expo);
344                  return mpfr_underflow (y, rnd, mpz_odd_p (z) ?
345                                         MPFR_SIGN (x) : MPFR_SIGN_POS);
346                }
347            }
348          if (MPFR_LIKELY (MPFR_CAN_ROUND (t, Nt - size_z - 2, MPFR_PREC (y),
349                                           rnd)))
350            break;
351          /* actualisation of the precision */
352          MPFR_ZIV_NEXT (loop, Nt);
353          mpfr_set_prec (t, Nt);
354        }
355      MPFR_ZIV_FREE (loop);
356
357      inexact = mpfr_set (y, t, rnd);
358      mpfr_clear (t);
359    }
360
361 end:
362  MPFR_SAVE_EXPO_FREE (expo);
363  return mpfr_check_range (y, inexact, rnd);
364}
365