pow_z.c revision 1.1.1.4
1/* mpfr_pow_z -- power function x^z with z a MPZ
2
3Copyright 2005-2020 Free Software Foundation, Inc.
4Contributed by the AriC and Caramba projects, INRIA.
5
6This file is part of the GNU MPFR Library.
7
8The GNU MPFR Library is free software; you can redistribute it and/or modify
9it under the terms of the GNU Lesser General Public License as published by
10the Free Software Foundation; either version 3 of the License, or (at your
11option) any later version.
12
13The GNU MPFR Library is distributed in the hope that it will be useful, but
14WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16License for more details.
17
18You should have received a copy of the GNU Lesser General Public License
19along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
2151 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22
23#define MPFR_NEED_LONGLONG_H
24#include "mpfr-impl.h"
25
26/* y <- x^|z| with z != 0
27   if cr=1: ensures correct rounding of y
28   if cr=0: does not ensure correct rounding, but avoid spurious overflow
29   or underflow, and uses the precision of y as working precision (warning,
30   y and x might be the same variable). */
31static int
32mpfr_pow_pos_z (mpfr_ptr y, mpfr_srcptr x, mpz_srcptr z, mpfr_rnd_t rnd, int cr)
33{
34  mpfr_t res;
35  mpfr_prec_t prec, err;
36  int inexact;
37  mpfr_rnd_t rnd1, rnd2;
38  mpz_t absz;
39  mp_size_t size_z;
40  MPFR_ZIV_DECL (loop);
41  MPFR_BLOCK_DECL (flags);
42
43  MPFR_LOG_FUNC
44    (("x[%Pu]=%.*Rg z=%Zd rnd=%d cr=%d",
45      mpfr_get_prec (x), mpfr_log_prec, x, z, rnd, cr),
46     ("y[%Pu]=%.*Rg inexact=%d",
47      mpfr_get_prec (y), mpfr_log_prec, y, inexact));
48
49  MPFR_ASSERTD (mpz_sgn (z) != 0);
50
51  if (MPFR_UNLIKELY (mpz_cmpabs_ui (z, 1) == 0))
52    return mpfr_set (y, x, rnd);
53
54  absz[0] = z[0];
55  SIZ (absz) = ABSIZ (absz); /* Hack to get abs(z) */
56  MPFR_MPZ_SIZEINBASE2 (size_z, z);
57
58  /* round toward 1 (or -1) to avoid spurious overflow or underflow,
59     i.e. if an overflow or underflow occurs, it is a real exception
60     and is not just due to the rounding error. */
61  rnd1 = (MPFR_EXP(x) >= 1) ? MPFR_RNDZ
62    : (MPFR_IS_POS(x) ? MPFR_RNDU : MPFR_RNDD);
63  rnd2 = (MPFR_EXP(x) >= 1) ? MPFR_RNDD : MPFR_RNDU;
64
65  if (cr != 0)
66    prec = MPFR_PREC (y) + 3 + size_z + MPFR_INT_CEIL_LOG2 (MPFR_PREC (y));
67  else
68    prec = MPFR_PREC (y);
69  mpfr_init2 (res, prec);
70
71  MPFR_ZIV_INIT (loop, prec);
72  for (;;)
73    {
74      unsigned int inexmul;  /* will be non-zero if res may be inexact */
75      mp_size_t i = size_z;
76
77      /* now 2^(i-1) <= z < 2^i */
78      /* see below (case z < 0) for the error analysis, which is identical,
79         except if z=n, the maximal relative error is here 2(n-1)2^(-prec)
80         instead of 2(2n-1)2^(-prec) for z<0. */
81      MPFR_ASSERTD (prec > (mpfr_prec_t) i);
82      err = prec - 1 - (mpfr_prec_t) i;
83
84      MPFR_BLOCK (flags,
85                  inexmul = mpfr_sqr (res, x, rnd2);
86                  MPFR_ASSERTD (i >= 2);
87                  if (mpz_tstbit (absz, i - 2))
88                    inexmul |= mpfr_mul (res, res, x, rnd1);
89                  for (i -= 3; i >= 0 && !MPFR_BLOCK_EXCEP; i--)
90                    {
91                      inexmul |= mpfr_sqr (res, res, rnd2);
92                      if (mpz_tstbit (absz, i))
93                        inexmul |= mpfr_mul (res, res, x, rnd1);
94                    });
95      if (MPFR_LIKELY (inexmul == 0 || cr == 0
96                       || MPFR_OVERFLOW (flags) || MPFR_UNDERFLOW (flags)
97                       || MPFR_CAN_ROUND (res, err, MPFR_PREC (y), rnd)))
98        break;
99      /* Can't decide correct rounding, increase the precision */
100      MPFR_ZIV_NEXT (loop, prec);
101      mpfr_set_prec (res, prec);
102    }
103  MPFR_ZIV_FREE (loop);
104
105  /* Check Overflow */
106  if (MPFR_OVERFLOW (flags))
107    {
108      MPFR_LOG_MSG (("overflow\n", 0));
109      inexact = mpfr_overflow (y, rnd, mpz_odd_p (absz) ?
110                               MPFR_SIGN (x) : MPFR_SIGN_POS);
111    }
112  /* Check Underflow */
113  else if (MPFR_UNDERFLOW (flags))
114    {
115      MPFR_LOG_MSG (("underflow\n", 0));
116      if (rnd == MPFR_RNDN)
117        {
118          mpfr_t y2, zz;
119
120          /* We cannot decide now whether the result should be rounded
121             toward zero or +Inf. So, let's use the general case of
122             mpfr_pow, which can do that. But the problem is that the
123             result can be exact! However, it is sufficient to try to
124             round on 2 bits (the precision does not matter in case of
125             underflow, since MPFR does not have subnormals), in which
126             case, the result cannot be exact due to previous filtering
127             of trivial cases. */
128          MPFR_ASSERTD (mpfr_cmp_si_2exp (x, MPFR_SIGN (x),
129                                          MPFR_EXP (x) - 1) != 0);
130          mpfr_init2 (y2, 2);
131          mpfr_init2 (zz, ABSIZ (z) * GMP_NUMB_BITS);
132          inexact = mpfr_set_z (zz, z, MPFR_RNDN);
133          MPFR_ASSERTN (inexact == 0);
134          inexact = mpfr_pow_general (y2, x, zz, rnd, 1,
135                                      (mpfr_save_expo_t *) NULL);
136          mpfr_clear (zz);
137          mpfr_set (y, y2, MPFR_RNDN);
138          mpfr_clear (y2);
139          __gmpfr_flags = MPFR_FLAGS_INEXACT | MPFR_FLAGS_UNDERFLOW;
140        }
141      else
142        {
143          inexact = mpfr_underflow (y, rnd, mpz_odd_p (absz) ?
144                                    MPFR_SIGN (x) : MPFR_SIGN_POS);
145        }
146    }
147  else
148    inexact = mpfr_set (y, res, rnd);
149
150  mpfr_clear (res);
151  return inexact;
152}
153
154/* The computation of y = pow(x,z) is done by
155 *    y = set_ui(1)      if z = 0
156 *    y = pow_ui(x,z)    if z > 0
157 *    y = pow_ui(1/x,-z) if z < 0
158 *
159 * Note: in case z < 0, we could also compute 1/pow_ui(x,-z). However, in
160 * case MAX < 1/MIN, where MAX is the largest positive value, i.e.,
161 * MAX = nextbelow(+Inf), and MIN is the smallest positive value, i.e.,
162 * MIN = nextabove(+0), then x^(-z) might produce an overflow, whereas
163 * x^z is representable.
164 */
165
166int
167mpfr_pow_z (mpfr_ptr y, mpfr_srcptr x, mpz_srcptr z, mpfr_rnd_t rnd)
168{
169  int   inexact;
170  mpz_t tmp;
171  MPFR_SAVE_EXPO_DECL (expo);
172
173  MPFR_LOG_FUNC
174    (("x[%Pu]=%.*Rg z=%Zd rnd=%d",
175      mpfr_get_prec (x), mpfr_log_prec, x, z, rnd),
176     ("y[%Pu]=%.*Rg inexact=%d",
177      mpfr_get_prec (y), mpfr_log_prec, y, inexact));
178
179  /* x^0 = 1 for any x, even a NaN */
180  if (MPFR_UNLIKELY (mpz_sgn (z) == 0))
181    return mpfr_set_ui (y, 1, rnd);
182
183  if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
184    {
185      if (MPFR_IS_NAN (x))
186        {
187          MPFR_SET_NAN (y);
188          MPFR_RET_NAN;
189        }
190      else if (MPFR_IS_INF (x))
191        {
192          /* Inf^n = Inf, (-Inf)^n = Inf for n even, -Inf for n odd */
193          /* Inf ^(-n) = 0, sign = + if x>0 or z even */
194          if (mpz_sgn (z) > 0)
195            MPFR_SET_INF (y);
196          else
197            MPFR_SET_ZERO (y);
198          if (MPFR_UNLIKELY (MPFR_IS_NEG (x) && mpz_odd_p (z)))
199            MPFR_SET_NEG (y);
200          else
201            MPFR_SET_POS (y);
202          MPFR_RET (0);
203        }
204      else /* x is zero */
205        {
206          MPFR_ASSERTD (MPFR_IS_ZERO(x));
207          if (mpz_sgn (z) > 0)
208            /* 0^n = +/-0 for any n */
209            MPFR_SET_ZERO (y);
210          else
211            {
212              /* 0^(-n) if +/- INF */
213              MPFR_SET_INF (y);
214              MPFR_SET_DIVBY0 ();
215            }
216          if (MPFR_LIKELY (MPFR_IS_POS (x) || mpz_even_p (z)))
217            MPFR_SET_POS (y);
218          else
219            MPFR_SET_NEG (y);
220          MPFR_RET(0);
221        }
222    }
223
224  MPFR_SAVE_EXPO_MARK (expo);
225
226  /* detect exact powers: x^-n is exact iff x is a power of 2
227     Do it if n > 0 too as this is faster and this filtering is
228     needed in case of underflow. */
229  if (MPFR_UNLIKELY (mpfr_cmp_si_2exp (x, MPFR_SIGN (x),
230                                       MPFR_EXP (x) - 1) == 0))
231    {
232      mpfr_exp_t expx = MPFR_EXP (x); /* warning: x and y may be the same
233                                         variable */
234
235      MPFR_LOG_MSG (("x^n with x power of two\n", 0));
236      mpfr_set_si (y, mpz_odd_p (z) ? MPFR_INT_SIGN(x) : 1, rnd);
237      MPFR_ASSERTD (MPFR_IS_FP (y));
238      mpz_init (tmp);
239      mpz_mul_si (tmp, z, expx - 1);
240      MPFR_ASSERTD (MPFR_GET_EXP (y) == 1);
241      mpz_add_ui (tmp, tmp, 1);
242      inexact = 0;
243      if (MPFR_UNLIKELY (mpz_cmp_si (tmp, __gmpfr_emin) < 0))
244        {
245          MPFR_LOG_MSG (("underflow\n", 0));
246          /* |y| is a power of two, thus |y| <= 2^(emin-2), and in
247             rounding to nearest, the value must be rounded to 0. */
248          if (rnd == MPFR_RNDN)
249            rnd = MPFR_RNDZ;
250          inexact = mpfr_underflow (y, rnd, MPFR_SIGN (y));
251        }
252      else if (MPFR_UNLIKELY (mpz_cmp_si (tmp, __gmpfr_emax) > 0))
253        {
254          MPFR_LOG_MSG (("overflow\n", 0));
255          inexact = mpfr_overflow (y, rnd, MPFR_SIGN (y));
256        }
257      else
258        MPFR_SET_EXP (y, mpz_get_si (tmp));
259      mpz_clear (tmp);
260      MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
261    }
262  else if (mpz_sgn (z) > 0)
263    {
264      inexact = mpfr_pow_pos_z (y, x, z, rnd, 1);
265      MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
266    }
267  else
268    {
269      /* Declaration of the intermediary variable */
270      mpfr_t t;
271      mpfr_prec_t Nt;   /* Precision of the intermediary variable */
272      mpfr_rnd_t rnd1;
273      mp_size_t size_z;
274      MPFR_ZIV_DECL (loop);
275
276      MPFR_MPZ_SIZEINBASE2 (size_z, z);
277
278      /* initial working precision */
279      Nt = MPFR_PREC (y);
280      Nt = Nt + size_z + 3 + MPFR_INT_CEIL_LOG2 (Nt);
281      /* ensures Nt >= bits(z)+2 */
282
283      /* initialize of intermediary variable */
284      mpfr_init2 (t, Nt);
285
286      /* We will compute rnd(rnd1(1/x) ^ (-z)), where rnd1 is the rounding
287         toward sign(x), to avoid spurious overflow or underflow. */
288      rnd1 = MPFR_EXP (x) < 1 ? MPFR_RNDZ :
289        (MPFR_IS_POS (x) ? MPFR_RNDU : MPFR_RNDD);
290
291      MPFR_ZIV_INIT (loop, Nt);
292      for (;;)
293        {
294          MPFR_BLOCK_DECL (flags);
295
296          /* compute (1/x)^(-z), -z>0 */
297          /* As emin = -emax, an underflow cannot occur in the division.
298             And if an overflow occurs, then this means that x^z overflows
299             too (since we have rounded toward 1 or -1). */
300          MPFR_BLOCK (flags, mpfr_ui_div (t, 1, x, rnd1));
301          MPFR_ASSERTD (! MPFR_UNDERFLOW (flags));
302          /* t = (1/x)*(1+theta) where |theta| <= 2^(-Nt) */
303          if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags)))
304            goto overflow;
305          MPFR_BLOCK (flags, mpfr_pow_pos_z (t, t, z, rnd, 0));
306          /* Now if z=-n, t = x^z*(1+theta)^(2n-1) where |theta| <= 2^(-Nt),
307             with theta maybe different from above. If (2n-1)*2^(-Nt) <= 1/2,
308             which is satisfied as soon as Nt >= bits(z)+2, then we can use
309             Lemma \ref{lemma_graillat} from algorithms.tex, which yields
310             t = x^z*(1+theta) with |theta| <= 2(2n-1)*2^(-Nt), thus the
311             error is bounded by 2(2n-1) ulps <= 2^(bits(z)+2) ulps. */
312          if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags)))
313            {
314            overflow:
315              MPFR_ZIV_FREE (loop);
316              mpfr_clear (t);
317              MPFR_SAVE_EXPO_FREE (expo);
318              MPFR_LOG_MSG (("overflow\n", 0));
319              return mpfr_overflow (y, rnd,
320                                    mpz_odd_p (z) ? MPFR_SIGN (x) :
321                                    MPFR_SIGN_POS);
322            }
323          if (MPFR_UNLIKELY (MPFR_UNDERFLOW (flags)))
324            {
325              MPFR_ZIV_FREE (loop);
326              mpfr_clear (t);
327              MPFR_LOG_MSG (("underflow\n", 0));
328              if (rnd == MPFR_RNDN)
329                {
330                  mpfr_t y2, zz;
331
332                  /* We cannot decide now whether the result should be
333                     rounded toward zero or away from zero. So, like
334                     in mpfr_pow_pos_z, let's use the general case of
335                     mpfr_pow in precision 2. */
336                  MPFR_ASSERTD (mpfr_cmp_si_2exp (x, MPFR_SIGN (x),
337                                                  MPFR_EXP (x) - 1) != 0);
338                  mpfr_init2 (y2, 2);
339                  mpfr_init2 (zz, ABSIZ (z) * GMP_NUMB_BITS);
340                  inexact = mpfr_set_z (zz, z, MPFR_RNDN);
341                  MPFR_ASSERTN (inexact == 0);
342                  inexact = mpfr_pow_general (y2, x, zz, rnd, 1,
343                                              (mpfr_save_expo_t *) NULL);
344                  mpfr_clear (zz);
345                  mpfr_set (y, y2, MPFR_RNDN);
346                  mpfr_clear (y2);
347                  MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, MPFR_FLAGS_UNDERFLOW);
348                  goto end;
349                }
350              else
351                {
352                  MPFR_SAVE_EXPO_FREE (expo);
353                  return mpfr_underflow (y, rnd, mpz_odd_p (z) ?
354                                         MPFR_SIGN (x) : MPFR_SIGN_POS);
355                }
356            }
357          if (MPFR_LIKELY (MPFR_CAN_ROUND (t, Nt - size_z - 2, MPFR_PREC (y),
358                                           rnd)))
359            break;
360          /* actualization of the precision */
361          MPFR_ZIV_NEXT (loop, Nt);
362          mpfr_set_prec (t, Nt);
363        }
364      MPFR_ZIV_FREE (loop);
365
366      inexact = mpfr_set (y, t, rnd);
367      mpfr_clear (t);
368    }
369
370 end:
371  MPFR_SAVE_EXPO_FREE (expo);
372  return mpfr_check_range (y, inexact, rnd);
373}
374