1/* mpfr_cosu  -- cosu(x) = cos(2*pi*x/u)
2   mpfr_cospi -- cospi(x) = cos(pi*x)
3
4Copyright 2020-2023 Free Software Foundation, Inc.
5Contributed by the AriC and Caramba projects, INRIA.
6
7This file is part of the GNU MPFR Library.
8
9The GNU MPFR Library is free software; you can redistribute it and/or modify
10it under the terms of the GNU Lesser General Public License as published by
11the Free Software Foundation; either version 3 of the License, or (at your
12option) any later version.
13
14The GNU MPFR Library is distributed in the hope that it will be useful, but
15WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
17License for more details.
18
19You should have received a copy of the GNU Lesser General Public License
20along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
21https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
2251 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
23
24#define MPFR_NEED_LONGLONG_H
25#include "mpfr-impl.h"
26
27/* put in y the correctly rounded value of cos(2*pi*x/u) */
28int
29mpfr_cosu (mpfr_ptr y, mpfr_srcptr x, unsigned long u, mpfr_rnd_t rnd_mode)
30{
31  mpfr_srcptr xp;
32  mpfr_prec_t precy, prec;
33  mpfr_exp_t expx, expt, err, log2u, erra, errb;
34  mpfr_t t, xr;
35  int inexact = 0, nloops = 0, underflow = 0;
36  MPFR_ZIV_DECL (loop);
37  MPFR_SAVE_EXPO_DECL (expo);
38
39  MPFR_LOG_FUNC (
40    ("x[%Pd]=%.*Rg u=%lu rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, u,
41     rnd_mode),
42    ("y[%Pd]=%.*Rg inexact=%d", mpfr_get_prec (y), mpfr_log_prec, y,
43     inexact));
44
45  if (u == 0 || MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
46    {
47      /* for u=0, return NaN */
48      if (u == 0 || MPFR_IS_NAN (x) || MPFR_IS_INF (x))
49        {
50          MPFR_SET_NAN (y);
51          MPFR_RET_NAN;
52        }
53      else /* x is zero: cos(0) = 1 */
54        {
55          MPFR_ASSERTD (MPFR_IS_ZERO (x));
56          return mpfr_set_ui (y, 1, rnd_mode);
57        }
58    }
59
60  MPFR_SAVE_EXPO_MARK (expo);
61
62  /* Range reduction. We do not need to reduce the argument if it is
63     already reduced (|x| < u).
64     Note that the case |x| = u is better in the "else" branch as it
65     will give xr = 0. */
66  if (mpfr_cmpabs_ui (x, u) < 0)
67    {
68      xp = x;
69    }
70  else
71    {
72      mpfr_exp_t p = MPFR_GET_PREC (x) - MPFR_GET_EXP (x);
73      int inex;
74
75      /* Let's compute xr = x mod u, with signbit(xr) = signbit(x), though
76         this doesn't matter.
77         The precision of xr is chosen to ensure that x mod u is exactly
78         representable in xr, e.g., the maximum size of u + the length of
79         the fractional part of x. Note that since |x| >= u in this branch,
80         the additional memory amount will not be more than the one of x.
81      */
82      mpfr_init2 (xr, sizeof (unsigned long) * CHAR_BIT + (p < 0 ? 0 : p));
83      MPFR_DBGRES (inex = mpfr_fmod_ui (xr, x, u, MPFR_RNDN));  /* exact */
84      MPFR_ASSERTD (inex == 0);
85      if (MPFR_IS_ZERO (xr))
86        {
87          mpfr_clear (xr);
88          MPFR_SAVE_EXPO_FREE (expo);
89          return mpfr_set_ui (y, 1, rnd_mode);
90        }
91      xp = xr;
92    }
93
94#define CLEAR_XR                       \
95  do                                   \
96    if (xp != x)                       \
97      {                                \
98        MPFR_ASSERTD (xp == xr);       \
99        mpfr_clear (xr);               \
100      }                                \
101  while (0)
102
103  /* now |xp/u| < 1 */
104
105  /* for x small, we have |cos(2*pi*x/u)-1| < 1/2*(2*pi*x/u)^2 < 2^5*(x/u)^2 */
106  expx = MPFR_GET_EXP (xp);
107  log2u = u == 1 ? 0 : MPFR_INT_CEIL_LOG2 (u) - 1;
108  /* u >= 2^log2u thus 1/u <= 2^(-log2u) */
109  erra = -2 * expx;
110  errb = 5 - 2 * log2u;
111  /* The 3rd argument (err1) of MPFR_SMALL_INPUT_AFTER_SAVE_EXPO should be
112     erra - errb, but it may overflow. The negative overflow is avoided by
113     the test erra > errb: if erra - errb <= 0, the macro is no-op.
114     Saturate to MPFR_EXP_MAX in case of positive overflow, as the error
115     test in MPFR_SMALL_INPUT_AFTER_SAVE_EXPO will always be true for
116     any value >= MPFR_PREC_MAX + 1, and this includes MPFR_EXP_MAX (from
117     the definition of MPFR_PREC_MAX and mpfr_exp_t >= mpfr_prec_t). */
118  if (erra > errb)
119    {
120      mpfr_exp_t err1 = errb >= 0 || erra < MPFR_EXP_MAX + errb ?
121        erra - errb : MPFR_EXP_MAX;
122      MPFR_SMALL_INPUT_AFTER_SAVE_EXPO (y, __gmpfr_one, err1, 0, 0,
123                                        rnd_mode, expo, CLEAR_XR);
124    }
125
126  precy = MPFR_GET_PREC (y);
127  /* For x large, since argument reduction is expensive, we want to avoid
128     any failure in Ziv's strategy, thus we take into account expx too. */
129  prec = precy + MAX(expx,MPFR_INT_CEIL_LOG2 (precy)) + 8;
130  MPFR_ASSERTD(prec >= 2);
131  mpfr_init2 (t, prec);
132  MPFR_ZIV_INIT (loop, prec);
133  for (;;)
134    {
135      nloops ++;
136      /* In the error analysis below, xp stands for x.
137         We first compute an approximation t of 2*pi*x/u, then call cos(t).
138         If t = 2*pi*x/u + s, then |cos(t) - cos(2*pi*x/u)| <= |s|. */
139      mpfr_set_prec (t, prec);
140      mpfr_const_pi (t, MPFR_RNDN); /* t = pi * (1 + theta1) where
141                                       |theta1| <= 2^-prec */
142      mpfr_mul_2ui (t, t, 1, MPFR_RNDN); /* t = 2*pi * (1 + theta1) */
143      mpfr_mul (t, t, xp, MPFR_RNDN);    /* t = 2*pi*x * (1 + theta2)^2 where
144                                            |theta2| <= 2^-prec */
145      mpfr_div_ui (t, t, u, MPFR_RNDN);  /* t = 2*pi*x/u * (1 + theta3)^3 where
146                                            |theta3| <= 2^-prec */
147      /* if t is zero here, it means the division by u underflowd */
148      if (MPFR_UNLIKELY (MPFR_IS_ZERO (t)))
149        {
150          mpfr_set_ui (y, 1, MPFR_RNDZ);
151          if (MPFR_IS_LIKE_RNDZ(rnd_mode,0))
152            {
153              inexact = -1;
154              mpfr_nextbelow (y);
155            }
156          else
157            inexact = 1;
158          goto end;
159        }
160      /* since prec >= 2, |(1 + theta3)^3 - 1| <= 4*theta3 <= 2^(2-prec) */
161      expt = MPFR_GET_EXP (t);
162      /* we have |s| <= 2^(expt + 2 - prec) */
163      mpfr_cos (t, t, MPFR_RNDN);
164      err = expt + 2 - prec;
165      expt = MPFR_GET_EXP (t); /* new exponent of t */
166      /* the total error is at most 2^err + ulp(t)/2 = 2^err + 2^(expt-prec-1)
167         thus if err <= expt-prec-1, it is bounded by 2^(expt-prec),
168         otherwise it is bounded by 2^(err+1). */
169      err = (err <= expt - prec - 1) ? expt - prec : err + 1;
170      /* normalize err for mpfr_can_round */
171      err = expt - err;
172      if (MPFR_CAN_ROUND (t, err, precy, rnd_mode))
173        break;
174      /* Check exact cases only after the first level of Ziv' strategy, to
175         avoid slowing down the average case. Exact cases are:
176         (a) 2*pi*x/u is a multiple of pi/2, i.e., x/u is a multiple of 1/4
177         (b) 2*pi*x/u is {pi/3,2pi/3,4pi/3,5pi/3} mod 2pi */
178      if (nloops == 1)
179        {
180          /* detect case (a) */
181          inexact = mpfr_div_ui (t, xp, u, MPFR_RNDZ);
182          mpfr_mul_2ui (t, t, 2, MPFR_RNDZ);
183          if (inexact == 0 && mpfr_integer_p (t))
184            {
185              if (mpfr_odd_p (t))
186                /* t is odd: we have kpi+pi/2, thus cosu = 0,
187                   for the sign, we always return +0, following IEEE 754-2019:
188                   cosPi(n + 1/2) is +0 for any integer n when n + 1/2 is
189                   representable. */
190                mpfr_set_zero (y, +1);
191              else /* t is even: case kpi */
192                {
193                  mpfr_div_2ui (t, t, 1, MPFR_RNDZ);
194                  if (!mpfr_odd_p (t))
195                    /* case 2kpi: cosu = 1 */
196                    mpfr_set_ui (y, 1, MPFR_RNDZ);
197                  else
198                    mpfr_set_si (y, -1, MPFR_RNDZ);
199                }
200              goto end;
201            }
202          /* detect case (b): this can only occur if u is divisible by 3 */
203          if ((u % 3) == 0)
204            {
205              inexact = mpfr_div_ui (t, xp, u / 3, MPFR_RNDZ);
206              /* t should be in {1/2,2/2,4/2,5/2} */
207              mpfr_mul_2ui (t, t, 1, MPFR_RNDZ);
208              /* t should be {1,2,4,5} mod 6:
209                 t = 1 mod 6: case pi/3: return 1/2
210                 t = 2 mod 6: case 2pi/3: return -1/2
211                 t = 4 mod 6: case 4pi/3: return -1/2
212                 t = 5 mod 6: case 5pi/3: return 1/2 */
213              if (inexact == 0 && mpfr_integer_p (t))
214                {
215                  mpz_t z;
216                  unsigned long mod6;
217                  mpz_init (z);
218                  inexact = mpfr_get_z (z, t, MPFR_RNDZ);
219                  MPFR_ASSERTN(inexact == 0);
220                  mod6 = mpz_fdiv_ui (z, 6);
221                  mpz_clear (z);
222                  if (mod6 == 1 || mod6 == 5)
223                    {
224                      mpfr_set_ui_2exp (y, 1, -1, MPFR_RNDZ);
225                      goto end;
226                    }
227                  else /* we cannot have mod6 = 0 or 3 since those
228                          case belong to (a) */
229                    {
230                      MPFR_ASSERTD(mod6 == 2 || mod6 == 4);
231                      mpfr_set_si_2exp (y, -1, -1, MPFR_RNDZ);
232                      goto end;
233                    }
234                }
235            }
236        }
237      MPFR_ZIV_NEXT (loop, prec);
238    }
239  MPFR_ZIV_FREE (loop);
240
241  inexact = mpfr_set (y, t, rnd_mode);
242
243 end:
244  mpfr_clear (t);
245  CLEAR_XR;
246  MPFR_SAVE_EXPO_FREE (expo);
247  return underflow ? inexact : mpfr_check_range (y, inexact, rnd_mode);
248}
249
250int
251mpfr_cospi (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd_mode)
252{
253  return mpfr_cosu (y, x, 2, rnd_mode);
254}
255