1/* mpn_broot -- Compute hensel sqrt
2
3   Contributed to the GNU project by Niels M��ller
4
5   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
6   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
8
9Copyright 2012 Free Software Foundation, Inc.
10
11This file is part of the GNU MP Library.
12
13The GNU MP Library is free software; you can redistribute it and/or modify
14it under the terms of either:
15
16  * the GNU Lesser General Public License as published by the Free
17    Software Foundation; either version 3 of the License, or (at your
18    option) any later version.
19
20or
21
22  * the GNU General Public License as published by the Free Software
23    Foundation; either version 2 of the License, or (at your option) any
24    later version.
25
26or both in parallel, as here.
27
28The GNU MP Library is distributed in the hope that it will be useful, but
29WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
31for more details.
32
33You should have received copies of the GNU General Public License and the
34GNU Lesser General Public License along with the GNU MP Library.  If not,
35see https://www.gnu.org/licenses/.  */
36
37#include "gmp-impl.h"
38
39/* Computes a^e (mod B). Uses right-to-left binary algorithm, since
40   typical use will have e small. */
41static mp_limb_t
42powlimb (mp_limb_t a, mp_limb_t e)
43{
44  mp_limb_t r = 1;
45  mp_limb_t s = a;
46
47  for (r = 1, s = a; e > 0; e >>= 1, s *= s)
48    if (e & 1)
49      r *= s;
50
51  return r;
52}
53
54/* Computes a^{1/k - 1} (mod B^n). Both a and k must be odd.
55
56   Iterates
57
58     r' <-- r - r * (a^{k-1} r^k - 1) / n
59
60   If
61
62     a^{k-1} r^k = 1 (mod 2^m),
63
64   then
65
66     a^{k-1} r'^k = 1 (mod 2^{2m}),
67
68   Compute the update term as
69
70     r' = r - (a^{k-1} r^{k+1} - r) / k
71
72   where we still have cancellation of low limbs.
73
74 */
75void
76mpn_broot_invm1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
77{
78  mp_size_t sizes[GMP_LIMB_BITS * 2];
79  mp_ptr akm1, tp, rnp, ep;
80  mp_limb_t a0, r0, km1, kp1h, kinv;
81  mp_size_t rn;
82  unsigned i;
83
84  TMP_DECL;
85
86  ASSERT (n > 0);
87  ASSERT (ap[0] & 1);
88  ASSERT (k & 1);
89  ASSERT (k >= 3);
90
91  TMP_MARK;
92
93  akm1 = TMP_ALLOC_LIMBS (4*n);
94  tp = akm1 + n;
95
96  km1 = k-1;
97  /* FIXME: Could arrange the iteration so we don't need to compute
98     this up front, computing a^{k-1} * r^k as (a r)^{k-1} * r. Note
99     that we can use wraparound also for a*r, since the low half is
100     unchanged from the previous iteration. Or possibly mulmid. Also,
101     a r = a^{1/k}, so we get that value too, for free? */
102  mpn_powlo (akm1, ap, &km1, 1, n, tp); /* 3 n scratch space */
103
104  a0 = ap[0];
105  binvert_limb (kinv, k);
106
107  /* 4 bits: a^{1/k - 1} (mod 16):
108
109	a % 8
110	1 3 5 7
111   k%4 +-------
112     1 |1 1 1 1
113     3 |1 9 9 1
114  */
115  r0 = 1 + (((k << 2) & ((a0 << 1) ^ (a0 << 2))) & 8);
116  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7f)); /* 8 bits */
117  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7fff)); /* 16 bits */
118  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k)); /* 32 bits */
119#if GMP_NUMB_BITS > 32
120  {
121    unsigned prec = 32;
122    do
123      {
124	r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k));
125	prec *= 2;
126      }
127    while (prec < GMP_NUMB_BITS);
128  }
129#endif
130
131  rp[0] = r0;
132  if (n == 1)
133    {
134      TMP_FREE;
135      return;
136    }
137
138  /* For odd k, (k+1)/2 = k/2+1, and the latter avoids overflow. */
139  kp1h = k/2 + 1;
140
141  /* FIXME: Special case for two limb iteration. */
142  rnp = TMP_ALLOC_LIMBS (2*n + 1);
143  ep = rnp + n;
144
145  /* FIXME: Possible to this on the fly with some bit fiddling. */
146  for (i = 0; n > 1; n = (n + 1)/2)
147    sizes[i++] = n;
148
149  rn = 1;
150
151  while (i-- > 0)
152    {
153      /* Compute x^{k+1}. */
154      mpn_sqr (ep, rp, rn); /* For odd n, writes n+1 limbs in the
155			       final iteration. */
156      mpn_powlo (rnp, ep, &kp1h, 1, sizes[i], tp);
157
158      /* Multiply by a^{k-1}. Can use wraparound; low part equals r. */
159
160      mpn_mullo_n (ep, rnp, akm1, sizes[i]);
161      ASSERT (mpn_cmp (ep, rp, rn) == 0);
162
163      ASSERT (sizes[i] <= 2*rn);
164      mpn_pi1_bdiv_q_1 (rp + rn, ep + rn, sizes[i] - rn, k, kinv, 0);
165      mpn_neg (rp + rn, rp + rn, sizes[i] - rn);
166      rn = sizes[i];
167    }
168  TMP_FREE;
169}
170
171/* Computes a^{1/k} (mod B^n). Both a and k must be odd. */
172void
173mpn_broot (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
174{
175  mp_ptr tp;
176  TMP_DECL;
177
178  ASSERT (n > 0);
179  ASSERT (ap[0] & 1);
180  ASSERT (k & 1);
181
182  if (k == 1)
183    {
184      MPN_COPY (rp, ap, n);
185      return;
186    }
187
188  TMP_MARK;
189  tp = TMP_ALLOC_LIMBS (n);
190
191  mpn_broot_invm1 (tp, ap, n, k);
192  mpn_mullo_n (rp, tp, ap, n);
193
194  TMP_FREE;
195}
196