broot.c revision 1.1.1.1
1/* mpn_broot -- Compute hensel sqrt
2
3   Contributed to the GNU project by Niels M�ller
4
5   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
6   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
8
9Copyright 2012 Free Software Foundation, Inc.
10
11This file is part of the GNU MP Library.
12
13The GNU MP Library is free software; you can redistribute it and/or modify
14it under the terms of the GNU Lesser General Public License as published by
15the Free Software Foundation; either version 3 of the License, or (at your
16option) any later version.
17
18The GNU MP Library is distributed in the hope that it will be useful, but
19WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
21License for more details.
22
23You should have received a copy of the GNU Lesser General Public License
24along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
25
26#include "gmp.h"
27#include "gmp-impl.h"
28
29/* Computes a^e (mod B). Uses right-to-left binary algorithm, since
30   typical use will have e small. */
31static mp_limb_t
32powlimb (mp_limb_t a, mp_limb_t e)
33{
34  mp_limb_t r = 1;
35  mp_limb_t s = a;
36
37  for (r = 1, s = a; e > 0; e >>= 1, s *= s)
38    if (e & 1)
39      r *= s;
40
41  return r;
42}
43
44/* Computes a^{1/k - 1} (mod B^n). Both a and k must be odd.
45
46   Iterates
47
48     r' <-- r - r * (a^{k-1} r^k - 1) / n
49
50   If
51
52     a^{k-1} r^k = 1 (mod 2^m),
53
54   then
55
56     a^{k-1} r'^k = 1 (mod 2^{2m}),
57
58   Compute the update term as
59
60     r' = r - (a^{k-1} r^{k+1} - r) / k
61
62   where we still have cancelation of low limbs.
63
64 */
65void
66mpn_broot_invm1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
67{
68  mp_size_t sizes[GMP_LIMB_BITS * 2];
69  mp_ptr akm1, tp, rnp, ep, scratch;
70  mp_limb_t a0, r0, km1, kp1h, kinv;
71  mp_size_t rn;
72  unsigned i;
73
74  TMP_DECL;
75
76  ASSERT (n > 0);
77  ASSERT (ap[0] & 1);
78  ASSERT (k & 1);
79  ASSERT (k >= 3);
80
81  TMP_MARK;
82
83  akm1 = TMP_ALLOC_LIMBS (4*n);
84  tp = akm1 + n;
85
86  km1 = k-1;
87  /* FIXME: Could arrange the iteration so we don't need to compute
88     this up front, computing a^{k-1} * r^k as (a r)^{k-1} * r. Note
89     that we can use wraparound also for a*r, since the low half is
90     unchanged from the previous iteration. Or possibly mulmid. Also,
91     a r = a^{1/k}, so we get that value too, for free? */
92  mpn_powlo (akm1, ap, &km1, 1, n, tp); /* 3 n scratch space */
93
94  a0 = ap[0];
95  binvert_limb (kinv, k);
96
97  /* 4 bits: a^{1/k - 1} (mod 16):
98
99	a % 8
100	1 3 5 7
101   k%4 +-------
102     1 |1 1 1 1
103     3 |1 9 9 1
104  */
105  r0 = 1 + (((k << 2) & ((a0 << 1) ^ (a0 << 2))) & 8);
106  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7f)); /* 8 bits */
107  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7fff)); /* 16 bits */
108  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k)); /* 32 bits */
109#if GMP_NUMB_BITS > 32
110  {
111    unsigned prec = 32;
112    do
113      {
114	r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k));
115	prec *= 2;
116      }
117    while (prec < GMP_NUMB_BITS);
118  }
119#endif
120
121  rp[0] = r0;
122  if (n == 1)
123    {
124      TMP_FREE;
125      return;
126    }
127
128  /* For odd k, (k+1)/2 = k/2+1, and the latter avoids overflow. */
129  kp1h = k/2 + 1;
130
131  /* FIXME: Special case for two limb iteration. */
132  rnp = TMP_ALLOC_LIMBS (2*n + 1);
133  ep = rnp + n;
134
135  /* FIXME: Possible to this on the fly with some bit fiddling. */
136  for (i = 0; n > 1; n = (n + 1)/2)
137    sizes[i++] = n;
138
139  rn = 1;
140
141  while (i-- > 0)
142    {
143      /* Compute x^{k+1}. */
144      mpn_sqr (ep, rp, rn); /* For odd n, writes n+1 limbs in the
145			       final iteration.*/
146      mpn_powlo (rnp, ep, &kp1h, 1, sizes[i], tp);
147
148      /* Multiply by a^{k-1}. Can use wraparound; low part equals
149	 r. */
150
151      mpn_mullo_n (ep, rnp, akm1, sizes[i]);
152      ASSERT (mpn_cmp (ep, rp, rn) == 0);
153
154      ASSERT (sizes[i] <= 2*rn);
155      mpn_pi1_bdiv_q_1 (rp + rn, ep + rn, sizes[i] - rn, k, kinv, 0);
156      mpn_neg (rp + rn, rp + rn, sizes[i] - rn);
157      rn = sizes[i];
158    }
159  TMP_FREE;
160}
161
162/* Computes a^{1/k} (mod B^n). Both a and k must be odd. */
163void
164mpn_broot (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
165{
166  mp_ptr tp;
167  TMP_DECL;
168
169  ASSERT (n > 0);
170  ASSERT (ap[0] & 1);
171  ASSERT (k & 1);
172
173  if (k == 1)
174    {
175      MPN_COPY (rp, ap, n);
176      return;
177    }
178
179  TMP_MARK;
180  tp = TMP_ALLOC_LIMBS (n);
181
182  mpn_broot_invm1 (tp, ap, n, k);
183  mpn_mullo_n (rp, tp, ap, n);
184
185  TMP_FREE;
186}
187