1/* Compute {up,n}^(-1) mod B^n.
2
3   Contributed to the GNU project by Torbjorn Granlund.
4
5   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
6   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
8
9Copyright (C) 2004, 2005, 2006, 2007, 2009 Free Software Foundation, Inc.
10
11This file is part of the GNU MP Library.
12
13The GNU MP Library is free software; you can redistribute it and/or modify
14it under the terms of the GNU Lesser General Public License as published by
15the Free Software Foundation; either version 3 of the License, or (at your
16option) any later version.
17
18The GNU MP Library is distributed in the hope that it will be useful, but
19WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
21License for more details.
22
23You should have received a copy of the GNU Lesser General Public License
24along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
25
26#include "gmp.h"
27#include "gmp-impl.h"
28
29
30/*
31  r[k+1] = r[k] - r[k] * (u*r[k] - 1)
32  r[k+1] = r[k] + r[k] - r[k]*(u*r[k])
33*/
34
35/* This is intended for constant THRESHOLDs only, where the compiler can
36   completely fold the result.  */
37#define LOG2C(n) \
38 (((n) >=    0x1) + ((n) >=    0x2) + ((n) >=    0x4) + ((n) >=    0x8) + \
39  ((n) >=   0x10) + ((n) >=   0x20) + ((n) >=   0x40) + ((n) >=   0x80) + \
40  ((n) >=  0x100) + ((n) >=  0x200) + ((n) >=  0x400) + ((n) >=  0x800) + \
41  ((n) >= 0x1000) + ((n) >= 0x2000) + ((n) >= 0x4000) + ((n) >= 0x8000))
42
43#if TUNE_PROGRAM_BUILD
44#define NPOWS \
45 ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)))
46#else
47#define NPOWS \
48 ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)) - LOG2C (BINV_NEWTON_THRESHOLD))
49#endif
50
51mp_size_t
52mpn_binvert_itch (mp_size_t n)
53{
54  mp_size_t itch_local = mpn_mulmod_bnm1_next_size (n);
55  mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local, n, (n + 1) >> 1);
56  return itch_local + itch_out;
57}
58
59void
60mpn_binvert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch)
61{
62  mp_ptr xp;
63  mp_size_t rn, newrn;
64  mp_size_t sizes[NPOWS], *sizp;
65  mp_limb_t di;
66
67  /* Compute the computation precisions from highest to lowest, leaving the
68     base case size in 'rn'.  */
69  sizp = sizes;
70  for (rn = n; ABOVE_THRESHOLD (rn, BINV_NEWTON_THRESHOLD); rn = (rn + 1) >> 1)
71    *sizp++ = rn;
72
73  xp = scratch;
74
75  /* Compute a base value of rn limbs.  */
76  MPN_ZERO (xp, rn);
77  xp[0] = 1;
78  binvert_limb (di, up[0]);
79  if (BELOW_THRESHOLD (rn, DC_BDIV_Q_THRESHOLD))
80    mpn_sbpi1_bdiv_q (rp, xp, rn, up, rn, -di);
81  else
82    mpn_dcpi1_bdiv_q (rp, xp, rn, up, rn, -di);
83
84  /* Use Newton iterations to get the desired precision.  */
85  for (; rn < n; rn = newrn)
86    {
87      mp_size_t m;
88      newrn = *--sizp;
89
90      /* X <- UR. */
91      m = mpn_mulmod_bnm1_next_size (newrn);
92      mpn_mulmod_bnm1 (xp, m, up, newrn, rp, rn, xp + m);
93      mpn_sub_1 (xp + m, xp, rn - (m - newrn), 1);
94
95      /* R = R(X/B^rn) */
96      mpn_mullo_n (rp + rn, rp, xp + rn, newrn - rn);
97      mpn_neg (rp + rn, rp + rn, newrn - rn);
98    }
99}
100