1/* Compute {up,n}^(-1) mod B^n. 2 3 Contributed to the GNU project by Torbjorn Granlund. 4 5 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY 6 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 7 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE. 8 9Copyright (C) 2004, 2005, 2006, 2007, 2009 Free Software Foundation, Inc. 10 11This file is part of the GNU MP Library. 12 13The GNU MP Library is free software; you can redistribute it and/or modify 14it under the terms of the GNU Lesser General Public License as published by 15the Free Software Foundation; either version 3 of the License, or (at your 16option) any later version. 17 18The GNU MP Library is distributed in the hope that it will be useful, but 19WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 20or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 21License for more details. 22 23You should have received a copy of the GNU Lesser General Public License 24along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 25 26#include "gmp.h" 27#include "gmp-impl.h" 28 29 30/* 31 r[k+1] = r[k] - r[k] * (u*r[k] - 1) 32 r[k+1] = r[k] + r[k] - r[k]*(u*r[k]) 33*/ 34 35/* This is intended for constant THRESHOLDs only, where the compiler can 36 completely fold the result. */ 37#define LOG2C(n) \ 38 (((n) >= 0x1) + ((n) >= 0x2) + ((n) >= 0x4) + ((n) >= 0x8) + \ 39 ((n) >= 0x10) + ((n) >= 0x20) + ((n) >= 0x40) + ((n) >= 0x80) + \ 40 ((n) >= 0x100) + ((n) >= 0x200) + ((n) >= 0x400) + ((n) >= 0x800) + \ 41 ((n) >= 0x1000) + ((n) >= 0x2000) + ((n) >= 0x4000) + ((n) >= 0x8000)) 42 43#if TUNE_PROGRAM_BUILD 44#define NPOWS \ 45 ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t))) 46#else 47#define NPOWS \ 48 ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)) - LOG2C (BINV_NEWTON_THRESHOLD)) 49#endif 50 51mp_size_t 52mpn_binvert_itch (mp_size_t n) 53{ 54 mp_size_t itch_local = mpn_mulmod_bnm1_next_size (n); 55 mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local, n, (n + 1) >> 1); 56 return itch_local + itch_out; 57} 58 59void 60mpn_binvert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch) 61{ 62 mp_ptr xp; 63 mp_size_t rn, newrn; 64 mp_size_t sizes[NPOWS], *sizp; 65 mp_limb_t di; 66 67 /* Compute the computation precisions from highest to lowest, leaving the 68 base case size in 'rn'. */ 69 sizp = sizes; 70 for (rn = n; ABOVE_THRESHOLD (rn, BINV_NEWTON_THRESHOLD); rn = (rn + 1) >> 1) 71 *sizp++ = rn; 72 73 xp = scratch; 74 75 /* Compute a base value of rn limbs. */ 76 MPN_ZERO (xp, rn); 77 xp[0] = 1; 78 binvert_limb (di, up[0]); 79 if (BELOW_THRESHOLD (rn, DC_BDIV_Q_THRESHOLD)) 80 mpn_sbpi1_bdiv_q (rp, xp, rn, up, rn, -di); 81 else 82 mpn_dcpi1_bdiv_q (rp, xp, rn, up, rn, -di); 83 84 /* Use Newton iterations to get the desired precision. */ 85 for (; rn < n; rn = newrn) 86 { 87 mp_size_t m; 88 newrn = *--sizp; 89 90 /* X <- UR. */ 91 m = mpn_mulmod_bnm1_next_size (newrn); 92 mpn_mulmod_bnm1 (xp, m, up, newrn, rp, rn, xp + m); 93 mpn_sub_1 (xp + m, xp, rn - (m - newrn), 1); 94 95 /* R = R(X/B^rn) */ 96 mpn_mullo_n (rp + rn, rp, xp + rn, newrn - rn); 97 mpn_neg (rp + rn, rp + rn, newrn - rn); 98 } 99} 100