1/* mpn_mu_div_q. 2 3 Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato. 4 5 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY 6 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 7 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE. 8 9Copyright 2005, 2006, 2007, 2009, 2010 Free Software Foundation, Inc. 10 11This file is part of the GNU MP Library. 12 13The GNU MP Library is free software; you can redistribute it and/or modify 14it under the terms of the GNU Lesser General Public License as published by 15the Free Software Foundation; either version 3 of the License, or (at your 16option) any later version. 17 18The GNU MP Library is distributed in the hope that it will be useful, but 19WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 20or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 21License for more details. 22 23You should have received a copy of the GNU Lesser General Public License 24along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 25 26 27/* 28 The idea of the algorithm used herein is to compute a smaller inverted value 29 than used in the standard Barrett algorithm, and thus save time in the 30 Newton iterations, and pay just a small price when using the inverted value 31 for developing quotient bits. This algorithm was presented at ICMS 2006. 32*/ 33 34/* 35 Things to work on: 36 37 1. This is a rudimentary implementation of mpn_mu_div_q. The algorithm is 38 probably close to optimal, except when mpn_mu_divappr_q fails. 39 40 An alternative which could be considered for much simpler code for the 41 complex qn>=dn arm would be to allocate a temporary nn+1 limb buffer, then 42 simply call mpn_mu_divappr_q. Such a temporary allocation is 43 unfortunately very large. 44 45 2. We used to fall back to mpn_mu_div_qr when we detect a possible 46 mpn_mu_divappr_q rounding problem, now we multiply and compare. 47 Unfortunately, since mpn_mu_divappr_q does not return the partial 48 remainder, this also doesn't become optimal. A mpn_mu_divappr_qr could 49 solve that. 50 51 3. The allocations done here should be made from the scratch area, which 52 then would need to be amended. 53*/ 54 55#include <stdlib.h> /* for NULL */ 56#include "gmp.h" 57#include "gmp-impl.h" 58 59 60mp_limb_t 61mpn_mu_div_q (mp_ptr qp, 62 mp_srcptr np, mp_size_t nn, 63 mp_srcptr dp, mp_size_t dn, 64 mp_ptr scratch) 65{ 66 mp_ptr tp, rp, ip, this_ip; 67 mp_size_t qn, in, this_in; 68 mp_limb_t cy, qh; 69 TMP_DECL; 70 71 TMP_MARK; 72 73 qn = nn - dn; 74 75 tp = TMP_BALLOC_LIMBS (qn + 1); 76 77 if (qn >= dn) /* nn >= 2*dn + 1 */ 78 { 79 /* Find max inverse size needed by the two preinv calls. FIXME: This is 80 not optimal, it underestimates the invariance. */ 81 if (dn != qn) 82 { 83 mp_size_t in1, in2; 84 85 in1 = mpn_mu_div_qr_choose_in (qn - dn, dn, 0); 86 in2 = mpn_mu_divappr_q_choose_in (dn + 1, dn, 0); 87 in = MAX (in1, in2); 88 } 89 else 90 { 91 in = mpn_mu_divappr_q_choose_in (dn + 1, dn, 0); 92 } 93 94 ip = TMP_BALLOC_LIMBS (in + 1); 95 96 if (dn == in) 97 { 98 MPN_COPY (scratch + 1, dp, in); 99 scratch[0] = 1; 100 mpn_invertappr (ip, scratch, in + 1, NULL); 101 MPN_COPY_INCR (ip, ip + 1, in); 102 } 103 else 104 { 105 cy = mpn_add_1 (scratch, dp + dn - (in + 1), in + 1, 1); 106 if (UNLIKELY (cy != 0)) 107 MPN_ZERO (ip, in); 108 else 109 { 110 mpn_invertappr (ip, scratch, in + 1, NULL); 111 MPN_COPY_INCR (ip, ip + 1, in); 112 } 113 } 114 115 /* |_______________________| dividend 116 |________| divisor */ 117 rp = TMP_BALLOC_LIMBS (2 * dn + 1); 118 119 this_in = mpn_mu_div_qr_choose_in (qn - dn, dn, 0); 120 this_ip = ip + in - this_in; 121 qh = mpn_preinv_mu_div_qr (tp + dn + 1, rp + dn + 1, np + dn, qn, dp, dn, 122 this_ip, this_in, scratch); 123 124 MPN_COPY (rp + 1, np, dn); 125 rp[0] = 0; 126 this_in = mpn_mu_divappr_q_choose_in (dn + 1, dn, 0); 127 this_ip = ip + in - this_in; 128 cy = mpn_preinv_mu_divappr_q (tp, rp, 2 * dn + 1, dp, dn, 129 this_ip, this_in, scratch); 130 131 if (UNLIKELY (cy != 0)) 132 { 133 /* Since the partial remainder fed to mpn_preinv_mu_divappr_q was 134 canonically reduced, replace the returned value of B^(qn-dn)+eps 135 by the largest possible value. */ 136 mp_size_t i; 137 for (i = 0; i < dn + 1; i++) 138 tp[i] = GMP_NUMB_MAX; 139 } 140 141 /* The max error of mpn_mu_divappr_q is +4. If the low quotient limb is 142 greater than the max error, we cannot trust the quotient. */ 143 if (tp[0] > 4) 144 { 145 MPN_COPY (qp, tp + 1, qn); 146 } 147 else 148 { 149 mp_limb_t cy; 150 mp_ptr pp; 151 152 /* FIXME: can we use already allocated space? */ 153 pp = TMP_BALLOC_LIMBS (nn); 154 mpn_mul (pp, tp + 1, qn, dp, dn); 155 156 cy = (qh != 0) ? mpn_add_n (pp + qn, pp + qn, dp, dn) : 0; 157 158 if (cy || mpn_cmp (pp, np, nn) > 0) /* At most is wrong by one, no cycle. */ 159 qh -= mpn_sub_1 (qp, tp + 1, qn, 1); 160 else /* Same as above */ 161 MPN_COPY (qp, tp + 1, qn); 162 } 163 } 164 else 165 { 166 /* |_______________________| dividend 167 |________________| divisor */ 168 169 /* FIXME: When nn = 2dn-1, qn becomes dn-1, and the numerator size passed 170 here becomes 2dn, i.e., more than nn. This shouldn't hurt, since only 171 the most significant dn-1 limbs will actually be read, but it is not 172 pretty. */ 173 174 qh = mpn_mu_divappr_q (tp, np + nn - (2 * qn + 2), 2 * qn + 2, 175 dp + dn - (qn + 1), qn + 1, scratch); 176 177 /* The max error of mpn_mu_divappr_q is +4, but we get an additional 178 error from the divisor truncation. */ 179 if (tp[0] > 6) 180 { 181 MPN_COPY (qp, tp + 1, qn); 182 } 183 else 184 { 185 mp_limb_t cy; 186 187 /* FIXME: a shorter product should be enough; we may use already 188 allocated space... */ 189 rp = TMP_BALLOC_LIMBS (nn); 190 mpn_mul (rp, dp, dn, tp + 1, qn); 191 192 cy = (qh != 0) ? mpn_add_n (rp + qn, rp + qn, dp, dn) : 0; 193 194 if (cy || mpn_cmp (rp, np, nn) > 0) /* At most is wrong by one, no cycle. */ 195 qh -= mpn_sub_1 (qp, tp + 1, qn, 1); 196 else /* Same as above */ 197 MPN_COPY (qp, tp + 1, qn); 198 } 199 } 200 201 TMP_FREE; 202 return qh; 203} 204 205mp_size_t 206mpn_mu_div_q_itch (mp_size_t nn, mp_size_t dn, int mua_k) 207{ 208 mp_size_t qn, itch1, itch2; 209 210 qn = nn - dn; 211 if (qn >= dn) 212 { 213 itch1 = mpn_mu_div_qr_itch (qn, dn, mua_k); 214 itch2 = mpn_mu_divappr_q_itch (2 * dn + 1, dn, mua_k); 215 return MAX (itch1, itch2); 216 } 217 else 218 { 219 itch1 = mpn_mu_divappr_q_itch (2 * qn + 2, qn + 1, mua_k); 220 return itch1; 221 } 222} 223