1/* mpn_mu_div_q. 2 3 Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato. 4 5 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY 6 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 7 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE. 8 9Copyright 2005-2007, 2009, 2010, 2013 Free Software Foundation, Inc. 10 11This file is part of the GNU MP Library. 12 13The GNU MP Library is free software; you can redistribute it and/or modify 14it under the terms of either: 15 16 * the GNU Lesser General Public License as published by the Free 17 Software Foundation; either version 3 of the License, or (at your 18 option) any later version. 19 20or 21 22 * the GNU General Public License as published by the Free Software 23 Foundation; either version 2 of the License, or (at your option) any 24 later version. 25 26or both in parallel, as here. 27 28The GNU MP Library is distributed in the hope that it will be useful, but 29WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 30or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 31for more details. 32 33You should have received copies of the GNU General Public License and the 34GNU Lesser General Public License along with the GNU MP Library. If not, 35see https://www.gnu.org/licenses/. */ 36 37 38/* 39 The idea of the algorithm used herein is to compute a smaller inverted value 40 than used in the standard Barrett algorithm, and thus save time in the 41 Newton iterations, and pay just a small price when using the inverted value 42 for developing quotient bits. This algorithm was presented at ICMS 2006. 43*/ 44 45/* 46 Things to work on: 47 48 1. This is a rudimentary implementation of mpn_mu_div_q. The algorithm is 49 probably close to optimal, except when mpn_mu_divappr_q fails. 50 51 2. We used to fall back to mpn_mu_div_qr when we detect a possible 52 mpn_mu_divappr_q rounding problem, now we multiply and compare. 53 Unfortunately, since mpn_mu_divappr_q does not return the partial 54 remainder, this also doesn't become optimal. A mpn_mu_divappr_qr could 55 solve that. 56 57 3. The allocations done here should be made from the scratch area, which 58 then would need to be amended. 59*/ 60 61#include <stdlib.h> /* for NULL */ 62#include "gmp-impl.h" 63 64 65mp_limb_t 66mpn_mu_div_q (mp_ptr qp, 67 mp_srcptr np, mp_size_t nn, 68 mp_srcptr dp, mp_size_t dn, 69 mp_ptr scratch) 70{ 71 mp_ptr tp, rp; 72 mp_size_t qn; 73 mp_limb_t cy, qh; 74 TMP_DECL; 75 76 TMP_MARK; 77 78 qn = nn - dn; 79 80 tp = TMP_BALLOC_LIMBS (qn + 1); 81 82 if (qn >= dn) /* nn >= 2*dn + 1 */ 83 { 84 /* |_______________________| dividend 85 |________| divisor */ 86 87 rp = TMP_BALLOC_LIMBS (nn + 1); 88 MPN_COPY (rp + 1, np, nn); 89 rp[0] = 0; 90 91 qh = mpn_cmp (rp + 1 + nn - dn, dp, dn) >= 0; 92 if (qh != 0) 93 mpn_sub_n (rp + 1 + nn - dn, rp + 1 + nn - dn, dp, dn); 94 95 cy = mpn_mu_divappr_q (tp, rp, nn + 1, dp, dn, scratch); 96 97 if (UNLIKELY (cy != 0)) 98 { 99 /* Since the partial remainder fed to mpn_preinv_mu_divappr_q was 100 canonically reduced, replace the returned value of B^(qn-dn)+eps 101 by the largest possible value. */ 102 mp_size_t i; 103 for (i = 0; i < qn + 1; i++) 104 tp[i] = GMP_NUMB_MAX; 105 } 106 107 /* The max error of mpn_mu_divappr_q is +4. If the low quotient limb is 108 smaller than the max error, we cannot trust the quotient. */ 109 if (tp[0] > 4) 110 { 111 MPN_COPY (qp, tp + 1, qn); 112 } 113 else 114 { 115 mp_limb_t cy; 116 mp_ptr pp; 117 118 pp = rp; 119 mpn_mul (pp, tp + 1, qn, dp, dn); 120 121 cy = (qh != 0) ? mpn_add_n (pp + qn, pp + qn, dp, dn) : 0; 122 123 if (cy || mpn_cmp (pp, np, nn) > 0) /* At most is wrong by one, no cycle. */ 124 qh -= mpn_sub_1 (qp, tp + 1, qn, 1); 125 else /* Same as above */ 126 MPN_COPY (qp, tp + 1, qn); 127 } 128 } 129 else 130 { 131 /* |_______________________| dividend 132 |________________| divisor */ 133 134 /* FIXME: When nn = 2dn-1, qn becomes dn-1, and the numerator size passed 135 here becomes 2dn, i.e., more than nn. This shouldn't hurt, since only 136 the most significant dn-1 limbs will actually be read, but it is not 137 pretty. */ 138 139 qh = mpn_mu_divappr_q (tp, np + nn - (2 * qn + 2), 2 * qn + 2, 140 dp + dn - (qn + 1), qn + 1, scratch); 141 142 /* The max error of mpn_mu_divappr_q is +4, but we get an additional 143 error from the divisor truncation. */ 144 if (tp[0] > 6) 145 { 146 MPN_COPY (qp, tp + 1, qn); 147 } 148 else 149 { 150 mp_limb_t cy; 151 152 /* FIXME: a shorter product should be enough; we may use already 153 allocated space... */ 154 rp = TMP_BALLOC_LIMBS (nn); 155 mpn_mul (rp, dp, dn, tp + 1, qn); 156 157 cy = (qh != 0) ? mpn_add_n (rp + qn, rp + qn, dp, dn) : 0; 158 159 if (cy || mpn_cmp (rp, np, nn) > 0) /* At most is wrong by one, no cycle. */ 160 qh -= mpn_sub_1 (qp, tp + 1, qn, 1); 161 else /* Same as above */ 162 MPN_COPY (qp, tp + 1, qn); 163 } 164 } 165 166 TMP_FREE; 167 return qh; 168} 169 170mp_size_t 171mpn_mu_div_q_itch (mp_size_t nn, mp_size_t dn, int mua_k) 172{ 173 mp_size_t qn; 174 175 qn = nn - dn; 176 if (qn >= dn) 177 { 178 return mpn_mu_divappr_q_itch (nn + 1, dn, mua_k); 179 } 180 else 181 { 182 return mpn_mu_divappr_q_itch (2 * qn + 2, qn + 1, mua_k); 183 } 184} 185