1/* mpn_dcpi1_divappr_q -- divide-and-conquer division, returning approximate 2 quotient. The quotient returned is either correct, or one too large. 3 4 Contributed to the GNU project by Torbjorn Granlund. 5 6 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY 7 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 8 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE. 9 10Copyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc. 11 12This file is part of the GNU MP Library. 13 14The GNU MP Library is free software; you can redistribute it and/or modify 15it under the terms of the GNU Lesser General Public License as published by 16the Free Software Foundation; either version 3 of the License, or (at your 17option) any later version. 18 19The GNU MP Library is distributed in the hope that it will be useful, but 20WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 21or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 22License for more details. 23 24You should have received a copy of the GNU Lesser General Public License 25along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 26 27#include "gmp.h" 28#include "gmp-impl.h" 29#include "longlong.h" 30 31 32mp_limb_t 33mpn_dcpi1_divappr_q_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, 34 gmp_pi1_t *dinv, mp_ptr tp) 35{ 36 mp_size_t lo, hi; 37 mp_limb_t cy, qh, ql; 38 39 lo = n >> 1; /* floor(n/2) */ 40 hi = n - lo; /* ceil(n/2) */ 41 42 if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD)) 43 qh = mpn_sbpi1_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dinv->inv32); 44 else 45 qh = mpn_dcpi1_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dinv, tp); 46 47 mpn_mul (tp, qp + lo, hi, dp, lo); 48 49 cy = mpn_sub_n (np + lo, np + lo, tp, n); 50 if (qh != 0) 51 cy += mpn_sub_n (np + n, np + n, dp, lo); 52 53 while (cy != 0) 54 { 55 qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1); 56 cy -= mpn_add_n (np + lo, np + lo, dp, n); 57 } 58 59 if (BELOW_THRESHOLD (lo, DC_DIVAPPR_Q_THRESHOLD)) 60 ql = mpn_sbpi1_divappr_q (qp, np + hi, 2 * lo, dp + hi, lo, dinv->inv32); 61 else 62 ql = mpn_dcpi1_divappr_q_n (qp, np + hi, dp + hi, lo, dinv, tp); 63 64 if (UNLIKELY (ql != 0)) 65 { 66 mp_size_t i; 67 for (i = 0; i < lo; i++) 68 qp[i] = GMP_NUMB_MASK; 69 } 70 71 return qh; 72} 73 74mp_limb_t 75mpn_dcpi1_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn, 76 mp_srcptr dp, mp_size_t dn, gmp_pi1_t *dinv) 77{ 78 mp_size_t qn; 79 mp_limb_t qh, cy, qsave; 80 mp_ptr tp; 81 TMP_DECL; 82 83 TMP_MARK; 84 85 ASSERT (dn >= 6); 86 ASSERT (nn > dn); 87 ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT); 88 89 qn = nn - dn; 90 qp += qn; 91 np += nn; 92 dp += dn; 93 94 if (qn >= dn) 95 { 96 qn++; /* pretend we'll need an extra limb */ 97 /* Reduce qn mod dn without division, optimizing small operations. */ 98 do 99 qn -= dn; 100 while (qn > dn); 101 102 qp -= qn; /* point at low limb of next quotient block */ 103 np -= qn; /* point in the middle of partial remainder */ 104 105 tp = TMP_SALLOC_LIMBS (dn); 106 107 /* Perform the typically smaller block first. */ 108 if (qn == 1) 109 { 110 mp_limb_t q, n2, n1, n0, d1, d0; 111 112 /* Handle qh up front, for simplicity. */ 113 qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0; 114 if (qh) 115 ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn)); 116 117 /* A single iteration of schoolbook: One 3/2 division, 118 followed by the bignum update and adjustment. */ 119 n2 = np[0]; 120 n1 = np[-1]; 121 n0 = np[-2]; 122 d1 = dp[-1]; 123 d0 = dp[-2]; 124 125 ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0)); 126 127 if (UNLIKELY (n2 == d1) && n1 == d0) 128 { 129 q = GMP_NUMB_MASK; 130 cy = mpn_submul_1 (np - dn, dp - dn, dn, q); 131 ASSERT (cy == n2); 132 } 133 else 134 { 135 udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32); 136 137 if (dn > 2) 138 { 139 mp_limb_t cy, cy1; 140 cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q); 141 142 cy1 = n0 < cy; 143 n0 = (n0 - cy) & GMP_NUMB_MASK; 144 cy = n1 < cy1; 145 n1 = (n1 - cy1) & GMP_NUMB_MASK; 146 np[-2] = n0; 147 148 if (UNLIKELY (cy != 0)) 149 { 150 n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1); 151 qh -= (q == 0); 152 q = (q - 1) & GMP_NUMB_MASK; 153 } 154 } 155 else 156 np[-2] = n0; 157 158 np[-1] = n1; 159 } 160 qp[0] = q; 161 } 162 else 163 { 164 if (qn == 2) 165 qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2); 166 else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD)) 167 qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32); 168 else 169 qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp); 170 171 if (qn != dn) 172 { 173 if (qn > dn - qn) 174 mpn_mul (tp, qp, qn, dp - dn, dn - qn); 175 else 176 mpn_mul (tp, dp - dn, dn - qn, qp, qn); 177 178 cy = mpn_sub_n (np - dn, np - dn, tp, dn); 179 if (qh != 0) 180 cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn); 181 182 while (cy != 0) 183 { 184 qh -= mpn_sub_1 (qp, qp, qn, 1); 185 cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn); 186 } 187 } 188 } 189 qn = nn - dn - qn + 1; 190 while (qn > dn) 191 { 192 qp -= dn; 193 np -= dn; 194 mpn_dcpi1_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp); 195 qn -= dn; 196 } 197 198 /* Since we pretended we'd need an extra quotient limb before, we now 199 have made sure the code above left just dn-1=qn quotient limbs to 200 develop. Develop that plus a guard limb. */ 201 qn--; 202 qp -= qn; 203 np -= dn; 204 qsave = qp[qn]; 205 mpn_dcpi1_divappr_q_n (qp, np - dn, dp - dn, dn, dinv, tp); 206 MPN_COPY_INCR (qp, qp + 1, qn); 207 qp[qn] = qsave; 208 } 209 else /* (qn < dn) */ 210 { 211 mp_ptr q2p; 212#if 0 /* not possible since we demand nn > dn */ 213 if (qn == 0) 214 { 215 qh = mpn_cmp (np - dn, dp - dn, dn) >= 0; 216 if (qh) 217 mpn_sub_n (np - dn, np - dn, dp - dn, dn); 218 TMP_FREE; 219 return qh; 220 } 221#endif 222 223 qp -= qn; /* point at low limb of next quotient block */ 224 np -= qn; /* point in the middle of partial remainder */ 225 226 q2p = TMP_SALLOC_LIMBS (qn + 1); 227 /* Should we at all check DC_DIVAPPR_Q_THRESHOLD here, or reply on 228 callers not to be silly? */ 229 if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD)) 230 { 231 qh = mpn_sbpi1_divappr_q (q2p, np - qn - 2, 2 * (qn + 1), 232 dp - (qn + 1), qn + 1, dinv->inv32); 233 } 234 else 235 { 236 /* It is tempting to use qp for recursive scratch and put quotient in 237 tp, but the recursive scratch needs one limb too many. */ 238 tp = TMP_SALLOC_LIMBS (qn + 1); 239 qh = mpn_dcpi1_divappr_q_n (q2p, np - qn - 2, dp - (qn + 1), qn + 1, dinv, tp); 240 } 241 MPN_COPY (qp, q2p + 1, qn); 242 } 243 244 TMP_FREE; 245 return qh; 246} 247