1/* mpn_dcpi1_divappr_q -- divide-and-conquer division, returning approximate 2 quotient. The quotient returned is either correct, or one too large. 3 4 Contributed to the GNU project by Torbjorn Granlund. 5 6 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY 7 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 8 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE. 9 10Copyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc. 11 12This file is part of the GNU MP Library. 13 14The GNU MP Library is free software; you can redistribute it and/or modify 15it under the terms of either: 16 17 * the GNU Lesser General Public License as published by the Free 18 Software Foundation; either version 3 of the License, or (at your 19 option) any later version. 20 21or 22 23 * the GNU General Public License as published by the Free Software 24 Foundation; either version 2 of the License, or (at your option) any 25 later version. 26 27or both in parallel, as here. 28 29The GNU MP Library is distributed in the hope that it will be useful, but 30WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 31or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 32for more details. 33 34You should have received copies of the GNU General Public License and the 35GNU Lesser General Public License along with the GNU MP Library. If not, 36see https://www.gnu.org/licenses/. */ 37 38#include "gmp-impl.h" 39#include "longlong.h" 40 41 42static mp_limb_t 43mpn_dcpi1_divappr_q_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, 44 gmp_pi1_t *dinv, mp_ptr tp) 45{ 46 mp_size_t lo, hi; 47 mp_limb_t cy, qh, ql; 48 49 lo = n >> 1; /* floor(n/2) */ 50 hi = n - lo; /* ceil(n/2) */ 51 52 if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD)) 53 qh = mpn_sbpi1_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dinv->inv32); 54 else 55 qh = mpn_dcpi1_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dinv, tp); 56 57 mpn_mul (tp, qp + lo, hi, dp, lo); 58 59 cy = mpn_sub_n (np + lo, np + lo, tp, n); 60 if (qh != 0) 61 cy += mpn_sub_n (np + n, np + n, dp, lo); 62 63 while (cy != 0) 64 { 65 qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1); 66 cy -= mpn_add_n (np + lo, np + lo, dp, n); 67 } 68 69 if (BELOW_THRESHOLD (lo, DC_DIVAPPR_Q_THRESHOLD)) 70 ql = mpn_sbpi1_divappr_q (qp, np + hi, 2 * lo, dp + hi, lo, dinv->inv32); 71 else 72 ql = mpn_dcpi1_divappr_q_n (qp, np + hi, dp + hi, lo, dinv, tp); 73 74 if (UNLIKELY (ql != 0)) 75 { 76 mp_size_t i; 77 for (i = 0; i < lo; i++) 78 qp[i] = GMP_NUMB_MASK; 79 } 80 81 return qh; 82} 83 84mp_limb_t 85mpn_dcpi1_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn, 86 mp_srcptr dp, mp_size_t dn, gmp_pi1_t *dinv) 87{ 88 mp_size_t qn; 89 mp_limb_t qh, cy, qsave; 90 mp_ptr tp; 91 TMP_DECL; 92 93 TMP_MARK; 94 95 ASSERT (dn >= 6); 96 ASSERT (nn > dn); 97 ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT); 98 99 qn = nn - dn; 100 qp += qn; 101 np += nn; 102 dp += dn; 103 104 if (qn >= dn) 105 { 106 qn++; /* pretend we'll need an extra limb */ 107 /* Reduce qn mod dn without division, optimizing small operations. */ 108 do 109 qn -= dn; 110 while (qn > dn); 111 112 qp -= qn; /* point at low limb of next quotient block */ 113 np -= qn; /* point in the middle of partial remainder */ 114 115 tp = TMP_SALLOC_LIMBS (dn); 116 117 /* Perform the typically smaller block first. */ 118 if (qn == 1) 119 { 120 mp_limb_t q, n2, n1, n0, d1, d0; 121 122 /* Handle qh up front, for simplicity. */ 123 qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0; 124 if (qh) 125 ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn)); 126 127 /* A single iteration of schoolbook: One 3/2 division, 128 followed by the bignum update and adjustment. */ 129 n2 = np[0]; 130 n1 = np[-1]; 131 n0 = np[-2]; 132 d1 = dp[-1]; 133 d0 = dp[-2]; 134 135 ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0)); 136 137 if (UNLIKELY (n2 == d1) && n1 == d0) 138 { 139 q = GMP_NUMB_MASK; 140 cy = mpn_submul_1 (np - dn, dp - dn, dn, q); 141 ASSERT (cy == n2); 142 } 143 else 144 { 145 udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32); 146 147 if (dn > 2) 148 { 149 mp_limb_t cy, cy1; 150 cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q); 151 152 cy1 = n0 < cy; 153 n0 = (n0 - cy) & GMP_NUMB_MASK; 154 cy = n1 < cy1; 155 n1 = (n1 - cy1) & GMP_NUMB_MASK; 156 np[-2] = n0; 157 158 if (UNLIKELY (cy != 0)) 159 { 160 n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1); 161 qh -= (q == 0); 162 q = (q - 1) & GMP_NUMB_MASK; 163 } 164 } 165 else 166 np[-2] = n0; 167 168 np[-1] = n1; 169 } 170 qp[0] = q; 171 } 172 else 173 { 174 if (qn == 2) 175 qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2); 176 else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD)) 177 qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32); 178 else 179 qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp); 180 181 if (qn != dn) 182 { 183 if (qn > dn - qn) 184 mpn_mul (tp, qp, qn, dp - dn, dn - qn); 185 else 186 mpn_mul (tp, dp - dn, dn - qn, qp, qn); 187 188 cy = mpn_sub_n (np - dn, np - dn, tp, dn); 189 if (qh != 0) 190 cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn); 191 192 while (cy != 0) 193 { 194 qh -= mpn_sub_1 (qp, qp, qn, 1); 195 cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn); 196 } 197 } 198 } 199 qn = nn - dn - qn + 1; 200 while (qn > dn) 201 { 202 qp -= dn; 203 np -= dn; 204 mpn_dcpi1_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp); 205 qn -= dn; 206 } 207 208 /* Since we pretended we'd need an extra quotient limb before, we now 209 have made sure the code above left just dn-1=qn quotient limbs to 210 develop. Develop that plus a guard limb. */ 211 qn--; 212 qp -= qn; 213 np -= dn; 214 qsave = qp[qn]; 215 mpn_dcpi1_divappr_q_n (qp, np - dn, dp - dn, dn, dinv, tp); 216 MPN_COPY_INCR (qp, qp + 1, qn); 217 qp[qn] = qsave; 218 } 219 else /* (qn < dn) */ 220 { 221 mp_ptr q2p; 222#if 0 /* not possible since we demand nn > dn */ 223 if (qn == 0) 224 { 225 qh = mpn_cmp (np - dn, dp - dn, dn) >= 0; 226 if (qh) 227 mpn_sub_n (np - dn, np - dn, dp - dn, dn); 228 TMP_FREE; 229 return qh; 230 } 231#endif 232 233 qp -= qn; /* point at low limb of next quotient block */ 234 np -= qn; /* point in the middle of partial remainder */ 235 236 q2p = TMP_SALLOC_LIMBS (qn + 1); 237 /* Should we at all check DC_DIVAPPR_Q_THRESHOLD here, or reply on 238 callers not to be silly? */ 239 if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD)) 240 { 241 qh = mpn_sbpi1_divappr_q (q2p, np - qn - 2, 2 * (qn + 1), 242 dp - (qn + 1), qn + 1, dinv->inv32); 243 } 244 else 245 { 246 /* It is tempting to use qp for recursive scratch and put quotient in 247 tp, but the recursive scratch needs one limb too many. */ 248 tp = TMP_SALLOC_LIMBS (qn + 1); 249 qh = mpn_dcpi1_divappr_q_n (q2p, np - qn - 2, dp - (qn + 1), qn + 1, dinv, tp); 250 } 251 MPN_COPY (qp, q2p + 1, qn); 252 } 253 254 TMP_FREE; 255 return qh; 256} 257