157429Smarkm/* mpn_mod_1(dividend_ptr, dividend_size, divisor_limb) -- 257429Smarkm Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB. 357429Smarkm Return the single-limb remainder. 457429Smarkm There are no constraints on the value of the divisor. 557429Smarkm 660573SkrisCopyright 1991, 1993, 1994, 1999, 2000, 2002, 2007, 2008, 2009 Free 765668SkrisSoftware Foundation, Inc. 865668Skris 965668SkrisThis file is part of the GNU MP Library. 1065668Skris 1165668SkrisThe GNU MP Library is free software; you can redistribute it and/or modify 1257429Smarkmit under the terms of the GNU Lesser General Public License as published by 1357429Smarkmthe Free Software Foundation; either version 3 of the License, or (at your 1457429Smarkmoption) any later version. 1599060Sdes 1657429SmarkmThe GNU MP Library is distributed in the hope that it will be useful, but 1757429SmarkmWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 1857429Smarkmor FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 1976259SgreenLicense for more details. 2057429Smarkm 2157429SmarkmYou should have received a copy of the GNU Lesser General Public License 2257429Smarkmalong with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 2360573Skris 2457429Smarkm#include "gmp.h" 2557429Smarkm#include "gmp-impl.h" 2657429Smarkm#include "longlong.h" 2757429Smarkm 2857429Smarkm 2957429Smarkm/* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd, 3057429Smarkm meaning the quotient size where that should happen, the quotient size 3157429Smarkm being how many udiv divisions will be done. 3257429Smarkm 3357429Smarkm The default is to use preinv always, CPUs where this doesn't suit have 3460573Skris tuned thresholds. Note in particular that preinv should certainly be 3557429Smarkm used if that's the only division available (USE_PREINV_ALWAYS). */ 3657429Smarkm 3757429Smarkm#ifndef MOD_1_NORM_THRESHOLD 3857429Smarkm#define MOD_1_NORM_THRESHOLD 0 3957429Smarkm#endif 4057429Smarkm 4157429Smarkm#ifndef MOD_1_UNNORM_THRESHOLD 4257429Smarkm#define MOD_1_UNNORM_THRESHOLD 0 4357429Smarkm#endif 4457429Smarkm 4557429Smarkm#ifndef MOD_1U_TO_MOD_1_1_THRESHOLD 4660573Skris#define MOD_1U_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */ 4757429Smarkm#endif 4857429Smarkm 4957429Smarkm#ifndef MOD_1N_TO_MOD_1_1_THRESHOLD 5057429Smarkm#define MOD_1N_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */ 5157429Smarkm#endif 5257429Smarkm 5357429Smarkm#ifndef MOD_1_1_TO_MOD_1_2_THRESHOLD 5457429Smarkm#define MOD_1_1_TO_MOD_1_2_THRESHOLD 10 5560573Skris#endif 5692555Sdes 5757429Smarkm#ifndef MOD_1_2_TO_MOD_1_4_THRESHOLD 5892555Sdes#define MOD_1_2_TO_MOD_1_4_THRESHOLD 20 5992555Sdes#endif 6092555Sdes 6157429Smarkm 6257429Smarkm/* The comments in mpn/generic/divrem_1.c apply here too. 6357429Smarkm 6457429Smarkm As noted in the algorithms section of the manual, the shifts in the loop 6557429Smarkm for the unnorm case can be avoided by calculating r = a%(d*2^n), followed 6657429Smarkm by a final (r*2^n)%(d*2^n). In fact if it happens that a%(d*2^n) can 6757429Smarkm skip a division where (a*2^n)%(d*2^n) can't then there's the same number 6857429Smarkm of divide steps, though how often that happens depends on the assumed 6992555Sdes distributions of dividend and divisor. In any case this idea is left to 7092555Sdes CPU specific implementations to consider. */ 7157429Smarkm 7292555Sdesstatic mp_limb_t 7392555Sdesmpn_mod_1_unnorm (mp_srcptr up, mp_size_t un, mp_limb_t d) 7499060Sdes{ 7599060Sdes mp_size_t i; 7699060Sdes mp_limb_t n1, n0, r; 7757429Smarkm mp_limb_t dummy; 7857429Smarkm int cnt; 7957429Smarkm 8057429Smarkm ASSERT (un > 0); 8157429Smarkm ASSERT (d != 0); 8257429Smarkm 8357429Smarkm d <<= GMP_NAIL_BITS; 8457429Smarkm 8592555Sdes /* Skip a division if high < divisor. Having the test here before 8657429Smarkm normalizing will still skip as often as possible. */ 8792555Sdes r = up[un - 1] << GMP_NAIL_BITS; 8857429Smarkm if (r < d) 8957429Smarkm { 9057429Smarkm r >>= GMP_NAIL_BITS; 9157429Smarkm un--; 9257429Smarkm if (un == 0) 9357429Smarkm return r; 9457429Smarkm } 9557429Smarkm else 9657429Smarkm r = 0; 9757429Smarkm 9857429Smarkm /* If udiv_qrnnd doesn't need a normalized divisor, can use the simple 9957429Smarkm code above. */ 10057429Smarkm if (! UDIV_NEEDS_NORMALIZATION 10157429Smarkm && BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD)) 10299060Sdes { 10399060Sdes for (i = un - 1; i >= 0; i--) 10499060Sdes { 10557429Smarkm n0 = up[i] << GMP_NAIL_BITS; 10657429Smarkm udiv_qrnnd (dummy, r, r, n0, d); 10792555Sdes r >>= GMP_NAIL_BITS; 10857429Smarkm } 10957429Smarkm return r; 11057429Smarkm } 11157429Smarkm 11276259Sgreen count_leading_zeros (cnt, d); 11357429Smarkm d <<= cnt; 11457429Smarkm 11557429Smarkm n1 = up[un - 1] << GMP_NAIL_BITS; 11657429Smarkm r = (r << cnt) | (n1 >> (GMP_LIMB_BITS - cnt)); 11757429Smarkm 11857429Smarkm if (UDIV_NEEDS_NORMALIZATION 11957429Smarkm && BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD)) 12060573Skris { 12192555Sdes for (i = un - 2; i >= 0; i--) 12257429Smarkm { 12357429Smarkm n0 = up[i] << GMP_NAIL_BITS; 12476259Sgreen udiv_qrnnd (dummy, r, r, 12576259Sgreen (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt)), 12657429Smarkm d); 12757429Smarkm r >>= GMP_NAIL_BITS; 12857429Smarkm n1 = n0; 12957429Smarkm } 13057429Smarkm udiv_qrnnd (dummy, r, r, n1 << cnt, d); 13157429Smarkm r >>= GMP_NAIL_BITS; 13260573Skris return r >> cnt; 13376259Sgreen } 13457429Smarkm else 13557429Smarkm { 13660573Skris mp_limb_t inv; 13757429Smarkm invert_limb (inv, d); 13857429Smarkm 13957429Smarkm for (i = un - 2; i >= 0; i--) 14057429Smarkm { 14157429Smarkm n0 = up[i] << GMP_NAIL_BITS; 14260573Skris udiv_qrnnd_preinv (dummy, r, r, 14376259Sgreen (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt)), 14457429Smarkm d, inv); 14557429Smarkm r >>= GMP_NAIL_BITS; 14660573Skris n1 = n0; 14757429Smarkm } 14857429Smarkm udiv_qrnnd_preinv (dummy, r, r, n1 << cnt, d, inv); 14957429Smarkm r >>= GMP_NAIL_BITS; 15057429Smarkm return r >> cnt; 15157429Smarkm } 15292555Sdes} 15357429Smarkm 15457429Smarkmstatic mp_limb_t 15557429Smarkmmpn_mod_1_norm (mp_srcptr up, mp_size_t un, mp_limb_t d) 15657429Smarkm{ 15757429Smarkm mp_size_t i; 15857429Smarkm mp_limb_t n0, r; 15957429Smarkm mp_limb_t dummy; 16060573Skris 16157429Smarkm ASSERT (un > 0); 16257429Smarkm 16357429Smarkm d <<= GMP_NAIL_BITS; 16492555Sdes 16557429Smarkm ASSERT (d & GMP_LIMB_HIGHBIT); 16676259Sgreen 16776259Sgreen /* High limb is initial remainder, possibly with one subtract of 16876259Sgreen d to get r<d. */ 16976259Sgreen r = up[un - 1] << GMP_NAIL_BITS; 17076259Sgreen if (r >= d) 17176259Sgreen r -= d; 17276259Sgreen r >>= GMP_NAIL_BITS; 17376259Sgreen un--; 17457429Smarkm if (un == 0) 175 return r; 176 177 if (BELOW_THRESHOLD (un, MOD_1_NORM_THRESHOLD)) 178 { 179 for (i = un - 1; i >= 0; i--) 180 { 181 n0 = up[i] << GMP_NAIL_BITS; 182 udiv_qrnnd (dummy, r, r, n0, d); 183 r >>= GMP_NAIL_BITS; 184 } 185 return r; 186 } 187 else 188 { 189 mp_limb_t inv; 190 invert_limb (inv, d); 191 for (i = un - 1; i >= 0; i--) 192 { 193 n0 = up[i] << GMP_NAIL_BITS; 194 udiv_qrnnd_preinv (dummy, r, r, n0, d, inv); 195 r >>= GMP_NAIL_BITS; 196 } 197 return r; 198 } 199} 200 201mp_limb_t 202mpn_mod_1 (mp_srcptr ap, mp_size_t n, mp_limb_t b) 203{ 204 ASSERT (n >= 0); 205 ASSERT (b != 0); 206 207 /* Should this be handled at all? Rely on callers? Note un==0 is currently 208 required by mpz/fdiv_r_ui.c and possibly other places. */ 209 if (n == 0) 210 return 0; 211 212 if (UNLIKELY ((b & GMP_NUMB_HIGHBIT) != 0)) 213 { 214 if (BELOW_THRESHOLD (n, MOD_1N_TO_MOD_1_1_THRESHOLD)) 215 { 216 return mpn_mod_1_norm (ap, n, b); 217 } 218 else 219 { 220 mp_limb_t pre[4]; 221 mpn_mod_1_1p_cps (pre, b); 222 return mpn_mod_1_1p (ap, n, b, pre); 223 } 224 } 225 else 226 { 227 if (BELOW_THRESHOLD (n, MOD_1U_TO_MOD_1_1_THRESHOLD)) 228 { 229 return mpn_mod_1_unnorm (ap, n, b); 230 } 231 else if (BELOW_THRESHOLD (n, MOD_1_1_TO_MOD_1_2_THRESHOLD)) 232 { 233 mp_limb_t pre[4]; 234 mpn_mod_1_1p_cps (pre, b); 235 return mpn_mod_1_1p (ap, n, b << pre[1], pre); 236 } 237 else if (BELOW_THRESHOLD (n, MOD_1_2_TO_MOD_1_4_THRESHOLD) || UNLIKELY (b > GMP_NUMB_MASK / 4)) 238 { 239 mp_limb_t pre[5]; 240 mpn_mod_1s_2p_cps (pre, b); 241 return mpn_mod_1s_2p (ap, n, b << pre[1], pre); 242 } 243 else 244 { 245 mp_limb_t pre[7]; 246 mpn_mod_1s_4p_cps (pre, b); 247 return mpn_mod_1s_4p (ap, n, b << pre[1], pre); 248 } 249 } 250} 251