157429Smarkm/* mpn_mod_1(dividend_ptr, dividend_size, divisor_limb) --
257429Smarkm   Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
357429Smarkm   Return the single-limb remainder.
457429Smarkm   There are no constraints on the value of the divisor.
557429Smarkm
660573SkrisCopyright 1991, 1993, 1994, 1999, 2000, 2002, 2007, 2008, 2009 Free
765668SkrisSoftware Foundation, Inc.
865668Skris
965668SkrisThis file is part of the GNU MP Library.
1065668Skris
1165668SkrisThe GNU MP Library is free software; you can redistribute it and/or modify
1257429Smarkmit under the terms of the GNU Lesser General Public License as published by
1357429Smarkmthe Free Software Foundation; either version 3 of the License, or (at your
1457429Smarkmoption) any later version.
1599060Sdes
1657429SmarkmThe GNU MP Library is distributed in the hope that it will be useful, but
1757429SmarkmWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1857429Smarkmor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
1976259SgreenLicense for more details.
2057429Smarkm
2157429SmarkmYou should have received a copy of the GNU Lesser General Public License
2257429Smarkmalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
2360573Skris
2457429Smarkm#include "gmp.h"
2557429Smarkm#include "gmp-impl.h"
2657429Smarkm#include "longlong.h"
2757429Smarkm
2857429Smarkm
2957429Smarkm/* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
3057429Smarkm   meaning the quotient size where that should happen, the quotient size
3157429Smarkm   being how many udiv divisions will be done.
3257429Smarkm
3357429Smarkm   The default is to use preinv always, CPUs where this doesn't suit have
3460573Skris   tuned thresholds.  Note in particular that preinv should certainly be
3557429Smarkm   used if that's the only division available (USE_PREINV_ALWAYS).  */
3657429Smarkm
3757429Smarkm#ifndef MOD_1_NORM_THRESHOLD
3857429Smarkm#define MOD_1_NORM_THRESHOLD  0
3957429Smarkm#endif
4057429Smarkm
4157429Smarkm#ifndef MOD_1_UNNORM_THRESHOLD
4257429Smarkm#define MOD_1_UNNORM_THRESHOLD  0
4357429Smarkm#endif
4457429Smarkm
4557429Smarkm#ifndef MOD_1U_TO_MOD_1_1_THRESHOLD
4660573Skris#define MOD_1U_TO_MOD_1_1_THRESHOLD  MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */
4757429Smarkm#endif
4857429Smarkm
4957429Smarkm#ifndef MOD_1N_TO_MOD_1_1_THRESHOLD
5057429Smarkm#define MOD_1N_TO_MOD_1_1_THRESHOLD  MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */
5157429Smarkm#endif
5257429Smarkm
5357429Smarkm#ifndef MOD_1_1_TO_MOD_1_2_THRESHOLD
5457429Smarkm#define MOD_1_1_TO_MOD_1_2_THRESHOLD  10
5560573Skris#endif
5692555Sdes
5757429Smarkm#ifndef MOD_1_2_TO_MOD_1_4_THRESHOLD
5892555Sdes#define MOD_1_2_TO_MOD_1_4_THRESHOLD  20
5992555Sdes#endif
6092555Sdes
6157429Smarkm
6257429Smarkm/* The comments in mpn/generic/divrem_1.c apply here too.
6357429Smarkm
6457429Smarkm   As noted in the algorithms section of the manual, the shifts in the loop
6557429Smarkm   for the unnorm case can be avoided by calculating r = a%(d*2^n), followed
6657429Smarkm   by a final (r*2^n)%(d*2^n).  In fact if it happens that a%(d*2^n) can
6757429Smarkm   skip a division where (a*2^n)%(d*2^n) can't then there's the same number
6857429Smarkm   of divide steps, though how often that happens depends on the assumed
6992555Sdes   distributions of dividend and divisor.  In any case this idea is left to
7092555Sdes   CPU specific implementations to consider.  */
7157429Smarkm
7292555Sdesstatic mp_limb_t
7392555Sdesmpn_mod_1_unnorm (mp_srcptr up, mp_size_t un, mp_limb_t d)
7499060Sdes{
7599060Sdes  mp_size_t  i;
7699060Sdes  mp_limb_t  n1, n0, r;
7757429Smarkm  mp_limb_t  dummy;
7857429Smarkm  int cnt;
7957429Smarkm
8057429Smarkm  ASSERT (un > 0);
8157429Smarkm  ASSERT (d != 0);
8257429Smarkm
8357429Smarkm  d <<= GMP_NAIL_BITS;
8457429Smarkm
8592555Sdes  /* Skip a division if high < divisor.  Having the test here before
8657429Smarkm     normalizing will still skip as often as possible.  */
8792555Sdes  r = up[un - 1] << GMP_NAIL_BITS;
8857429Smarkm  if (r < d)
8957429Smarkm    {
9057429Smarkm      r >>= GMP_NAIL_BITS;
9157429Smarkm      un--;
9257429Smarkm      if (un == 0)
9357429Smarkm	return r;
9457429Smarkm    }
9557429Smarkm  else
9657429Smarkm    r = 0;
9757429Smarkm
9857429Smarkm  /* If udiv_qrnnd doesn't need a normalized divisor, can use the simple
9957429Smarkm     code above. */
10057429Smarkm  if (! UDIV_NEEDS_NORMALIZATION
10157429Smarkm      && BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD))
10299060Sdes    {
10399060Sdes      for (i = un - 1; i >= 0; i--)
10499060Sdes	{
10557429Smarkm	  n0 = up[i] << GMP_NAIL_BITS;
10657429Smarkm	  udiv_qrnnd (dummy, r, r, n0, d);
10792555Sdes	  r >>= GMP_NAIL_BITS;
10857429Smarkm	}
10957429Smarkm      return r;
11057429Smarkm    }
11157429Smarkm
11276259Sgreen  count_leading_zeros (cnt, d);
11357429Smarkm  d <<= cnt;
11457429Smarkm
11557429Smarkm  n1 = up[un - 1] << GMP_NAIL_BITS;
11657429Smarkm  r = (r << cnt) | (n1 >> (GMP_LIMB_BITS - cnt));
11757429Smarkm
11857429Smarkm  if (UDIV_NEEDS_NORMALIZATION
11957429Smarkm      && BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD))
12060573Skris    {
12192555Sdes      for (i = un - 2; i >= 0; i--)
12257429Smarkm	{
12357429Smarkm	  n0 = up[i] << GMP_NAIL_BITS;
12476259Sgreen	  udiv_qrnnd (dummy, r, r,
12576259Sgreen		      (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt)),
12657429Smarkm		      d);
12757429Smarkm	  r >>= GMP_NAIL_BITS;
12857429Smarkm	  n1 = n0;
12957429Smarkm	}
13057429Smarkm      udiv_qrnnd (dummy, r, r, n1 << cnt, d);
13157429Smarkm      r >>= GMP_NAIL_BITS;
13260573Skris      return r >> cnt;
13376259Sgreen    }
13457429Smarkm  else
13557429Smarkm    {
13660573Skris      mp_limb_t inv;
13757429Smarkm      invert_limb (inv, d);
13857429Smarkm
13957429Smarkm      for (i = un - 2; i >= 0; i--)
14057429Smarkm	{
14157429Smarkm	  n0 = up[i] << GMP_NAIL_BITS;
14260573Skris	  udiv_qrnnd_preinv (dummy, r, r,
14376259Sgreen			     (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt)),
14457429Smarkm			     d, inv);
14557429Smarkm	  r >>= GMP_NAIL_BITS;
14660573Skris	  n1 = n0;
14757429Smarkm	}
14857429Smarkm      udiv_qrnnd_preinv (dummy, r, r, n1 << cnt, d, inv);
14957429Smarkm      r >>= GMP_NAIL_BITS;
15057429Smarkm      return r >> cnt;
15157429Smarkm    }
15292555Sdes}
15357429Smarkm
15457429Smarkmstatic mp_limb_t
15557429Smarkmmpn_mod_1_norm (mp_srcptr up, mp_size_t un, mp_limb_t d)
15657429Smarkm{
15757429Smarkm  mp_size_t  i;
15857429Smarkm  mp_limb_t  n0, r;
15957429Smarkm  mp_limb_t  dummy;
16060573Skris
16157429Smarkm  ASSERT (un > 0);
16257429Smarkm
16357429Smarkm  d <<= GMP_NAIL_BITS;
16492555Sdes
16557429Smarkm  ASSERT (d & GMP_LIMB_HIGHBIT);
16676259Sgreen
16776259Sgreen  /* High limb is initial remainder, possibly with one subtract of
16876259Sgreen     d to get r<d.  */
16976259Sgreen  r = up[un - 1] << GMP_NAIL_BITS;
17076259Sgreen  if (r >= d)
17176259Sgreen    r -= d;
17276259Sgreen  r >>= GMP_NAIL_BITS;
17376259Sgreen  un--;
17457429Smarkm  if (un == 0)
175    return r;
176
177  if (BELOW_THRESHOLD (un, MOD_1_NORM_THRESHOLD))
178    {
179      for (i = un - 1; i >= 0; i--)
180	{
181	  n0 = up[i] << GMP_NAIL_BITS;
182	  udiv_qrnnd (dummy, r, r, n0, d);
183	  r >>= GMP_NAIL_BITS;
184	}
185      return r;
186    }
187  else
188    {
189      mp_limb_t  inv;
190      invert_limb (inv, d);
191      for (i = un - 1; i >= 0; i--)
192	{
193	  n0 = up[i] << GMP_NAIL_BITS;
194	  udiv_qrnnd_preinv (dummy, r, r, n0, d, inv);
195	  r >>= GMP_NAIL_BITS;
196	}
197      return r;
198    }
199}
200
201mp_limb_t
202mpn_mod_1 (mp_srcptr ap, mp_size_t n, mp_limb_t b)
203{
204  ASSERT (n >= 0);
205  ASSERT (b != 0);
206
207  /* Should this be handled at all?  Rely on callers?  Note un==0 is currently
208     required by mpz/fdiv_r_ui.c and possibly other places.  */
209  if (n == 0)
210    return 0;
211
212  if (UNLIKELY ((b & GMP_NUMB_HIGHBIT) != 0))
213    {
214      if (BELOW_THRESHOLD (n, MOD_1N_TO_MOD_1_1_THRESHOLD))
215	{
216	  return mpn_mod_1_norm (ap, n, b);
217	}
218      else
219	{
220	  mp_limb_t pre[4];
221	  mpn_mod_1_1p_cps (pre, b);
222	  return mpn_mod_1_1p (ap, n, b, pre);
223	}
224    }
225  else
226    {
227      if (BELOW_THRESHOLD (n, MOD_1U_TO_MOD_1_1_THRESHOLD))
228	{
229	  return mpn_mod_1_unnorm (ap, n, b);
230	}
231      else if (BELOW_THRESHOLD (n, MOD_1_1_TO_MOD_1_2_THRESHOLD))
232	{
233	  mp_limb_t pre[4];
234	  mpn_mod_1_1p_cps (pre, b);
235	  return mpn_mod_1_1p (ap, n, b << pre[1], pre);
236	}
237      else if (BELOW_THRESHOLD (n, MOD_1_2_TO_MOD_1_4_THRESHOLD) || UNLIKELY (b > GMP_NUMB_MASK / 4))
238	{
239	  mp_limb_t pre[5];
240	  mpn_mod_1s_2p_cps (pre, b);
241	  return mpn_mod_1s_2p (ap, n, b << pre[1], pre);
242	}
243      else
244	{
245	  mp_limb_t pre[7];
246	  mpn_mod_1s_4p_cps (pre, b);
247	  return mpn_mod_1s_4p (ap, n, b << pre[1], pre);
248	}
249    }
250}
251