1/* mpn_mod_1s_2p (ap, n, b, cps)
2   Divide (ap,,n) by b.  Return the single-limb remainder.
3   Requires that b < B / 2.
4
5   Contributed to the GNU project by Torbjorn Granlund.
6
7   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
8   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
9   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
10
11Copyright 2008, 2009 Free Software Foundation, Inc.
12
13This file is part of the GNU MP Library.
14
15The GNU MP Library is free software; you can redistribute it and/or modify
16it under the terms of the GNU Lesser General Public License as published by
17the Free Software Foundation; either version 3 of the License, or (at your
18option) any later version.
19
20The GNU MP Library is distributed in the hope that it will be useful, but
21WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
22or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
23License for more details.
24
25You should have received a copy of the GNU Lesser General Public License
26along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
27
28#include "gmp.h"
29#include "gmp-impl.h"
30#include "longlong.h"
31
32void
33mpn_mod_1s_2p_cps (mp_limb_t cps[5], mp_limb_t b)
34{
35  mp_limb_t bi;
36  mp_limb_t B1modb, B2modb, B3modb;
37  int cnt;
38
39  ASSERT (b <= (~(mp_limb_t) 0) / 2);
40
41  count_leading_zeros (cnt, b);
42
43  b <<= cnt;
44  invert_limb (bi, b);
45
46  B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
47  ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
48  udiv_rnd_preinv (B2modb, B1modb, b, bi);
49  udiv_rnd_preinv (B3modb, B2modb, b, bi);
50
51  cps[0] = bi;
52  cps[1] = cnt;
53  cps[2] = B1modb >> cnt;
54  cps[3] = B2modb >> cnt;
55  cps[4] = B3modb >> cnt;
56
57#if WANT_ASSERT
58  {
59    int i;
60    b = cps[2];
61    for (i = 3; i <= 4; i++)
62      {
63	b += cps[i];
64	ASSERT (b >= cps[i]);
65      }
66  }
67#endif
68}
69
70mp_limb_t
71mpn_mod_1s_2p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t cps[5])
72{
73  mp_limb_t rh, rl, bi, q, ph, pl, ch, cl, r;
74  mp_limb_t B1modb, B2modb, B3modb;
75  mp_size_t i;
76  int cnt;
77
78  ASSERT (n >= 1);
79
80  B1modb = cps[2];
81  B2modb = cps[3];
82  B3modb = cps[4];
83
84  if ((n & 1) != 0)
85    {
86      if (n == 1)
87	{
88	  rl = ap[n - 1];
89	  bi = cps[0];
90	  cnt = cps[1];
91	  udiv_qrnnd_preinv (q, r, rl >> (GMP_LIMB_BITS - cnt),
92			     rl << cnt, b, bi);
93	  return r >> cnt;
94	}
95
96      umul_ppmm (ph, pl, ap[n - 2], B1modb);
97      add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 3]);
98      umul_ppmm (rh, rl, ap[n - 1], B2modb);
99      add_ssaaaa (rh, rl, rh, rl, ph, pl);
100      n--;
101    }
102  else
103    {
104      umul_ppmm (rh, rl, ap[n - 1], B1modb);
105      add_ssaaaa (rh, rl, rh, rl, 0, ap[n - 2]);
106    }
107
108  for (i = n - 4; i >= 0; i -= 2)
109    {
110      /* rr = ap[i]				< B
111	    + ap[i+1] * (B mod b)		<= (B-1)(b-1)
112	    + LO(rr)  * (B^2 mod b)		<= (B-1)(b-1)
113	    + HI(rr)  * (B^3 mod b)		<= (B-1)(b-1)
114      */
115      umul_ppmm (ph, pl, ap[i + 1], B1modb);
116      add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 0]);
117
118      umul_ppmm (ch, cl, rl, B2modb);
119      add_ssaaaa (ph, pl, ph, pl, ch, cl);
120
121      umul_ppmm (rh, rl, rh, B3modb);
122      add_ssaaaa (rh, rl, rh, rl, ph, pl);
123    }
124
125  bi = cps[0];
126  cnt = cps[1];
127
128#if 1
129  umul_ppmm (rh, cl, rh, B1modb);
130  add_ssaaaa (rh, rl, rh, rl, 0, cl);
131  r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
132#else
133  udiv_qrnnd_preinv (q, r, rh >> (GMP_LIMB_BITS - cnt),
134		     (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt)), b, bi);
135  ASSERT (q <= 2);	/* optimize for small quotient? */
136#endif
137
138  udiv_qrnnd_preinv (q, r, r, rl << cnt, b, bi);
139
140  return r >> cnt;
141}
142