1/* mpn_mod_1s_4p (ap, n, b, cps)
2   Divide (ap,,n) by b.  Return the single-limb remainder.
3   Requires that b < B / 4.
4
5   Contributed to the GNU project by Torbjorn Granlund.
6   Based on a suggestion by Peter L. Montgomery.
7
8   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
9   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
10   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
11
12Copyright 2008-2010 Free Software Foundation, Inc.
13
14This file is part of the GNU MP Library.
15
16The GNU MP Library is free software; you can redistribute it and/or modify
17it under the terms of either:
18
19  * the GNU Lesser General Public License as published by the Free
20    Software Foundation; either version 3 of the License, or (at your
21    option) any later version.
22
23or
24
25  * the GNU General Public License as published by the Free Software
26    Foundation; either version 2 of the License, or (at your option) any
27    later version.
28
29or both in parallel, as here.
30
31The GNU MP Library is distributed in the hope that it will be useful, but
32WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
33or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
34for more details.
35
36You should have received copies of the GNU General Public License and the
37GNU Lesser General Public License along with the GNU MP Library.  If not,
38see https://www.gnu.org/licenses/.  */
39
40#include "gmp-impl.h"
41#include "longlong.h"
42
43void
44mpn_mod_1s_4p_cps (mp_limb_t cps[7], mp_limb_t b)
45{
46  mp_limb_t bi;
47  mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
48  int cnt;
49
50  ASSERT (b <= (~(mp_limb_t) 0) / 4);
51
52  count_leading_zeros (cnt, b);
53
54  b <<= cnt;
55  invert_limb (bi, b);
56
57  cps[0] = bi;
58  cps[1] = cnt;
59
60  B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
61  ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
62  cps[2] = B1modb >> cnt;
63
64  udiv_rnnd_preinv (B2modb, B1modb, CNST_LIMB(0), b, bi);
65  cps[3] = B2modb >> cnt;
66
67  udiv_rnnd_preinv (B3modb, B2modb, CNST_LIMB(0), b, bi);
68  cps[4] = B3modb >> cnt;
69
70  udiv_rnnd_preinv (B4modb, B3modb, CNST_LIMB(0), b, bi);
71  cps[5] = B4modb >> cnt;
72
73  udiv_rnnd_preinv (B5modb, B4modb, CNST_LIMB(0), b, bi);
74  cps[6] = B5modb >> cnt;
75
76#if WANT_ASSERT
77  {
78    int i;
79    b = cps[2];
80    for (i = 3; i <= 6; i++)
81      {
82	b += cps[i];
83	ASSERT (b >= cps[i]);
84      }
85  }
86#endif
87}
88
89mp_limb_t
90mpn_mod_1s_4p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[7])
91{
92  mp_limb_t rh, rl, bi, ph, pl, ch, cl, r;
93  mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
94  mp_size_t i;
95  int cnt;
96
97  ASSERT (n >= 1);
98
99  B1modb = cps[2];
100  B2modb = cps[3];
101  B3modb = cps[4];
102  B4modb = cps[5];
103  B5modb = cps[6];
104
105  switch (n & 3)
106    {
107    case 0:
108      umul_ppmm (ph, pl, ap[n - 3], B1modb);
109      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 4]);
110      umul_ppmm (ch, cl, ap[n - 2], B2modb);
111      add_ssaaaa (ph, pl, ph, pl, ch, cl);
112      umul_ppmm (rh, rl, ap[n - 1], B3modb);
113      add_ssaaaa (rh, rl, rh, rl, ph, pl);
114      n -= 4;
115      break;
116    case 1:
117      rh = 0;
118      rl = ap[n - 1];
119      n -= 1;
120      break;
121    case 2:
122      rh = ap[n - 1];
123      rl = ap[n - 2];
124      n -= 2;
125      break;
126    case 3:
127      umul_ppmm (ph, pl, ap[n - 2], B1modb);
128      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 3]);
129      umul_ppmm (rh, rl, ap[n - 1], B2modb);
130      add_ssaaaa (rh, rl, rh, rl, ph, pl);
131      n -= 3;
132      break;
133    }
134
135  for (i = n - 4; i >= 0; i -= 4)
136    {
137      /* rr = ap[i]				< B
138	    + ap[i+1] * (B mod b)		<= (B-1)(b-1)
139	    + ap[i+2] * (B^2 mod b)		<= (B-1)(b-1)
140	    + ap[i+3] * (B^3 mod b)		<= (B-1)(b-1)
141	    + LO(rr)  * (B^4 mod b)		<= (B-1)(b-1)
142	    + HI(rr)  * (B^5 mod b)		<= (B-1)(b-1)
143      */
144      umul_ppmm (ph, pl, ap[i + 1], B1modb);
145      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[i + 0]);
146
147      umul_ppmm (ch, cl, ap[i + 2], B2modb);
148      add_ssaaaa (ph, pl, ph, pl, ch, cl);
149
150      umul_ppmm (ch, cl, ap[i + 3], B3modb);
151      add_ssaaaa (ph, pl, ph, pl, ch, cl);
152
153      umul_ppmm (ch, cl, rl, B4modb);
154      add_ssaaaa (ph, pl, ph, pl, ch, cl);
155
156      umul_ppmm (rh, rl, rh, B5modb);
157      add_ssaaaa (rh, rl, rh, rl, ph, pl);
158    }
159
160  umul_ppmm (rh, cl, rh, B1modb);
161  add_ssaaaa (rh, rl, rh, rl, CNST_LIMB(0), cl);
162
163  cnt = cps[1];
164  bi = cps[0];
165
166  r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
167  udiv_rnnd_preinv (r, r, rl << cnt, b, bi);
168
169  return r >> cnt;
170}
171