div_q.c revision 1.1.1.2
1/* mpn_div_q -- division for arbitrary size operands.
2
3   Contributed to the GNU project by Torbjorn Granlund.
4
5   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
6   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
8
9Copyright 2009, 2010 Free Software Foundation, Inc.
10
11This file is part of the GNU MP Library.
12
13The GNU MP Library is free software; you can redistribute it and/or modify
14it under the terms of either:
15
16  * the GNU Lesser General Public License as published by the Free
17    Software Foundation; either version 3 of the License, or (at your
18    option) any later version.
19
20or
21
22  * the GNU General Public License as published by the Free Software
23    Foundation; either version 2 of the License, or (at your option) any
24    later version.
25
26or both in parallel, as here.
27
28The GNU MP Library is distributed in the hope that it will be useful, but
29WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
31for more details.
32
33You should have received copies of the GNU General Public License and the
34GNU Lesser General Public License along with the GNU MP Library.  If not,
35see https://www.gnu.org/licenses/.  */
36
37#include "gmp.h"
38#include "gmp-impl.h"
39#include "longlong.h"
40
41
42/* Compute Q = N/D with truncation.
43     N = {np,nn}
44     D = {dp,dn}
45     Q = {qp,nn-dn+1}
46     T = {scratch,nn+1} is scratch space
47   N and D are both untouched by the computation.
48   N and T may overlap; pass the same space if N is irrelevant after the call,
49   but note that tp needs an extra limb.
50
51   Operand requirements:
52     N >= D > 0
53     dp[dn-1] != 0
54     No overlap between the N, D, and Q areas.
55
56   This division function does not clobber its input operands, since it is
57   intended to support average-O(qn) division, and for that to be effective, it
58   cannot put requirements on callers to copy a O(nn) operand.
59
60   If a caller does not care about the value of {np,nn+1} after calling this
61   function, it should pass np also for the scratch argument.  This function
62   will then save some time and space by avoiding allocation and copying.
63   (FIXME: Is this a good design?  We only really save any copying for
64   already-normalised divisors, which should be rare.  It also prevents us from
65   reasonably asking for all scratch space we need.)
66
67   We write nn-dn+1 limbs for the quotient, but return void.  Why not return
68   the most significant quotient limb?  Look at the 4 main code blocks below
69   (consisting of an outer if-else where each arm contains an if-else). It is
70   tricky for the first code block, since the mpn_*_div_q calls will typically
71   generate all nn-dn+1 and return 0 or 1.  I don't see how to fix that unless
72   we generate the most significant quotient limb here, before calling
73   mpn_*_div_q, or put the quotient in a temporary area.  Since this is a
74   critical division case (the SB sub-case in particular) copying is not a good
75   idea.
76
77   It might make sense to split the if-else parts of the (qn + FUDGE
78   >= dn) blocks into separate functions, since we could promise quite
79   different things to callers in these two cases.  The 'then' case
80   benefits from np=scratch, and it could perhaps even tolerate qp=np,
81   saving some headache for many callers.
82
83   FIXME: Scratch allocation leaves a lot to be desired.  E.g., for the MU size
84   operands, we do not reuse the huge scratch for adjustments.  This can be a
85   serious waste of memory for the largest operands.
86*/
87
88/* FUDGE determines when to try getting an approximate quotient from the upper
89   parts of the dividend and divisor, then adjust.  N.B. FUDGE must be >= 2
90   for the code to be correct.  */
91#define FUDGE 5			/* FIXME: tune this */
92
93#define DC_DIV_Q_THRESHOLD      DC_DIVAPPR_Q_THRESHOLD
94#define MU_DIV_Q_THRESHOLD      MU_DIVAPPR_Q_THRESHOLD
95#define MUPI_DIV_Q_THRESHOLD  MUPI_DIVAPPR_Q_THRESHOLD
96#ifndef MUPI_DIVAPPR_Q_THRESHOLD
97#define MUPI_DIVAPPR_Q_THRESHOLD  MUPI_DIV_QR_THRESHOLD
98#endif
99
100void
101mpn_div_q (mp_ptr qp,
102	   mp_srcptr np, mp_size_t nn,
103	   mp_srcptr dp, mp_size_t dn, mp_ptr scratch)
104{
105  mp_ptr new_dp, new_np, tp, rp;
106  mp_limb_t cy, dh, qh;
107  mp_size_t new_nn, qn;
108  gmp_pi1_t dinv;
109  int cnt;
110  TMP_DECL;
111  TMP_MARK;
112
113  ASSERT (nn >= dn);
114  ASSERT (dn > 0);
115  ASSERT (dp[dn - 1] != 0);
116  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, np, nn));
117  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, dp, dn));
118  ASSERT (MPN_SAME_OR_SEPARATE_P (np, scratch, nn));
119
120  ASSERT_ALWAYS (FUDGE >= 2);
121
122  if (dn == 1)
123    {
124      mpn_divrem_1 (qp, 0L, np, nn, dp[dn - 1]);
125      return;
126    }
127
128  qn = nn - dn + 1;		/* Quotient size, high limb might be zero */
129
130  if (qn + FUDGE >= dn)
131    {
132      /* |________________________|
133                          |_______|  */
134      new_np = scratch;
135
136      dh = dp[dn - 1];
137      if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
138	{
139	  count_leading_zeros (cnt, dh);
140
141	  cy = mpn_lshift (new_np, np, nn, cnt);
142	  new_np[nn] = cy;
143	  new_nn = nn + (cy != 0);
144
145	  new_dp = TMP_ALLOC_LIMBS (dn);
146	  mpn_lshift (new_dp, dp, dn, cnt);
147
148	  if (dn == 2)
149	    {
150	      qh = mpn_divrem_2 (qp, 0L, new_np, new_nn, new_dp);
151	    }
152	  else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
153		   BELOW_THRESHOLD (new_nn - dn, DC_DIV_Q_THRESHOLD))
154	    {
155	      invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
156	      qh = mpn_sbpi1_div_q (qp, new_np, new_nn, new_dp, dn, dinv.inv32);
157	    }
158	  else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) ||   /* fast condition */
159		   BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
160		   (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
161		   + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn)   /* ...condition */
162	    {
163	      invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
164	      qh = mpn_dcpi1_div_q (qp, new_np, new_nn, new_dp, dn, &dinv);
165	    }
166	  else
167	    {
168	      mp_size_t itch = mpn_mu_div_q_itch (new_nn, dn, 0);
169	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
170	      qh = mpn_mu_div_q (qp, new_np, new_nn, new_dp, dn, scratch);
171	    }
172	  if (cy == 0)
173	    qp[qn - 1] = qh;
174	  else if (UNLIKELY (qh != 0))
175	    {
176	      /* This happens only when the quotient is close to B^n and
177		 mpn_*_divappr_q returned B^n.  */
178	      mp_size_t i, n;
179	      n = new_nn - dn;
180	      for (i = 0; i < n; i++)
181		qp[i] = GMP_NUMB_MAX;
182	      qh = 0;		/* currently ignored */
183	    }
184	}
185      else  /* divisor is already normalised */
186	{
187	  if (new_np != np)
188	    MPN_COPY (new_np, np, nn);
189
190	  if (dn == 2)
191	    {
192	      qh = mpn_divrem_2 (qp, 0L, new_np, nn, dp);
193	    }
194	  else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
195		   BELOW_THRESHOLD (nn - dn, DC_DIV_Q_THRESHOLD))
196	    {
197	      invert_pi1 (dinv, dh, dp[dn - 2]);
198	      qh = mpn_sbpi1_div_q (qp, new_np, nn, dp, dn, dinv.inv32);
199	    }
200	  else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) ||   /* fast condition */
201		   BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
202		   (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
203		   + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn)   /* ...condition */
204	    {
205	      invert_pi1 (dinv, dh, dp[dn - 2]);
206	      qh = mpn_dcpi1_div_q (qp, new_np, nn, dp, dn, &dinv);
207	    }
208	  else
209	    {
210	      mp_size_t itch = mpn_mu_div_q_itch (nn, dn, 0);
211	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
212	      qh = mpn_mu_div_q (qp, np, nn, dp, dn, scratch);
213	    }
214	  qp[nn - dn] = qh;
215	}
216    }
217  else
218    {
219      /* |________________________|
220                |_________________|  */
221      tp = TMP_ALLOC_LIMBS (qn + 1);
222
223      new_np = scratch;
224      new_nn = 2 * qn + 1;
225      if (new_np == np)
226	/* We need {np,nn} to remain untouched until the final adjustment, so
227	   we need to allocate separate space for new_np.  */
228	new_np = TMP_ALLOC_LIMBS (new_nn + 1);
229
230
231      dh = dp[dn - 1];
232      if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
233	{
234	  count_leading_zeros (cnt, dh);
235
236	  cy = mpn_lshift (new_np, np + nn - new_nn, new_nn, cnt);
237	  new_np[new_nn] = cy;
238
239	  new_nn += (cy != 0);
240
241	  new_dp = TMP_ALLOC_LIMBS (qn + 1);
242	  mpn_lshift (new_dp, dp + dn - (qn + 1), qn + 1, cnt);
243	  new_dp[0] |= dp[dn - (qn + 1) - 1] >> (GMP_NUMB_BITS - cnt);
244
245	  if (qn + 1 == 2)
246	    {
247	      qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
248	    }
249	  else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
250	    {
251	      invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
252	      qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
253	    }
254	  else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
255	    {
256	      invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
257	      qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
258	    }
259	  else
260	    {
261	      mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
262	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
263	      qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
264	    }
265	  if (cy == 0)
266	    tp[qn] = qh;
267	  else if (UNLIKELY (qh != 0))
268	    {
269	      /* This happens only when the quotient is close to B^n and
270		 mpn_*_divappr_q returned B^n.  */
271	      mp_size_t i, n;
272	      n = new_nn - (qn + 1);
273	      for (i = 0; i < n; i++)
274		tp[i] = GMP_NUMB_MAX;
275	      qh = 0;		/* currently ignored */
276	    }
277	}
278      else  /* divisor is already normalised */
279	{
280	  MPN_COPY (new_np, np + nn - new_nn, new_nn); /* pointless if MU will be used */
281
282	  new_dp = (mp_ptr) dp + dn - (qn + 1);
283
284	  if (qn == 2 - 1)
285	    {
286	      qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
287	    }
288	  else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
289	    {
290	      invert_pi1 (dinv, dh, new_dp[qn - 1]);
291	      qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
292	    }
293	  else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
294	    {
295	      invert_pi1 (dinv, dh, new_dp[qn - 1]);
296	      qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
297	    }
298	  else
299	    {
300	      mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
301	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
302	      qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
303	    }
304	  tp[qn] = qh;
305	}
306
307      MPN_COPY (qp, tp + 1, qn);
308      if (tp[0] <= 4)
309        {
310	  mp_size_t rn;
311
312          rp = TMP_ALLOC_LIMBS (dn + qn);
313          mpn_mul (rp, dp, dn, tp + 1, qn);
314	  rn = dn + qn;
315	  rn -= rp[rn - 1] == 0;
316
317          if (rn > nn || mpn_cmp (np, rp, nn) < 0)
318            mpn_decr_u (qp, 1);
319        }
320    }
321
322  TMP_FREE;
323}
324