1249109Sjkim/* mpn_div_q -- division for arbitrary size operands.
2249109Sjkim
3249109Sjkim   Contributed to the GNU project by Torbjorn Granlund.
4249109Sjkim
5249109Sjkim   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
6249109Sjkim   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7249109Sjkim   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
8281075Sdim
9249109SjkimCopyright 2009, 2010 Free Software Foundation, Inc.
10249109Sjkim
11249109SjkimThis file is part of the GNU MP Library.
12249109Sjkim
13249109SjkimThe GNU MP Library is free software; you can redistribute it and/or modify
14249109Sjkimit under the terms of the GNU Lesser General Public License as published by
15249109Sjkimthe Free Software Foundation; either version 3 of the License, or (at your
16249109Sjkimoption) any later version.
17249109Sjkim
18249109SjkimThe GNU MP Library is distributed in the hope that it will be useful, but
19249109SjkimWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20249109Sjkimor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
21249109SjkimLicense for more details.
22249109Sjkim
23249109SjkimYou should have received a copy of the GNU Lesser General Public License
24249109Sjkimalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
25249109Sjkim
26249109Sjkim#include "gmp.h"
27249109Sjkim#include "gmp-impl.h"
28249109Sjkim#include "longlong.h"
29249109Sjkim
30249109Sjkim
31249109Sjkim/* Compute Q = N/D with truncation.
32249109Sjkim     N = {np,nn}
33249109Sjkim     D = {dp,dn}
34249109Sjkim     Q = {qp,nn-dn+1}
35249109Sjkim     T = {scratch,nn+1} is scratch space
36249109Sjkim   N and D are both untouched by the computation.
37249109Sjkim   N and T may overlap; pass the same space if N is irrelevant after the call,
38249109Sjkim   but note that tp needs an extra limb.
39249109Sjkim
40249109Sjkim   Operand requirements:
41249109Sjkim     N >= D > 0
42249109Sjkim     dp[dn-1] != 0
43249109Sjkim     No overlap between the N, D, and Q areas.
44249112Sjkim
45249109Sjkim   This division function does not clobber its input operands, since it is
46249109Sjkim   intended to support average-O(qn) division, and for that to be effective, it
47249109Sjkim   cannot put requirements on callers to copy a O(nn) operand.
48249109Sjkim
49249109Sjkim   If a caller does not care about the value of {np,nn+1} after calling this
50249109Sjkim   function, it should pass np also for the scratch argument.  This function
51249109Sjkim   will then save some time and space by avoiding allocation and copying.
52249109Sjkim   (FIXME: Is this a good design?  We only really save any copying for
53249109Sjkim   already-normalised divisors, which should be rare.  It also prevents us from
54249109Sjkim   reasonably asking for all scratch space we need.)
55249109Sjkim
56249109Sjkim   We write nn-dn+1 limbs for the quotient, but return void.  Why not return
57249109Sjkim   the most significant quotient limb?  Look at the 4 main code blocks below
58249109Sjkim   (consisting of an outer if-else where each arm contains an if-else). It is
59249109Sjkim   tricky for the first code block, since the mpn_*_div_q calls will typically
60249109Sjkim   generate all nn-dn+1 and return 0 or 1.  I don't see how to fix that unless
61249109Sjkim   we generate the most significant quotient limb here, before calling
62249109Sjkim   mpn_*_div_q, or put the quotient in a temporary area.  Since this is a
63249109Sjkim   critical division case (the SB sub-case in particular) copying is not a good
64249109Sjkim   idea.
65249109Sjkim
66249109Sjkim   It might make sense to split the if-else parts of the (qn + FUDGE
67249109Sjkim   >= dn) blocks into separate functions, since we could promise quite
68249109Sjkim   different things to callers in these two cases.  The 'then' case
69249109Sjkim   benefits from np=scratch, and it could perhaps even tolerate qp=np,
70249109Sjkim   saving some headache for many callers.
71249109Sjkim
72249109Sjkim   FIXME: Scratch allocation leaves a lot to be desired.  E.g., for the MU size
73249109Sjkim   operands, we do not reuse the huge scratch for adjustments.  This can be a
74249109Sjkim   serious waste of memory for the largest operands.
75249109Sjkim*/
76249109Sjkim
77249109Sjkim/* FUDGE determines when to try getting an approximate quotient from the upper
78249109Sjkim   parts of the dividend and divisor, then adjust.  N.B. FUDGE must be >= 2
79249109Sjkim   for the code to be correct.  */
80249109Sjkim#define FUDGE 5			/* FIXME: tune this */
81249109Sjkim
82249109Sjkim#define DC_DIV_Q_THRESHOLD      DC_DIVAPPR_Q_THRESHOLD
83249109Sjkim#define MU_DIV_Q_THRESHOLD      MU_DIVAPPR_Q_THRESHOLD
84249109Sjkim#define MUPI_DIV_Q_THRESHOLD  MUPI_DIVAPPR_Q_THRESHOLD
85249109Sjkim#ifndef MUPI_DIVAPPR_Q_THRESHOLD
86249109Sjkim#define MUPI_DIVAPPR_Q_THRESHOLD  MUPI_DIV_QR_THRESHOLD
87249109Sjkim#endif
88249109Sjkim
89249109Sjkimvoid
90249109Sjkimmpn_div_q (mp_ptr qp,
91249109Sjkim	   mp_srcptr np, mp_size_t nn,
92249109Sjkim	   mp_srcptr dp, mp_size_t dn, mp_ptr scratch)
93249109Sjkim{
94249109Sjkim  mp_ptr new_dp, new_np, tp, rp;
95249109Sjkim  mp_limb_t cy, dh, qh;
96249109Sjkim  mp_size_t new_nn, qn;
97249109Sjkim  gmp_pi1_t dinv;
98249109Sjkim  int cnt;
99249109Sjkim  TMP_DECL;
100249109Sjkim  TMP_MARK;
101249109Sjkim
102249109Sjkim  ASSERT (nn >= dn);
103249109Sjkim  ASSERT (dn > 0);
104249109Sjkim  ASSERT (dp[dn - 1] != 0);
105249109Sjkim  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, np, nn));
106249109Sjkim  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, dp, dn));
107249109Sjkim  ASSERT (MPN_SAME_OR_SEPARATE_P (np, scratch, nn));
108249109Sjkim
109249109Sjkim  ASSERT_ALWAYS (FUDGE >= 2);
110249109Sjkim
111249109Sjkim  if (dn == 1)
112249109Sjkim    {
113249109Sjkim      mpn_divrem_1 (qp, 0L, np, nn, dp[dn - 1]);
114249109Sjkim      return;
115249109Sjkim    }
116249109Sjkim
117249109Sjkim  qn = nn - dn + 1;		/* Quotient size, high limb might be zero */
118249109Sjkim
119249109Sjkim  if (qn + FUDGE >= dn)
120249109Sjkim    {
121249109Sjkim      /* |________________________|
122249109Sjkim                          |_______|  */
123249109Sjkim      new_np = scratch;
124249109Sjkim
125249109Sjkim      dh = dp[dn - 1];
126249109Sjkim      if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
127249109Sjkim	{
128249109Sjkim	  count_leading_zeros (cnt, dh);
129249109Sjkim
130249109Sjkim	  cy = mpn_lshift (new_np, np, nn, cnt);
131249109Sjkim	  new_np[nn] = cy;
132249109Sjkim	  new_nn = nn + (cy != 0);
133249109Sjkim
134249109Sjkim	  new_dp = TMP_ALLOC_LIMBS (dn);
135249109Sjkim	  mpn_lshift (new_dp, dp, dn, cnt);
136249109Sjkim
137249109Sjkim	  if (dn == 2)
138249109Sjkim	    {
139249109Sjkim	      qh = mpn_divrem_2 (qp, 0L, new_np, new_nn, new_dp);
140249109Sjkim	    }
141249109Sjkim	  else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
142249109Sjkim		   BELOW_THRESHOLD (new_nn - dn, DC_DIV_Q_THRESHOLD))
143249109Sjkim	    {
144249109Sjkim	      invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
145249109Sjkim	      qh = mpn_sbpi1_div_q (qp, new_np, new_nn, new_dp, dn, dinv.inv32);
146249109Sjkim	    }
147249109Sjkim	  else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) ||   /* fast condition */
148249109Sjkim		   BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
149249109Sjkim		   (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
150249109Sjkim		   + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn)   /* ...condition */
151249109Sjkim	    {
152249109Sjkim	      invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
153249109Sjkim	      qh = mpn_dcpi1_div_q (qp, new_np, new_nn, new_dp, dn, &dinv);
154249109Sjkim	    }
155249109Sjkim	  else
156249109Sjkim	    {
157249109Sjkim	      mp_size_t itch = mpn_mu_div_q_itch (new_nn, dn, 0);
158249109Sjkim	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
159249109Sjkim	      qh = mpn_mu_div_q (qp, new_np, new_nn, new_dp, dn, scratch);
160249109Sjkim	    }
161249109Sjkim	  if (cy == 0)
162249109Sjkim	    qp[qn - 1] = qh;
163249109Sjkim	  else if (UNLIKELY (qh != 0))
164249109Sjkim	    {
165249109Sjkim	      /* This happens only when the quotient is close to B^n and
166249109Sjkim		 mpn_*_divappr_q returned B^n.  */
167249109Sjkim	      mp_size_t i, n;
168249109Sjkim	      n = new_nn - dn;
169249109Sjkim	      for (i = 0; i < n; i++)
170249109Sjkim		qp[i] = GMP_NUMB_MAX;
171249109Sjkim	      qh = 0;		/* currently ignored */
172249109Sjkim	    }
173249109Sjkim	}
174249109Sjkim      else  /* divisor is already normalised */
175249109Sjkim	{
176249109Sjkim	  if (new_np != np)
177249109Sjkim	    MPN_COPY (new_np, np, nn);
178249109Sjkim
179249109Sjkim	  if (dn == 2)
180249109Sjkim	    {
181249109Sjkim	      qh = mpn_divrem_2 (qp, 0L, new_np, nn, dp);
182249109Sjkim	    }
183249109Sjkim	  else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
184249109Sjkim		   BELOW_THRESHOLD (nn - dn, DC_DIV_Q_THRESHOLD))
185249109Sjkim	    {
186249109Sjkim	      invert_pi1 (dinv, dh, dp[dn - 2]);
187249109Sjkim	      qh = mpn_sbpi1_div_q (qp, new_np, nn, dp, dn, dinv.inv32);
188249109Sjkim	    }
189249109Sjkim	  else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) ||   /* fast condition */
190249109Sjkim		   BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
191249109Sjkim		   (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
192249109Sjkim		   + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn)   /* ...condition */
193249109Sjkim	    {
194249109Sjkim	      invert_pi1 (dinv, dh, dp[dn - 2]);
195249109Sjkim	      qh = mpn_dcpi1_div_q (qp, new_np, nn, dp, dn, &dinv);
196249109Sjkim	    }
197249109Sjkim	  else
198249109Sjkim	    {
199249109Sjkim	      mp_size_t itch = mpn_mu_div_q_itch (nn, dn, 0);
200249109Sjkim	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
201249109Sjkim	      qh = mpn_mu_div_q (qp, np, nn, dp, dn, scratch);
202249109Sjkim	    }
203249109Sjkim	  qp[nn - dn] = qh;
204249109Sjkim	}
205249109Sjkim    }
206249109Sjkim  else
207249109Sjkim    {
208249109Sjkim      /* |________________________|
209249109Sjkim                |_________________|  */
210249109Sjkim      tp = TMP_ALLOC_LIMBS (qn + 1);
211249109Sjkim
212249109Sjkim      new_np = scratch;
213249109Sjkim      new_nn = 2 * qn + 1;
214249109Sjkim      if (new_np == np)
215249109Sjkim	/* We need {np,nn} to remain untouched until the final adjustment, so
216249109Sjkim	   we need to allocate separate space for new_np.  */
217249109Sjkim	new_np = TMP_ALLOC_LIMBS (new_nn + 1);
218249109Sjkim
219249109Sjkim
220249109Sjkim      dh = dp[dn - 1];
221249109Sjkim      if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
222249109Sjkim	{
223249109Sjkim	  count_leading_zeros (cnt, dh);
224249109Sjkim
225249109Sjkim	  cy = mpn_lshift (new_np, np + nn - new_nn, new_nn, cnt);
226249109Sjkim	  new_np[new_nn] = cy;
227249109Sjkim
228249109Sjkim	  new_nn += (cy != 0);
229249109Sjkim
230249109Sjkim	  new_dp = TMP_ALLOC_LIMBS (qn + 1);
231249109Sjkim	  mpn_lshift (new_dp, dp + dn - (qn + 1), qn + 1, cnt);
232249109Sjkim	  new_dp[0] |= dp[dn - (qn + 1) - 1] >> (GMP_NUMB_BITS - cnt);
233249109Sjkim
234249109Sjkim	  if (qn + 1 == 2)
235249109Sjkim	    {
236249109Sjkim	      qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
237249109Sjkim	    }
238249109Sjkim	  else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
239249109Sjkim	    {
240249109Sjkim	      invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
241249109Sjkim	      qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
242249109Sjkim	    }
243249109Sjkim	  else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
244249109Sjkim	    {
245249109Sjkim	      invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
246249109Sjkim	      qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
247249109Sjkim	    }
248249109Sjkim	  else
249249109Sjkim	    {
250249109Sjkim	      mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
251249109Sjkim	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
252249109Sjkim	      qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
253249109Sjkim	    }
254250838Sjkim	  if (cy == 0)
255249109Sjkim	    tp[qn] = qh;
256250838Sjkim	  else if (UNLIKELY (qh != 0))
257249109Sjkim	    {
258249109Sjkim	      /* This happens only when the quotient is close to B^n and
259249109Sjkim		 mpn_*_divappr_q returned B^n.  */
260249109Sjkim	      mp_size_t i, n;
261249109Sjkim	      n = new_nn - (qn + 1);
262249109Sjkim	      for (i = 0; i < n; i++)
263249109Sjkim		tp[i] = GMP_NUMB_MAX;
264249109Sjkim	      qh = 0;		/* currently ignored */
265249109Sjkim	    }
266249109Sjkim	}
267249109Sjkim      else  /* divisor is already normalised */
268249109Sjkim	{
269249109Sjkim	  MPN_COPY (new_np, np + nn - new_nn, new_nn); /* pointless of MU will be used */
270249109Sjkim
271249109Sjkim	  new_dp = (mp_ptr) dp + dn - (qn + 1);
272249109Sjkim
273249109Sjkim	  if (qn == 2 - 1)
274249109Sjkim	    {
275249109Sjkim	      qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
276249109Sjkim	    }
277249109Sjkim	  else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
278249109Sjkim	    {
279249109Sjkim	      invert_pi1 (dinv, dh, new_dp[qn - 1]);
280249109Sjkim	      qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
281249109Sjkim	    }
282249109Sjkim	  else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
283249109Sjkim	    {
284249109Sjkim	      invert_pi1 (dinv, dh, new_dp[qn - 1]);
285249109Sjkim	      qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
286249109Sjkim	    }
287249109Sjkim	  else
288249109Sjkim	    {
289249109Sjkim	      mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
290249109Sjkim	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
291249109Sjkim	      qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
292249109Sjkim	    }
293249109Sjkim	  tp[qn] = qh;
294249109Sjkim	}
295249109Sjkim
296249109Sjkim      MPN_COPY (qp, tp + 1, qn);
297249109Sjkim      if (tp[0] <= 4)
298249109Sjkim        {
299249109Sjkim	  mp_size_t rn;
300249109Sjkim
301249109Sjkim          rp = TMP_ALLOC_LIMBS (dn + qn);
302249109Sjkim          mpn_mul (rp, dp, dn, tp + 1, qn);
303249109Sjkim	  rn = dn + qn;
304249109Sjkim	  rn -= rp[rn - 1] == 0;
305249109Sjkim
306249109Sjkim          if (rn > nn || mpn_cmp (np, rp, nn) < 0)
307249109Sjkim            mpn_decr_u (qp, 1);
308249109Sjkim        }
309249109Sjkim    }
310249109Sjkim
311249109Sjkim  TMP_FREE;
312249109Sjkim}
313249109Sjkim