1/* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and
2   write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp.  If
3   qxn is non-zero, generate that many fraction limbs and append them after the
4   other quotient limbs, and update the remainder accordingly.  The input
5   operands are unaffected.
6
7   Preconditions:
8   1. The most significant limb of the divisor must be non-zero.
9   2. nn >= dn, even if qxn is non-zero.  (??? relax this ???)
10
11   The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time
12   complexity of multiplication.
13
14Copyright 1997, 2000-2002, 2005, 2009, 2015 Free Software Foundation, Inc.
15
16This file is part of the GNU MP Library.
17
18The GNU MP Library is free software; you can redistribute it and/or modify
19it under the terms of either:
20
21  * the GNU Lesser General Public License as published by the Free
22    Software Foundation; either version 3 of the License, or (at your
23    option) any later version.
24
25or
26
27  * the GNU General Public License as published by the Free Software
28    Foundation; either version 2 of the License, or (at your option) any
29    later version.
30
31or both in parallel, as here.
32
33The GNU MP Library is distributed in the hope that it will be useful, but
34WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
35or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
36for more details.
37
38You should have received copies of the GNU General Public License and the
39GNU Lesser General Public License along with the GNU MP Library.  If not,
40see https://www.gnu.org/licenses/.  */
41
42#include "gmp-impl.h"
43#include "longlong.h"
44
45
46void
47mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
48	     mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
49{
50  ASSERT_ALWAYS (qxn == 0);
51
52  ASSERT (nn >= 0);
53  ASSERT (dn >= 0);
54  ASSERT (dn == 0 || dp[dn - 1] != 0);
55  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, np, nn));
56  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, dp, dn));
57
58  switch (dn)
59    {
60    case 0:
61      DIVIDE_BY_ZERO;
62
63    case 1:
64      {
65	rp[0] = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, dp[0]);
66	return;
67      }
68
69    case 2:
70      {
71	mp_ptr n2p;
72	mp_limb_t qhl, cy;
73	TMP_DECL;
74	TMP_MARK;
75	if ((dp[1] & GMP_NUMB_HIGHBIT) == 0)
76	  {
77	    int cnt;
78	    mp_limb_t d2p[2];
79	    count_leading_zeros (cnt, dp[1]);
80	    cnt -= GMP_NAIL_BITS;
81	    d2p[1] = (dp[1] << cnt) | (dp[0] >> (GMP_NUMB_BITS - cnt));
82	    d2p[0] = (dp[0] << cnt) & GMP_NUMB_MASK;
83	    n2p = TMP_ALLOC_LIMBS (nn + 1);
84	    cy = mpn_lshift (n2p, np, nn, cnt);
85	    n2p[nn] = cy;
86	    qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p);
87	    if (cy == 0)
88	      qp[nn - 2] = qhl;	/* always store nn-2+1 quotient limbs */
89	    rp[0] = (n2p[0] >> cnt)
90	      | ((n2p[1] << (GMP_NUMB_BITS - cnt)) & GMP_NUMB_MASK);
91	    rp[1] = (n2p[1] >> cnt);
92	  }
93	else
94	  {
95	    n2p = TMP_ALLOC_LIMBS (nn);
96	    MPN_COPY (n2p, np, nn);
97	    qhl = mpn_divrem_2 (qp, 0L, n2p, nn, dp);
98	    qp[nn - 2] = qhl;	/* always store nn-2+1 quotient limbs */
99	    rp[0] = n2p[0];
100	    rp[1] = n2p[1];
101	  }
102	TMP_FREE;
103	return;
104      }
105
106    default:
107      {
108	int adjust;
109	gmp_pi1_t dinv;
110	TMP_DECL;
111	TMP_MARK;
112	adjust = np[nn - 1] >= dp[dn - 1];	/* conservative tests for quotient size */
113	if (nn + adjust >= 2 * dn)
114	  {
115	    mp_ptr n2p, d2p;
116	    mp_limb_t cy;
117	    int cnt;
118
119	    qp[nn - dn] = 0;			  /* zero high quotient limb */
120	    if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0) /* normalize divisor */
121	      {
122		count_leading_zeros (cnt, dp[dn - 1]);
123		cnt -= GMP_NAIL_BITS;
124		d2p = TMP_ALLOC_LIMBS (dn);
125		mpn_lshift (d2p, dp, dn, cnt);
126		n2p = TMP_ALLOC_LIMBS (nn + 1);
127		cy = mpn_lshift (n2p, np, nn, cnt);
128		n2p[nn] = cy;
129		nn += adjust;
130	      }
131	    else
132	      {
133		cnt = 0;
134		d2p = (mp_ptr) dp;
135		n2p = TMP_ALLOC_LIMBS (nn + 1);
136		MPN_COPY (n2p, np, nn);
137		n2p[nn] = 0;
138		nn += adjust;
139	      }
140
141	    invert_pi1 (dinv, d2p[dn - 1], d2p[dn - 2]);
142	    if (BELOW_THRESHOLD (dn, DC_DIV_QR_THRESHOLD))
143	      mpn_sbpi1_div_qr (qp, n2p, nn, d2p, dn, dinv.inv32);
144	    else if (BELOW_THRESHOLD (dn, MUPI_DIV_QR_THRESHOLD) ||   /* fast condition */
145		     BELOW_THRESHOLD (nn, 2 * MU_DIV_QR_THRESHOLD) || /* fast condition */
146		     (double) (2 * (MU_DIV_QR_THRESHOLD - MUPI_DIV_QR_THRESHOLD)) * dn /* slow... */
147		     + (double) MUPI_DIV_QR_THRESHOLD * nn > (double) dn * nn)    /* ...condition */
148	      mpn_dcpi1_div_qr (qp, n2p, nn, d2p, dn, &dinv);
149	    else
150	      {
151		mp_size_t itch = mpn_mu_div_qr_itch (nn, dn, 0);
152		mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
153		mpn_mu_div_qr (qp, rp, n2p, nn, d2p, dn, scratch);
154		n2p = rp;
155	      }
156
157	    if (cnt != 0)
158	      mpn_rshift (rp, n2p, dn, cnt);
159	    else
160	      MPN_COPY (rp, n2p, dn);
161	    TMP_FREE;
162	    return;
163	  }
164
165	/* When we come here, the numerator/partial remainder is less
166	   than twice the size of the denominator.  */
167
168	  {
169	    /* Problem:
170
171	       Divide a numerator N with nn limbs by a denominator D with dn
172	       limbs forming a quotient of qn=nn-dn+1 limbs.  When qn is small
173	       compared to dn, conventional division algorithms perform poorly.
174	       We want an algorithm that has an expected running time that is
175	       dependent only on qn.
176
177	       Algorithm (very informally stated):
178
179	       1) Divide the 2 x qn most significant limbs from the numerator
180		  by the qn most significant limbs from the denominator.  Call
181		  the result qest.  This is either the correct quotient, but
182		  might be 1 or 2 too large.  Compute the remainder from the
183		  division.  (This step is implemented by an mpn_divrem call.)
184
185	       2) Is the most significant limb from the remainder < p, where p
186		  is the product of the most significant limb from the quotient
187		  and the next(d)?  (Next(d) denotes the next ignored limb from
188		  the denominator.)  If it is, decrement qest, and adjust the
189		  remainder accordingly.
190
191	       3) Is the remainder >= qest?  If it is, qest is the desired
192		  quotient.  The algorithm terminates.
193
194	       4) Subtract qest x next(d) from the remainder.  If there is
195		  borrow out, decrement qest, and adjust the remainder
196		  accordingly.
197
198	       5) Skip one word from the denominator (i.e., let next(d) denote
199		  the next less significant limb.  */
200
201	    mp_size_t qn;
202	    mp_ptr n2p, d2p;
203	    mp_ptr tp;
204	    mp_limb_t cy;
205	    mp_size_t in, rn;
206	    mp_limb_t quotient_too_large;
207	    unsigned int cnt;
208
209	    qn = nn - dn;
210	    qp[qn] = 0;				/* zero high quotient limb */
211	    qn += adjust;			/* qn cannot become bigger */
212
213	    if (qn == 0)
214	      {
215		MPN_COPY (rp, np, dn);
216		TMP_FREE;
217		return;
218	      }
219
220	    in = dn - qn;		/* (at least partially) ignored # of limbs in ops */
221	    /* Normalize denominator by shifting it to the left such that its
222	       most significant bit is set.  Then shift the numerator the same
223	       amount, to mathematically preserve quotient.  */
224	    if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0)
225	      {
226		count_leading_zeros (cnt, dp[dn - 1]);
227		cnt -= GMP_NAIL_BITS;
228
229		d2p = TMP_ALLOC_LIMBS (qn);
230		mpn_lshift (d2p, dp + in, qn, cnt);
231		d2p[0] |= dp[in - 1] >> (GMP_NUMB_BITS - cnt);
232
233		n2p = TMP_ALLOC_LIMBS (2 * qn + 1);
234		cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt);
235		if (adjust)
236		  {
237		    n2p[2 * qn] = cy;
238		    n2p++;
239		  }
240		else
241		  {
242		    n2p[0] |= np[nn - 2 * qn - 1] >> (GMP_NUMB_BITS - cnt);
243		  }
244	      }
245	    else
246	      {
247		cnt = 0;
248		d2p = (mp_ptr) dp + in;
249
250		n2p = TMP_ALLOC_LIMBS (2 * qn + 1);
251		MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn);
252		if (adjust)
253		  {
254		    n2p[2 * qn] = 0;
255		    n2p++;
256		  }
257	      }
258
259	    /* Get an approximate quotient using the extracted operands.  */
260	    if (qn == 1)
261	      {
262		mp_limb_t q0, r0;
263		udiv_qrnnd (q0, r0, n2p[1], n2p[0] << GMP_NAIL_BITS, d2p[0] << GMP_NAIL_BITS);
264		n2p[0] = r0 >> GMP_NAIL_BITS;
265		qp[0] = q0;
266	      }
267	    else if (qn == 2)
268	      mpn_divrem_2 (qp, 0L, n2p, 4L, d2p); /* FIXME: obsolete function */
269	    else
270	      {
271		invert_pi1 (dinv, d2p[qn - 1], d2p[qn - 2]);
272		if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
273		  mpn_sbpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, dinv.inv32);
274		else if (BELOW_THRESHOLD (qn, MU_DIV_QR_THRESHOLD))
275		  mpn_dcpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, &dinv);
276		else
277		  {
278		    mp_size_t itch = mpn_mu_div_qr_itch (2 * qn, qn, 0);
279		    mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
280		    mp_ptr r2p = rp;
281		    if (np == r2p)	/* If N and R share space, put ... */
282		      r2p += nn - qn;	/* intermediate remainder at N's upper end. */
283		    mpn_mu_div_qr (qp, r2p, n2p, 2 * qn, d2p, qn, scratch);
284		    MPN_COPY (n2p, r2p, qn);
285		  }
286	      }
287
288	    rn = qn;
289	    /* Multiply the first ignored divisor limb by the most significant
290	       quotient limb.  If that product is > the partial remainder's
291	       most significant limb, we know the quotient is too large.  This
292	       test quickly catches most cases where the quotient is too large;
293	       it catches all cases where the quotient is 2 too large.  */
294	    {
295	      mp_limb_t dl, x;
296	      mp_limb_t h, dummy;
297
298	      if (in - 2 < 0)
299		dl = 0;
300	      else
301		dl = dp[in - 2];
302
303#if GMP_NAIL_BITS == 0
304	      x = (dp[in - 1] << cnt) | ((dl >> 1) >> ((~cnt) % GMP_LIMB_BITS));
305#else
306	      x = (dp[in - 1] << cnt) & GMP_NUMB_MASK;
307	      if (cnt != 0)
308		x |= dl >> (GMP_NUMB_BITS - cnt);
309#endif
310	      umul_ppmm (h, dummy, x, qp[qn - 1] << GMP_NAIL_BITS);
311
312	      if (n2p[qn - 1] < h)
313		{
314		  mp_limb_t cy;
315
316		  mpn_decr_u (qp, (mp_limb_t) 1);
317		  cy = mpn_add_n (n2p, n2p, d2p, qn);
318		  if (cy)
319		    {
320		      /* The partial remainder is safely large.  */
321		      n2p[qn] = cy;
322		      ++rn;
323		    }
324		}
325	    }
326
327	    quotient_too_large = 0;
328	    if (cnt != 0)
329	      {
330		mp_limb_t cy1, cy2;
331
332		/* Append partially used numerator limb to partial remainder.  */
333		cy1 = mpn_lshift (n2p, n2p, rn, GMP_NUMB_BITS - cnt);
334		n2p[0] |= np[in - 1] & (GMP_NUMB_MASK >> cnt);
335
336		/* Update partial remainder with partially used divisor limb.  */
337		cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (GMP_NUMB_MASK >> cnt));
338		if (qn != rn)
339		  {
340		    ASSERT_ALWAYS (n2p[qn] >= cy2);
341		    n2p[qn] -= cy2;
342		  }
343		else
344		  {
345		    n2p[qn] = cy1 - cy2; /* & GMP_NUMB_MASK; */
346
347		    quotient_too_large = (cy1 < cy2);
348		    ++rn;
349		  }
350		--in;
351	      }
352	    /* True: partial remainder now is neutral, i.e., it is not shifted up.  */
353
354	    tp = TMP_ALLOC_LIMBS (dn);
355
356	    if (in < qn)
357	      {
358		if (in == 0)
359		  {
360		    MPN_COPY (rp, n2p, rn);
361		    ASSERT_ALWAYS (rn == dn);
362		    goto foo;
363		  }
364		mpn_mul (tp, qp, qn, dp, in);
365	      }
366	    else
367	      mpn_mul (tp, dp, in, qp, qn);
368
369	    cy = mpn_sub (n2p, n2p, rn, tp + in, qn);
370	    MPN_COPY (rp + in, n2p, dn - in);
371	    quotient_too_large |= cy;
372	    cy = mpn_sub_n (rp, np, tp, in);
373	    cy = mpn_sub_1 (rp + in, rp + in, rn, cy);
374	    quotient_too_large |= cy;
375	  foo:
376	    if (quotient_too_large)
377	      {
378		mpn_decr_u (qp, (mp_limb_t) 1);
379		mpn_add_n (rp, rp, dp, dn);
380	      }
381	  }
382	TMP_FREE;
383	return;
384      }
385    }
386}
387