1/* mpn_dcpi1_divappr_q -- divide-and-conquer division, returning approximate
2   quotient.  The quotient returned is either correct, or one too large.
3
4   Contributed to the GNU project by Torbjorn Granlund.
5
6   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
7   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
8   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
9
10Copyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
11
12This file is part of the GNU MP Library.
13
14The GNU MP Library is free software; you can redistribute it and/or modify
15it under the terms of the GNU Lesser General Public License as published by
16the Free Software Foundation; either version 3 of the License, or (at your
17option) any later version.
18
19The GNU MP Library is distributed in the hope that it will be useful, but
20WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
21or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
22License for more details.
23
24You should have received a copy of the GNU Lesser General Public License
25along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
26
27#include "gmp.h"
28#include "gmp-impl.h"
29#include "longlong.h"
30
31
32mp_limb_t
33mpn_dcpi1_divappr_q_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
34		       gmp_pi1_t *dinv, mp_ptr tp)
35{
36  mp_size_t lo, hi;
37  mp_limb_t cy, qh, ql;
38
39  lo = n >> 1;			/* floor(n/2) */
40  hi = n - lo;			/* ceil(n/2) */
41
42  if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD))
43    qh = mpn_sbpi1_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dinv->inv32);
44  else
45    qh = mpn_dcpi1_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dinv, tp);
46
47  mpn_mul (tp, qp + lo, hi, dp, lo);
48
49  cy = mpn_sub_n (np + lo, np + lo, tp, n);
50  if (qh != 0)
51    cy += mpn_sub_n (np + n, np + n, dp, lo);
52
53  while (cy != 0)
54    {
55      qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1);
56      cy -= mpn_add_n (np + lo, np + lo, dp, n);
57    }
58
59  if (BELOW_THRESHOLD (lo, DC_DIVAPPR_Q_THRESHOLD))
60    ql = mpn_sbpi1_divappr_q (qp, np + hi, 2 * lo, dp + hi, lo, dinv->inv32);
61  else
62    ql = mpn_dcpi1_divappr_q_n (qp, np + hi, dp + hi, lo, dinv, tp);
63
64  if (UNLIKELY (ql != 0))
65    {
66      mp_size_t i;
67      for (i = 0; i < lo; i++)
68	qp[i] = GMP_NUMB_MASK;
69    }
70
71  return qh;
72}
73
74mp_limb_t
75mpn_dcpi1_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn,
76		     mp_srcptr dp, mp_size_t dn, gmp_pi1_t *dinv)
77{
78  mp_size_t qn;
79  mp_limb_t qh, cy, qsave;
80  mp_ptr tp;
81  TMP_DECL;
82
83  TMP_MARK;
84
85  ASSERT (dn >= 6);
86  ASSERT (nn > dn);
87  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
88
89  qn = nn - dn;
90  qp += qn;
91  np += nn;
92  dp += dn;
93
94  if (qn >= dn)
95    {
96      qn++;			/* pretend we'll need an extra limb */
97      /* Reduce qn mod dn without division, optimizing small operations.  */
98      do
99	qn -= dn;
100      while (qn > dn);
101
102      qp -= qn;			/* point at low limb of next quotient block */
103      np -= qn;			/* point in the middle of partial remainder */
104
105      tp = TMP_SALLOC_LIMBS (dn);
106
107      /* Perform the typically smaller block first.  */
108      if (qn == 1)
109	{
110	  mp_limb_t q, n2, n1, n0, d1, d0;
111
112	  /* Handle qh up front, for simplicity. */
113	  qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;
114	  if (qh)
115	    ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));
116
117	  /* A single iteration of schoolbook: One 3/2 division,
118	     followed by the bignum update and adjustment. */
119	  n2 = np[0];
120	  n1 = np[-1];
121	  n0 = np[-2];
122	  d1 = dp[-1];
123	  d0 = dp[-2];
124
125	  ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));
126
127	  if (UNLIKELY (n2 == d1) && n1 == d0)
128	    {
129	      q = GMP_NUMB_MASK;
130	      cy = mpn_submul_1 (np - dn, dp - dn, dn, q);
131	      ASSERT (cy == n2);
132	    }
133	  else
134	    {
135	      udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32);
136
137	      if (dn > 2)
138		{
139		  mp_limb_t cy, cy1;
140		  cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);
141
142		  cy1 = n0 < cy;
143		  n0 = (n0 - cy) & GMP_NUMB_MASK;
144		  cy = n1 < cy1;
145		  n1 = (n1 - cy1) & GMP_NUMB_MASK;
146		  np[-2] = n0;
147
148		  if (UNLIKELY (cy != 0))
149		    {
150		      n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);
151		      qh -= (q == 0);
152		      q = (q - 1) & GMP_NUMB_MASK;
153		    }
154		}
155	      else
156		np[-2] = n0;
157
158	      np[-1] = n1;
159	    }
160	  qp[0] = q;
161	}
162      else
163	{
164	  if (qn == 2)
165	    qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2);
166	  else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
167	    qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
168	  else
169	    qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
170
171	  if (qn != dn)
172	    {
173	      if (qn > dn - qn)
174		mpn_mul (tp, qp, qn, dp - dn, dn - qn);
175	      else
176		mpn_mul (tp, dp - dn, dn - qn, qp, qn);
177
178	      cy = mpn_sub_n (np - dn, np - dn, tp, dn);
179	      if (qh != 0)
180		cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
181
182	      while (cy != 0)
183		{
184		  qh -= mpn_sub_1 (qp, qp, qn, 1);
185		  cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
186		}
187	    }
188	}
189      qn = nn - dn - qn + 1;
190      while (qn > dn)
191	{
192	  qp -= dn;
193	  np -= dn;
194	  mpn_dcpi1_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp);
195	  qn -= dn;
196	}
197
198      /* Since we pretended we'd need an extra quotient limb before, we now
199	 have made sure the code above left just dn-1=qn quotient limbs to
200	 develop.  Develop that plus a guard limb. */
201      qn--;
202      qp -= qn;
203      np -= dn;
204      qsave = qp[qn];
205      mpn_dcpi1_divappr_q_n (qp, np - dn, dp - dn, dn, dinv, tp);
206      MPN_COPY_INCR (qp, qp + 1, qn);
207      qp[qn] = qsave;
208    }
209  else    /* (qn < dn) */
210    {
211      mp_ptr q2p;
212#if 0				/* not possible since we demand nn > dn */
213      if (qn == 0)
214	{
215	  qh = mpn_cmp (np - dn, dp - dn, dn) >= 0;
216	  if (qh)
217	    mpn_sub_n (np - dn, np - dn, dp - dn, dn);
218	  TMP_FREE;
219	  return qh;
220	}
221#endif
222
223      qp -= qn;			/* point at low limb of next quotient block */
224      np -= qn;			/* point in the middle of partial remainder */
225
226      q2p = TMP_SALLOC_LIMBS (qn + 1);
227      /* Should we at all check DC_DIVAPPR_Q_THRESHOLD here, or reply on
228	 callers not to be silly?  */
229      if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD))
230	{
231	  qh = mpn_sbpi1_divappr_q (q2p, np - qn - 2, 2 * (qn + 1),
232				    dp - (qn + 1), qn + 1, dinv->inv32);
233	}
234      else
235	{
236	  /* It is tempting to use qp for recursive scratch and put quotient in
237	     tp, but the recursive scratch needs one limb too many.  */
238	  tp = TMP_SALLOC_LIMBS (qn + 1);
239	  qh = mpn_dcpi1_divappr_q_n (q2p, np - qn - 2, dp - (qn + 1), qn + 1, dinv, tp);
240	}
241      MPN_COPY (qp, q2p + 1, qn);
242    }
243
244  TMP_FREE;
245  return qh;
246}
247