1/* mpn_dcpi1_div_qr_n -- recursive divide-and-conquer division for arbitrary
2   size operands.
3
4   Contributed to the GNU project by Torbjorn Granlund.
5
6   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
7   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
8   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
9
10Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
11
12This file is part of the GNU MP Library.
13
14The GNU MP Library is free software; you can redistribute it and/or modify
15it under the terms of the GNU Lesser General Public License as published by
16the Free Software Foundation; either version 3 of the License, or (at your
17option) any later version.
18
19The GNU MP Library is distributed in the hope that it will be useful, but
20WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
21or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
22License for more details.
23
24You should have received a copy of the GNU Lesser General Public License
25along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
26
27#include "gmp.h"
28#include "gmp-impl.h"
29#include "longlong.h"
30
31
32mp_limb_t
33mpn_dcpi1_div_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
34		    gmp_pi1_t *dinv, mp_ptr tp)
35{
36  mp_size_t lo, hi;
37  mp_limb_t cy, qh, ql;
38
39  lo = n >> 1;			/* floor(n/2) */
40  hi = n - lo;			/* ceil(n/2) */
41
42  if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD))
43    qh = mpn_sbpi1_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dinv->inv32);
44  else
45    qh = mpn_dcpi1_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dinv, tp);
46
47  mpn_mul (tp, qp + lo, hi, dp, lo);
48
49  cy = mpn_sub_n (np + lo, np + lo, tp, n);
50  if (qh != 0)
51    cy += mpn_sub_n (np + n, np + n, dp, lo);
52
53  while (cy != 0)
54    {
55      qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1);
56      cy -= mpn_add_n (np + lo, np + lo, dp, n);
57    }
58
59  if (BELOW_THRESHOLD (lo, DC_DIV_QR_THRESHOLD))
60    ql = mpn_sbpi1_div_qr (qp, np + hi, 2 * lo, dp + hi, lo, dinv->inv32);
61  else
62    ql = mpn_dcpi1_div_qr_n (qp, np + hi, dp + hi, lo, dinv, tp);
63
64  mpn_mul (tp, dp, hi, qp, lo);
65
66  cy = mpn_sub_n (np, np, tp, n);
67  if (ql != 0)
68    cy += mpn_sub_n (np + lo, np + lo, dp, hi);
69
70  while (cy != 0)
71    {
72      mpn_sub_1 (qp, qp, lo, 1);
73      cy -= mpn_add_n (np, np, dp, n);
74    }
75
76  return qh;
77}
78
79mp_limb_t
80mpn_dcpi1_div_qr (mp_ptr qp,
81		  mp_ptr np, mp_size_t nn,
82		  mp_srcptr dp, mp_size_t dn,
83		  gmp_pi1_t *dinv)
84{
85  mp_size_t qn;
86  mp_limb_t qh, cy;
87  mp_ptr tp;
88  TMP_DECL;
89
90  TMP_MARK;
91
92  ASSERT (dn >= 6);		/* to adhere to mpn_sbpi1_div_qr's limits */
93  ASSERT (nn - dn >= 3);	/* to adhere to mpn_sbpi1_div_qr's limits */
94  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
95
96  tp = TMP_SALLOC_LIMBS (dn);
97
98  qn = nn - dn;
99  qp += qn;
100  np += nn;
101  dp += dn;
102
103  if (qn > dn)
104    {
105      /* Reduce qn mod dn without division, optimizing small operations.  */
106      do
107	qn -= dn;
108      while (qn > dn);
109
110      qp -= qn;			/* point at low limb of next quotient block */
111      np -= qn;			/* point in the middle of partial remainder */
112
113      /* Perform the typically smaller block first.  */
114      if (qn == 1)
115	{
116	  mp_limb_t q, n2, n1, n0, d1, d0;
117
118	  /* Handle qh up front, for simplicity. */
119	  qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;
120	  if (qh)
121	    ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));
122
123	  /* A single iteration of schoolbook: One 3/2 division,
124	     followed by the bignum update and adjustment. */
125	  n2 = np[0];
126	  n1 = np[-1];
127	  n0 = np[-2];
128	  d1 = dp[-1];
129	  d0 = dp[-2];
130
131	  ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));
132
133	  if (UNLIKELY (n2 == d1) && n1 == d0)
134	    {
135	      q = GMP_NUMB_MASK;
136	      cy = mpn_submul_1 (np - dn, dp - dn, dn, q);
137	      ASSERT (cy == n2);
138	    }
139	  else
140	    {
141	      udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32);
142
143	      if (dn > 2)
144		{
145		  mp_limb_t cy, cy1;
146		  cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);
147
148		  cy1 = n0 < cy;
149		  n0 = (n0 - cy) & GMP_NUMB_MASK;
150		  cy = n1 < cy1;
151		  n1 = (n1 - cy1) & GMP_NUMB_MASK;
152		  np[-2] = n0;
153
154		  if (UNLIKELY (cy != 0))
155		    {
156		      n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);
157		      qh -= (q == 0);
158		      q = (q - 1) & GMP_NUMB_MASK;
159		    }
160		}
161	      else
162		np[-2] = n0;
163
164	      np[-1] = n1;
165	    }
166	  qp[0] = q;
167	}
168      else
169	{
170	  /* Do a 2qn / qn division */
171	  if (qn == 2)
172	    qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2); /* FIXME: obsolete function. Use 5/3 division? */
173	  else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
174	    qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
175	  else
176	    qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
177
178	  if (qn != dn)
179	    {
180	      if (qn > dn - qn)
181		mpn_mul (tp, qp, qn, dp - dn, dn - qn);
182	      else
183		mpn_mul (tp, dp - dn, dn - qn, qp, qn);
184
185	      cy = mpn_sub_n (np - dn, np - dn, tp, dn);
186	      if (qh != 0)
187		cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
188
189	      while (cy != 0)
190		{
191		  qh -= mpn_sub_1 (qp, qp, qn, 1);
192		  cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
193		}
194	    }
195	}
196
197      qn = nn - dn - qn;
198      do
199	{
200	  qp -= dn;
201	  np -= dn;
202	  mpn_dcpi1_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp);
203	  qn -= dn;
204	}
205      while (qn > 0);
206    }
207  else
208    {
209      qp -= qn;			/* point at low limb of next quotient block */
210      np -= qn;			/* point in the middle of partial remainder */
211
212      if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
213	qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
214      else
215	qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
216
217      if (qn != dn)
218	{
219	  if (qn > dn - qn)
220	    mpn_mul (tp, qp, qn, dp - dn, dn - qn);
221	  else
222	    mpn_mul (tp, dp - dn, dn - qn, qp, qn);
223
224	  cy = mpn_sub_n (np - dn, np - dn, tp, dn);
225	  if (qh != 0)
226	    cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
227
228	  while (cy != 0)
229	    {
230	      qh -= mpn_sub_1 (qp, qp, qn, 1);
231	      cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
232	    }
233	}
234    }
235
236  TMP_FREE;
237  return qh;
238}
239