1/* mpn_dcpi1_div_qr_n -- recursive divide-and-conquer division for arbitrary
2   size operands.
3
4   Contributed to the GNU project by Torbjorn Granlund.
5
6   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
7   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
8   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
9
10Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
11
12This file is part of the GNU MP Library.
13
14The GNU MP Library is free software; you can redistribute it and/or modify
15it under the terms of either:
16
17  * the GNU Lesser General Public License as published by the Free
18    Software Foundation; either version 3 of the License, or (at your
19    option) any later version.
20
21or
22
23  * the GNU General Public License as published by the Free Software
24    Foundation; either version 2 of the License, or (at your option) any
25    later version.
26
27or both in parallel, as here.
28
29The GNU MP Library is distributed in the hope that it will be useful, but
30WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
32for more details.
33
34You should have received copies of the GNU General Public License and the
35GNU Lesser General Public License along with the GNU MP Library.  If not,
36see https://www.gnu.org/licenses/.  */
37
38#include "gmp-impl.h"
39#include "longlong.h"
40
41
42mp_limb_t
43mpn_dcpi1_div_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
44		    gmp_pi1_t *dinv, mp_ptr tp)
45{
46  mp_size_t lo, hi;
47  mp_limb_t cy, qh, ql;
48
49  lo = n >> 1;			/* floor(n/2) */
50  hi = n - lo;			/* ceil(n/2) */
51
52  if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD))
53    qh = mpn_sbpi1_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dinv->inv32);
54  else
55    qh = mpn_dcpi1_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dinv, tp);
56
57  mpn_mul (tp, qp + lo, hi, dp, lo);
58
59  cy = mpn_sub_n (np + lo, np + lo, tp, n);
60  if (qh != 0)
61    cy += mpn_sub_n (np + n, np + n, dp, lo);
62
63  while (cy != 0)
64    {
65      qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1);
66      cy -= mpn_add_n (np + lo, np + lo, dp, n);
67    }
68
69  if (BELOW_THRESHOLD (lo, DC_DIV_QR_THRESHOLD))
70    ql = mpn_sbpi1_div_qr (qp, np + hi, 2 * lo, dp + hi, lo, dinv->inv32);
71  else
72    ql = mpn_dcpi1_div_qr_n (qp, np + hi, dp + hi, lo, dinv, tp);
73
74  mpn_mul (tp, dp, hi, qp, lo);
75
76  cy = mpn_sub_n (np, np, tp, n);
77  if (ql != 0)
78    cy += mpn_sub_n (np + lo, np + lo, dp, hi);
79
80  while (cy != 0)
81    {
82      mpn_sub_1 (qp, qp, lo, 1);
83      cy -= mpn_add_n (np, np, dp, n);
84    }
85
86  return qh;
87}
88
89mp_limb_t
90mpn_dcpi1_div_qr (mp_ptr qp,
91		  mp_ptr np, mp_size_t nn,
92		  mp_srcptr dp, mp_size_t dn,
93		  gmp_pi1_t *dinv)
94{
95  mp_size_t qn;
96  mp_limb_t qh, cy;
97  mp_ptr tp;
98  TMP_DECL;
99
100  TMP_MARK;
101
102  ASSERT (dn >= 6);		/* to adhere to mpn_sbpi1_div_qr's limits */
103  ASSERT (nn - dn >= 3);	/* to adhere to mpn_sbpi1_div_qr's limits */
104  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
105
106  tp = TMP_ALLOC_LIMBS (dn);
107
108  qn = nn - dn;
109  qp += qn;
110  np += nn;
111  dp += dn;
112
113  if (qn > dn)
114    {
115      /* Reduce qn mod dn without division, optimizing small operations.  */
116      do
117	qn -= dn;
118      while (qn > dn);
119
120      qp -= qn;			/* point at low limb of next quotient block */
121      np -= qn;			/* point in the middle of partial remainder */
122
123      /* Perform the typically smaller block first.  */
124      if (qn == 1)
125	{
126	  mp_limb_t q, n2, n1, n0, d1, d0;
127
128	  /* Handle qh up front, for simplicity. */
129	  qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;
130	  if (qh)
131	    ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));
132
133	  /* A single iteration of schoolbook: One 3/2 division,
134	     followed by the bignum update and adjustment. */
135	  n2 = np[0];
136	  n1 = np[-1];
137	  n0 = np[-2];
138	  d1 = dp[-1];
139	  d0 = dp[-2];
140
141	  ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));
142
143	  if (UNLIKELY (n2 == d1) && n1 == d0)
144	    {
145	      q = GMP_NUMB_MASK;
146	      cy = mpn_submul_1 (np - dn, dp - dn, dn, q);
147	      ASSERT (cy == n2);
148	    }
149	  else
150	    {
151	      udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32);
152
153	      if (dn > 2)
154		{
155		  mp_limb_t cy, cy1;
156		  cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);
157
158		  cy1 = n0 < cy;
159		  n0 = (n0 - cy) & GMP_NUMB_MASK;
160		  cy = n1 < cy1;
161		  n1 = (n1 - cy1) & GMP_NUMB_MASK;
162		  np[-2] = n0;
163
164		  if (UNLIKELY (cy != 0))
165		    {
166		      n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);
167		      qh -= (q == 0);
168		      q = (q - 1) & GMP_NUMB_MASK;
169		    }
170		}
171	      else
172		np[-2] = n0;
173
174	      np[-1] = n1;
175	    }
176	  qp[0] = q;
177	}
178      else
179	{
180	  /* Do a 2qn / qn division */
181	  if (qn == 2)
182	    qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2); /* FIXME: obsolete function. Use 5/3 division? */
183	  else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
184	    qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
185	  else
186	    qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
187
188	  if (qn != dn)
189	    {
190	      if (qn > dn - qn)
191		mpn_mul (tp, qp, qn, dp - dn, dn - qn);
192	      else
193		mpn_mul (tp, dp - dn, dn - qn, qp, qn);
194
195	      cy = mpn_sub_n (np - dn, np - dn, tp, dn);
196	      if (qh != 0)
197		cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
198
199	      while (cy != 0)
200		{
201		  qh -= mpn_sub_1 (qp, qp, qn, 1);
202		  cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
203		}
204	    }
205	}
206
207      qn = nn - dn - qn;
208      do
209	{
210	  qp -= dn;
211	  np -= dn;
212	  mpn_dcpi1_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp);
213	  qn -= dn;
214	}
215      while (qn > 0);
216    }
217  else
218    {
219      qp -= qn;			/* point at low limb of next quotient block */
220      np -= qn;			/* point in the middle of partial remainder */
221
222      if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
223	qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
224      else
225	qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
226
227      if (qn != dn)
228	{
229	  if (qn > dn - qn)
230	    mpn_mul (tp, qp, qn, dp - dn, dn - qn);
231	  else
232	    mpn_mul (tp, dp - dn, dn - qn, qp, qn);
233
234	  cy = mpn_sub_n (np - dn, np - dn, tp, dn);
235	  if (qh != 0)
236	    cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
237
238	  while (cy != 0)
239	    {
240	      qh -= mpn_sub_1 (qp, qp, qn, 1);
241	      cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
242	    }
243	}
244    }
245
246  TMP_FREE;
247  return qh;
248}
249