1/* mpn_dcpi1_bdiv_qr -- divide-and-conquer Hensel division with precomputed
2   inverse, returning quotient and remainder.
3
4   Contributed to the GNU project by Niels M��ller and Torbjorn Granlund.
5
6   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
7   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
8   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
9
10Copyright 2006, 2007, 2009, 2010, 2017 Free Software Foundation, Inc.
11
12This file is part of the GNU MP Library.
13
14The GNU MP Library is free software; you can redistribute it and/or modify
15it under the terms of either:
16
17  * the GNU Lesser General Public License as published by the Free
18    Software Foundation; either version 3 of the License, or (at your
19    option) any later version.
20
21or
22
23  * the GNU General Public License as published by the Free Software
24    Foundation; either version 2 of the License, or (at your option) any
25    later version.
26
27or both in parallel, as here.
28
29The GNU MP Library is distributed in the hope that it will be useful, but
30WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
32for more details.
33
34You should have received copies of the GNU General Public License and the
35GNU Lesser General Public License along with the GNU MP Library.  If not,
36see https://www.gnu.org/licenses/.  */
37
38#include "gmp-impl.h"
39
40
41/* Computes Hensel binary division of {np, 2*n} by {dp, n}.
42
43   Output:
44
45      q = -n * d^{-1} mod 2^{qn * GMP_NUMB_BITS},
46
47      r = (n + q * d) * 2^{-qn * GMP_NUMB_BITS}
48
49   Stores q at qp. Stores the n least significant limbs of r at the high half
50   of np, and returns the carry from the addition n + q*d.
51
52   d must be odd. dinv is (-d)^-1 mod 2^GMP_NUMB_BITS. */
53
54mp_size_t
55mpn_dcpi1_bdiv_qr_n_itch (mp_size_t n)
56{
57  return n;
58}
59
60mp_limb_t
61mpn_dcpi1_bdiv_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
62		     mp_limb_t dinv, mp_ptr tp)
63{
64  mp_size_t lo, hi;
65  mp_limb_t cy;
66  mp_limb_t rh;
67
68  lo = n >> 1;			/* floor(n/2) */
69  hi = n - lo;			/* ceil(n/2) */
70
71  if (BELOW_THRESHOLD (lo, DC_BDIV_QR_THRESHOLD))
72    cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * lo, dp, lo, dinv);
73  else
74    cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, lo, dinv, tp);
75
76  mpn_mul (tp, dp + lo, hi, qp, lo);
77
78  mpn_incr_u (tp + lo, cy);
79  rh = mpn_add (np + lo, np + lo, n + hi, tp, n);
80
81  if (BELOW_THRESHOLD (hi, DC_BDIV_QR_THRESHOLD))
82    cy = mpn_sbpi1_bdiv_qr (qp + lo, np + lo, 2 * hi, dp, hi, dinv);
83  else
84    cy = mpn_dcpi1_bdiv_qr_n (qp + lo, np + lo, dp, hi, dinv, tp);
85
86  mpn_mul (tp, qp + lo, hi, dp + hi, lo);
87
88  mpn_incr_u (tp + hi, cy);
89  rh += mpn_add_n (np + n, np + n, tp, n);
90
91  return rh;
92}
93
94mp_limb_t
95mpn_dcpi1_bdiv_qr (mp_ptr qp, mp_ptr np, mp_size_t nn,
96		   mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)
97{
98  mp_size_t qn;
99  mp_limb_t rr, cy;
100  mp_ptr tp;
101  TMP_DECL;
102
103  TMP_MARK;
104
105  ASSERT (dn >= 2);		/* to adhere to mpn_sbpi1_div_qr's limits */
106  ASSERT (nn - dn >= 1);	/* to adhere to mpn_sbpi1_div_qr's limits */
107  ASSERT (dp[0] & 1);
108
109  tp = TMP_SALLOC_LIMBS (dn);
110
111  qn = nn - dn;
112
113  if (qn > dn)
114    {
115      /* Reduce qn mod dn without division, optimizing small operations.  */
116      do
117	qn -= dn;
118      while (qn > dn);
119
120      /* Perform the typically smaller block first.  */
121      if (BELOW_THRESHOLD (qn, DC_BDIV_QR_THRESHOLD))
122	cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv);
123      else
124	cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, qn, dinv, tp);
125
126      rr = 0;
127      if (qn != dn)
128	{
129	  if (qn > dn - qn)
130	    mpn_mul (tp, qp, qn, dp + qn, dn - qn);
131	  else
132	    mpn_mul (tp, dp + qn, dn - qn, qp, qn);
133	  mpn_incr_u (tp + qn, cy);
134
135	  rr = mpn_add (np + qn, np + qn, nn - qn, tp, dn);
136	  cy = 0;
137	}
138
139      np += qn;
140      qp += qn;
141
142      qn = nn - dn - qn;
143      do
144	{
145	  rr += mpn_add_1 (np + dn, np + dn, qn, cy);
146	  cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, dn, dinv, tp);
147	  qp += dn;
148	  np += dn;
149	  qn -= dn;
150	}
151      while (qn > 0);
152      TMP_FREE;
153      return rr + cy;
154    }
155
156  if (BELOW_THRESHOLD (qn, DC_BDIV_QR_THRESHOLD))
157    cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv);
158  else
159    cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, qn, dinv, tp);
160
161  rr = 0;
162  if (qn != dn)
163    {
164      if (qn > dn - qn)
165	mpn_mul (tp, qp, qn, dp + qn, dn - qn);
166      else
167	mpn_mul (tp, dp + qn, dn - qn, qp, qn);
168      mpn_incr_u (tp + qn, cy);
169
170      rr = mpn_add (np + qn, np + qn, nn - qn, tp, dn);
171      cy = 0;
172    }
173
174  TMP_FREE;
175  return rr + cy;
176}
177