1/* mpn_mu_div_q.
2
3   Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
4
5   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
6   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
8
9Copyright 2005-2007, 2009, 2010, 2013 Free Software Foundation, Inc.
10
11This file is part of the GNU MP Library.
12
13The GNU MP Library is free software; you can redistribute it and/or modify
14it under the terms of either:
15
16  * the GNU Lesser General Public License as published by the Free
17    Software Foundation; either version 3 of the License, or (at your
18    option) any later version.
19
20or
21
22  * the GNU General Public License as published by the Free Software
23    Foundation; either version 2 of the License, or (at your option) any
24    later version.
25
26or both in parallel, as here.
27
28The GNU MP Library is distributed in the hope that it will be useful, but
29WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
31for more details.
32
33You should have received copies of the GNU General Public License and the
34GNU Lesser General Public License along with the GNU MP Library.  If not,
35see https://www.gnu.org/licenses/.  */
36
37
38/*
39   The idea of the algorithm used herein is to compute a smaller inverted value
40   than used in the standard Barrett algorithm, and thus save time in the
41   Newton iterations, and pay just a small price when using the inverted value
42   for developing quotient bits.  This algorithm was presented at ICMS 2006.
43*/
44
45/*
46  Things to work on:
47
48  1. This is a rudimentary implementation of mpn_mu_div_q.  The algorithm is
49     probably close to optimal, except when mpn_mu_divappr_q fails.
50
51  2. We used to fall back to mpn_mu_div_qr when we detect a possible
52     mpn_mu_divappr_q rounding problem, now we multiply and compare.
53     Unfortunately, since mpn_mu_divappr_q does not return the partial
54     remainder, this also doesn't become optimal.  A mpn_mu_divappr_qr could
55     solve that.
56
57  3. The allocations done here should be made from the scratch area, which
58     then would need to be amended.
59*/
60
61#include <stdlib.h>		/* for NULL */
62#include "gmp-impl.h"
63
64
65mp_limb_t
66mpn_mu_div_q (mp_ptr qp,
67	      mp_srcptr np, mp_size_t nn,
68	      mp_srcptr dp, mp_size_t dn,
69	      mp_ptr scratch)
70{
71  mp_ptr tp, rp;
72  mp_size_t qn;
73  mp_limb_t cy, qh;
74  TMP_DECL;
75
76  TMP_MARK;
77
78  qn = nn - dn;
79
80  tp = TMP_BALLOC_LIMBS (qn + 1);
81
82  if (qn >= dn)			/* nn >= 2*dn + 1 */
83    {
84       /* |_______________________|   dividend
85			 |________|   divisor  */
86
87      rp = TMP_BALLOC_LIMBS (nn + 1);
88      MPN_COPY (rp + 1, np, nn);
89      rp[0] = 0;
90
91      qh = mpn_cmp (rp + 1 + nn - dn, dp, dn) >= 0;
92      if (qh != 0)
93	mpn_sub_n (rp + 1 + nn - dn, rp + 1 + nn - dn, dp, dn);
94
95      cy = mpn_mu_divappr_q (tp, rp, nn + 1, dp, dn, scratch);
96
97      if (UNLIKELY (cy != 0))
98	{
99	  /* Since the partial remainder fed to mpn_preinv_mu_divappr_q was
100	     canonically reduced, replace the returned value of B^(qn-dn)+eps
101	     by the largest possible value.  */
102	  mp_size_t i;
103	  for (i = 0; i < qn + 1; i++)
104	    tp[i] = GMP_NUMB_MAX;
105	}
106
107      /* The max error of mpn_mu_divappr_q is +4.  If the low quotient limb is
108	 smaller than the max error, we cannot trust the quotient.  */
109      if (tp[0] > 4)
110	{
111	  MPN_COPY (qp, tp + 1, qn);
112	}
113      else
114	{
115	  mp_limb_t cy;
116	  mp_ptr pp;
117
118	  pp = rp;
119	  mpn_mul (pp, tp + 1, qn, dp, dn);
120
121	  cy = (qh != 0) ? mpn_add_n (pp + qn, pp + qn, dp, dn) : 0;
122
123	  if (cy || mpn_cmp (pp, np, nn) > 0) /* At most is wrong by one, no cycle. */
124	    qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
125	  else /* Same as above */
126	    MPN_COPY (qp, tp + 1, qn);
127	}
128    }
129  else
130    {
131       /* |_______________________|   dividend
132		 |________________|   divisor  */
133
134      /* FIXME: When nn = 2dn-1, qn becomes dn-1, and the numerator size passed
135	 here becomes 2dn, i.e., more than nn.  This shouldn't hurt, since only
136	 the most significant dn-1 limbs will actually be read, but it is not
137	 pretty.  */
138
139      qh = mpn_mu_divappr_q (tp, np + nn - (2 * qn + 2), 2 * qn + 2,
140			     dp + dn - (qn + 1), qn + 1, scratch);
141
142      /* The max error of mpn_mu_divappr_q is +4, but we get an additional
143         error from the divisor truncation.  */
144      if (tp[0] > 6)
145	{
146	  MPN_COPY (qp, tp + 1, qn);
147	}
148      else
149	{
150	  mp_limb_t cy;
151
152	  /* FIXME: a shorter product should be enough; we may use already
153	     allocated space... */
154	  rp = TMP_BALLOC_LIMBS (nn);
155	  mpn_mul (rp, dp, dn, tp + 1, qn);
156
157	  cy = (qh != 0) ? mpn_add_n (rp + qn, rp + qn, dp, dn) : 0;
158
159	  if (cy || mpn_cmp (rp, np, nn) > 0) /* At most is wrong by one, no cycle. */
160	    qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
161	  else /* Same as above */
162	    MPN_COPY (qp, tp + 1, qn);
163	}
164    }
165
166  TMP_FREE;
167  return qh;
168}
169
170mp_size_t
171mpn_mu_div_q_itch (mp_size_t nn, mp_size_t dn, int mua_k)
172{
173  mp_size_t qn;
174
175  qn = nn - dn;
176  if (qn >= dn)
177    {
178      return mpn_mu_divappr_q_itch (nn + 1, dn, mua_k);
179    }
180  else
181    {
182      return mpn_mu_divappr_q_itch (2 * qn + 2, qn + 1, mua_k);
183    }
184}
185