1/* mpn_divrem_1 -- mpn by limb division.
2
3Copyright 1991, 1993, 1994, 1996, 1998, 1999, 2000, 2002, 2003 Free Software
4Foundation, Inc.
5
6This file is part of the GNU MP Library.
7
8The GNU MP Library is free software; you can redistribute it and/or modify
9it under the terms of the GNU Lesser General Public License as published by
10the Free Software Foundation; either version 3 of the License, or (at your
11option) any later version.
12
13The GNU MP Library is distributed in the hope that it will be useful, but
14WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16License for more details.
17
18You should have received a copy of the GNU Lesser General Public License
19along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
20
21#include "gmp.h"
22#include "gmp-impl.h"
23#include "longlong.h"
24
25
26/* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
27   meaning the quotient size where that should happen, the quotient size
28   being how many udiv divisions will be done.
29
30   The default is to use preinv always, CPUs where this doesn't suit have
31   tuned thresholds.  Note in particular that preinv should certainly be
32   used if that's the only division available (USE_PREINV_ALWAYS).  */
33
34#ifndef DIVREM_1_NORM_THRESHOLD
35#define DIVREM_1_NORM_THRESHOLD  0
36#endif
37#ifndef DIVREM_1_UNNORM_THRESHOLD
38#define DIVREM_1_UNNORM_THRESHOLD  0
39#endif
40
41
42
43/* If the cpu only has multiply-by-inverse division (eg. alpha), then NORM
44   and UNNORM thresholds are 0 and only the inversion code is included.
45
46   If multiply-by-inverse is never viable, then NORM and UNNORM thresholds
47   will be MP_SIZE_T_MAX and only the plain division code is included.
48
49   Otherwise mul-by-inverse is better than plain division above some
50   threshold, and best results are obtained by having code for both present.
51
52   The main reason for separating the norm and unnorm cases is that not all
53   CPUs give zero for "n0 >> GMP_LIMB_BITS" which would arise in the unnorm
54   code used on an already normalized divisor.
55
56   If UDIV_NEEDS_NORMALIZATION is false then plain division uses the same
57   non-shifting code for both the norm and unnorm cases, though with
58   different criteria for skipping a division, and with different thresholds
59   of course.  And in fact if inversion is never viable, then that simple
60   non-shifting division would be all that's left.
61
62   The NORM and UNNORM thresholds might not differ much, but if there's
63   going to be separate code for norm and unnorm then it makes sense to have
64   separate thresholds.  One thing that's possible is that the
65   mul-by-inverse might be better only for normalized divisors, due to that
66   case not needing variable bit shifts.
67
68   Notice that the thresholds are tested after the decision to possibly skip
69   one divide step, so they're based on the actual number of divisions done.
70
71   For the unnorm case, it would be possible to call mpn_lshift to adjust
72   the dividend all in one go (into the quotient space say), rather than
73   limb-by-limb in the loop.  This might help if mpn_lshift is a lot faster
74   than what the compiler can generate for EXTRACT.  But this is left to CPU
75   specific implementations to consider, especially since EXTRACT isn't on
76   the dependent chain.  */
77
78mp_limb_t
79mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
80	      mp_srcptr up, mp_size_t un, mp_limb_t d)
81{
82  mp_size_t  n;
83  mp_size_t  i;
84  mp_limb_t  n1, n0;
85  mp_limb_t  r = 0;
86
87  ASSERT (qxn >= 0);
88  ASSERT (un >= 0);
89  ASSERT (d != 0);
90  /* FIXME: What's the correct overlap rule when qxn!=0? */
91  ASSERT (MPN_SAME_OR_SEPARATE_P (qp+qxn, up, un));
92
93  n = un + qxn;
94  if (n == 0)
95    return 0;
96
97  d <<= GMP_NAIL_BITS;
98
99  qp += (n - 1);   /* Make qp point at most significant quotient limb */
100
101  if ((d & GMP_LIMB_HIGHBIT) != 0)
102    {
103      if (un != 0)
104	{
105	  /* High quotient limb is 0 or 1, skip a divide step. */
106	  mp_limb_t q;
107	  r = up[un - 1] << GMP_NAIL_BITS;
108	  q = (r >= d);
109	  *qp-- = q;
110	  r -= (d & -q);
111	  r >>= GMP_NAIL_BITS;
112	  n--;
113	  un--;
114	}
115
116      if (BELOW_THRESHOLD (n, DIVREM_1_NORM_THRESHOLD))
117	{
118	plain:
119	  for (i = un - 1; i >= 0; i--)
120	    {
121	      n0 = up[i] << GMP_NAIL_BITS;
122	      udiv_qrnnd (*qp, r, r, n0, d);
123	      r >>= GMP_NAIL_BITS;
124	      qp--;
125	    }
126	  for (i = qxn - 1; i >= 0; i--)
127	    {
128	      udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d);
129	      r >>= GMP_NAIL_BITS;
130	      qp--;
131	    }
132	  return r;
133	}
134      else
135	{
136	  /* Multiply-by-inverse, divisor already normalized. */
137	  mp_limb_t dinv;
138	  invert_limb (dinv, d);
139
140	  for (i = un - 1; i >= 0; i--)
141	    {
142	      n0 = up[i] << GMP_NAIL_BITS;
143	      udiv_qrnnd_preinv (*qp, r, r, n0, d, dinv);
144	      r >>= GMP_NAIL_BITS;
145	      qp--;
146	    }
147	  for (i = qxn - 1; i >= 0; i--)
148	    {
149	      udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
150	      r >>= GMP_NAIL_BITS;
151	      qp--;
152	    }
153	  return r;
154	}
155    }
156  else
157    {
158      /* Most significant bit of divisor == 0.  */
159      int norm;
160
161      /* Skip a division if high < divisor (high quotient 0).  Testing here
162	 before normalizing will still skip as often as possible.  */
163      if (un != 0)
164	{
165	  n1 = up[un - 1] << GMP_NAIL_BITS;
166	  if (n1 < d)
167	    {
168	      r = n1 >> GMP_NAIL_BITS;
169	      *qp-- = 0;
170	      n--;
171	      if (n == 0)
172		return r;
173	      un--;
174	    }
175	}
176
177      if (! UDIV_NEEDS_NORMALIZATION
178	  && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
179	goto plain;
180
181      count_leading_zeros (norm, d);
182      d <<= norm;
183      r <<= norm;
184
185      if (UDIV_NEEDS_NORMALIZATION
186	  && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
187	{
188	  if (un != 0)
189	    {
190	      n1 = up[un - 1] << GMP_NAIL_BITS;
191	      r |= (n1 >> (GMP_LIMB_BITS - norm));
192	      for (i = un - 2; i >= 0; i--)
193		{
194		  n0 = up[i] << GMP_NAIL_BITS;
195		  udiv_qrnnd (*qp, r, r,
196			      (n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm)),
197			      d);
198		  r >>= GMP_NAIL_BITS;
199		  qp--;
200		  n1 = n0;
201		}
202	      udiv_qrnnd (*qp, r, r, n1 << norm, d);
203	      r >>= GMP_NAIL_BITS;
204	      qp--;
205	    }
206	  for (i = qxn - 1; i >= 0; i--)
207	    {
208	      udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d);
209	      r >>= GMP_NAIL_BITS;
210	      qp--;
211	    }
212	  return r >> norm;
213	}
214      else
215	{
216	  mp_limb_t  dinv;
217	  invert_limb (dinv, d);
218	  if (un != 0)
219	    {
220	      n1 = up[un - 1] << GMP_NAIL_BITS;
221	      r |= (n1 >> (GMP_LIMB_BITS - norm));
222	      for (i = un - 2; i >= 0; i--)
223		{
224		  n0 = up[i] << GMP_NAIL_BITS;
225		  udiv_qrnnd_preinv (*qp, r, r,
226				     ((n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm))),
227				     d, dinv);
228		  r >>= GMP_NAIL_BITS;
229		  qp--;
230		  n1 = n0;
231		}
232	      udiv_qrnnd_preinv (*qp, r, r, n1 << norm, d, dinv);
233	      r >>= GMP_NAIL_BITS;
234	      qp--;
235	    }
236	  for (i = qxn - 1; i >= 0; i--)
237	    {
238	      udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
239	      r >>= GMP_NAIL_BITS;
240	      qp--;
241	    }
242	  return r >> norm;
243	}
244    }
245}
246