1/* mpn_divexact_1 -- mpn by limb exact division.
2
3   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
4   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
5   FUTURE GNU MP RELEASES.
6
7Copyright 2000-2003, 2005, 2013 Free Software Foundation, Inc.
8
9This file is part of the GNU MP Library.
10
11The GNU MP Library is free software; you can redistribute it and/or modify
12it under the terms of either:
13
14  * the GNU Lesser General Public License as published by the Free
15    Software Foundation; either version 3 of the License, or (at your
16    option) any later version.
17
18or
19
20  * the GNU General Public License as published by the Free Software
21    Foundation; either version 2 of the License, or (at your option) any
22    later version.
23
24or both in parallel, as here.
25
26The GNU MP Library is distributed in the hope that it will be useful, but
27WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
28or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
29for more details.
30
31You should have received copies of the GNU General Public License and the
32GNU Lesser General Public License along with the GNU MP Library.  If not,
33see https://www.gnu.org/licenses/.  */
34
35#include "gmp-impl.h"
36#include "longlong.h"
37
38
39
40/* Divide a={src,size} by d=divisor and store the quotient in q={dst,size}.
41   q will only be correct if d divides a exactly.
42
43   A separate loop is used for shift==0 because n<<GMP_LIMB_BITS doesn't
44   give zero on all CPUs (for instance it doesn't on the x86s).  This
45   separate loop might run faster too, helping odd divisors.
46
47   Possibilities:
48
49   mpn_divexact_1c could be created, accepting and returning c.  This would
50   let a long calculation be done piece by piece.  Currently there's no
51   particular need for that, and not returning c means that a final umul can
52   be skipped.
53
54   Another use for returning c would be letting the caller know whether the
55   division was in fact exact.  It would work just to return the carry bit
56   "c=(l>s)" and let the caller do a final umul if interested.
57
58   When the divisor is even, the factors of two could be handled with a
59   separate mpn_rshift, instead of shifting on the fly.  That might be
60   faster on some CPUs and would mean just the shift==0 style loop would be
61   needed.
62
63   If n<<GMP_LIMB_BITS gives zero on a particular CPU then the separate
64   shift==0 loop is unnecessary, and could be eliminated if there's no great
65   speed difference.
66
67   It's not clear whether "/" is the best way to handle size==1.  Alpha gcc
68   2.95 for instance has a poor "/" and might prefer the modular method.
69   Perhaps a tuned parameter should control this.
70
71   If src[size-1] < divisor then dst[size-1] will be zero, and one divide
72   step could be skipped.  A test at last step for s<divisor (or ls in the
73   even case) might be a good way to do that.  But if this code is often
74   used with small divisors then it might not be worth bothering  */
75
76void
77mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor)
78{
79  mp_size_t  i;
80  mp_limb_t  c, h, l, ls, s, s_next, inverse, dummy;
81  unsigned   shift;
82
83  ASSERT (size >= 1);
84  ASSERT (divisor != 0);
85  ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size));
86  ASSERT_MPN (src, size);
87  ASSERT_LIMB (divisor);
88
89  if ((divisor & 1) == 0)
90    {
91      count_trailing_zeros (shift, divisor);
92      divisor >>= shift;
93    }
94  else
95    shift = 0;
96
97  binvert_limb (inverse, divisor);
98  divisor <<= GMP_NAIL_BITS;
99
100  if (shift != 0)
101    {
102      c = 0;
103
104      s = src[0];
105
106      for (i = 1; i < size; i++)
107	{
108	  s_next = src[i];
109	  ls = ((s >> shift) | (s_next << (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK;
110	  s = s_next;
111
112	  SUBC_LIMB (c, l, ls, c);
113
114	  l = (l * inverse) & GMP_NUMB_MASK;
115	  dst[i - 1] = l;
116
117	  umul_ppmm (h, dummy, l, divisor);
118	  c += h;
119	}
120
121      ls = s >> shift;
122      l = ls - c;
123      l = (l * inverse) & GMP_NUMB_MASK;
124      dst[size - 1] = l;
125    }
126  else
127    {
128      s = src[0];
129
130      l = (s * inverse) & GMP_NUMB_MASK;
131      dst[0] = l;
132      c = 0;
133
134      for (i = 1; i < size; i++)
135	{
136	  umul_ppmm (h, dummy, l, divisor);
137	  c += h;
138
139	  s = src[i];
140	  SUBC_LIMB (c, l, s, c);
141
142	  l = (l * inverse) & GMP_NUMB_MASK;
143	  dst[i] = l;
144	}
145    }
146}
147