1/* Alpha mpn_divexact_1 -- mpn by limb exact division.
2
3   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
4   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
5   FUTURE GNU MP RELEASES.
6
7Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
8
9This file is part of the GNU MP Library.
10
11The GNU MP Library is free software; you can redistribute it and/or modify
12it under the terms of the GNU Lesser General Public License as published by
13the Free Software Foundation; either version 3 of the License, or (at your
14option) any later version.
15
16The GNU MP Library is distributed in the hope that it will be useful, but
17WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
19License for more details.
20
21You should have received a copy of the GNU Lesser General Public License
22along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
23
24#include "gmp.h"
25#include "gmp-impl.h"
26#include "longlong.h"
27
28
29/*      cycles/limb
30   EV4:    47.0
31   EV5:    30.0
32   EV6:    15.0
33*/
34
35
36/* The dependent chain is as follows (the same as modexact), and this is
37   what the code runs as.
38
39       ev4    ev5   ev6
40        1      1     1    sub    y = x - h
41       23     13     7    mulq   q = y * inverse
42       23     15     7    umulh  h = high (q * d)
43       --     --    --
44       47     30    15
45
46   The time to load src[i+1] and establish x hides under the umulh latency.  */
47
48void
49mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor)
50{
51  mp_limb_t  inverse, lshift_mask, s, sr, s_next, c, h, x, y, q, dummy;
52  unsigned   rshift, lshift;
53
54  ASSERT (size >= 1);
55  ASSERT (divisor != 0);
56  ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size));
57  ASSERT_MPN (src, size);
58  ASSERT_LIMB (divisor);
59
60  s_next = *src++;   /* src[0] */
61
62  rshift = 0;
63  lshift_mask = 0;
64  if ((divisor & 1) == 0)
65    {
66      count_trailing_zeros (rshift, divisor);
67      lshift_mask = MP_LIMB_T_MAX;
68      divisor >>= rshift;
69    }
70
71  binvert_limb (inverse, divisor);
72  lshift = 64 - rshift;
73
74  c = 0;
75  h = 0;
76  sr = s_next >> rshift;
77
78  size--;
79  if (LIKELY (size != 0))
80    {
81      do
82        {
83          s_next = *src++;      /* src[i+1] */
84          s = sr | ((s_next << lshift) & lshift_mask);
85          x = s - c;
86          c = s < c;
87          sr = s_next >> rshift;
88
89          y = x - h;
90          c += (x < h);
91          q = y * inverse;
92          *dst++ = q;
93          umul_ppmm (h, dummy, q, divisor);
94
95          size--;
96        }
97      while (size != 0);
98    }
99
100  x = sr - c;
101  y = x - h;
102  q = y * inverse;
103  *dst = q;         /* dst[size-1] */
104}
105