1276789Sdim/* ===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------===
2276789Sdim *
3276789Sdim *                     The LLVM Compiler Infrastructure
4276789Sdim *
5276789Sdim * This file is dual licensed under the MIT and the University of Illinois Open
6276789Sdim * Source Licenses. See LICENSE.TXT for details.
7276789Sdim *
8276789Sdim * ===----------------------------------------------------------------------===
9276789Sdim *
10276789Sdim * This file implements __udivmoddi4 for the compiler_rt library.
11276789Sdim *
12276789Sdim * ===----------------------------------------------------------------------===
13276789Sdim */
14276789Sdim
15276789Sdim#include "int_lib.h"
16276789Sdim
17276789Sdim/* Effects: if rem != 0, *rem = a % b
18276789Sdim * Returns: a / b
19276789Sdim */
20276789Sdim
21276789Sdim/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */
22276789Sdim
23276789SdimCOMPILER_RT_ABI du_int
24276789Sdim__udivmoddi4(du_int a, du_int b, du_int* rem)
25276789Sdim{
26276789Sdim    const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT;
27276789Sdim    const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
28276789Sdim    udwords n;
29276789Sdim    n.all = a;
30276789Sdim    udwords d;
31276789Sdim    d.all = b;
32276789Sdim    udwords q;
33276789Sdim    udwords r;
34276789Sdim    unsigned sr;
35276789Sdim    /* special cases, X is unknown, K != 0 */
36276789Sdim    if (n.s.high == 0)
37276789Sdim    {
38276789Sdim        if (d.s.high == 0)
39276789Sdim        {
40276789Sdim            /* 0 X
41276789Sdim             * ---
42276789Sdim             * 0 X
43276789Sdim             */
44276789Sdim            if (rem)
45276789Sdim                *rem = n.s.low % d.s.low;
46276789Sdim            return n.s.low / d.s.low;
47276789Sdim        }
48276789Sdim        /* 0 X
49276789Sdim         * ---
50276789Sdim         * K X
51276789Sdim         */
52276789Sdim        if (rem)
53276789Sdim            *rem = n.s.low;
54276789Sdim        return 0;
55276789Sdim    }
56276789Sdim    /* n.s.high != 0 */
57276789Sdim    if (d.s.low == 0)
58276789Sdim    {
59276789Sdim        if (d.s.high == 0)
60276789Sdim        {
61276789Sdim            /* K X
62276789Sdim             * ---
63276789Sdim             * 0 0
64276789Sdim             */
65276789Sdim            if (rem)
66276789Sdim                *rem = n.s.high % d.s.low;
67276789Sdim            return n.s.high / d.s.low;
68276789Sdim        }
69276789Sdim        /* d.s.high != 0 */
70276789Sdim        if (n.s.low == 0)
71276789Sdim        {
72276789Sdim            /* K 0
73276789Sdim             * ---
74276789Sdim             * K 0
75276789Sdim             */
76276789Sdim            if (rem)
77276789Sdim            {
78276789Sdim                r.s.high = n.s.high % d.s.high;
79276789Sdim                r.s.low = 0;
80276789Sdim                *rem = r.all;
81276789Sdim            }
82276789Sdim            return n.s.high / d.s.high;
83276789Sdim        }
84276789Sdim        /* K K
85276789Sdim         * ---
86276789Sdim         * K 0
87276789Sdim         */
88276789Sdim        if ((d.s.high & (d.s.high - 1)) == 0)     /* if d is a power of 2 */
89276789Sdim        {
90276789Sdim            if (rem)
91276789Sdim            {
92276789Sdim                r.s.low = n.s.low;
93276789Sdim                r.s.high = n.s.high & (d.s.high - 1);
94276789Sdim                *rem = r.all;
95276789Sdim            }
96276789Sdim            return n.s.high >> __builtin_ctz(d.s.high);
97276789Sdim        }
98276789Sdim        /* K K
99276789Sdim         * ---
100276789Sdim         * K 0
101276789Sdim         */
102276789Sdim        sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
103276789Sdim        /* 0 <= sr <= n_uword_bits - 2 or sr large */
104276789Sdim        if (sr > n_uword_bits - 2)
105276789Sdim        {
106276789Sdim           if (rem)
107276789Sdim                *rem = n.all;
108276789Sdim            return 0;
109276789Sdim        }
110276789Sdim        ++sr;
111276789Sdim        /* 1 <= sr <= n_uword_bits - 1 */
112276789Sdim        /* q.all = n.all << (n_udword_bits - sr); */
113276789Sdim        q.s.low = 0;
114276789Sdim        q.s.high = n.s.low << (n_uword_bits - sr);
115276789Sdim        /* r.all = n.all >> sr; */
116276789Sdim        r.s.high = n.s.high >> sr;
117276789Sdim        r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
118276789Sdim    }
119276789Sdim    else  /* d.s.low != 0 */
120276789Sdim    {
121276789Sdim        if (d.s.high == 0)
122276789Sdim        {
123276789Sdim            /* K X
124276789Sdim             * ---
125276789Sdim             * 0 K
126276789Sdim             */
127276789Sdim            if ((d.s.low & (d.s.low - 1)) == 0)     /* if d is a power of 2 */
128276789Sdim            {
129276789Sdim                if (rem)
130276789Sdim                    *rem = n.s.low & (d.s.low - 1);
131276789Sdim                if (d.s.low == 1)
132276789Sdim                    return n.all;
133276789Sdim                sr = __builtin_ctz(d.s.low);
134276789Sdim                q.s.high = n.s.high >> sr;
135276789Sdim                q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
136276789Sdim                return q.all;
137276789Sdim            }
138276789Sdim            /* K X
139276789Sdim             * ---
140276789Sdim             * 0 K
141276789Sdim             */
142276789Sdim            sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high);
143276789Sdim            /* 2 <= sr <= n_udword_bits - 1
144276789Sdim             * q.all = n.all << (n_udword_bits - sr);
145276789Sdim             * r.all = n.all >> sr;
146276789Sdim             */
147276789Sdim            if (sr == n_uword_bits)
148276789Sdim            {
149276789Sdim                q.s.low = 0;
150276789Sdim                q.s.high = n.s.low;
151276789Sdim                r.s.high = 0;
152276789Sdim                r.s.low = n.s.high;
153276789Sdim            }
154276789Sdim            else if (sr < n_uword_bits)  // 2 <= sr <= n_uword_bits - 1
155276789Sdim            {
156276789Sdim                q.s.low = 0;
157276789Sdim                q.s.high = n.s.low << (n_uword_bits - sr);
158276789Sdim                r.s.high = n.s.high >> sr;
159276789Sdim                r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
160276789Sdim            }
161276789Sdim            else              // n_uword_bits + 1 <= sr <= n_udword_bits - 1
162276789Sdim            {
163276789Sdim                q.s.low = n.s.low << (n_udword_bits - sr);
164276789Sdim                q.s.high = (n.s.high << (n_udword_bits - sr)) |
165276789Sdim                           (n.s.low >> (sr - n_uword_bits));
166276789Sdim                r.s.high = 0;
167276789Sdim                r.s.low = n.s.high >> (sr - n_uword_bits);
168276789Sdim            }
169276789Sdim        }
170276789Sdim        else
171276789Sdim        {
172276789Sdim            /* K X
173276789Sdim             * ---
174276789Sdim             * K K
175276789Sdim             */
176276789Sdim            sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
177276789Sdim            /* 0 <= sr <= n_uword_bits - 1 or sr large */
178276789Sdim            if (sr > n_uword_bits - 1)
179276789Sdim            {
180276789Sdim                if (rem)
181276789Sdim                    *rem = n.all;
182276789Sdim                return 0;
183276789Sdim            }
184276789Sdim            ++sr;
185276789Sdim            /* 1 <= sr <= n_uword_bits */
186276789Sdim            /*  q.all = n.all << (n_udword_bits - sr); */
187276789Sdim            q.s.low = 0;
188276789Sdim            if (sr == n_uword_bits)
189276789Sdim            {
190276789Sdim                q.s.high = n.s.low;
191276789Sdim                r.s.high = 0;
192276789Sdim                r.s.low = n.s.high;
193276789Sdim            }
194276789Sdim            else
195276789Sdim            {
196276789Sdim                q.s.high = n.s.low << (n_uword_bits - sr);
197276789Sdim                r.s.high = n.s.high >> sr;
198276789Sdim                r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
199276789Sdim            }
200276789Sdim        }
201276789Sdim    }
202276789Sdim    /* Not a special case
203276789Sdim     * q and r are initialized with:
204276789Sdim     * q.all = n.all << (n_udword_bits - sr);
205276789Sdim     * r.all = n.all >> sr;
206276789Sdim     * 1 <= sr <= n_udword_bits - 1
207276789Sdim     */
208276789Sdim    su_int carry = 0;
209276789Sdim    for (; sr > 0; --sr)
210276789Sdim    {
211276789Sdim        /* r:q = ((r:q)  << 1) | carry */
212276789Sdim        r.s.high = (r.s.high << 1) | (r.s.low  >> (n_uword_bits - 1));
213276789Sdim        r.s.low  = (r.s.low  << 1) | (q.s.high >> (n_uword_bits - 1));
214276789Sdim        q.s.high = (q.s.high << 1) | (q.s.low  >> (n_uword_bits - 1));
215276789Sdim        q.s.low  = (q.s.low  << 1) | carry;
216276789Sdim        /* carry = 0;
217276789Sdim         * if (r.all >= d.all)
218276789Sdim         * {
219276789Sdim         *      r.all -= d.all;
220276789Sdim         *      carry = 1;
221276789Sdim         * }
222276789Sdim         */
223276789Sdim        const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1);
224276789Sdim        carry = s & 1;
225276789Sdim        r.all -= d.all & s;
226276789Sdim    }
227276789Sdim    q.all = (q.all << 1) | carry;
228276789Sdim    if (rem)
229276789Sdim        *rem = r.all;
230276789Sdim    return q.all;
231276789Sdim}
232