1214152Sed/* ===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------===
2214152Sed *
3214152Sed *                     The LLVM Compiler Infrastructure
4214152Sed *
5222656Sed * This file is dual licensed under the MIT and the University of Illinois Open
6222656Sed * Source Licenses. See LICENSE.TXT for details.
7214152Sed *
8214152Sed * ===----------------------------------------------------------------------===
9214152Sed *
10214152Sed * This file implements __udivmoddi4 for the compiler_rt library.
11214152Sed *
12214152Sed * ===----------------------------------------------------------------------===
13214152Sed */
14214152Sed
15214152Sed#include "int_lib.h"
16214152Sed
17214152Sed/* Effects: if rem != 0, *rem = a % b
18214152Sed * Returns: a / b
19214152Sed */
20214152Sed
21214152Sed/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */
22214152Sed
23222656SedCOMPILER_RT_ABI du_int
24214152Sed__udivmoddi4(du_int a, du_int b, du_int* rem)
25214152Sed{
26214152Sed    const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT;
27214152Sed    const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
28214152Sed    udwords n;
29214152Sed    n.all = a;
30214152Sed    udwords d;
31214152Sed    d.all = b;
32214152Sed    udwords q;
33214152Sed    udwords r;
34214152Sed    unsigned sr;
35214152Sed    /* special cases, X is unknown, K != 0 */
36214152Sed    if (n.s.high == 0)
37214152Sed    {
38214152Sed        if (d.s.high == 0)
39214152Sed        {
40214152Sed            /* 0 X
41214152Sed             * ---
42214152Sed             * 0 X
43214152Sed             */
44214152Sed            if (rem)
45214152Sed                *rem = n.s.low % d.s.low;
46214152Sed            return n.s.low / d.s.low;
47214152Sed        }
48214152Sed        /* 0 X
49214152Sed         * ---
50214152Sed         * K X
51214152Sed         */
52214152Sed        if (rem)
53214152Sed            *rem = n.s.low;
54214152Sed        return 0;
55214152Sed    }
56214152Sed    /* n.s.high != 0 */
57214152Sed    if (d.s.low == 0)
58214152Sed    {
59214152Sed        if (d.s.high == 0)
60214152Sed        {
61214152Sed            /* K X
62214152Sed             * ---
63214152Sed             * 0 0
64214152Sed             */
65214152Sed            if (rem)
66214152Sed                *rem = n.s.high % d.s.low;
67214152Sed            return n.s.high / d.s.low;
68214152Sed        }
69214152Sed        /* d.s.high != 0 */
70214152Sed        if (n.s.low == 0)
71214152Sed        {
72214152Sed            /* K 0
73214152Sed             * ---
74214152Sed             * K 0
75214152Sed             */
76214152Sed            if (rem)
77214152Sed            {
78214152Sed                r.s.high = n.s.high % d.s.high;
79214152Sed                r.s.low = 0;
80214152Sed                *rem = r.all;
81214152Sed            }
82214152Sed            return n.s.high / d.s.high;
83214152Sed        }
84214152Sed        /* K K
85214152Sed         * ---
86214152Sed         * K 0
87214152Sed         */
88214152Sed        if ((d.s.high & (d.s.high - 1)) == 0)     /* if d is a power of 2 */
89214152Sed        {
90214152Sed            if (rem)
91214152Sed            {
92214152Sed                r.s.low = n.s.low;
93214152Sed                r.s.high = n.s.high & (d.s.high - 1);
94214152Sed                *rem = r.all;
95214152Sed            }
96214152Sed            return n.s.high >> __builtin_ctz(d.s.high);
97214152Sed        }
98214152Sed        /* K K
99214152Sed         * ---
100214152Sed         * K 0
101214152Sed         */
102214152Sed        sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
103214152Sed        /* 0 <= sr <= n_uword_bits - 2 or sr large */
104214152Sed        if (sr > n_uword_bits - 2)
105214152Sed        {
106214152Sed           if (rem)
107214152Sed                *rem = n.all;
108214152Sed            return 0;
109214152Sed        }
110214152Sed        ++sr;
111214152Sed        /* 1 <= sr <= n_uword_bits - 1 */
112214152Sed        /* q.all = n.all << (n_udword_bits - sr); */
113214152Sed        q.s.low = 0;
114214152Sed        q.s.high = n.s.low << (n_uword_bits - sr);
115214152Sed        /* r.all = n.all >> sr; */
116214152Sed        r.s.high = n.s.high >> sr;
117214152Sed        r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
118214152Sed    }
119214152Sed    else  /* d.s.low != 0 */
120214152Sed    {
121214152Sed        if (d.s.high == 0)
122214152Sed        {
123214152Sed            /* K X
124214152Sed             * ---
125214152Sed             * 0 K
126214152Sed             */
127214152Sed            if ((d.s.low & (d.s.low - 1)) == 0)     /* if d is a power of 2 */
128214152Sed            {
129214152Sed                if (rem)
130214152Sed                    *rem = n.s.low & (d.s.low - 1);
131214152Sed                if (d.s.low == 1)
132214152Sed                    return n.all;
133236011Smarius                sr = __builtin_ctz(d.s.low);
134214152Sed                q.s.high = n.s.high >> sr;
135214152Sed                q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
136214152Sed                return q.all;
137214152Sed            }
138214152Sed            /* K X
139214152Sed             * ---
140214152Sed             *0 K
141214152Sed             */
142214152Sed            sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high);
143214152Sed            /* 2 <= sr <= n_udword_bits - 1
144214152Sed             * q.all = n.all << (n_udword_bits - sr);
145214152Sed             * r.all = n.all >> sr;
146214152Sed             * if (sr == n_uword_bits)
147214152Sed             * {
148214152Sed             *     q.s.low = 0;
149214152Sed             *     q.s.high = n.s.low;
150214152Sed             *     r.s.high = 0;
151214152Sed             *     r.s.low = n.s.high;
152214152Sed             * }
153214152Sed             * else if (sr < n_uword_bits)  // 2 <= sr <= n_uword_bits - 1
154214152Sed             * {
155214152Sed             *     q.s.low = 0;
156214152Sed             *     q.s.high = n.s.low << (n_uword_bits - sr);
157214152Sed             *     r.s.high = n.s.high >> sr;
158214152Sed             *     r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
159214152Sed             * }
160214152Sed             * else              // n_uword_bits + 1 <= sr <= n_udword_bits - 1
161214152Sed             * {
162214152Sed             *     q.s.low = n.s.low << (n_udword_bits - sr);
163214152Sed             *     q.s.high = (n.s.high << (n_udword_bits - sr)) |
164214152Sed             *              (n.s.low >> (sr - n_uword_bits));
165214152Sed             *     r.s.high = 0;
166214152Sed             *     r.s.low = n.s.high >> (sr - n_uword_bits);
167214152Sed             * }
168214152Sed             */
169214152Sed            q.s.low =  (n.s.low << (n_udword_bits - sr)) &
170214152Sed                     ((si_int)(n_uword_bits - sr) >> (n_uword_bits-1));
171214152Sed            q.s.high = ((n.s.low << ( n_uword_bits - sr))                       &
172214152Sed                     ((si_int)(sr - n_uword_bits - 1) >> (n_uword_bits-1))) |
173214152Sed                     (((n.s.high << (n_udword_bits - sr))                     |
174214152Sed                     (n.s.low >> (sr - n_uword_bits)))                        &
175214152Sed                     ((si_int)(n_uword_bits - sr) >> (n_uword_bits-1)));
176214152Sed            r.s.high = (n.s.high >> sr) &
177214152Sed                     ((si_int)(sr - n_uword_bits) >> (n_uword_bits-1));
178214152Sed            r.s.low =  ((n.s.high >> (sr - n_uword_bits))                       &
179214152Sed                     ((si_int)(n_uword_bits - sr - 1) >> (n_uword_bits-1))) |
180214152Sed                     (((n.s.high << (n_uword_bits - sr))                      |
181214152Sed                     (n.s.low >> sr))                                         &
182214152Sed                     ((si_int)(sr - n_uword_bits) >> (n_uword_bits-1)));
183214152Sed        }
184214152Sed        else
185214152Sed        {
186214152Sed            /* K X
187214152Sed             * ---
188214152Sed             * K K
189214152Sed             */
190214152Sed            sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
191214152Sed            /* 0 <= sr <= n_uword_bits - 1 or sr large */
192214152Sed            if (sr > n_uword_bits - 1)
193214152Sed            {
194214152Sed               if (rem)
195214152Sed                    *rem = n.all;
196214152Sed                return 0;
197214152Sed            }
198214152Sed            ++sr;
199214152Sed            /* 1 <= sr <= n_uword_bits */
200214152Sed            /*  q.all = n.all << (n_udword_bits - sr); */
201214152Sed            q.s.low = 0;
202214152Sed            q.s.high = n.s.low << (n_uword_bits - sr);
203214152Sed            /* r.all = n.all >> sr;
204214152Sed             * if (sr < n_uword_bits)
205214152Sed             * {
206214152Sed             *     r.s.high = n.s.high >> sr;
207214152Sed             *     r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
208214152Sed             * }
209214152Sed             * else
210214152Sed             * {
211214152Sed             *     r.s.high = 0;
212214152Sed             *     r.s.low = n.s.high;
213214152Sed             * }
214214152Sed             */
215214152Sed            r.s.high = (n.s.high >> sr) &
216214152Sed                     ((si_int)(sr - n_uword_bits) >> (n_uword_bits-1));
217214152Sed            r.s.low = (n.s.high << (n_uword_bits - sr)) |
218214152Sed                    ((n.s.low >> sr)                  &
219214152Sed                    ((si_int)(sr - n_uword_bits) >> (n_uword_bits-1)));
220214152Sed        }
221214152Sed    }
222214152Sed    /* Not a special case
223214152Sed     * q and r are initialized with:
224214152Sed     * q.all = n.all << (n_udword_bits - sr);
225214152Sed     * r.all = n.all >> sr;
226214152Sed     * 1 <= sr <= n_udword_bits - 1
227214152Sed     */
228214152Sed    su_int carry = 0;
229214152Sed    for (; sr > 0; --sr)
230214152Sed    {
231214152Sed        /* r:q = ((r:q)  << 1) | carry */
232214152Sed        r.s.high = (r.s.high << 1) | (r.s.low  >> (n_uword_bits - 1));
233214152Sed        r.s.low  = (r.s.low  << 1) | (q.s.high >> (n_uword_bits - 1));
234214152Sed        q.s.high = (q.s.high << 1) | (q.s.low  >> (n_uword_bits - 1));
235214152Sed        q.s.low  = (q.s.low  << 1) | carry;
236214152Sed        /* carry = 0;
237214152Sed         * if (r.all >= d.all)
238214152Sed         * {
239214152Sed         *      r.all -= d.all;
240214152Sed         *      carry = 1;
241214152Sed         * }
242214152Sed         */
243214152Sed        const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1);
244214152Sed        carry = s & 1;
245214152Sed        r.all -= d.all & s;
246214152Sed    }
247214152Sed    q.all = (q.all << 1) | carry;
248214152Sed    if (rem)
249214152Sed        *rem = r.all;
250214152Sed    return q.all;
251214152Sed}
252