udivmoddi4.c revision 222656
1/* ===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------===
2 *
3 *                     The LLVM Compiler Infrastructure
4 *
5 * This file is dual licensed under the MIT and the University of Illinois Open
6 * Source Licenses. See LICENSE.TXT for details.
7 *
8 * ===----------------------------------------------------------------------===
9 *
10 * This file implements __udivmoddi4 for the compiler_rt library.
11 *
12 * ===----------------------------------------------------------------------===
13 */
14#include "abi.h"
15
16#include "int_lib.h"
17
18/* Effects: if rem != 0, *rem = a % b
19 * Returns: a / b
20 */
21
22/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */
23
24ARM_EABI_FNALIAS(uldivmod, udivmoddi4);
25
26COMPILER_RT_ABI du_int
27__udivmoddi4(du_int a, du_int b, du_int* rem)
28{
29    const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT;
30    const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
31    udwords n;
32    n.all = a;
33    udwords d;
34    d.all = b;
35    udwords q;
36    udwords r;
37    unsigned sr;
38    /* special cases, X is unknown, K != 0 */
39    if (n.s.high == 0)
40    {
41        if (d.s.high == 0)
42        {
43            /* 0 X
44             * ---
45             * 0 X
46             */
47            if (rem)
48                *rem = n.s.low % d.s.low;
49            return n.s.low / d.s.low;
50        }
51        /* 0 X
52         * ---
53         * K X
54         */
55        if (rem)
56            *rem = n.s.low;
57        return 0;
58    }
59    /* n.s.high != 0 */
60    if (d.s.low == 0)
61    {
62        if (d.s.high == 0)
63        {
64            /* K X
65             * ---
66             * 0 0
67             */
68            if (rem)
69                *rem = n.s.high % d.s.low;
70            return n.s.high / d.s.low;
71        }
72        /* d.s.high != 0 */
73        if (n.s.low == 0)
74        {
75            /* K 0
76             * ---
77             * K 0
78             */
79            if (rem)
80            {
81                r.s.high = n.s.high % d.s.high;
82                r.s.low = 0;
83                *rem = r.all;
84            }
85            return n.s.high / d.s.high;
86        }
87        /* K K
88         * ---
89         * K 0
90         */
91        if ((d.s.high & (d.s.high - 1)) == 0)     /* if d is a power of 2 */
92        {
93            if (rem)
94            {
95                r.s.low = n.s.low;
96                r.s.high = n.s.high & (d.s.high - 1);
97                *rem = r.all;
98            }
99            return n.s.high >> __builtin_ctz(d.s.high);
100        }
101        /* K K
102         * ---
103         * K 0
104         */
105        sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
106        /* 0 <= sr <= n_uword_bits - 2 or sr large */
107        if (sr > n_uword_bits - 2)
108        {
109           if (rem)
110                *rem = n.all;
111            return 0;
112        }
113        ++sr;
114        /* 1 <= sr <= n_uword_bits - 1 */
115        /* q.all = n.all << (n_udword_bits - sr); */
116        q.s.low = 0;
117        q.s.high = n.s.low << (n_uword_bits - sr);
118        /* r.all = n.all >> sr; */
119        r.s.high = n.s.high >> sr;
120        r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
121    }
122    else  /* d.s.low != 0 */
123    {
124        if (d.s.high == 0)
125        {
126            /* K X
127             * ---
128             * 0 K
129             */
130            if ((d.s.low & (d.s.low - 1)) == 0)     /* if d is a power of 2 */
131            {
132                if (rem)
133                    *rem = n.s.low & (d.s.low - 1);
134                if (d.s.low == 1)
135                    return n.all;
136                unsigned sr = __builtin_ctz(d.s.low);
137                q.s.high = n.s.high >> sr;
138                q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
139                return q.all;
140            }
141            /* K X
142             * ---
143             *0 K
144             */
145            sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high);
146            /* 2 <= sr <= n_udword_bits - 1
147             * q.all = n.all << (n_udword_bits - sr);
148             * r.all = n.all >> sr;
149             * if (sr == n_uword_bits)
150             * {
151             *     q.s.low = 0;
152             *     q.s.high = n.s.low;
153             *     r.s.high = 0;
154             *     r.s.low = n.s.high;
155             * }
156             * else if (sr < n_uword_bits)  // 2 <= sr <= n_uword_bits - 1
157             * {
158             *     q.s.low = 0;
159             *     q.s.high = n.s.low << (n_uword_bits - sr);
160             *     r.s.high = n.s.high >> sr;
161             *     r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
162             * }
163             * else              // n_uword_bits + 1 <= sr <= n_udword_bits - 1
164             * {
165             *     q.s.low = n.s.low << (n_udword_bits - sr);
166             *     q.s.high = (n.s.high << (n_udword_bits - sr)) |
167             *              (n.s.low >> (sr - n_uword_bits));
168             *     r.s.high = 0;
169             *     r.s.low = n.s.high >> (sr - n_uword_bits);
170             * }
171             */
172            q.s.low =  (n.s.low << (n_udword_bits - sr)) &
173                     ((si_int)(n_uword_bits - sr) >> (n_uword_bits-1));
174            q.s.high = ((n.s.low << ( n_uword_bits - sr))                       &
175                     ((si_int)(sr - n_uword_bits - 1) >> (n_uword_bits-1))) |
176                     (((n.s.high << (n_udword_bits - sr))                     |
177                     (n.s.low >> (sr - n_uword_bits)))                        &
178                     ((si_int)(n_uword_bits - sr) >> (n_uword_bits-1)));
179            r.s.high = (n.s.high >> sr) &
180                     ((si_int)(sr - n_uword_bits) >> (n_uword_bits-1));
181            r.s.low =  ((n.s.high >> (sr - n_uword_bits))                       &
182                     ((si_int)(n_uword_bits - sr - 1) >> (n_uword_bits-1))) |
183                     (((n.s.high << (n_uword_bits - sr))                      |
184                     (n.s.low >> sr))                                         &
185                     ((si_int)(sr - n_uword_bits) >> (n_uword_bits-1)));
186        }
187        else
188        {
189            /* K X
190             * ---
191             * K K
192             */
193            sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
194            /* 0 <= sr <= n_uword_bits - 1 or sr large */
195            if (sr > n_uword_bits - 1)
196            {
197               if (rem)
198                    *rem = n.all;
199                return 0;
200            }
201            ++sr;
202            /* 1 <= sr <= n_uword_bits */
203            /*  q.all = n.all << (n_udword_bits - sr); */
204            q.s.low = 0;
205            q.s.high = n.s.low << (n_uword_bits - sr);
206            /* r.all = n.all >> sr;
207             * if (sr < n_uword_bits)
208             * {
209             *     r.s.high = n.s.high >> sr;
210             *     r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
211             * }
212             * else
213             * {
214             *     r.s.high = 0;
215             *     r.s.low = n.s.high;
216             * }
217             */
218            r.s.high = (n.s.high >> sr) &
219                     ((si_int)(sr - n_uword_bits) >> (n_uword_bits-1));
220            r.s.low = (n.s.high << (n_uword_bits - sr)) |
221                    ((n.s.low >> sr)                  &
222                    ((si_int)(sr - n_uword_bits) >> (n_uword_bits-1)));
223        }
224    }
225    /* Not a special case
226     * q and r are initialized with:
227     * q.all = n.all << (n_udword_bits - sr);
228     * r.all = n.all >> sr;
229     * 1 <= sr <= n_udword_bits - 1
230     */
231    su_int carry = 0;
232    for (; sr > 0; --sr)
233    {
234        /* r:q = ((r:q)  << 1) | carry */
235        r.s.high = (r.s.high << 1) | (r.s.low  >> (n_uword_bits - 1));
236        r.s.low  = (r.s.low  << 1) | (q.s.high >> (n_uword_bits - 1));
237        q.s.high = (q.s.high << 1) | (q.s.low  >> (n_uword_bits - 1));
238        q.s.low  = (q.s.low  << 1) | carry;
239        /* carry = 0;
240         * if (r.all >= d.all)
241         * {
242         *      r.all -= d.all;
243         *      carry = 1;
244         * }
245         */
246        const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1);
247        carry = s & 1;
248        r.all -= d.all & s;
249    }
250    q.all = (q.all << 1) | carry;
251    if (rem)
252        *rem = r.all;
253    return q.all;
254}
255