udivmoddi4.c revision 353358
1//===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements __udivmoddi4 for the compiler_rt library.
10//
11//===----------------------------------------------------------------------===//
12
13#include "int_lib.h"
14
15// Effects: if rem != 0, *rem = a % b
16// Returns: a / b
17
18// Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide
19
20COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int *rem) {
21  const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT;
22  const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
23  udwords n;
24  n.all = a;
25  udwords d;
26  d.all = b;
27  udwords q;
28  udwords r;
29  unsigned sr;
30  // special cases, X is unknown, K != 0
31  if (n.s.high == 0) {
32    if (d.s.high == 0) {
33      // 0 X
34      // ---
35      // 0 X
36      if (rem)
37        *rem = n.s.low % d.s.low;
38      return n.s.low / d.s.low;
39    }
40    // 0 X
41    // ---
42    // K X
43    if (rem)
44      *rem = n.s.low;
45    return 0;
46  }
47  // n.s.high != 0
48  if (d.s.low == 0) {
49    if (d.s.high == 0) {
50      // K X
51      // ---
52      // 0 0
53      if (rem)
54        *rem = n.s.high % d.s.low;
55      return n.s.high / d.s.low;
56    }
57    // d.s.high != 0
58    if (n.s.low == 0) {
59      // K 0
60      // ---
61      // K 0
62      if (rem) {
63        r.s.high = n.s.high % d.s.high;
64        r.s.low = 0;
65        *rem = r.all;
66      }
67      return n.s.high / d.s.high;
68    }
69    // K K
70    // ---
71    // K 0
72    if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ {
73      if (rem) {
74        r.s.low = n.s.low;
75        r.s.high = n.s.high & (d.s.high - 1);
76        *rem = r.all;
77      }
78      return n.s.high >> __builtin_ctz(d.s.high);
79    }
80    // K K
81    // ---
82    // K 0
83    sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
84    // 0 <= sr <= n_uword_bits - 2 or sr large
85    if (sr > n_uword_bits - 2) {
86      if (rem)
87        *rem = n.all;
88      return 0;
89    }
90    ++sr;
91    // 1 <= sr <= n_uword_bits - 1
92    // q.all = n.all << (n_udword_bits - sr);
93    q.s.low = 0;
94    q.s.high = n.s.low << (n_uword_bits - sr);
95    // r.all = n.all >> sr;
96    r.s.high = n.s.high >> sr;
97    r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
98  } else /* d.s.low != 0 */ {
99    if (d.s.high == 0) {
100      // K X
101      // ---
102      // 0 K
103      if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ {
104        if (rem)
105          *rem = n.s.low & (d.s.low - 1);
106        if (d.s.low == 1)
107          return n.all;
108        sr = __builtin_ctz(d.s.low);
109        q.s.high = n.s.high >> sr;
110        q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
111        return q.all;
112      }
113      // K X
114      // ---
115      // 0 K
116      sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high);
117      // 2 <= sr <= n_udword_bits - 1
118      // q.all = n.all << (n_udword_bits - sr);
119      // r.all = n.all >> sr;
120      if (sr == n_uword_bits) {
121        q.s.low = 0;
122        q.s.high = n.s.low;
123        r.s.high = 0;
124        r.s.low = n.s.high;
125      } else if (sr < n_uword_bits) /* 2 <= sr <= n_uword_bits - 1 */ {
126        q.s.low = 0;
127        q.s.high = n.s.low << (n_uword_bits - sr);
128        r.s.high = n.s.high >> sr;
129        r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
130      } else /* n_uword_bits + 1 <= sr <= n_udword_bits - 1 */ {
131        q.s.low = n.s.low << (n_udword_bits - sr);
132        q.s.high = (n.s.high << (n_udword_bits - sr)) |
133                   (n.s.low >> (sr - n_uword_bits));
134        r.s.high = 0;
135        r.s.low = n.s.high >> (sr - n_uword_bits);
136      }
137    } else {
138      // K X
139      // ---
140      // K K
141      sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
142      // 0 <= sr <= n_uword_bits - 1 or sr large
143      if (sr > n_uword_bits - 1) {
144        if (rem)
145          *rem = n.all;
146        return 0;
147      }
148      ++sr;
149      // 1 <= sr <= n_uword_bits
150      // q.all = n.all << (n_udword_bits - sr);
151      q.s.low = 0;
152      if (sr == n_uword_bits) {
153        q.s.high = n.s.low;
154        r.s.high = 0;
155        r.s.low = n.s.high;
156      } else {
157        q.s.high = n.s.low << (n_uword_bits - sr);
158        r.s.high = n.s.high >> sr;
159        r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
160      }
161    }
162  }
163  // Not a special case
164  // q and r are initialized with:
165  // q.all = n.all << (n_udword_bits - sr);
166  // r.all = n.all >> sr;
167  // 1 <= sr <= n_udword_bits - 1
168  su_int carry = 0;
169  for (; sr > 0; --sr) {
170    // r:q = ((r:q)  << 1) | carry
171    r.s.high = (r.s.high << 1) | (r.s.low >> (n_uword_bits - 1));
172    r.s.low = (r.s.low << 1) | (q.s.high >> (n_uword_bits - 1));
173    q.s.high = (q.s.high << 1) | (q.s.low >> (n_uword_bits - 1));
174    q.s.low = (q.s.low << 1) | carry;
175    // carry = 0;
176    // if (r.all >= d.all)
177    // {
178    //      r.all -= d.all;
179    //      carry = 1;
180    // }
181    const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1);
182    carry = s & 1;
183    r.all -= d.all & s;
184  }
185  q.all = (q.all << 1) | carry;
186  if (rem)
187    *rem = r.all;
188  return q.all;
189}
190