1//===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements __udivmoddi4 for the compiler_rt library.
10//
11//===----------------------------------------------------------------------===//
12
13#include "int_lib.h"
14
15// Effects: if rem != 0, *rem = a % b
16// Returns: a / b
17
18// Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide
19
20#if defined(_MSC_VER) && !defined(__clang__)
21// MSVC throws a warning about mod 0 here, disable it for builds that
22// warn-as-error
23#pragma warning(push)
24#pragma warning(disable : 4724)
25#endif
26
27COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int *rem) {
28  const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT;
29  const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
30  udwords n;
31  n.all = a;
32  udwords d;
33  d.all = b;
34  udwords q;
35  udwords r;
36  unsigned sr;
37  // special cases, X is unknown, K != 0
38  if (n.s.high == 0) {
39    if (d.s.high == 0) {
40      // 0 X
41      // ---
42      // 0 X
43      if (rem)
44        *rem = n.s.low % d.s.low;
45      return n.s.low / d.s.low;
46    }
47    // 0 X
48    // ---
49    // K X
50    if (rem)
51      *rem = n.s.low;
52    return 0;
53  }
54  // n.s.high != 0
55  if (d.s.low == 0) {
56    if (d.s.high == 0) {
57      // K X
58      // ---
59      // 0 0
60      if (rem)
61        *rem = n.s.high % d.s.low;
62      return n.s.high / d.s.low;
63    }
64    // d.s.high != 0
65    if (n.s.low == 0) {
66      // K 0
67      // ---
68      // K 0
69      if (rem) {
70        r.s.high = n.s.high % d.s.high;
71        r.s.low = 0;
72        *rem = r.all;
73      }
74      return n.s.high / d.s.high;
75    }
76    // K K
77    // ---
78    // K 0
79    if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ {
80      if (rem) {
81        r.s.low = n.s.low;
82        r.s.high = n.s.high & (d.s.high - 1);
83        *rem = r.all;
84      }
85      return n.s.high >> __builtin_ctz(d.s.high);
86    }
87    // K K
88    // ---
89    // K 0
90    sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
91    // 0 <= sr <= n_uword_bits - 2 or sr large
92    if (sr > n_uword_bits - 2) {
93      if (rem)
94        *rem = n.all;
95      return 0;
96    }
97    ++sr;
98    // 1 <= sr <= n_uword_bits - 1
99    // q.all = n.all << (n_udword_bits - sr);
100    q.s.low = 0;
101    q.s.high = n.s.low << (n_uword_bits - sr);
102    // r.all = n.all >> sr;
103    r.s.high = n.s.high >> sr;
104    r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
105  } else /* d.s.low != 0 */ {
106    if (d.s.high == 0) {
107      // K X
108      // ---
109      // 0 K
110      if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ {
111        if (rem)
112          *rem = n.s.low & (d.s.low - 1);
113        if (d.s.low == 1)
114          return n.all;
115        sr = __builtin_ctz(d.s.low);
116        q.s.high = n.s.high >> sr;
117        q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
118        return q.all;
119      }
120      // K X
121      // ---
122      // 0 K
123      sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high);
124      // 2 <= sr <= n_udword_bits - 1
125      // q.all = n.all << (n_udword_bits - sr);
126      // r.all = n.all >> sr;
127      if (sr == n_uword_bits) {
128        q.s.low = 0;
129        q.s.high = n.s.low;
130        r.s.high = 0;
131        r.s.low = n.s.high;
132      } else if (sr < n_uword_bits) /* 2 <= sr <= n_uword_bits - 1 */ {
133        q.s.low = 0;
134        q.s.high = n.s.low << (n_uword_bits - sr);
135        r.s.high = n.s.high >> sr;
136        r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
137      } else /* n_uword_bits + 1 <= sr <= n_udword_bits - 1 */ {
138        q.s.low = n.s.low << (n_udword_bits - sr);
139        q.s.high = (n.s.high << (n_udword_bits - sr)) |
140                   (n.s.low >> (sr - n_uword_bits));
141        r.s.high = 0;
142        r.s.low = n.s.high >> (sr - n_uword_bits);
143      }
144    } else {
145      // K X
146      // ---
147      // K K
148      sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
149      // 0 <= sr <= n_uword_bits - 1 or sr large
150      if (sr > n_uword_bits - 1) {
151        if (rem)
152          *rem = n.all;
153        return 0;
154      }
155      ++sr;
156      // 1 <= sr <= n_uword_bits
157      // q.all = n.all << (n_udword_bits - sr);
158      q.s.low = 0;
159      if (sr == n_uword_bits) {
160        q.s.high = n.s.low;
161        r.s.high = 0;
162        r.s.low = n.s.high;
163      } else {
164        q.s.high = n.s.low << (n_uword_bits - sr);
165        r.s.high = n.s.high >> sr;
166        r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
167      }
168    }
169  }
170  // Not a special case
171  // q and r are initialized with:
172  // q.all = n.all << (n_udword_bits - sr);
173  // r.all = n.all >> sr;
174  // 1 <= sr <= n_udword_bits - 1
175  su_int carry = 0;
176  for (; sr > 0; --sr) {
177    // r:q = ((r:q)  << 1) | carry
178    r.s.high = (r.s.high << 1) | (r.s.low >> (n_uword_bits - 1));
179    r.s.low = (r.s.low << 1) | (q.s.high >> (n_uword_bits - 1));
180    q.s.high = (q.s.high << 1) | (q.s.low >> (n_uword_bits - 1));
181    q.s.low = (q.s.low << 1) | carry;
182    // carry = 0;
183    // if (r.all >= d.all)
184    // {
185    //      r.all -= d.all;
186    //      carry = 1;
187    // }
188    const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1);
189    carry = s & 1;
190    r.all -= d.all & s;
191  }
192  q.all = (q.all << 1) | carry;
193  if (rem)
194    *rem = r.all;
195  return q.all;
196}
197
198#if defined(_MSC_VER) && !defined(__clang__)
199#pragma warning(pop)
200#endif
201