1353358Sdim//===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------===//
2353358Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6353358Sdim//
7353358Sdim//===----------------------------------------------------------------------===//
8353358Sdim//
9353358Sdim// This file implements __udivmoddi4 for the compiler_rt library.
10353358Sdim//
11353358Sdim//===----------------------------------------------------------------------===//
12276789Sdim
13276789Sdim#include "int_lib.h"
14276789Sdim
15353358Sdim// Effects: if rem != 0, *rem = a % b
16353358Sdim// Returns: a / b
17276789Sdim
18353358Sdim// Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide
19276789Sdim
20360784Sdim#if defined(_MSC_VER) && !defined(__clang__)
21360784Sdim// MSVC throws a warning about mod 0 here, disable it for builds that
22360784Sdim// warn-as-error
23360784Sdim#pragma warning(push)
24360784Sdim#pragma warning(disable : 4724)
25360784Sdim#endif
26360784Sdim
27353358SdimCOMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int *rem) {
28353358Sdim  const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT;
29353358Sdim  const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
30353358Sdim  udwords n;
31353358Sdim  n.all = a;
32353358Sdim  udwords d;
33353358Sdim  d.all = b;
34353358Sdim  udwords q;
35353358Sdim  udwords r;
36353358Sdim  unsigned sr;
37353358Sdim  // special cases, X is unknown, K != 0
38353358Sdim  if (n.s.high == 0) {
39353358Sdim    if (d.s.high == 0) {
40353358Sdim      // 0 X
41353358Sdim      // ---
42353358Sdim      // 0 X
43353358Sdim      if (rem)
44353358Sdim        *rem = n.s.low % d.s.low;
45353358Sdim      return n.s.low / d.s.low;
46353358Sdim    }
47353358Sdim    // 0 X
48353358Sdim    // ---
49353358Sdim    // K X
50353358Sdim    if (rem)
51353358Sdim      *rem = n.s.low;
52353358Sdim    return 0;
53353358Sdim  }
54353358Sdim  // n.s.high != 0
55353358Sdim  if (d.s.low == 0) {
56353358Sdim    if (d.s.high == 0) {
57353358Sdim      // K X
58353358Sdim      // ---
59353358Sdim      // 0 0
60353358Sdim      if (rem)
61353358Sdim        *rem = n.s.high % d.s.low;
62353358Sdim      return n.s.high / d.s.low;
63353358Sdim    }
64353358Sdim    // d.s.high != 0
65353358Sdim    if (n.s.low == 0) {
66353358Sdim      // K 0
67353358Sdim      // ---
68353358Sdim      // K 0
69353358Sdim      if (rem) {
70353358Sdim        r.s.high = n.s.high % d.s.high;
71353358Sdim        r.s.low = 0;
72353358Sdim        *rem = r.all;
73353358Sdim      }
74353358Sdim      return n.s.high / d.s.high;
75353358Sdim    }
76353358Sdim    // K K
77353358Sdim    // ---
78353358Sdim    // K 0
79353358Sdim    if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ {
80353358Sdim      if (rem) {
81353358Sdim        r.s.low = n.s.low;
82353358Sdim        r.s.high = n.s.high & (d.s.high - 1);
83353358Sdim        *rem = r.all;
84353358Sdim      }
85353358Sdim      return n.s.high >> __builtin_ctz(d.s.high);
86353358Sdim    }
87353358Sdim    // K K
88353358Sdim    // ---
89353358Sdim    // K 0
90353358Sdim    sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
91353358Sdim    // 0 <= sr <= n_uword_bits - 2 or sr large
92353358Sdim    if (sr > n_uword_bits - 2) {
93353358Sdim      if (rem)
94353358Sdim        *rem = n.all;
95353358Sdim      return 0;
96353358Sdim    }
97353358Sdim    ++sr;
98353358Sdim    // 1 <= sr <= n_uword_bits - 1
99353358Sdim    // q.all = n.all << (n_udword_bits - sr);
100353358Sdim    q.s.low = 0;
101353358Sdim    q.s.high = n.s.low << (n_uword_bits - sr);
102353358Sdim    // r.all = n.all >> sr;
103353358Sdim    r.s.high = n.s.high >> sr;
104353358Sdim    r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
105353358Sdim  } else /* d.s.low != 0 */ {
106353358Sdim    if (d.s.high == 0) {
107353358Sdim      // K X
108353358Sdim      // ---
109353358Sdim      // 0 K
110353358Sdim      if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ {
111276789Sdim        if (rem)
112353358Sdim          *rem = n.s.low & (d.s.low - 1);
113353358Sdim        if (d.s.low == 1)
114353358Sdim          return n.all;
115353358Sdim        sr = __builtin_ctz(d.s.low);
116353358Sdim        q.s.high = n.s.high >> sr;
117353358Sdim        q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
118353358Sdim        return q.all;
119353358Sdim      }
120353358Sdim      // K X
121353358Sdim      // ---
122353358Sdim      // 0 K
123353358Sdim      sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high);
124353358Sdim      // 2 <= sr <= n_udword_bits - 1
125353358Sdim      // q.all = n.all << (n_udword_bits - sr);
126353358Sdim      // r.all = n.all >> sr;
127353358Sdim      if (sr == n_uword_bits) {
128276789Sdim        q.s.low = 0;
129353358Sdim        q.s.high = n.s.low;
130353358Sdim        r.s.high = 0;
131353358Sdim        r.s.low = n.s.high;
132353358Sdim      } else if (sr < n_uword_bits) /* 2 <= sr <= n_uword_bits - 1 */ {
133353358Sdim        q.s.low = 0;
134276789Sdim        q.s.high = n.s.low << (n_uword_bits - sr);
135276789Sdim        r.s.high = n.s.high >> sr;
136276789Sdim        r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
137353358Sdim      } else /* n_uword_bits + 1 <= sr <= n_udword_bits - 1 */ {
138353358Sdim        q.s.low = n.s.low << (n_udword_bits - sr);
139353358Sdim        q.s.high = (n.s.high << (n_udword_bits - sr)) |
140353358Sdim                   (n.s.low >> (sr - n_uword_bits));
141353358Sdim        r.s.high = 0;
142353358Sdim        r.s.low = n.s.high >> (sr - n_uword_bits);
143353358Sdim      }
144353358Sdim    } else {
145353358Sdim      // K X
146353358Sdim      // ---
147353358Sdim      // K K
148353358Sdim      sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
149353358Sdim      // 0 <= sr <= n_uword_bits - 1 or sr large
150353358Sdim      if (sr > n_uword_bits - 1) {
151353358Sdim        if (rem)
152353358Sdim          *rem = n.all;
153353358Sdim        return 0;
154353358Sdim      }
155353358Sdim      ++sr;
156353358Sdim      // 1 <= sr <= n_uword_bits
157353358Sdim      // q.all = n.all << (n_udword_bits - sr);
158353358Sdim      q.s.low = 0;
159353358Sdim      if (sr == n_uword_bits) {
160353358Sdim        q.s.high = n.s.low;
161353358Sdim        r.s.high = 0;
162353358Sdim        r.s.low = n.s.high;
163353358Sdim      } else {
164353358Sdim        q.s.high = n.s.low << (n_uword_bits - sr);
165353358Sdim        r.s.high = n.s.high >> sr;
166353358Sdim        r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
167353358Sdim      }
168276789Sdim    }
169353358Sdim  }
170353358Sdim  // Not a special case
171353358Sdim  // q and r are initialized with:
172353358Sdim  // q.all = n.all << (n_udword_bits - sr);
173353358Sdim  // r.all = n.all >> sr;
174353358Sdim  // 1 <= sr <= n_udword_bits - 1
175353358Sdim  su_int carry = 0;
176353358Sdim  for (; sr > 0; --sr) {
177353358Sdim    // r:q = ((r:q)  << 1) | carry
178353358Sdim    r.s.high = (r.s.high << 1) | (r.s.low >> (n_uword_bits - 1));
179353358Sdim    r.s.low = (r.s.low << 1) | (q.s.high >> (n_uword_bits - 1));
180353358Sdim    q.s.high = (q.s.high << 1) | (q.s.low >> (n_uword_bits - 1));
181353358Sdim    q.s.low = (q.s.low << 1) | carry;
182353358Sdim    // carry = 0;
183353358Sdim    // if (r.all >= d.all)
184353358Sdim    // {
185353358Sdim    //      r.all -= d.all;
186353358Sdim    //      carry = 1;
187353358Sdim    // }
188353358Sdim    const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1);
189353358Sdim    carry = s & 1;
190353358Sdim    r.all -= d.all & s;
191353358Sdim  }
192353358Sdim  q.all = (q.all << 1) | carry;
193353358Sdim  if (rem)
194353358Sdim    *rem = r.all;
195353358Sdim  return q.all;
196276789Sdim}
197360784Sdim
198360784Sdim#if defined(_MSC_VER) && !defined(__clang__)
199360784Sdim#pragma warning(pop)
200360784Sdim#endif
201