1353358Sdim//===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------===// 2353358Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6353358Sdim// 7353358Sdim//===----------------------------------------------------------------------===// 8353358Sdim// 9353358Sdim// This file implements __udivmoddi4 for the compiler_rt library. 10353358Sdim// 11353358Sdim//===----------------------------------------------------------------------===// 12276789Sdim 13276789Sdim#include "int_lib.h" 14276789Sdim 15353358Sdim// Effects: if rem != 0, *rem = a % b 16353358Sdim// Returns: a / b 17276789Sdim 18353358Sdim// Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide 19276789Sdim 20360784Sdim#if defined(_MSC_VER) && !defined(__clang__) 21360784Sdim// MSVC throws a warning about mod 0 here, disable it for builds that 22360784Sdim// warn-as-error 23360784Sdim#pragma warning(push) 24360784Sdim#pragma warning(disable : 4724) 25360784Sdim#endif 26360784Sdim 27353358SdimCOMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int *rem) { 28353358Sdim const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT; 29353358Sdim const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT; 30353358Sdim udwords n; 31353358Sdim n.all = a; 32353358Sdim udwords d; 33353358Sdim d.all = b; 34353358Sdim udwords q; 35353358Sdim udwords r; 36353358Sdim unsigned sr; 37353358Sdim // special cases, X is unknown, K != 0 38353358Sdim if (n.s.high == 0) { 39353358Sdim if (d.s.high == 0) { 40353358Sdim // 0 X 41353358Sdim // --- 42353358Sdim // 0 X 43353358Sdim if (rem) 44353358Sdim *rem = n.s.low % d.s.low; 45353358Sdim return n.s.low / d.s.low; 46353358Sdim } 47353358Sdim // 0 X 48353358Sdim // --- 49353358Sdim // K X 50353358Sdim if (rem) 51353358Sdim *rem = n.s.low; 52353358Sdim return 0; 53353358Sdim } 54353358Sdim // n.s.high != 0 55353358Sdim if (d.s.low == 0) { 56353358Sdim if (d.s.high == 0) { 57353358Sdim // K X 58353358Sdim // --- 59353358Sdim // 0 0 60353358Sdim if (rem) 61353358Sdim *rem = n.s.high % d.s.low; 62353358Sdim return n.s.high / d.s.low; 63353358Sdim } 64353358Sdim // d.s.high != 0 65353358Sdim if (n.s.low == 0) { 66353358Sdim // K 0 67353358Sdim // --- 68353358Sdim // K 0 69353358Sdim if (rem) { 70353358Sdim r.s.high = n.s.high % d.s.high; 71353358Sdim r.s.low = 0; 72353358Sdim *rem = r.all; 73353358Sdim } 74353358Sdim return n.s.high / d.s.high; 75353358Sdim } 76353358Sdim // K K 77353358Sdim // --- 78353358Sdim // K 0 79353358Sdim if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ { 80353358Sdim if (rem) { 81353358Sdim r.s.low = n.s.low; 82353358Sdim r.s.high = n.s.high & (d.s.high - 1); 83353358Sdim *rem = r.all; 84353358Sdim } 85353358Sdim return n.s.high >> __builtin_ctz(d.s.high); 86353358Sdim } 87353358Sdim // K K 88353358Sdim // --- 89353358Sdim // K 0 90353358Sdim sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high); 91353358Sdim // 0 <= sr <= n_uword_bits - 2 or sr large 92353358Sdim if (sr > n_uword_bits - 2) { 93353358Sdim if (rem) 94353358Sdim *rem = n.all; 95353358Sdim return 0; 96353358Sdim } 97353358Sdim ++sr; 98353358Sdim // 1 <= sr <= n_uword_bits - 1 99353358Sdim // q.all = n.all << (n_udword_bits - sr); 100353358Sdim q.s.low = 0; 101353358Sdim q.s.high = n.s.low << (n_uword_bits - sr); 102353358Sdim // r.all = n.all >> sr; 103353358Sdim r.s.high = n.s.high >> sr; 104353358Sdim r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 105353358Sdim } else /* d.s.low != 0 */ { 106353358Sdim if (d.s.high == 0) { 107353358Sdim // K X 108353358Sdim // --- 109353358Sdim // 0 K 110353358Sdim if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ { 111276789Sdim if (rem) 112353358Sdim *rem = n.s.low & (d.s.low - 1); 113353358Sdim if (d.s.low == 1) 114353358Sdim return n.all; 115353358Sdim sr = __builtin_ctz(d.s.low); 116353358Sdim q.s.high = n.s.high >> sr; 117353358Sdim q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 118353358Sdim return q.all; 119353358Sdim } 120353358Sdim // K X 121353358Sdim // --- 122353358Sdim // 0 K 123353358Sdim sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high); 124353358Sdim // 2 <= sr <= n_udword_bits - 1 125353358Sdim // q.all = n.all << (n_udword_bits - sr); 126353358Sdim // r.all = n.all >> sr; 127353358Sdim if (sr == n_uword_bits) { 128276789Sdim q.s.low = 0; 129353358Sdim q.s.high = n.s.low; 130353358Sdim r.s.high = 0; 131353358Sdim r.s.low = n.s.high; 132353358Sdim } else if (sr < n_uword_bits) /* 2 <= sr <= n_uword_bits - 1 */ { 133353358Sdim q.s.low = 0; 134276789Sdim q.s.high = n.s.low << (n_uword_bits - sr); 135276789Sdim r.s.high = n.s.high >> sr; 136276789Sdim r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 137353358Sdim } else /* n_uword_bits + 1 <= sr <= n_udword_bits - 1 */ { 138353358Sdim q.s.low = n.s.low << (n_udword_bits - sr); 139353358Sdim q.s.high = (n.s.high << (n_udword_bits - sr)) | 140353358Sdim (n.s.low >> (sr - n_uword_bits)); 141353358Sdim r.s.high = 0; 142353358Sdim r.s.low = n.s.high >> (sr - n_uword_bits); 143353358Sdim } 144353358Sdim } else { 145353358Sdim // K X 146353358Sdim // --- 147353358Sdim // K K 148353358Sdim sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high); 149353358Sdim // 0 <= sr <= n_uword_bits - 1 or sr large 150353358Sdim if (sr > n_uword_bits - 1) { 151353358Sdim if (rem) 152353358Sdim *rem = n.all; 153353358Sdim return 0; 154353358Sdim } 155353358Sdim ++sr; 156353358Sdim // 1 <= sr <= n_uword_bits 157353358Sdim // q.all = n.all << (n_udword_bits - sr); 158353358Sdim q.s.low = 0; 159353358Sdim if (sr == n_uword_bits) { 160353358Sdim q.s.high = n.s.low; 161353358Sdim r.s.high = 0; 162353358Sdim r.s.low = n.s.high; 163353358Sdim } else { 164353358Sdim q.s.high = n.s.low << (n_uword_bits - sr); 165353358Sdim r.s.high = n.s.high >> sr; 166353358Sdim r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 167353358Sdim } 168276789Sdim } 169353358Sdim } 170353358Sdim // Not a special case 171353358Sdim // q and r are initialized with: 172353358Sdim // q.all = n.all << (n_udword_bits - sr); 173353358Sdim // r.all = n.all >> sr; 174353358Sdim // 1 <= sr <= n_udword_bits - 1 175353358Sdim su_int carry = 0; 176353358Sdim for (; sr > 0; --sr) { 177353358Sdim // r:q = ((r:q) << 1) | carry 178353358Sdim r.s.high = (r.s.high << 1) | (r.s.low >> (n_uword_bits - 1)); 179353358Sdim r.s.low = (r.s.low << 1) | (q.s.high >> (n_uword_bits - 1)); 180353358Sdim q.s.high = (q.s.high << 1) | (q.s.low >> (n_uword_bits - 1)); 181353358Sdim q.s.low = (q.s.low << 1) | carry; 182353358Sdim // carry = 0; 183353358Sdim // if (r.all >= d.all) 184353358Sdim // { 185353358Sdim // r.all -= d.all; 186353358Sdim // carry = 1; 187353358Sdim // } 188353358Sdim const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1); 189353358Sdim carry = s & 1; 190353358Sdim r.all -= d.all & s; 191353358Sdim } 192353358Sdim q.all = (q.all << 1) | carry; 193353358Sdim if (rem) 194353358Sdim *rem = r.all; 195353358Sdim return q.all; 196276789Sdim} 197360784Sdim 198360784Sdim#if defined(_MSC_VER) && !defined(__clang__) 199360784Sdim#pragma warning(pop) 200360784Sdim#endif 201