1276789Sdim/* ===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------=== 2276789Sdim * 3276789Sdim * The LLVM Compiler Infrastructure 4276789Sdim * 5276789Sdim * This file is dual licensed under the MIT and the University of Illinois Open 6276789Sdim * Source Licenses. See LICENSE.TXT for details. 7276789Sdim * 8276789Sdim * ===----------------------------------------------------------------------=== 9276789Sdim * 10276789Sdim * This file implements __udivmoddi4 for the compiler_rt library. 11276789Sdim * 12276789Sdim * ===----------------------------------------------------------------------=== 13276789Sdim */ 14276789Sdim 15276789Sdim#include "int_lib.h" 16276789Sdim 17276789Sdim/* Effects: if rem != 0, *rem = a % b 18276789Sdim * Returns: a / b 19276789Sdim */ 20276789Sdim 21276789Sdim/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */ 22276789Sdim 23276789SdimCOMPILER_RT_ABI du_int 24276789Sdim__udivmoddi4(du_int a, du_int b, du_int* rem) 25276789Sdim{ 26276789Sdim const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT; 27276789Sdim const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT; 28276789Sdim udwords n; 29276789Sdim n.all = a; 30276789Sdim udwords d; 31276789Sdim d.all = b; 32276789Sdim udwords q; 33276789Sdim udwords r; 34276789Sdim unsigned sr; 35276789Sdim /* special cases, X is unknown, K != 0 */ 36276789Sdim if (n.s.high == 0) 37276789Sdim { 38276789Sdim if (d.s.high == 0) 39276789Sdim { 40276789Sdim /* 0 X 41276789Sdim * --- 42276789Sdim * 0 X 43276789Sdim */ 44276789Sdim if (rem) 45276789Sdim *rem = n.s.low % d.s.low; 46276789Sdim return n.s.low / d.s.low; 47276789Sdim } 48276789Sdim /* 0 X 49276789Sdim * --- 50276789Sdim * K X 51276789Sdim */ 52276789Sdim if (rem) 53276789Sdim *rem = n.s.low; 54276789Sdim return 0; 55276789Sdim } 56276789Sdim /* n.s.high != 0 */ 57276789Sdim if (d.s.low == 0) 58276789Sdim { 59276789Sdim if (d.s.high == 0) 60276789Sdim { 61276789Sdim /* K X 62276789Sdim * --- 63276789Sdim * 0 0 64276789Sdim */ 65276789Sdim if (rem) 66276789Sdim *rem = n.s.high % d.s.low; 67276789Sdim return n.s.high / d.s.low; 68276789Sdim } 69276789Sdim /* d.s.high != 0 */ 70276789Sdim if (n.s.low == 0) 71276789Sdim { 72276789Sdim /* K 0 73276789Sdim * --- 74276789Sdim * K 0 75276789Sdim */ 76276789Sdim if (rem) 77276789Sdim { 78276789Sdim r.s.high = n.s.high % d.s.high; 79276789Sdim r.s.low = 0; 80276789Sdim *rem = r.all; 81276789Sdim } 82276789Sdim return n.s.high / d.s.high; 83276789Sdim } 84276789Sdim /* K K 85276789Sdim * --- 86276789Sdim * K 0 87276789Sdim */ 88276789Sdim if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ 89276789Sdim { 90276789Sdim if (rem) 91276789Sdim { 92276789Sdim r.s.low = n.s.low; 93276789Sdim r.s.high = n.s.high & (d.s.high - 1); 94276789Sdim *rem = r.all; 95276789Sdim } 96276789Sdim return n.s.high >> __builtin_ctz(d.s.high); 97276789Sdim } 98276789Sdim /* K K 99276789Sdim * --- 100276789Sdim * K 0 101276789Sdim */ 102276789Sdim sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high); 103276789Sdim /* 0 <= sr <= n_uword_bits - 2 or sr large */ 104276789Sdim if (sr > n_uword_bits - 2) 105276789Sdim { 106276789Sdim if (rem) 107276789Sdim *rem = n.all; 108276789Sdim return 0; 109276789Sdim } 110276789Sdim ++sr; 111276789Sdim /* 1 <= sr <= n_uword_bits - 1 */ 112276789Sdim /* q.all = n.all << (n_udword_bits - sr); */ 113276789Sdim q.s.low = 0; 114276789Sdim q.s.high = n.s.low << (n_uword_bits - sr); 115276789Sdim /* r.all = n.all >> sr; */ 116276789Sdim r.s.high = n.s.high >> sr; 117276789Sdim r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 118276789Sdim } 119276789Sdim else /* d.s.low != 0 */ 120276789Sdim { 121276789Sdim if (d.s.high == 0) 122276789Sdim { 123276789Sdim /* K X 124276789Sdim * --- 125276789Sdim * 0 K 126276789Sdim */ 127276789Sdim if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ 128276789Sdim { 129276789Sdim if (rem) 130276789Sdim *rem = n.s.low & (d.s.low - 1); 131276789Sdim if (d.s.low == 1) 132276789Sdim return n.all; 133276789Sdim sr = __builtin_ctz(d.s.low); 134276789Sdim q.s.high = n.s.high >> sr; 135276789Sdim q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 136276789Sdim return q.all; 137276789Sdim } 138276789Sdim /* K X 139276789Sdim * --- 140276789Sdim * 0 K 141276789Sdim */ 142276789Sdim sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high); 143276789Sdim /* 2 <= sr <= n_udword_bits - 1 144276789Sdim * q.all = n.all << (n_udword_bits - sr); 145276789Sdim * r.all = n.all >> sr; 146276789Sdim */ 147276789Sdim if (sr == n_uword_bits) 148276789Sdim { 149276789Sdim q.s.low = 0; 150276789Sdim q.s.high = n.s.low; 151276789Sdim r.s.high = 0; 152276789Sdim r.s.low = n.s.high; 153276789Sdim } 154276789Sdim else if (sr < n_uword_bits) // 2 <= sr <= n_uword_bits - 1 155276789Sdim { 156276789Sdim q.s.low = 0; 157276789Sdim q.s.high = n.s.low << (n_uword_bits - sr); 158276789Sdim r.s.high = n.s.high >> sr; 159276789Sdim r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 160276789Sdim } 161276789Sdim else // n_uword_bits + 1 <= sr <= n_udword_bits - 1 162276789Sdim { 163276789Sdim q.s.low = n.s.low << (n_udword_bits - sr); 164276789Sdim q.s.high = (n.s.high << (n_udword_bits - sr)) | 165276789Sdim (n.s.low >> (sr - n_uword_bits)); 166276789Sdim r.s.high = 0; 167276789Sdim r.s.low = n.s.high >> (sr - n_uword_bits); 168276789Sdim } 169276789Sdim } 170276789Sdim else 171276789Sdim { 172276789Sdim /* K X 173276789Sdim * --- 174276789Sdim * K K 175276789Sdim */ 176276789Sdim sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high); 177276789Sdim /* 0 <= sr <= n_uword_bits - 1 or sr large */ 178276789Sdim if (sr > n_uword_bits - 1) 179276789Sdim { 180276789Sdim if (rem) 181276789Sdim *rem = n.all; 182276789Sdim return 0; 183276789Sdim } 184276789Sdim ++sr; 185276789Sdim /* 1 <= sr <= n_uword_bits */ 186276789Sdim /* q.all = n.all << (n_udword_bits - sr); */ 187276789Sdim q.s.low = 0; 188276789Sdim if (sr == n_uword_bits) 189276789Sdim { 190276789Sdim q.s.high = n.s.low; 191276789Sdim r.s.high = 0; 192276789Sdim r.s.low = n.s.high; 193276789Sdim } 194276789Sdim else 195276789Sdim { 196276789Sdim q.s.high = n.s.low << (n_uword_bits - sr); 197276789Sdim r.s.high = n.s.high >> sr; 198276789Sdim r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 199276789Sdim } 200276789Sdim } 201276789Sdim } 202276789Sdim /* Not a special case 203276789Sdim * q and r are initialized with: 204276789Sdim * q.all = n.all << (n_udword_bits - sr); 205276789Sdim * r.all = n.all >> sr; 206276789Sdim * 1 <= sr <= n_udword_bits - 1 207276789Sdim */ 208276789Sdim su_int carry = 0; 209276789Sdim for (; sr > 0; --sr) 210276789Sdim { 211276789Sdim /* r:q = ((r:q) << 1) | carry */ 212276789Sdim r.s.high = (r.s.high << 1) | (r.s.low >> (n_uword_bits - 1)); 213276789Sdim r.s.low = (r.s.low << 1) | (q.s.high >> (n_uword_bits - 1)); 214276789Sdim q.s.high = (q.s.high << 1) | (q.s.low >> (n_uword_bits - 1)); 215276789Sdim q.s.low = (q.s.low << 1) | carry; 216276789Sdim /* carry = 0; 217276789Sdim * if (r.all >= d.all) 218276789Sdim * { 219276789Sdim * r.all -= d.all; 220276789Sdim * carry = 1; 221276789Sdim * } 222276789Sdim */ 223276789Sdim const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1); 224276789Sdim carry = s & 1; 225276789Sdim r.all -= d.all & s; 226276789Sdim } 227276789Sdim q.all = (q.all << 1) | carry; 228276789Sdim if (rem) 229276789Sdim *rem = r.all; 230276789Sdim return q.all; 231276789Sdim} 232