1214152Sed/* ===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------=== 2214152Sed * 3214152Sed * The LLVM Compiler Infrastructure 4214152Sed * 5222656Sed * This file is dual licensed under the MIT and the University of Illinois Open 6222656Sed * Source Licenses. See LICENSE.TXT for details. 7214152Sed * 8214152Sed * ===----------------------------------------------------------------------=== 9214152Sed * 10214152Sed * This file implements __udivmoddi4 for the compiler_rt library. 11214152Sed * 12214152Sed * ===----------------------------------------------------------------------=== 13214152Sed */ 14214152Sed 15214152Sed#include "int_lib.h" 16214152Sed 17214152Sed/* Effects: if rem != 0, *rem = a % b 18214152Sed * Returns: a / b 19214152Sed */ 20214152Sed 21214152Sed/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */ 22214152Sed 23222656SedCOMPILER_RT_ABI du_int 24214152Sed__udivmoddi4(du_int a, du_int b, du_int* rem) 25214152Sed{ 26214152Sed const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT; 27214152Sed const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT; 28214152Sed udwords n; 29214152Sed n.all = a; 30214152Sed udwords d; 31214152Sed d.all = b; 32214152Sed udwords q; 33214152Sed udwords r; 34214152Sed unsigned sr; 35214152Sed /* special cases, X is unknown, K != 0 */ 36214152Sed if (n.s.high == 0) 37214152Sed { 38214152Sed if (d.s.high == 0) 39214152Sed { 40214152Sed /* 0 X 41214152Sed * --- 42214152Sed * 0 X 43214152Sed */ 44214152Sed if (rem) 45214152Sed *rem = n.s.low % d.s.low; 46214152Sed return n.s.low / d.s.low; 47214152Sed } 48214152Sed /* 0 X 49214152Sed * --- 50214152Sed * K X 51214152Sed */ 52214152Sed if (rem) 53214152Sed *rem = n.s.low; 54214152Sed return 0; 55214152Sed } 56214152Sed /* n.s.high != 0 */ 57214152Sed if (d.s.low == 0) 58214152Sed { 59214152Sed if (d.s.high == 0) 60214152Sed { 61214152Sed /* K X 62214152Sed * --- 63214152Sed * 0 0 64214152Sed */ 65214152Sed if (rem) 66214152Sed *rem = n.s.high % d.s.low; 67214152Sed return n.s.high / d.s.low; 68214152Sed } 69214152Sed /* d.s.high != 0 */ 70214152Sed if (n.s.low == 0) 71214152Sed { 72214152Sed /* K 0 73214152Sed * --- 74214152Sed * K 0 75214152Sed */ 76214152Sed if (rem) 77214152Sed { 78214152Sed r.s.high = n.s.high % d.s.high; 79214152Sed r.s.low = 0; 80214152Sed *rem = r.all; 81214152Sed } 82214152Sed return n.s.high / d.s.high; 83214152Sed } 84214152Sed /* K K 85214152Sed * --- 86214152Sed * K 0 87214152Sed */ 88214152Sed if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ 89214152Sed { 90214152Sed if (rem) 91214152Sed { 92214152Sed r.s.low = n.s.low; 93214152Sed r.s.high = n.s.high & (d.s.high - 1); 94214152Sed *rem = r.all; 95214152Sed } 96214152Sed return n.s.high >> __builtin_ctz(d.s.high); 97214152Sed } 98214152Sed /* K K 99214152Sed * --- 100214152Sed * K 0 101214152Sed */ 102214152Sed sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high); 103214152Sed /* 0 <= sr <= n_uword_bits - 2 or sr large */ 104214152Sed if (sr > n_uword_bits - 2) 105214152Sed { 106214152Sed if (rem) 107214152Sed *rem = n.all; 108214152Sed return 0; 109214152Sed } 110214152Sed ++sr; 111214152Sed /* 1 <= sr <= n_uword_bits - 1 */ 112214152Sed /* q.all = n.all << (n_udword_bits - sr); */ 113214152Sed q.s.low = 0; 114214152Sed q.s.high = n.s.low << (n_uword_bits - sr); 115214152Sed /* r.all = n.all >> sr; */ 116214152Sed r.s.high = n.s.high >> sr; 117214152Sed r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 118214152Sed } 119214152Sed else /* d.s.low != 0 */ 120214152Sed { 121214152Sed if (d.s.high == 0) 122214152Sed { 123214152Sed /* K X 124214152Sed * --- 125214152Sed * 0 K 126214152Sed */ 127214152Sed if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ 128214152Sed { 129214152Sed if (rem) 130214152Sed *rem = n.s.low & (d.s.low - 1); 131214152Sed if (d.s.low == 1) 132214152Sed return n.all; 133229135Sed sr = __builtin_ctz(d.s.low); 134214152Sed q.s.high = n.s.high >> sr; 135214152Sed q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 136214152Sed return q.all; 137214152Sed } 138214152Sed /* K X 139214152Sed * --- 140214152Sed *0 K 141214152Sed */ 142214152Sed sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high); 143214152Sed /* 2 <= sr <= n_udword_bits - 1 144214152Sed * q.all = n.all << (n_udword_bits - sr); 145214152Sed * r.all = n.all >> sr; 146214152Sed * if (sr == n_uword_bits) 147214152Sed * { 148214152Sed * q.s.low = 0; 149214152Sed * q.s.high = n.s.low; 150214152Sed * r.s.high = 0; 151214152Sed * r.s.low = n.s.high; 152214152Sed * } 153214152Sed * else if (sr < n_uword_bits) // 2 <= sr <= n_uword_bits - 1 154214152Sed * { 155214152Sed * q.s.low = 0; 156214152Sed * q.s.high = n.s.low << (n_uword_bits - sr); 157214152Sed * r.s.high = n.s.high >> sr; 158214152Sed * r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 159214152Sed * } 160214152Sed * else // n_uword_bits + 1 <= sr <= n_udword_bits - 1 161214152Sed * { 162214152Sed * q.s.low = n.s.low << (n_udword_bits - sr); 163214152Sed * q.s.high = (n.s.high << (n_udword_bits - sr)) | 164214152Sed * (n.s.low >> (sr - n_uword_bits)); 165214152Sed * r.s.high = 0; 166214152Sed * r.s.low = n.s.high >> (sr - n_uword_bits); 167214152Sed * } 168214152Sed */ 169214152Sed q.s.low = (n.s.low << (n_udword_bits - sr)) & 170214152Sed ((si_int)(n_uword_bits - sr) >> (n_uword_bits-1)); 171214152Sed q.s.high = ((n.s.low << ( n_uword_bits - sr)) & 172214152Sed ((si_int)(sr - n_uword_bits - 1) >> (n_uword_bits-1))) | 173214152Sed (((n.s.high << (n_udword_bits - sr)) | 174214152Sed (n.s.low >> (sr - n_uword_bits))) & 175214152Sed ((si_int)(n_uword_bits - sr) >> (n_uword_bits-1))); 176214152Sed r.s.high = (n.s.high >> sr) & 177214152Sed ((si_int)(sr - n_uword_bits) >> (n_uword_bits-1)); 178214152Sed r.s.low = ((n.s.high >> (sr - n_uword_bits)) & 179214152Sed ((si_int)(n_uword_bits - sr - 1) >> (n_uword_bits-1))) | 180214152Sed (((n.s.high << (n_uword_bits - sr)) | 181214152Sed (n.s.low >> sr)) & 182214152Sed ((si_int)(sr - n_uword_bits) >> (n_uword_bits-1))); 183214152Sed } 184214152Sed else 185214152Sed { 186214152Sed /* K X 187214152Sed * --- 188214152Sed * K K 189214152Sed */ 190214152Sed sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high); 191214152Sed /* 0 <= sr <= n_uword_bits - 1 or sr large */ 192214152Sed if (sr > n_uword_bits - 1) 193214152Sed { 194214152Sed if (rem) 195214152Sed *rem = n.all; 196214152Sed return 0; 197214152Sed } 198214152Sed ++sr; 199214152Sed /* 1 <= sr <= n_uword_bits */ 200214152Sed /* q.all = n.all << (n_udword_bits - sr); */ 201214152Sed q.s.low = 0; 202214152Sed q.s.high = n.s.low << (n_uword_bits - sr); 203214152Sed /* r.all = n.all >> sr; 204214152Sed * if (sr < n_uword_bits) 205214152Sed * { 206214152Sed * r.s.high = n.s.high >> sr; 207214152Sed * r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 208214152Sed * } 209214152Sed * else 210214152Sed * { 211214152Sed * r.s.high = 0; 212214152Sed * r.s.low = n.s.high; 213214152Sed * } 214214152Sed */ 215214152Sed r.s.high = (n.s.high >> sr) & 216214152Sed ((si_int)(sr - n_uword_bits) >> (n_uword_bits-1)); 217214152Sed r.s.low = (n.s.high << (n_uword_bits - sr)) | 218214152Sed ((n.s.low >> sr) & 219214152Sed ((si_int)(sr - n_uword_bits) >> (n_uword_bits-1))); 220214152Sed } 221214152Sed } 222214152Sed /* Not a special case 223214152Sed * q and r are initialized with: 224214152Sed * q.all = n.all << (n_udword_bits - sr); 225214152Sed * r.all = n.all >> sr; 226214152Sed * 1 <= sr <= n_udword_bits - 1 227214152Sed */ 228214152Sed su_int carry = 0; 229214152Sed for (; sr > 0; --sr) 230214152Sed { 231214152Sed /* r:q = ((r:q) << 1) | carry */ 232214152Sed r.s.high = (r.s.high << 1) | (r.s.low >> (n_uword_bits - 1)); 233214152Sed r.s.low = (r.s.low << 1) | (q.s.high >> (n_uword_bits - 1)); 234214152Sed q.s.high = (q.s.high << 1) | (q.s.low >> (n_uword_bits - 1)); 235214152Sed q.s.low = (q.s.low << 1) | carry; 236214152Sed /* carry = 0; 237214152Sed * if (r.all >= d.all) 238214152Sed * { 239214152Sed * r.all -= d.all; 240214152Sed * carry = 1; 241214152Sed * } 242214152Sed */ 243214152Sed const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1); 244214152Sed carry = s & 1; 245214152Sed r.all -= d.all & s; 246214152Sed } 247214152Sed q.all = (q.all << 1) | carry; 248214152Sed if (rem) 249214152Sed *rem = r.all; 250214152Sed return q.all; 251214152Sed} 252