udivmodti4.c revision 263763
1214152Sed/* ===-- udivmodti4.c - Implement __udivmodti4 -----------------------------=== 2214152Sed * 3214152Sed * The LLVM Compiler Infrastructure 4214152Sed * 5222656Sed * This file is dual licensed under the MIT and the University of Illinois Open 6222656Sed * Source Licenses. See LICENSE.TXT for details. 7214152Sed * 8214152Sed * ===----------------------------------------------------------------------=== 9214152Sed * 10214152Sed * This file implements __udivmodti4 for the compiler_rt library. 11214152Sed * 12214152Sed * ===----------------------------------------------------------------------=== 13214152Sed */ 14214152Sed 15239138Sandrew#include "int_lib.h" 16239138Sandrew 17263763Sdim#ifdef CRT_HAS_128BIT 18214152Sed 19214152Sed/* Effects: if rem != 0, *rem = a % b 20214152Sed * Returns: a / b 21214152Sed */ 22214152Sed 23214152Sed/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */ 24214152Sed 25214152Sedtu_int 26214152Sed__udivmodti4(tu_int a, tu_int b, tu_int* rem) 27214152Sed{ 28214152Sed const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT; 29214152Sed const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT; 30214152Sed utwords n; 31214152Sed n.all = a; 32214152Sed utwords d; 33214152Sed d.all = b; 34214152Sed utwords q; 35214152Sed utwords r; 36214152Sed unsigned sr; 37214152Sed /* special cases, X is unknown, K != 0 */ 38214152Sed if (n.s.high == 0) 39214152Sed { 40214152Sed if (d.s.high == 0) 41214152Sed { 42214152Sed /* 0 X 43214152Sed * --- 44214152Sed * 0 X 45214152Sed */ 46214152Sed if (rem) 47214152Sed *rem = n.s.low % d.s.low; 48214152Sed return n.s.low / d.s.low; 49214152Sed } 50214152Sed /* 0 X 51214152Sed * --- 52214152Sed * K X 53214152Sed */ 54214152Sed if (rem) 55214152Sed *rem = n.s.low; 56214152Sed return 0; 57214152Sed } 58214152Sed /* n.s.high != 0 */ 59214152Sed if (d.s.low == 0) 60214152Sed { 61214152Sed if (d.s.high == 0) 62214152Sed { 63214152Sed /* K X 64214152Sed * --- 65214152Sed * 0 0 66214152Sed */ 67214152Sed if (rem) 68214152Sed *rem = n.s.high % d.s.low; 69214152Sed return n.s.high / d.s.low; 70214152Sed } 71214152Sed /* d.s.high != 0 */ 72214152Sed if (n.s.low == 0) 73214152Sed { 74214152Sed /* K 0 75214152Sed * --- 76214152Sed * K 0 77214152Sed */ 78214152Sed if (rem) 79214152Sed { 80214152Sed r.s.high = n.s.high % d.s.high; 81214152Sed r.s.low = 0; 82214152Sed *rem = r.all; 83214152Sed } 84214152Sed return n.s.high / d.s.high; 85214152Sed } 86214152Sed /* K K 87214152Sed * --- 88214152Sed * K 0 89214152Sed */ 90214152Sed if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ 91214152Sed { 92214152Sed if (rem) 93214152Sed { 94214152Sed r.s.low = n.s.low; 95214152Sed r.s.high = n.s.high & (d.s.high - 1); 96214152Sed *rem = r.all; 97214152Sed } 98214152Sed return n.s.high >> __builtin_ctzll(d.s.high); 99214152Sed } 100214152Sed /* K K 101214152Sed * --- 102214152Sed * K 0 103214152Sed */ 104214152Sed sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high); 105214152Sed /* 0 <= sr <= n_udword_bits - 2 or sr large */ 106214152Sed if (sr > n_udword_bits - 2) 107214152Sed { 108214152Sed if (rem) 109214152Sed *rem = n.all; 110214152Sed return 0; 111214152Sed } 112214152Sed ++sr; 113214152Sed /* 1 <= sr <= n_udword_bits - 1 */ 114214152Sed /* q.all = n.all << (n_utword_bits - sr); */ 115214152Sed q.s.low = 0; 116214152Sed q.s.high = n.s.low << (n_udword_bits - sr); 117214152Sed /* r.all = n.all >> sr; */ 118214152Sed r.s.high = n.s.high >> sr; 119214152Sed r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); 120214152Sed } 121214152Sed else /* d.s.low != 0 */ 122214152Sed { 123214152Sed if (d.s.high == 0) 124214152Sed { 125214152Sed /* K X 126214152Sed * --- 127214152Sed * 0 K 128214152Sed */ 129214152Sed if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ 130214152Sed { 131214152Sed if (rem) 132214152Sed *rem = n.s.low & (d.s.low - 1); 133214152Sed if (d.s.low == 1) 134214152Sed return n.all; 135229135Sed sr = __builtin_ctzll(d.s.low); 136214152Sed q.s.high = n.s.high >> sr; 137214152Sed q.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); 138214152Sed return q.all; 139214152Sed } 140214152Sed /* K X 141214152Sed * --- 142214152Sed * 0 K 143214152Sed */ 144214152Sed sr = 1 + n_udword_bits + __builtin_clzll(d.s.low) 145214152Sed - __builtin_clzll(n.s.high); 146214152Sed /* 2 <= sr <= n_utword_bits - 1 147214152Sed * q.all = n.all << (n_utword_bits - sr); 148214152Sed * r.all = n.all >> sr; 149214152Sed * if (sr == n_udword_bits) 150214152Sed * { 151214152Sed * q.s.low = 0; 152214152Sed * q.s.high = n.s.low; 153214152Sed * r.s.high = 0; 154214152Sed * r.s.low = n.s.high; 155214152Sed * } 156214152Sed * else if (sr < n_udword_bits) // 2 <= sr <= n_udword_bits - 1 157214152Sed * { 158214152Sed * q.s.low = 0; 159214152Sed * q.s.high = n.s.low << (n_udword_bits - sr); 160214152Sed * r.s.high = n.s.high >> sr; 161214152Sed * r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); 162214152Sed * } 163214152Sed * else // n_udword_bits + 1 <= sr <= n_utword_bits - 1 164214152Sed * { 165214152Sed * q.s.low = n.s.low << (n_utword_bits - sr); 166214152Sed * q.s.high = (n.s.high << (n_utword_bits - sr)) | 167214152Sed * (n.s.low >> (sr - n_udword_bits)); 168214152Sed * r.s.high = 0; 169214152Sed * r.s.low = n.s.high >> (sr - n_udword_bits); 170214152Sed * } 171214152Sed */ 172214152Sed q.s.low = (n.s.low << (n_utword_bits - sr)) & 173214152Sed ((di_int)(int)(n_udword_bits - sr) >> (n_udword_bits-1)); 174214152Sed q.s.high = ((n.s.low << ( n_udword_bits - sr)) & 175214152Sed ((di_int)(int)(sr - n_udword_bits - 1) >> (n_udword_bits-1))) | 176214152Sed (((n.s.high << (n_utword_bits - sr)) | 177214152Sed (n.s.low >> (sr - n_udword_bits))) & 178214152Sed ((di_int)(int)(n_udword_bits - sr) >> (n_udword_bits-1))); 179214152Sed r.s.high = (n.s.high >> sr) & 180214152Sed ((di_int)(int)(sr - n_udword_bits) >> (n_udword_bits-1)); 181214152Sed r.s.low = ((n.s.high >> (sr - n_udword_bits)) & 182214152Sed ((di_int)(int)(n_udword_bits - sr - 1) >> (n_udword_bits-1))) | 183214152Sed (((n.s.high << (n_udword_bits - sr)) | 184214152Sed (n.s.low >> sr)) & 185214152Sed ((di_int)(int)(sr - n_udword_bits) >> (n_udword_bits-1))); 186214152Sed } 187214152Sed else 188214152Sed { 189214152Sed /* K X 190214152Sed * --- 191214152Sed * K K 192214152Sed */ 193214152Sed sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high); 194214152Sed /*0 <= sr <= n_udword_bits - 1 or sr large */ 195214152Sed if (sr > n_udword_bits - 1) 196214152Sed { 197214152Sed if (rem) 198214152Sed *rem = n.all; 199214152Sed return 0; 200214152Sed } 201214152Sed ++sr; 202214152Sed /* 1 <= sr <= n_udword_bits */ 203214152Sed /* q.all = n.all << (n_utword_bits - sr); */ 204214152Sed q.s.low = 0; 205214152Sed q.s.high = n.s.low << (n_udword_bits - sr); 206214152Sed /* r.all = n.all >> sr; 207214152Sed * if (sr < n_udword_bits) 208214152Sed * { 209214152Sed * r.s.high = n.s.high >> sr; 210214152Sed * r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); 211214152Sed * } 212214152Sed * else 213214152Sed * { 214214152Sed * r.s.high = 0; 215214152Sed * r.s.low = n.s.high; 216214152Sed * } 217214152Sed */ 218214152Sed r.s.high = (n.s.high >> sr) & 219214152Sed ((di_int)(int)(sr - n_udword_bits) >> (n_udword_bits-1)); 220214152Sed r.s.low = (n.s.high << (n_udword_bits - sr)) | 221214152Sed ((n.s.low >> sr) & 222214152Sed ((di_int)(int)(sr - n_udword_bits) >> (n_udword_bits-1))); 223214152Sed } 224214152Sed } 225214152Sed /* Not a special case 226214152Sed * q and r are initialized with: 227214152Sed * q.all = n.all << (n_utword_bits - sr); 228214152Sed * r.all = n.all >> sr; 229214152Sed * 1 <= sr <= n_utword_bits - 1 230214152Sed */ 231214152Sed su_int carry = 0; 232214152Sed for (; sr > 0; --sr) 233214152Sed { 234214152Sed /* r:q = ((r:q) << 1) | carry */ 235214152Sed r.s.high = (r.s.high << 1) | (r.s.low >> (n_udword_bits - 1)); 236214152Sed r.s.low = (r.s.low << 1) | (q.s.high >> (n_udword_bits - 1)); 237214152Sed q.s.high = (q.s.high << 1) | (q.s.low >> (n_udword_bits - 1)); 238214152Sed q.s.low = (q.s.low << 1) | carry; 239214152Sed /* carry = 0; 240214152Sed * if (r.all >= d.all) 241214152Sed * { 242214152Sed * r.all -= d.all; 243214152Sed * carry = 1; 244214152Sed * } 245214152Sed */ 246214152Sed const ti_int s = (ti_int)(d.all - r.all - 1) >> (n_utword_bits - 1); 247214152Sed carry = s & 1; 248214152Sed r.all -= d.all & s; 249214152Sed } 250214152Sed q.all = (q.all << 1) | carry; 251214152Sed if (rem) 252214152Sed *rem = r.all; 253214152Sed return q.all; 254214152Sed} 255214152Sed 256263763Sdim#endif /* CRT_HAS_128BIT */ 257