udivmoddi4.c revision 222656
1/* ===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------=== 2 * 3 * The LLVM Compiler Infrastructure 4 * 5 * This file is dual licensed under the MIT and the University of Illinois Open 6 * Source Licenses. See LICENSE.TXT for details. 7 * 8 * ===----------------------------------------------------------------------=== 9 * 10 * This file implements __udivmoddi4 for the compiler_rt library. 11 * 12 * ===----------------------------------------------------------------------=== 13 */ 14#include "abi.h" 15 16#include "int_lib.h" 17 18/* Effects: if rem != 0, *rem = a % b 19 * Returns: a / b 20 */ 21 22/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */ 23 24ARM_EABI_FNALIAS(uldivmod, udivmoddi4); 25 26COMPILER_RT_ABI du_int 27__udivmoddi4(du_int a, du_int b, du_int* rem) 28{ 29 const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT; 30 const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT; 31 udwords n; 32 n.all = a; 33 udwords d; 34 d.all = b; 35 udwords q; 36 udwords r; 37 unsigned sr; 38 /* special cases, X is unknown, K != 0 */ 39 if (n.s.high == 0) 40 { 41 if (d.s.high == 0) 42 { 43 /* 0 X 44 * --- 45 * 0 X 46 */ 47 if (rem) 48 *rem = n.s.low % d.s.low; 49 return n.s.low / d.s.low; 50 } 51 /* 0 X 52 * --- 53 * K X 54 */ 55 if (rem) 56 *rem = n.s.low; 57 return 0; 58 } 59 /* n.s.high != 0 */ 60 if (d.s.low == 0) 61 { 62 if (d.s.high == 0) 63 { 64 /* K X 65 * --- 66 * 0 0 67 */ 68 if (rem) 69 *rem = n.s.high % d.s.low; 70 return n.s.high / d.s.low; 71 } 72 /* d.s.high != 0 */ 73 if (n.s.low == 0) 74 { 75 /* K 0 76 * --- 77 * K 0 78 */ 79 if (rem) 80 { 81 r.s.high = n.s.high % d.s.high; 82 r.s.low = 0; 83 *rem = r.all; 84 } 85 return n.s.high / d.s.high; 86 } 87 /* K K 88 * --- 89 * K 0 90 */ 91 if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ 92 { 93 if (rem) 94 { 95 r.s.low = n.s.low; 96 r.s.high = n.s.high & (d.s.high - 1); 97 *rem = r.all; 98 } 99 return n.s.high >> __builtin_ctz(d.s.high); 100 } 101 /* K K 102 * --- 103 * K 0 104 */ 105 sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high); 106 /* 0 <= sr <= n_uword_bits - 2 or sr large */ 107 if (sr > n_uword_bits - 2) 108 { 109 if (rem) 110 *rem = n.all; 111 return 0; 112 } 113 ++sr; 114 /* 1 <= sr <= n_uword_bits - 1 */ 115 /* q.all = n.all << (n_udword_bits - sr); */ 116 q.s.low = 0; 117 q.s.high = n.s.low << (n_uword_bits - sr); 118 /* r.all = n.all >> sr; */ 119 r.s.high = n.s.high >> sr; 120 r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 121 } 122 else /* d.s.low != 0 */ 123 { 124 if (d.s.high == 0) 125 { 126 /* K X 127 * --- 128 * 0 K 129 */ 130 if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ 131 { 132 if (rem) 133 *rem = n.s.low & (d.s.low - 1); 134 if (d.s.low == 1) 135 return n.all; 136 unsigned sr = __builtin_ctz(d.s.low); 137 q.s.high = n.s.high >> sr; 138 q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 139 return q.all; 140 } 141 /* K X 142 * --- 143 *0 K 144 */ 145 sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high); 146 /* 2 <= sr <= n_udword_bits - 1 147 * q.all = n.all << (n_udword_bits - sr); 148 * r.all = n.all >> sr; 149 * if (sr == n_uword_bits) 150 * { 151 * q.s.low = 0; 152 * q.s.high = n.s.low; 153 * r.s.high = 0; 154 * r.s.low = n.s.high; 155 * } 156 * else if (sr < n_uword_bits) // 2 <= sr <= n_uword_bits - 1 157 * { 158 * q.s.low = 0; 159 * q.s.high = n.s.low << (n_uword_bits - sr); 160 * r.s.high = n.s.high >> sr; 161 * r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 162 * } 163 * else // n_uword_bits + 1 <= sr <= n_udword_bits - 1 164 * { 165 * q.s.low = n.s.low << (n_udword_bits - sr); 166 * q.s.high = (n.s.high << (n_udword_bits - sr)) | 167 * (n.s.low >> (sr - n_uword_bits)); 168 * r.s.high = 0; 169 * r.s.low = n.s.high >> (sr - n_uword_bits); 170 * } 171 */ 172 q.s.low = (n.s.low << (n_udword_bits - sr)) & 173 ((si_int)(n_uword_bits - sr) >> (n_uword_bits-1)); 174 q.s.high = ((n.s.low << ( n_uword_bits - sr)) & 175 ((si_int)(sr - n_uword_bits - 1) >> (n_uword_bits-1))) | 176 (((n.s.high << (n_udword_bits - sr)) | 177 (n.s.low >> (sr - n_uword_bits))) & 178 ((si_int)(n_uword_bits - sr) >> (n_uword_bits-1))); 179 r.s.high = (n.s.high >> sr) & 180 ((si_int)(sr - n_uword_bits) >> (n_uword_bits-1)); 181 r.s.low = ((n.s.high >> (sr - n_uword_bits)) & 182 ((si_int)(n_uword_bits - sr - 1) >> (n_uword_bits-1))) | 183 (((n.s.high << (n_uword_bits - sr)) | 184 (n.s.low >> sr)) & 185 ((si_int)(sr - n_uword_bits) >> (n_uword_bits-1))); 186 } 187 else 188 { 189 /* K X 190 * --- 191 * K K 192 */ 193 sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high); 194 /* 0 <= sr <= n_uword_bits - 1 or sr large */ 195 if (sr > n_uword_bits - 1) 196 { 197 if (rem) 198 *rem = n.all; 199 return 0; 200 } 201 ++sr; 202 /* 1 <= sr <= n_uword_bits */ 203 /* q.all = n.all << (n_udword_bits - sr); */ 204 q.s.low = 0; 205 q.s.high = n.s.low << (n_uword_bits - sr); 206 /* r.all = n.all >> sr; 207 * if (sr < n_uword_bits) 208 * { 209 * r.s.high = n.s.high >> sr; 210 * r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 211 * } 212 * else 213 * { 214 * r.s.high = 0; 215 * r.s.low = n.s.high; 216 * } 217 */ 218 r.s.high = (n.s.high >> sr) & 219 ((si_int)(sr - n_uword_bits) >> (n_uword_bits-1)); 220 r.s.low = (n.s.high << (n_uword_bits - sr)) | 221 ((n.s.low >> sr) & 222 ((si_int)(sr - n_uword_bits) >> (n_uword_bits-1))); 223 } 224 } 225 /* Not a special case 226 * q and r are initialized with: 227 * q.all = n.all << (n_udword_bits - sr); 228 * r.all = n.all >> sr; 229 * 1 <= sr <= n_udword_bits - 1 230 */ 231 su_int carry = 0; 232 for (; sr > 0; --sr) 233 { 234 /* r:q = ((r:q) << 1) | carry */ 235 r.s.high = (r.s.high << 1) | (r.s.low >> (n_uword_bits - 1)); 236 r.s.low = (r.s.low << 1) | (q.s.high >> (n_uword_bits - 1)); 237 q.s.high = (q.s.high << 1) | (q.s.low >> (n_uword_bits - 1)); 238 q.s.low = (q.s.low << 1) | carry; 239 /* carry = 0; 240 * if (r.all >= d.all) 241 * { 242 * r.all -= d.all; 243 * carry = 1; 244 * } 245 */ 246 const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1); 247 carry = s & 1; 248 r.all -= d.all & s; 249 } 250 q.all = (q.all << 1) | carry; 251 if (rem) 252 *rem = r.all; 253 return q.all; 254} 255