1/* UltraSparc 64 mpn_divrem_1 -- mpn by limb division. 2 3Copyright 1991, 1993, 1994, 1996, 1998, 1999, 2000, 2001, 2003 Free Software 4Foundation, Inc. 5 6This file is part of the GNU MP Library. 7 8The GNU MP Library is free software; you can redistribute it and/or modify 9it under the terms of the GNU Lesser General Public License as published by 10the Free Software Foundation; either version 3 of the License, or (at your 11option) any later version. 12 13The GNU MP Library is distributed in the hope that it will be useful, but 14WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16License for more details. 17 18You should have received a copy of the GNU Lesser General Public License 19along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 20 21#include "gmp.h" 22#include "gmp-impl.h" 23#include "longlong.h" 24 25#include "mpn/sparc64/sparc64.h" 26 27 28/* 64-bit divisor 32-bit divisor 29 cycles/limb cycles/limb 30 (approx) (approx) 31 integer fraction integer fraction 32 Ultrasparc 2i: 160 160 122 96 33*/ 34 35 36/* 32-bit divisors are treated in special case code. This requires 4 mulx 37 per limb instead of 8 in the general case. 38 39 For big endian systems we need HALF_ENDIAN_ADJ included in the src[i] 40 addressing, to get the two halves of each limb read in the correct order. 41 This is kept in an adj variable. Doing that measures about 4 c/l faster 42 than just writing HALF_ENDIAN_ADJ(i) in the integer loop. The latter 43 shouldn't be 6 cycles worth of work, but perhaps it doesn't schedule well 44 (on gcc 3.2.1 at least). The fraction loop doesn't seem affected, but we 45 still use a variable since that ought to work out best. */ 46 47mp_limb_t 48mpn_divrem_1 (mp_ptr qp_limbptr, mp_size_t xsize_limbs, 49 mp_srcptr ap_limbptr, mp_size_t size_limbs, mp_limb_t d_limb) 50{ 51 mp_size_t total_size_limbs; 52 mp_size_t i; 53 54 ASSERT (xsize_limbs >= 0); 55 ASSERT (size_limbs >= 0); 56 ASSERT (d_limb != 0); 57 /* FIXME: What's the correct overlap rule when xsize!=0? */ 58 ASSERT (MPN_SAME_OR_SEPARATE_P (qp_limbptr + xsize_limbs, 59 ap_limbptr, size_limbs)); 60 61 total_size_limbs = size_limbs + xsize_limbs; 62 if (UNLIKELY (total_size_limbs == 0)) 63 return 0; 64 65 /* udivx is good for total_size==1, and no need to bother checking 66 limb<divisor, since if that's likely the caller should check */ 67 if (UNLIKELY (total_size_limbs == 1)) 68 { 69 mp_limb_t a, q; 70 a = (LIKELY (size_limbs != 0) ? ap_limbptr[0] : 0); 71 q = a / d_limb; 72 qp_limbptr[0] = q; 73 return a - q*d_limb; 74 } 75 76 if (d_limb <= CNST_LIMB(0xFFFFFFFF)) 77 { 78 mp_size_t size, xsize, total_size, adj; 79 unsigned *qp, n1, n0, q, r, nshift, norm_rmask; 80 mp_limb_t dinv_limb; 81 const unsigned *ap; 82 int norm, norm_rshift; 83 84 size = 2 * size_limbs; 85 xsize = 2 * xsize_limbs; 86 total_size = size + xsize; 87 88 ap = (unsigned *) ap_limbptr; 89 qp = (unsigned *) qp_limbptr; 90 91 qp += xsize; 92 r = 0; /* initial remainder */ 93 94 if (LIKELY (size != 0)) 95 { 96 n1 = ap[size-1 + HALF_ENDIAN_ADJ(1)]; 97 98 /* If the length of the source is uniformly distributed, then 99 there's a 50% chance of the high 32-bits being zero, which we 100 can skip. */ 101 if (n1 == 0) 102 { 103 n1 = ap[size-2 + HALF_ENDIAN_ADJ(0)]; 104 total_size--; 105 size--; 106 ASSERT (size > 0); /* because always even */ 107 qp[size + HALF_ENDIAN_ADJ(1)] = 0; 108 } 109 110 /* Skip a division if high < divisor (high quotient 0). Testing 111 here before before normalizing will still skip as often as 112 possible. */ 113 if (n1 < d_limb) 114 { 115 r = n1; 116 size--; 117 qp[size + HALF_ENDIAN_ADJ(size)] = 0; 118 total_size--; 119 if (total_size == 0) 120 return r; 121 } 122 } 123 124 count_leading_zeros_32 (norm, d_limb); 125 norm -= 32; 126 d_limb <<= norm; 127 r <<= norm; 128 129 norm_rshift = 32 - norm; 130 norm_rmask = (norm == 0 ? 0 : 0xFFFFFFFF); 131 132 invert_half_limb (dinv_limb, d_limb); 133 134 if (LIKELY (size != 0)) 135 { 136 i = size - 1; 137 adj = HALF_ENDIAN_ADJ (i); 138 n1 = ap[i + adj]; 139 adj = -adj; 140 r |= ((n1 >> norm_rshift) & norm_rmask); 141 for ( ; i > 0; i--) 142 { 143 n0 = ap[i-1 + adj]; 144 adj = -adj; 145 nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask); 146 udiv_qrnnd_half_preinv (q, r, r, nshift, d_limb, dinv_limb); 147 qp[i + adj] = q; 148 n1 = n0; 149 } 150 nshift = n1 << norm; 151 udiv_qrnnd_half_preinv (q, r, r, nshift, d_limb, dinv_limb); 152 qp[0 + HALF_ENDIAN_ADJ(0)] = q; 153 } 154 qp -= xsize; 155 adj = HALF_ENDIAN_ADJ (0); 156 for (i = xsize-1; i >= 0; i--) 157 { 158 udiv_qrnnd_half_preinv (q, r, r, 0, d_limb, dinv_limb); 159 adj = -adj; 160 qp[i + adj] = q; 161 } 162 163 return r >> norm; 164 } 165 else 166 { 167 mp_srcptr ap; 168 mp_ptr qp; 169 mp_size_t size, xsize, total_size; 170 mp_limb_t d, n1, n0, q, r, dinv, nshift, norm_rmask; 171 int norm, norm_rshift; 172 173 ap = ap_limbptr; 174 qp = qp_limbptr; 175 size = size_limbs; 176 xsize = xsize_limbs; 177 total_size = total_size_limbs; 178 d = d_limb; 179 180 qp += total_size; /* above high limb */ 181 r = 0; /* initial remainder */ 182 183 if (LIKELY (size != 0)) 184 { 185 /* Skip a division if high < divisor (high quotient 0). Testing 186 here before before normalizing will still skip as often as 187 possible. */ 188 n1 = ap[size-1]; 189 if (n1 < d) 190 { 191 r = n1; 192 *--qp = 0; 193 total_size--; 194 if (total_size == 0) 195 return r; 196 size--; 197 } 198 } 199 200 count_leading_zeros (norm, d); 201 d <<= norm; 202 r <<= norm; 203 204 norm_rshift = GMP_LIMB_BITS - norm; 205 norm_rmask = (norm == 0 ? 0 : ~CNST_LIMB(0)); 206 207 invert_limb (dinv, d); 208 209 if (LIKELY (size != 0)) 210 { 211 n1 = ap[size-1]; 212 r |= ((n1 >> norm_rshift) & norm_rmask); 213 for (i = size-2; i >= 0; i--) 214 { 215 n0 = ap[i]; 216 nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask); 217 udiv_qrnnd_preinv (q, r, r, nshift, d, dinv); 218 *--qp = q; 219 n1 = n0; 220 } 221 nshift = n1 << norm; 222 udiv_qrnnd_preinv (q, r, r, nshift, d, dinv); 223 *--qp = q; 224 } 225 for (i = 0; i < xsize; i++) 226 { 227 udiv_qrnnd_preinv (q, r, r, CNST_LIMB(0), d, dinv); 228 *--qp = q; 229 } 230 return r >> norm; 231 } 232} 233