qdivrem.c revision 8876
1181905Sed/*- 2181905Sed * Copyright (c) 1992, 1993 3181905Sed * The Regents of the University of California. All rights reserved. 4181905Sed * 5181905Sed * This software was developed by the Computer Systems Engineering group 6181905Sed * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and 7181905Sed * contributed to Berkeley. 8181905Sed * 9181905Sed * Redistribution and use in source and binary forms, with or without 10181905Sed * modification, are permitted provided that the following conditions 11181905Sed * are met: 12181905Sed * 1. Redistributions of source code must retain the above copyright 13181905Sed * notice, this list of conditions and the following disclaimer. 14181905Sed * 2. Redistributions in binary form must reproduce the above copyright 15181905Sed * notice, this list of conditions and the following disclaimer in the 16181905Sed * documentation and/or other materials provided with the distribution. 17181905Sed * 3. All advertising materials mentioning features or use of this software 18181905Sed * must display the following acknowledgement: 19181905Sed * This product includes software developed by the University of 20181905Sed * California, Berkeley and its contributors. 21181905Sed * 4. Neither the name of the University nor the names of its contributors 22181905Sed * may be used to endorse or promote products derived from this software 23181905Sed * without specific prior written permission. 24181905Sed * 25181905Sed * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26181905Sed * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27181905Sed * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28181905Sed * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29181905Sed * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30181905Sed * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31181905Sed * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32181905Sed * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33181905Sed * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34181905Sed * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35181905Sed * SUCH DAMAGE. 36181905Sed * 37181905Sed * $Id: qdivrem.c,v 1.2 1994/08/02 07:44:19 davidg Exp $ 38181905Sed */ 39181905Sed 40181905Sed/* 41181905Sed * Multiprecision divide. This algorithm is from Knuth vol. 2 (2nd ed), 42181905Sed * section 4.3.1, pp. 257--259. 43181905Sed */ 44181905Sed 45181905Sed#include "quad.h" 46181905Sed 47181905Sed#define B (1 << HALF_BITS) /* digit base */ 48181905Sed 49181905Sed/* Combine two `digits' to make a single two-digit number. */ 50181905Sed#define COMBINE(a, b) (((u_long)(a) << HALF_BITS) | (b)) 51189061Sed 52189061Sed/* select a type for digits in base B: use unsigned short if they fit */ 53181905Sed#if ULONG_MAX == 0xffffffff && USHRT_MAX >= 0xffff 54189061Sedtypedef unsigned short digit; 55189061Sed#else 56181905Sedtypedef u_long digit; 57181905Sed#endif 58181905Sed 59181905Sed/* 60181905Sed * Shift p[0]..p[len] left `sh' bits, ignoring any bits that 61181905Sed * `fall out' the left (there never will be any such anyway). 62181905Sed * We may assume len >= 0. NOTE THAT THIS WRITES len+1 DIGITS. 63181905Sed */ 64181905Sedstatic void 65181905Sedshl(register digit *p, register int len, register int sh) 66181905Sed{ 67181905Sed register int i; 68181905Sed 69181905Sed for (i = 0; i < len; i++) 70181905Sed p[i] = LHALF(p[i] << sh) | (p[i + 1] >> (HALF_BITS - sh)); 71181905Sed p[i] = LHALF(p[i] << sh); 72181905Sed} 73181905Sed 74181905Sed/* 75181905Sed * __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v. 76181905Sed * 77181905Sed * We do this in base 2-sup-HALF_BITS, so that all intermediate products 78181905Sed * fit within u_long. As a consequence, the maximum length dividend and 79181905Sed * divisor are 4 `digits' in this base (they are shorter if they have 80181905Sed * leading zeros). 81181905Sed */ 82183276Sedu_quad_t 83183276Sed__qdivrem(uq, vq, arq) 84181905Sed u_quad_t uq, vq, *arq; 85181905Sed{ 86181905Sed union uu tmp; 87181905Sed digit *u, *v, *q; 88181905Sed register digit v1, v2; 89181905Sed u_long qhat, rhat, t; 90181905Sed int m, n, d, j, i; 91181905Sed digit uspace[5], vspace[5], qspace[5]; 92181905Sed 93181905Sed /* 94181905Sed * Take care of special cases: divide by zero, and u < v. 95181905Sed */ 96181905Sed if (vq == 0) { 97181905Sed /* divide by zero. */ 98181905Sed static volatile const unsigned int zero = 0; 99181905Sed 100181905Sed tmp.ul[H] = tmp.ul[L] = 1 / zero; 101181905Sed if (arq) 102181905Sed *arq = uq; 103181905Sed return (tmp.q); 104181905Sed } 105183276Sed if (uq < vq) { 106183276Sed if (arq) 107183276Sed *arq = uq; 108181905Sed return (0); 109181905Sed } 110181905Sed u = &uspace[0]; 111181905Sed v = &vspace[0]; 112181905Sed q = &qspace[0]; 113181905Sed 114181905Sed /* 115181905Sed * Break dividend and divisor into digits in base B, then 116181905Sed * count leading zeros to determine m and n. When done, we 117181905Sed * will have: 118181905Sed * u = (u[1]u[2]...u[m+n]) sub B 119181905Sed * v = (v[1]v[2]...v[n]) sub B 120181905Sed * v[1] != 0 121181905Sed * 1 < n <= 4 (if n = 1, we use a different division algorithm) 122181905Sed * m >= 0 (otherwise u < v, which we already checked) 123181905Sed * m + n = 4 124181905Sed * and thus 125181905Sed * m = 4 - n <= 2 126181905Sed */ 127181905Sed tmp.uq = uq; 128181905Sed u[0] = 0; 129242078Sed u[1] = HHALF(tmp.ul[H]); 130242078Sed u[2] = LHALF(tmp.ul[H]); 131242078Sed u[3] = HHALF(tmp.ul[L]); 132242078Sed u[4] = LHALF(tmp.ul[L]); 133181905Sed tmp.uq = vq; 134181905Sed v[1] = HHALF(tmp.ul[H]); 135181905Sed v[2] = LHALF(tmp.ul[H]); 136181905Sed v[3] = HHALF(tmp.ul[L]); 137181905Sed v[4] = LHALF(tmp.ul[L]); 138181905Sed for (n = 4; v[1] == 0; v++) { 139181905Sed if (--n == 1) { 140181905Sed u_long rbj; /* r*B+u[j] (not root boy jim) */ 141181905Sed digit q1, q2, q3, q4; 142181905Sed 143181905Sed /* 144181905Sed * Change of plan, per exercise 16. 145181905Sed * r = 0; 146181905Sed * for j = 1..4: 147181905Sed * q[j] = floor((r*B + u[j]) / v), 148181905Sed * r = (r*B + u[j]) % v; 149181905Sed * We unroll this completely here. 150181905Sed */ 151181905Sed t = v[2]; /* nonzero, by definition */ 152181905Sed q1 = u[1] / t; 153181905Sed rbj = COMBINE(u[1] % t, u[2]); 154181905Sed q2 = rbj / t; 155181905Sed rbj = COMBINE(rbj % t, u[3]); 156241161Sed q3 = rbj / t; 157241161Sed rbj = COMBINE(rbj % t, u[4]); 158241161Sed q4 = rbj / t; 159181905Sed if (arq) 160181905Sed *arq = rbj % t; 161181905Sed tmp.ul[H] = COMBINE(q1, q2); 162181905Sed tmp.ul[L] = COMBINE(q3, q4); 163181905Sed return (tmp.q); 164181905Sed } 165181905Sed } 166181905Sed 167181905Sed /* 168181905Sed * By adjusting q once we determine m, we can guarantee that 169181905Sed * there is a complete four-digit quotient at &qspace[1] when 170181905Sed * we finally stop. 171181905Sed */ 172181905Sed for (m = 4 - n; u[1] == 0; u++) 173181905Sed m--; 174181905Sed for (i = 4 - m; --i >= 0;) 175181905Sed q[i] = 0; 176181905Sed q += 4 - m; 177181905Sed 178181905Sed /* 179181905Sed * Here we run Program D, translated from MIX to C and acquiring 180181905Sed * a few minor changes. 181181905Sed * 182181905Sed * D1: choose multiplier 1 << d to ensure v[1] >= B/2. 183181905Sed */ 184181905Sed d = 0; 185181905Sed for (t = v[1]; t < B / 2; t <<= 1) 186181905Sed d++; 187231949Skib if (d > 0) { 188181905Sed shl(&u[0], m + n, d); /* u <<= d */ 189181905Sed shl(&v[1], n - 1, d); /* v <<= d */ 190181905Sed } 191181905Sed /* 192181905Sed * D2: j = 0. 193181905Sed */ 194181905Sed j = 0; 195181905Sed v1 = v[1]; /* for D3 -- note that v[1..n] are constant */ 196181905Sed v2 = v[2]; /* for D3 */ 197181905Sed do { 198181905Sed register digit uj0, uj1, uj2; 199242078Sed 200242078Sed /* 201242078Sed * D3: Calculate qhat (\^q, in TeX notation). 202242078Sed * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and 203181905Sed * let rhat = (u[j]*B + u[j+1]) mod v[1]. 204181905Sed * While rhat < B and v[2]*qhat > rhat*B+u[j+2], 205181905Sed * decrement qhat and increase rhat correspondingly. 206181905Sed * Note that if rhat >= B, v[2]*qhat < rhat*B. 207181905Sed */ 208181905Sed uj0 = u[j + 0]; /* for D3 only -- note that u[j+...] change */ 209181905Sed uj1 = u[j + 1]; /* for D3 only */ 210181905Sed uj2 = u[j + 2]; /* for D3 only */ 211241161Sed if (uj0 == v1) { 212241161Sed qhat = B; 213241161Sed rhat = uj1; 214181905Sed goto qhat_too_big; 215181905Sed } else { 216181905Sed u_long n = COMBINE(uj0, uj1); 217181905Sed qhat = n / v1; 218181905Sed rhat = n % v1; 219181905Sed } 220181905Sed while (v2 * qhat > COMBINE(rhat, uj2)) { 221181905Sed qhat_too_big: 222181905Sed qhat--; 223181905Sed if ((rhat += v1) >= B) 224181905Sed break; 225181905Sed } 226181905Sed /* 227181905Sed * D4: Multiply and subtract. 228181905Sed * The variable `t' holds any borrows across the loop. 229181905Sed * We split this up so that we do not require v[0] = 0, 230181905Sed * and to eliminate a final special case. 231181905Sed */ 232181905Sed for (t = 0, i = n; i > 0; i--) { 233181905Sed t = u[i + j] - v[i] * qhat - t; 234181905Sed u[i + j] = LHALF(t); 235181905Sed t = (B - HHALF(t)) & (B - 1); 236181905Sed } 237181905Sed t = u[j] - t; 238181905Sed u[j] = LHALF(t); 239181905Sed /* 240242078Sed * D5: test remainder. 241242078Sed * There is a borrow if and only if HHALF(t) is nonzero; 242242078Sed * in that (rare) case, qhat was too large (by exactly 1). 243242078Sed * Fix it by adding v[1..n] to u[j..j+n]. 244181905Sed */ 245181905Sed if (HHALF(t)) { 246181905Sed qhat--; 247181905Sed for (t = 0, i = n; i > 0; i--) { /* D6: add back. */ 248181905Sed t += u[i + j] + v[i]; 249181905Sed u[i + j] = LHALF(t); 250181905Sed t = HHALF(t); 251181905Sed } 252181905Sed u[j] = LHALF(u[j] + t); 253181905Sed } 254181905Sed q[j] = qhat; 255181905Sed } while (++j <= m); /* D7: loop on j. */ 256181905Sed 257181905Sed /* 258181905Sed * If caller wants the remainder, we have to calculate it as 259181905Sed * u[m..m+n] >> d (this is at most n digits and thus fits in 260181905Sed * u[m+1..m+n], but we may need more source digits). 261181905Sed */ 262181905Sed if (arq) { 263241161Sed if (d) { 264241161Sed for (i = m + n; i > m; --i) 265241161Sed u[i] = (u[i] >> d) | 266181905Sed LHALF(u[i - 1] << (HALF_BITS - d)); 267181905Sed u[i] = 0; 268181905Sed } 269181905Sed tmp.ul[H] = COMBINE(uspace[1], uspace[2]); 270181905Sed tmp.ul[L] = COMBINE(uspace[3], uspace[4]); 271181905Sed *arq = tmp.q; 272181905Sed } 273181905Sed 274181905Sed tmp.ul[H] = COMBINE(qspace[1], qspace[2]); 275181905Sed tmp.ul[L] = COMBINE(qspace[3], qspace[4]); 276181905Sed return (tmp.q); 277181905Sed} 278181905Sed