11541Srgrimes/*- 21541Srgrimes * Copyright (c) 1992, 1993 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * 51541Srgrimes * This software was developed by the Computer Systems Engineering group 61541Srgrimes * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and 71541Srgrimes * contributed to Berkeley. 81541Srgrimes * 91541Srgrimes * Redistribution and use in source and binary forms, with or without 101541Srgrimes * modification, are permitted provided that the following conditions 111541Srgrimes * are met: 121541Srgrimes * 1. Redistributions of source code must retain the above copyright 131541Srgrimes * notice, this list of conditions and the following disclaimer. 141541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 151541Srgrimes * notice, this list of conditions and the following disclaimer in the 161541Srgrimes * documentation and/or other materials provided with the distribution. 171541Srgrimes * 4. Neither the name of the University nor the names of its contributors 181541Srgrimes * may be used to endorse or promote products derived from this software 191541Srgrimes * without specific prior written permission. 201541Srgrimes * 211541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 221541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 231541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 241541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 251541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 261541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 271541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 281541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 291541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 301541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 311541Srgrimes * SUCH DAMAGE. 321541Srgrimes */ 331541Srgrimes 34116189Sobrien#include <sys/cdefs.h> 35116189Sobrien__FBSDID("$FreeBSD: stable/11/sys/libkern/qdivrem.c 319284 2017-05-31 05:45:06Z delphij $"); 36116189Sobrien 371541Srgrimes/* 381541Srgrimes * Multiprecision divide. This algorithm is from Knuth vol. 2 (2nd ed), 391541Srgrimes * section 4.3.1, pp. 257--259. 401541Srgrimes */ 411541Srgrimes 4218207Sbde#include <libkern/quad.h> 431541Srgrimes 441541Srgrimes#define B (1 << HALF_BITS) /* digit base */ 451541Srgrimes 461541Srgrimes/* Combine two `digits' to make a single two-digit number. */ 471541Srgrimes#define COMBINE(a, b) (((u_long)(a) << HALF_BITS) | (b)) 481541Srgrimes 491541Srgrimes/* select a type for digits in base B: use unsigned short if they fit */ 501541Srgrimes#if ULONG_MAX == 0xffffffff && USHRT_MAX >= 0xffff 511541Srgrimestypedef unsigned short digit; 521541Srgrimes#else 531541Srgrimestypedef u_long digit; 541541Srgrimes#endif 551541Srgrimes 561541Srgrimes/* 571541Srgrimes * Shift p[0]..p[len] left `sh' bits, ignoring any bits that 581541Srgrimes * `fall out' the left (there never will be any such anyway). 591541Srgrimes * We may assume len >= 0. NOTE THAT THIS WRITES len+1 DIGITS. 601541Srgrimes */ 611541Srgrimesstatic void 62319284Sdelphij__shl(digit *p, int len, int sh) 631541Srgrimes{ 64319284Sdelphij int i; 651541Srgrimes 661541Srgrimes for (i = 0; i < len; i++) 671541Srgrimes p[i] = LHALF(p[i] << sh) | (p[i + 1] >> (HALF_BITS - sh)); 681541Srgrimes p[i] = LHALF(p[i] << sh); 691541Srgrimes} 701541Srgrimes 711541Srgrimes/* 721541Srgrimes * __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v. 731541Srgrimes * 741541Srgrimes * We do this in base 2-sup-HALF_BITS, so that all intermediate products 751541Srgrimes * fit within u_long. As a consequence, the maximum length dividend and 761541Srgrimes * divisor are 4 `digits' in this base (they are shorter if they have 771541Srgrimes * leading zeros). 781541Srgrimes */ 791541Srgrimesu_quad_t 801541Srgrimes__qdivrem(uq, vq, arq) 811541Srgrimes u_quad_t uq, vq, *arq; 821541Srgrimes{ 831541Srgrimes union uu tmp; 841541Srgrimes digit *u, *v, *q; 85319284Sdelphij digit v1, v2; 861541Srgrimes u_long qhat, rhat, t; 871541Srgrimes int m, n, d, j, i; 881541Srgrimes digit uspace[5], vspace[5], qspace[5]; 891541Srgrimes 901541Srgrimes /* 911541Srgrimes * Take care of special cases: divide by zero, and u < v. 921541Srgrimes */ 931541Srgrimes if (vq == 0) { 941541Srgrimes /* divide by zero. */ 951541Srgrimes static volatile const unsigned int zero = 0; 961541Srgrimes 971541Srgrimes tmp.ul[H] = tmp.ul[L] = 1 / zero; 981541Srgrimes if (arq) 991541Srgrimes *arq = uq; 1001541Srgrimes return (tmp.q); 1011541Srgrimes } 1021541Srgrimes if (uq < vq) { 1031541Srgrimes if (arq) 1041541Srgrimes *arq = uq; 1051541Srgrimes return (0); 1061541Srgrimes } 1071541Srgrimes u = &uspace[0]; 1081541Srgrimes v = &vspace[0]; 1091541Srgrimes q = &qspace[0]; 1101541Srgrimes 1111541Srgrimes /* 1121541Srgrimes * Break dividend and divisor into digits in base B, then 1131541Srgrimes * count leading zeros to determine m and n. When done, we 1141541Srgrimes * will have: 1151541Srgrimes * u = (u[1]u[2]...u[m+n]) sub B 1161541Srgrimes * v = (v[1]v[2]...v[n]) sub B 1171541Srgrimes * v[1] != 0 1181541Srgrimes * 1 < n <= 4 (if n = 1, we use a different division algorithm) 1191541Srgrimes * m >= 0 (otherwise u < v, which we already checked) 1201541Srgrimes * m + n = 4 1211541Srgrimes * and thus 1221541Srgrimes * m = 4 - n <= 2 1231541Srgrimes */ 1241541Srgrimes tmp.uq = uq; 1251541Srgrimes u[0] = 0; 1261541Srgrimes u[1] = HHALF(tmp.ul[H]); 1271541Srgrimes u[2] = LHALF(tmp.ul[H]); 1281541Srgrimes u[3] = HHALF(tmp.ul[L]); 1291541Srgrimes u[4] = LHALF(tmp.ul[L]); 1301541Srgrimes tmp.uq = vq; 1311541Srgrimes v[1] = HHALF(tmp.ul[H]); 1321541Srgrimes v[2] = LHALF(tmp.ul[H]); 1331541Srgrimes v[3] = HHALF(tmp.ul[L]); 1341541Srgrimes v[4] = LHALF(tmp.ul[L]); 1351541Srgrimes for (n = 4; v[1] == 0; v++) { 1361541Srgrimes if (--n == 1) { 1371541Srgrimes u_long rbj; /* r*B+u[j] (not root boy jim) */ 1381541Srgrimes digit q1, q2, q3, q4; 1391541Srgrimes 1401541Srgrimes /* 1411541Srgrimes * Change of plan, per exercise 16. 1421541Srgrimes * r = 0; 1431541Srgrimes * for j = 1..4: 1441541Srgrimes * q[j] = floor((r*B + u[j]) / v), 1451541Srgrimes * r = (r*B + u[j]) % v; 1461541Srgrimes * We unroll this completely here. 1471541Srgrimes */ 1481541Srgrimes t = v[2]; /* nonzero, by definition */ 1491541Srgrimes q1 = u[1] / t; 1501541Srgrimes rbj = COMBINE(u[1] % t, u[2]); 1511541Srgrimes q2 = rbj / t; 1521541Srgrimes rbj = COMBINE(rbj % t, u[3]); 1531541Srgrimes q3 = rbj / t; 1541541Srgrimes rbj = COMBINE(rbj % t, u[4]); 1551541Srgrimes q4 = rbj / t; 1561541Srgrimes if (arq) 1571541Srgrimes *arq = rbj % t; 1581541Srgrimes tmp.ul[H] = COMBINE(q1, q2); 1591541Srgrimes tmp.ul[L] = COMBINE(q3, q4); 1601541Srgrimes return (tmp.q); 1611541Srgrimes } 1621541Srgrimes } 1631541Srgrimes 1641541Srgrimes /* 1651541Srgrimes * By adjusting q once we determine m, we can guarantee that 1661541Srgrimes * there is a complete four-digit quotient at &qspace[1] when 1671541Srgrimes * we finally stop. 1681541Srgrimes */ 1691541Srgrimes for (m = 4 - n; u[1] == 0; u++) 1701541Srgrimes m--; 1711541Srgrimes for (i = 4 - m; --i >= 0;) 1721541Srgrimes q[i] = 0; 1731541Srgrimes q += 4 - m; 1741541Srgrimes 1751541Srgrimes /* 1761541Srgrimes * Here we run Program D, translated from MIX to C and acquiring 1771541Srgrimes * a few minor changes. 1781541Srgrimes * 1791541Srgrimes * D1: choose multiplier 1 << d to ensure v[1] >= B/2. 1801541Srgrimes */ 1811541Srgrimes d = 0; 1821541Srgrimes for (t = v[1]; t < B / 2; t <<= 1) 1831541Srgrimes d++; 1841541Srgrimes if (d > 0) { 185183733Sthompsa __shl(&u[0], m + n, d); /* u <<= d */ 186183733Sthompsa __shl(&v[1], n - 1, d); /* v <<= d */ 1871541Srgrimes } 1881541Srgrimes /* 1891541Srgrimes * D2: j = 0. 1901541Srgrimes */ 1911541Srgrimes j = 0; 1921541Srgrimes v1 = v[1]; /* for D3 -- note that v[1..n] are constant */ 1931541Srgrimes v2 = v[2]; /* for D3 */ 1941541Srgrimes do { 195319284Sdelphij digit uj0, uj1, uj2; 1968876Srgrimes 1971541Srgrimes /* 1981541Srgrimes * D3: Calculate qhat (\^q, in TeX notation). 1991541Srgrimes * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and 2001541Srgrimes * let rhat = (u[j]*B + u[j+1]) mod v[1]. 2011541Srgrimes * While rhat < B and v[2]*qhat > rhat*B+u[j+2], 2021541Srgrimes * decrement qhat and increase rhat correspondingly. 2031541Srgrimes * Note that if rhat >= B, v[2]*qhat < rhat*B. 2041541Srgrimes */ 2051541Srgrimes uj0 = u[j + 0]; /* for D3 only -- note that u[j+...] change */ 2061541Srgrimes uj1 = u[j + 1]; /* for D3 only */ 2071541Srgrimes uj2 = u[j + 2]; /* for D3 only */ 2081541Srgrimes if (uj0 == v1) { 2091541Srgrimes qhat = B; 2101541Srgrimes rhat = uj1; 2111541Srgrimes goto qhat_too_big; 2121541Srgrimes } else { 21331017Sphk u_long nn = COMBINE(uj0, uj1); 21431017Sphk qhat = nn / v1; 21531017Sphk rhat = nn % v1; 2161541Srgrimes } 2171541Srgrimes while (v2 * qhat > COMBINE(rhat, uj2)) { 2181541Srgrimes qhat_too_big: 2191541Srgrimes qhat--; 2201541Srgrimes if ((rhat += v1) >= B) 2211541Srgrimes break; 2221541Srgrimes } 2231541Srgrimes /* 2241541Srgrimes * D4: Multiply and subtract. 2251541Srgrimes * The variable `t' holds any borrows across the loop. 2261541Srgrimes * We split this up so that we do not require v[0] = 0, 2271541Srgrimes * and to eliminate a final special case. 2281541Srgrimes */ 2291541Srgrimes for (t = 0, i = n; i > 0; i--) { 2301541Srgrimes t = u[i + j] - v[i] * qhat - t; 2311541Srgrimes u[i + j] = LHALF(t); 2321541Srgrimes t = (B - HHALF(t)) & (B - 1); 2331541Srgrimes } 2341541Srgrimes t = u[j] - t; 2351541Srgrimes u[j] = LHALF(t); 2361541Srgrimes /* 2371541Srgrimes * D5: test remainder. 2381541Srgrimes * There is a borrow if and only if HHALF(t) is nonzero; 2391541Srgrimes * in that (rare) case, qhat was too large (by exactly 1). 2401541Srgrimes * Fix it by adding v[1..n] to u[j..j+n]. 2411541Srgrimes */ 2421541Srgrimes if (HHALF(t)) { 2431541Srgrimes qhat--; 2441541Srgrimes for (t = 0, i = n; i > 0; i--) { /* D6: add back. */ 2451541Srgrimes t += u[i + j] + v[i]; 2461541Srgrimes u[i + j] = LHALF(t); 2471541Srgrimes t = HHALF(t); 2481541Srgrimes } 2491541Srgrimes u[j] = LHALF(u[j] + t); 2501541Srgrimes } 2511541Srgrimes q[j] = qhat; 2521541Srgrimes } while (++j <= m); /* D7: loop on j. */ 2531541Srgrimes 2541541Srgrimes /* 2551541Srgrimes * If caller wants the remainder, we have to calculate it as 2561541Srgrimes * u[m..m+n] >> d (this is at most n digits and thus fits in 2571541Srgrimes * u[m+1..m+n], but we may need more source digits). 2581541Srgrimes */ 2591541Srgrimes if (arq) { 2601541Srgrimes if (d) { 2611541Srgrimes for (i = m + n; i > m; --i) 2621541Srgrimes u[i] = (u[i] >> d) | 2631541Srgrimes LHALF(u[i - 1] << (HALF_BITS - d)); 2641541Srgrimes u[i] = 0; 2651541Srgrimes } 2661541Srgrimes tmp.ul[H] = COMBINE(uspace[1], uspace[2]); 2671541Srgrimes tmp.ul[L] = COMBINE(uspace[3], uspace[4]); 2681541Srgrimes *arq = tmp.q; 2691541Srgrimes } 2701541Srgrimes 2711541Srgrimes tmp.ul[H] = COMBINE(qspace[1], qspace[2]); 2721541Srgrimes tmp.ul[L] = COMBINE(qspace[3], qspace[4]); 2731541Srgrimes return (tmp.q); 2741541Srgrimes} 275