fpu_div.c revision 92889
1139743Simp/* 265577Sdes * Copyright (c) 1992, 1993 365577Sdes * The Regents of the University of California. All rights reserved. 459412Smsmith * 559412Smsmith * This software was developed by the Computer Systems Engineering group 659412Smsmith * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and 759412Smsmith * contributed to Berkeley. 859412Smsmith * 959412Smsmith * All advertising materials mentioning features or use of this software 1059412Smsmith * must display the following acknowledgement: 1159412Smsmith * This product includes software developed by the University of 1259412Smsmith * California, Lawrence Berkeley Laboratory. 1359412Smsmith * 1459412Smsmith * Redistribution and use in source and binary forms, with or without 1559412Smsmith * modification, are permitted provided that the following conditions 1659412Smsmith * are met: 1759412Smsmith * 1. Redistributions of source code must retain the above copyright 1859412Smsmith * notice, this list of conditions and the following disclaimer. 1959412Smsmith * 2. Redistributions in binary form must reproduce the above copyright 2059412Smsmith * notice, this list of conditions and the following disclaimer in the 2159412Smsmith * documentation and/or other materials provided with the distribution. 2259412Smsmith * 3. All advertising materials mentioning features or use of this software 2359412Smsmith * must display the following acknowledgement: 2459412Smsmith * This product includes software developed by the University of 2559412Smsmith * California, Berkeley and its contributors. 2659412Smsmith * 4. Neither the name of the University nor the names of its contributors 2759412Smsmith * may be used to endorse or promote products derived from this software 2859412Smsmith * without specific prior written permission. 2959412Smsmith * 3059412Smsmith * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 3159412Smsmith * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 3259412Smsmith * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 3359412Smsmith * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 3459412Smsmith * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 3559412Smsmith * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 3659412Smsmith * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 3759412Smsmith * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 3859412Smsmith * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3959412Smsmith * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 4059412Smsmith * SUCH DAMAGE. 4159412Smsmith * 42116173Sobrien * @(#)fpu_div.c 8.1 (Berkeley) 6/11/93 43116173Sobrien * from: NetBSD: fpu_div.c,v 1.2 1994/11/20 20:52:38 deraadt Exp 44116173Sobrien * 4559412Smsmith * $FreeBSD: head/lib/libc/sparc64/fpu/fpu_div.c 92889 2002-03-21 18:49:23Z obrien $ 4683926Sdes */ 4776166Smarkm 4874135Sjlemon/* 4983926Sdes * Perform an FPU divide (return x / y). 50119911Sdes */ 5176166Smarkm 5265633Sdes#include <sys/types.h> 5383926Sdes 5476166Smarkm#include <machine/frame.h> 5574135Sjlemon#include <machine/fp.h> 5678025Sdes#include <machine/fsr.h> 5776827Salfred 5885289Sdes#include "fpu_arith.h" 5965633Sdes#include "fpu_emu.h" 6065633Sdes#include "fpu_extern.h" 6169995Sdes 62123246Sdes/* 6383926Sdes * Division of normal numbers is done as follows: 6476839Sjlemon * 6583926Sdes * x and y are floating point numbers, i.e., in the form 1.bbbb * 2^e. 66159995Snetchild * If X and Y are the mantissas (1.bbbb's), the quotient is then: 6765633Sdes * 6883926Sdes * q = (X / Y) * 2^((x exponent) - (y exponent)) 6983926Sdes * 7059412Smsmith * Since X and Y are both in [1.0,2.0), the quotient's mantissa (X / Y) 7159412Smsmith * will be in [0.5,2.0). Moreover, it will be less than 1.0 if and only 7283926Sdes * if X < Y. In that case, it will have to be shifted left one bit to 7383926Sdes * become a normal number, and the exponent decremented. Thus, the 7459412Smsmith * desired exponent is: 7559412Smsmith * 7667588Sdes * left_shift = x->fp_mant < y->fp_mant; 7759412Smsmith * result_exp = x->fp_exp - y->fp_exp - left_shift; 7860860Sdes * 7959412Smsmith * The quotient mantissa X/Y can then be computed one bit at a time 8069799Sdes * using the following algorithm: 8167589Sdes * 8278113Sdes * Q = 0; -- Initial quotient. 83133822Stjr * R = X; -- Initial remainder, 8467589Sdes * if (left_shift) -- but fixed up in advance. 8559412Smsmith * R *= 2; 86133822Stjr * for (bit = FP_NMANT; --bit >= 0; R *= 2) { 8759412Smsmith * if (R >= Y) { 88133822Stjr * Q |= 1 << bit; 89140214Sobrien * R -= Y; 90140214Sobrien * } 91140214Sobrien * } 9287275Srwatson * 93133822Stjr * The subtraction R -= Y always removes the uppermost bit from R (and 9485129Sdes * can sometimes remove additional lower-order 1 bits); this proof is 9569995Sdes * left to the reader. 9685289Sdes * 9778025Sdes * This loop correctly calculates the guard and round bits since they are 9884248Sdes * included in the expanded internal representation. The sticky bit 9959412Smsmith * is to be set if and only if any other bits beyond guard and round 10067588Sdes * would be set. From the above it is obvious that this is true if and 10167588Sdes * only if the remainder R is nonzero when the loop terminates. 10267588Sdes * 10376405Sdes * Examining the loop above, we can see that the quotient Q is built 10467588Sdes * one bit at a time ``from the top down''. This means that we can 10567588Sdes * dispense with the multi-word arithmetic and just build it one word 10669799Sdes * at a time, writing each result word when it is done. 10767588Sdes * 10867588Sdes * Furthermore, since X and Y are both in [1.0,2.0), we know that, 10974135Sjlemon * initially, R >= Y. (Recall that, if X < Y, R is set to X * 2 and 110159995Snetchild * is therefore at in [2.0,4.0).) Thus Q is sure to have bit FP_NMANT-1 111159995Snetchild * set, and R can be set initially to either X - Y (when X >= Y) or 112159995Snetchild * 2X - Y (when X < Y). In addition, comparing R and Y is difficult, 113159995Snetchild * so we will simply calculate R - Y and see if that underflows. 114159995Snetchild * This leads to the following revised version of the algorithm: 115159995Snetchild * 116159995Snetchild * R = X; 117159995Snetchild * bit = FP_1; 118159995Snetchild * D = R - Y; 119159995Snetchild * if (D >= 0) { 120159995Snetchild * result_exp = x->fp_exp - y->fp_exp; 121159995Snetchild * R = D; 122159995Snetchild * q = bit; 123159995Snetchild * bit >>= 1; 124159995Snetchild * } else { 125159995Snetchild * result_exp = x->fp_exp - y->fp_exp - 1; 12678113Sdes * q = 0; 12778113Sdes * } 12878113Sdes * R <<= 1; 12978025Sdes * do { 13078025Sdes * D = R - Y; 13159412Smsmith * if (D >= 0) { 13259412Smsmith * q |= bit; 13359412Smsmith * R = D; 13459412Smsmith * } 13559412Smsmith * R <<= 1; 13659412Smsmith * } while ((bit >>= 1) != 0); 137113574Sjhb * Q[0] = q; 138113574Sjhb * for (i = 1; i < 4; i++) { 139113574Sjhb * q = 0, bit = 1 << 31; 14060860Sdes * do { 141117723Sphk * D = R - Y; 14259412Smsmith * if (D >= 0) { 14359412Smsmith * q |= bit; 14459412Smsmith * R = D; 14559412Smsmith * } 14659412Smsmith * R <<= 1; 14759412Smsmith * } while ((bit >>= 1) != 0); 14859412Smsmith * Q[i] = q; 14959412Smsmith * } 15059412Smsmith * 15159412Smsmith * This can be refined just a bit further by moving the `R <<= 1' 15259412Smsmith * calculations to the front of the do-loops and eliding the first one. 15359412Smsmith * The process can be terminated immediately whenever R becomes 0, but 15459412Smsmith * this is relatively rare, and we do not bother. 15559412Smsmith */ 156117723Sphk 157153310Smlaierstruct fpn * 158153310Smlaier__fpu_div(fe) 159117723Sphk struct fpemu *fe; 16060860Sdes{ 161124082Salc struct fpn *x = &fe->fe_f1, *y = &fe->fe_f2; 16271471Sjhb u_int q, bit; 16360860Sdes u_int r0, r1, r2, r3, d0, d1, d2, d3, y0, y1, y2, y3; 16460860Sdes FPU_DECL_CARRY 165124082Salc 16660860Sdes /* 16759412Smsmith * Since divide is not commutative, we cannot just use ORDER. 16859412Smsmith * Check either operand for NaN first; if there is at least one, 16959412Smsmith * order the signalling one (if only one) onto the right, then 17059412Smsmith * return it. Otherwise we have the following cases: 17159412Smsmith * 17259412Smsmith * Inf / Inf = NaN, plus NV exception 17359412Smsmith * Inf / num = Inf [i.e., return x] 17459412Smsmith * Inf / 0 = Inf [i.e., return x] 17559412Smsmith * 0 / Inf = 0 [i.e., return x] 17659412Smsmith * 0 / num = 0 [i.e., return x] 17759412Smsmith * 0 / 0 = NaN, plus NV exception 17878025Sdes * num / Inf = 0 17978031Sdes * num / num = num (do the divide) 18069799Sdes * num / 0 = Inf, plus DZ exception 18176839Sjlemon */ 18269799Sdes if (ISNAN(x) || ISNAN(y)) { 18369799Sdes ORDER(x, y); 18469799Sdes return (y); 18569799Sdes } 18669799Sdes if (ISINF(x) || ISZERO(x)) { 18776839Sjlemon if (x->fp_class == y->fp_class) 18876839Sjlemon return (__fpu_newnan(fe)); 18969799Sdes return (x); 19069799Sdes } 19169799Sdes 19269799Sdes /* all results at this point use XOR of operand signs */ 19369799Sdes x->fp_sign ^= y->fp_sign; 19459412Smsmith if (ISINF(y)) { 19578025Sdes x->fp_class = FPC_ZERO; 19659412Smsmith return (x); 19759412Smsmith } 198133822Stjr if (ISZERO(y)) { 19978113Sdes fe->fe_cx = FSR_DZ; 200133822Stjr x->fp_class = FPC_INF; 20178113Sdes return (x); 20278113Sdes } 20378113Sdes 20478113Sdes /* 205159544Sdes * Macros for the divide. See comments at top for algorithm. 206159544Sdes * Note that we expand R, D, and Y here. 207159544Sdes */ 208123246Sdes 209118421Sdes#define SUBTRACT /* D = R - Y */ \ 21059412Smsmith FPU_SUBS(d3, r3, y3); FPU_SUBCS(d2, r2, y2); \ 21169799Sdes FPU_SUBCS(d1, r1, y1); FPU_SUBC(d0, r0, y0) 21278031Sdes 21378031Sdes#define NONNEGATIVE /* D >= 0 */ \ 21469799Sdes ((int)d0 >= 0) 21578031Sdes 21678031Sdes#ifdef FPU_SHL1_BY_ADD 21778031Sdes#define SHL1 /* R <<= 1 */ \ 21878031Sdes FPU_ADDS(r3, r3, r3); FPU_ADDCS(r2, r2, r2); \ 21978031Sdes FPU_ADDCS(r1, r1, r1); FPU_ADDC(r0, r0, r0) 22078031Sdes#else 22178031Sdes#define SHL1 \ 22267589Sdes r0 = (r0 << 1) | (r1 >> 31), r1 = (r1 << 1) | (r2 >> 31), \ 22367589Sdes r2 = (r2 << 1) | (r3 >> 31), r3 <<= 1 22467589Sdes#endif 22559412Smsmith 226133822Stjr#define LOOP /* do ... while (bit >>= 1) */ \ 22759412Smsmith do { \ 22867589Sdes SHL1; \ 22959412Smsmith SUBTRACT; \ 23059412Smsmith if (NONNEGATIVE) { \ 23167589Sdes q |= bit; \ 23259412Smsmith r0 = d0, r1 = d1, r2 = d2, r3 = d3; \ 23359412Smsmith } \ 23467589Sdes } while ((bit >>= 1) != 0) 23559412Smsmith 23659412Smsmith#define WORD(r, i) /* calculate r->fp_mant[i] */ \ 23767589Sdes q = 0; \ 23859412Smsmith bit = 1 << 31; \ 23959412Smsmith LOOP; \ 24067589Sdes (x)->fp_mant[i] = q 24159412Smsmith 24259412Smsmith /* Setup. Note that we put our result in x. */ 24378031Sdes r0 = x->fp_mant[0]; 24459412Smsmith r1 = x->fp_mant[1]; 245159170Sdes r2 = x->fp_mant[2]; 246133822Stjr r3 = x->fp_mant[3]; 247159170Sdes y0 = y->fp_mant[0]; 248133822Stjr y1 = y->fp_mant[1]; 249133822Stjr y2 = y->fp_mant[2]; 25059412Smsmith y3 = y->fp_mant[3]; 25159412Smsmith 252159544Sdes bit = FP_1; 253159544Sdes SUBTRACT; 254159544Sdes if (NONNEGATIVE) { 255159544Sdes x->fp_exp -= y->fp_exp; 256159544Sdes r0 = d0, r1 = d1, r2 = d2, r3 = d3; 257159544Sdes q = bit; 258123246Sdes bit >>= 1; 259118421Sdes } else { 260118421Sdes x->fp_exp -= y->fp_exp + 1; 261118421Sdes q = 0; 262118421Sdes } 263118421Sdes LOOP; 264159544Sdes x->fp_mant[0] = q; 265118421Sdes WORD(x, 1); 266159544Sdes WORD(x, 2); 267159544Sdes WORD(x, 3); 268118421Sdes x->fp_sticky = r0 | r1 | r2 | r3; 26959412Smsmith 27078031Sdes return (x); 27178031Sdes} 27267589Sdes