fpu_div.c revision 91174
10SN/A/*
214812Sserb * Copyright (c) 1992, 1993
30SN/A *	The Regents of the University of California.  All rights reserved.
40SN/A *
50SN/A * This software was developed by the Computer Systems Engineering group
60SN/A * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
72362SN/A * contributed to Berkeley.
80SN/A *
92362SN/A * All advertising materials mentioning features or use of this software
100SN/A * must display the following acknowledgement:
110SN/A *	This product includes software developed by the University of
120SN/A *	California, Lawrence Berkeley Laboratory.
130SN/A *
140SN/A * Redistribution and use in source and binary forms, with or without
150SN/A * modification, are permitted provided that the following conditions
160SN/A * are met:
170SN/A * 1. Redistributions of source code must retain the above copyright
180SN/A *    notice, this list of conditions and the following disclaimer.
190SN/A * 2. Redistributions in binary form must reproduce the above copyright
200SN/A *    notice, this list of conditions and the following disclaimer in the
212362SN/A *    documentation and/or other materials provided with the distribution.
222362SN/A * 3. All advertising materials mentioning features or use of this software
232362SN/A *    must display the following acknowledgement:
240SN/A *	This product includes software developed by the University of
250SN/A *	California, Berkeley and its contributors.
260SN/A * 4. Neither the name of the University nor the names of its contributors
270SN/A *    may be used to endorse or promote products derived from this software
280SN/A *    without specific prior written permission.
290SN/A *
300SN/A * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
310SN/A * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
325768SN/A * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
330SN/A * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
3414812Sserb * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
3514812Sserb * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
360SN/A * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
370SN/A * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
380SN/A * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
390SN/A * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
400SN/A * SUCH DAMAGE.
410SN/A *
420SN/A *	@(#)fpu_div.c	8.1 (Berkeley) 6/11/93
430SN/A *	from: NetBSD: fpu_div.c,v 1.2 1994/11/20 20:52:38 deraadt Exp
440SN/A *
450SN/A * $FreeBSD: head/lib/libc/sparc64/fpu/fpu_div.c 91174 2002-02-23 21:37:18Z tmm $
460SN/A */
470SN/A
480SN/A/*
490SN/A * Perform an FPU divide (return x / y).
500SN/A */
510SN/A
520SN/A#include <sys/types.h>
5314812Sserb
540SN/A#include <machine/frame.h>
550SN/A#include <machine/fp.h>
560SN/A#include <machine/fsr.h>
570SN/A
580SN/A#include "fpu_arith.h"
590SN/A#include "fpu_emu.h"
600SN/A#include "fpu_extern.h"
610SN/A
620SN/A/*
630SN/A * Division of normal numbers is done as follows:
645768SN/A *
655768SN/A * x and y are floating point numbers, i.e., in the form 1.bbbb * 2^e.
660SN/A * If X and Y are the mantissas (1.bbbb's), the quotient is then:
670SN/A *
680SN/A *	q = (X / Y) * 2^((x exponent) - (y exponent))
690SN/A *
700SN/A * Since X and Y are both in [1.0,2.0), the quotient's mantissa (X / Y)
710SN/A * will be in [0.5,2.0).  Moreover, it will be less than 1.0 if and only
720SN/A * if X < Y.  In that case, it will have to be shifted left one bit to
730SN/A * become a normal number, and the exponent decremented.  Thus, the
740SN/A * desired exponent is:
750SN/A *
760SN/A *	left_shift = x->fp_mant < y->fp_mant;
7714812Sserb *	result_exp = x->fp_exp - y->fp_exp - left_shift;
7814812Sserb *
790SN/A * The quotient mantissa X/Y can then be computed one bit at a time
8014812Sserb * using the following algorithm:
810SN/A *
820SN/A *	Q = 0;			-- Initial quotient.
830SN/A *	R = X;			-- Initial remainder,
840SN/A *	if (left_shift)		--   but fixed up in advance.
850SN/A *		R *= 2;
860SN/A *	for (bit = FP_NMANT; --bit >= 0; R *= 2) {
870SN/A *		if (R >= Y) {
880SN/A *			Q |= 1 << bit;
890SN/A *			R -= Y;
900SN/A *		}
910SN/A *	}
920SN/A *
930SN/A * The subtraction R -= Y always removes the uppermost bit from R (and
940SN/A * can sometimes remove additional lower-order 1 bits); this proof is
950SN/A * left to the reader.
960SN/A *
970SN/A * This loop correctly calculates the guard and round bits since they are
980SN/A * included in the expanded internal representation.  The sticky bit
990SN/A * is to be set if and only if any other bits beyond guard and round
1000SN/A * would be set.  From the above it is obvious that this is true if and
1010SN/A * only if the remainder R is nonzero when the loop terminates.
1020SN/A *
1030SN/A * Examining the loop above, we can see that the quotient Q is built
1040SN/A * one bit at a time ``from the top down''.  This means that we can
1050SN/A * dispense with the multi-word arithmetic and just build it one word
1060SN/A * at a time, writing each result word when it is done.
1070SN/A *
1080SN/A * Furthermore, since X and Y are both in [1.0,2.0), we know that,
1090SN/A * initially, R >= Y.  (Recall that, if X < Y, R is set to X * 2 and
1100SN/A * is therefore at in [2.0,4.0).)  Thus Q is sure to have bit FP_NMANT-1
1110SN/A * set, and R can be set initially to either X - Y (when X >= Y) or
112 * 2X - Y (when X < Y).  In addition, comparing R and Y is difficult,
113 * so we will simply calculate R - Y and see if that underflows.
114 * This leads to the following revised version of the algorithm:
115 *
116 *	R = X;
117 *	bit = FP_1;
118 *	D = R - Y;
119 *	if (D >= 0) {
120 *		result_exp = x->fp_exp - y->fp_exp;
121 *		R = D;
122 *		q = bit;
123 *		bit >>= 1;
124 *	} else {
125 *		result_exp = x->fp_exp - y->fp_exp - 1;
126 *		q = 0;
127 *	}
128 *	R <<= 1;
129 *	do  {
130 *		D = R - Y;
131 *		if (D >= 0) {
132 *			q |= bit;
133 *			R = D;
134 *		}
135 *		R <<= 1;
136 *	} while ((bit >>= 1) != 0);
137 *	Q[0] = q;
138 *	for (i = 1; i < 4; i++) {
139 *		q = 0, bit = 1 << 31;
140 *		do {
141 *			D = R - Y;
142 *			if (D >= 0) {
143 *				q |= bit;
144 *				R = D;
145 *			}
146 *			R <<= 1;
147 *		} while ((bit >>= 1) != 0);
148 *		Q[i] = q;
149 *	}
150 *
151 * This can be refined just a bit further by moving the `R <<= 1'
152 * calculations to the front of the do-loops and eliding the first one.
153 * The process can be terminated immediately whenever R becomes 0, but
154 * this is relatively rare, and we do not bother.
155 */
156
157struct fpn *
158__fpu_div(fe)
159	register struct fpemu *fe;
160{
161	register struct fpn *x = &fe->fe_f1, *y = &fe->fe_f2;
162	register u_int q, bit;
163	register u_int r0, r1, r2, r3, d0, d1, d2, d3, y0, y1, y2, y3;
164	FPU_DECL_CARRY
165
166	/*
167	 * Since divide is not commutative, we cannot just use ORDER.
168	 * Check either operand for NaN first; if there is at least one,
169	 * order the signalling one (if only one) onto the right, then
170	 * return it.  Otherwise we have the following cases:
171	 *
172	 *	Inf / Inf = NaN, plus NV exception
173	 *	Inf / num = Inf [i.e., return x]
174	 *	Inf / 0   = Inf [i.e., return x]
175	 *	0 / Inf = 0 [i.e., return x]
176	 *	0 / num = 0 [i.e., return x]
177	 *	0 / 0   = NaN, plus NV exception
178	 *	num / Inf = 0
179	 *	num / num = num (do the divide)
180	 *	num / 0   = Inf, plus DZ exception
181	 */
182	if (ISNAN(x) || ISNAN(y)) {
183		ORDER(x, y);
184		return (y);
185	}
186	if (ISINF(x) || ISZERO(x)) {
187		if (x->fp_class == y->fp_class)
188			return (__fpu_newnan(fe));
189		return (x);
190	}
191
192	/* all results at this point use XOR of operand signs */
193	x->fp_sign ^= y->fp_sign;
194	if (ISINF(y)) {
195		x->fp_class = FPC_ZERO;
196		return (x);
197	}
198	if (ISZERO(y)) {
199		fe->fe_cx = FSR_DZ;
200		x->fp_class = FPC_INF;
201		return (x);
202	}
203
204	/*
205	 * Macros for the divide.  See comments at top for algorithm.
206	 * Note that we expand R, D, and Y here.
207	 */
208
209#define	SUBTRACT		/* D = R - Y */ \
210	FPU_SUBS(d3, r3, y3); FPU_SUBCS(d2, r2, y2); \
211	FPU_SUBCS(d1, r1, y1); FPU_SUBC(d0, r0, y0)
212
213#define	NONNEGATIVE		/* D >= 0 */ \
214	((int)d0 >= 0)
215
216#ifdef FPU_SHL1_BY_ADD
217#define	SHL1			/* R <<= 1 */ \
218	FPU_ADDS(r3, r3, r3); FPU_ADDCS(r2, r2, r2); \
219	FPU_ADDCS(r1, r1, r1); FPU_ADDC(r0, r0, r0)
220#else
221#define	SHL1 \
222	r0 = (r0 << 1) | (r1 >> 31), r1 = (r1 << 1) | (r2 >> 31), \
223	r2 = (r2 << 1) | (r3 >> 31), r3 <<= 1
224#endif
225
226#define	LOOP			/* do ... while (bit >>= 1) */ \
227	do { \
228		SHL1; \
229		SUBTRACT; \
230		if (NONNEGATIVE) { \
231			q |= bit; \
232			r0 = d0, r1 = d1, r2 = d2, r3 = d3; \
233		} \
234	} while ((bit >>= 1) != 0)
235
236#define	WORD(r, i)			/* calculate r->fp_mant[i] */ \
237	q = 0; \
238	bit = 1 << 31; \
239	LOOP; \
240	(x)->fp_mant[i] = q
241
242	/* Setup.  Note that we put our result in x. */
243	r0 = x->fp_mant[0];
244	r1 = x->fp_mant[1];
245	r2 = x->fp_mant[2];
246	r3 = x->fp_mant[3];
247	y0 = y->fp_mant[0];
248	y1 = y->fp_mant[1];
249	y2 = y->fp_mant[2];
250	y3 = y->fp_mant[3];
251
252	bit = FP_1;
253	SUBTRACT;
254	if (NONNEGATIVE) {
255		x->fp_exp -= y->fp_exp;
256		r0 = d0, r1 = d1, r2 = d2, r3 = d3;
257		q = bit;
258		bit >>= 1;
259	} else {
260		x->fp_exp -= y->fp_exp + 1;
261		q = 0;
262	}
263	LOOP;
264	x->fp_mant[0] = q;
265	WORD(x, 1);
266	WORD(x, 2);
267	WORD(x, 3);
268	x->fp_sticky = r0 | r1 | r2 | r3;
269
270	return (x);
271}
272