1/*	$OpenBSD: divrem.m4,v 1.3 1996/10/31 00:43:17 niklas Exp $	*/
2/*	$NetBSD: divrem.m4,v 1.5 1996/10/17 04:26:25 cgd Exp $	*/
3
4/*
5 * Copyright (c) 1994, 1995 Carnegie-Mellon University.
6 * All rights reserved.
7 *
8 * Author: Chris G. Demetriou
9 * 
10 * Permission to use, copy, modify and distribute this software and
11 * its documentation is hereby granted, provided that both the copyright
12 * notice and this permission notice appear in all copies of the
13 * software, derivative works or modified versions, and any portions
14 * thereof, and that both notices appear in supporting documentation.
15 * 
16 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
17 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
18 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
19 * 
20 * Carnegie Mellon requests users of this software to return to
21 *
22 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
23 *  School of Computer Science
24 *  Carnegie Mellon University
25 *  Pittsburgh PA 15213-3890
26 *
27 * any improvements or extensions that they make and grant Carnegie the
28 * rights to redistribute these changes.
29 */
30
31/*
32 * Division and remainder.
33 *
34 * The use of m4 is modeled after the sparc code, but the algorithm is
35 * simple binary long division.
36 *
37 * Note that the loops could probably benefit from unrolling.
38 */
39
40/*
41 * M4 Parameters
42 * NAME		name of function to generate
43 * OP		OP=div: t10 / t11 -> t12; OP=rem: t10 % t11 -> t12
44 * S		S=true: signed; S=false: unsigned
45 * WORDSIZE	total number of bits
46 */
47
48define(A, `t10')
49define(B, `t11')
50define(RESULT, `t12')
51
52define(BIT, `t0')
53define(I, `t1')
54define(CC, `t2')
55define(T_0, `t3')
56ifelse(S, `true', `define(NEG, `t4')')
57
58#include <machine/asm.h>
59
60LEAF(NAME, 0)					/* XXX */
61	lda	sp, -64(sp)
62	stq	BIT, 0(sp)
63	stq	I, 8(sp)
64	stq	CC, 16(sp)
65	stq	T_0, 24(sp)
66ifelse(S, `true',
67`	stq	NEG, 32(sp)')
68	stq	A, 40(sp)
69	stq	B, 48(sp)
70	mov	zero, RESULT			/* Initialize result to zero */
71
72ifelse(S, `true',
73`
74	/* Compute sign of result.  If either is negative, this is easy.  */
75	or	A, B, NEG			/* not the sign, but... */
76	srl	NEG, WORDSIZE - 1, NEG		/* rather, or of high bits */
77	blbc	NEG, Ldoit			/* neither negative? do it! */
78
79ifelse(OP, `div',
80`	xor	A, B, NEG			/* THIS is the sign! */
81', `	mov	A, NEG				/* sign follows A. */
82')
83	srl	NEG, WORDSIZE - 1, NEG		/* make negation the low bit. */
84
85	srl	A, WORDSIZE - 1, I		/* is A negative? */
86	blbc	I, LnegB			/* no. */
87	/* A is negative; flip it. */
88ifelse(WORDSIZE, `32', `
89	/* top 32 bits may be random junk */
90	zap	A, 0xf0, A
91')
92	subq	zero, A, A
93	srl	B, WORDSIZE - 1, I		/* is B negative? */
94	blbc	I, Ldoit			/* no. */
95LnegB:
96	/* B is definitely negative, no matter how we got here. */
97ifelse(WORDSIZE, `32', `
98	/* top 32 bits may be random junk */
99	zap	B, 0xf0, B
100')
101	subq	zero, B, B
102Ldoit:
103')
104ifelse(WORDSIZE, `32', `
105	/*
106	 * Clear the top 32 bits of each operand, as they may
107	 * sign extension (if negated above), or random junk.
108	 */
109	zap	A, 0xf0, A
110	zap	B, 0xf0, B
111')
112
113	/* kill the special cases. */
114	beq	B, Ldotrap			/* division by zero! */
115
116	cmpult	A, B, CC			/* A < B? */
117	/* RESULT is already zero, from above.  A is untouched. */
118	bne	CC, Lret_result
119
120	cmpeq	A, B, CC			/* A == B? */
121	cmovne	CC, 1, RESULT
122	cmovne	CC, zero, A
123	bne	CC, Lret_result
124
125	/*
126	 * Find out how many bits of zeros are at the beginning of the divisor.
127	 */
128LBbits:
129	ldiq	T_0, 1				/* I = 0; BIT = 1<<WORDSIZE-1 */
130	mov	zero, I
131	sll	T_0, WORDSIZE-1, BIT
132LBloop:
133	and	B, BIT, CC			/* if bit in B is set, done. */
134	bne	CC, LAbits
135	addq	I, 1, I				/* increment I, shift bit */
136	srl	BIT, 1, BIT
137	cmplt	I, WORDSIZE-1, CC		/* if I leaves one bit, done. */
138	bne	CC, LBloop
139
140LAbits:
141	beq	I, Ldodiv			/* If I = 0, divide now.  */
142	ldiq	T_0, 1				/* BIT = 1<<WORDSIZE-1 */
143	sll	T_0, WORDSIZE-1, BIT
144
145LAloop:
146	and	A, BIT, CC			/* if bit in A is set, done. */
147	bne	CC, Ldodiv
148	subq	I, 1, I				/* decrement I, shift bit */
149	srl     BIT, 1, BIT 
150	bne	I, LAloop			/* If I != 0, loop again */
151
152Ldodiv:
153	sll	B, I, B				/* B <<= i */
154	ldiq	T_0, 1
155	sll	T_0, I, BIT
156
157Ldivloop:
158	cmpult	A, B, CC
159	or	RESULT, BIT, T_0
160	cmoveq	CC, T_0, RESULT
161	subq	A, B, T_0
162	cmoveq	CC, T_0, A
163	srl	BIT, 1, BIT	
164	srl	B, 1, B
165	beq	A, Lret_result
166	bne	BIT, Ldivloop
167
168Lret_result:
169ifelse(OP, `div',
170`', `	mov	A, RESULT
171')
172ifelse(S, `true',
173`
174	/* Check to see if we should negate it. */
175	subqv	zero, RESULT, T_0
176	cmovlbs	NEG, T_0, RESULT
177')
178
179	ldq	BIT, 0(sp)
180	ldq	I, 8(sp)
181	ldq	CC, 16(sp)
182	ldq	T_0, 24(sp)
183ifelse(S, `true',
184`	ldq	NEG, 32(sp)')
185	ldq	A, 40(sp)
186	ldq	B, 48(sp)
187	lda	sp, 64(sp)
188	ret	zero, (t9), 1
189
190Ldotrap:
191	ldiq	a0, -2			/* This is the signal to SIGFPE! */
192	call_pal PAL_gentrap
193ifelse(OP, `div',
194`', `	mov	zero, A			/* so that zero will be returned */
195')
196	br	zero, Lret_result
197
198END(NAME)
199