1230025Sed/*
2230025Sed * This m4 code has been taken from The SPARC Architecture Manual Version 8.
3230025Sed */
4230025Sed/*
5230025Sed * Division/Remainder
6230025Sed *
7230025Sed * Input is:
8230025Sed *   dividend -- the thing being divided
9230025Sed *   divisor -- how many ways to divide it
10230025Sed * Important parameters:
11230025Sed *   N -- how many bits per iteration we try to get
12230025Sed *        as our current guess:
13230025Sed *   WORDSIZE -- how many bits altogether we're talking about:
14230025Sed *               obviously:
15230025Sed * A derived constant:
16230025Sed *   TOPBITS -- how many bits are in the top "decade" of a number:
17230025Sed *
18230025Sed * Important variables are:
19230025Sed *   Q -- the partial quotient under development -- initially 0
20230025Sed *   R -- the remainder so far -- initially == the dividend
21230025Sed *   ITER -- number of iterations of the main division loop which will
22230025Sed *           be required. Equal to CEIL( lg2(quotient)/4 )
23230025Sed *           Note that this is log_base_(2��4) of the quotient.
24230025Sed *   V -- the current comparand -- initially divisor*2��(ITER*4-1)
25230025Sed * Cost:
26230025Sed *   current estimate for non-large dividend is
27230025Sed *        CEIL( lg2(quotient) / 4 ) x ( 10 + 74/2 ) + C
28230025Sed *   a large dividend is one greater than 2��(31-4 ) and takes a
29230025Sed *   different path, as the upper bits of the quotient must be developed
30230025Sed *   one bit at a time.
31230025Sed *   This uses the m4 and cpp macro preprocessors.
32230025Sed */
33230025Sed/*
34230025Sed * This is the recursive definition of how we develop quotient digits.
35230025Sed * It takes three important parameters:
36230025Sed *   $1 -- the current depth, 1<=$1<=4
37230025Sed *   $2 -- the current accumulation of quotient bits
38230025Sed *   4 -- max depth
39230025Sed * We add a new bit to $2 and either recurse or insert the bits in the quotient.
40230025Sed * Dynamic input:
41230025Sed *   %o3 -- current remainder
42230025Sed *   %o2 -- current quotient
43230025Sed *   %o5 -- current comparand
44230025Sed *   cc -- set on current value of %o3
45230025Sed * Dynamic output:
46230025Sed *   %o3', %o2', %o5', cc'
47230025Sed */
48230025Sed#include "../assembly.h"
49230025Sed.text
50235389Smarius	.align 32
51230025SedDEFINE_COMPILERRT_FUNCTION(__udivsi3)
52230025Sed	b	divide
53230025Sed	mov	0,%g3			! result always nonnegative
54235389Smarius.text
55235389Smarius	.align 32
56230025SedDEFINE_COMPILERRT_FUNCTION(__divsi3)
57230025Sed	orcc	%o1,%o0,%g0	! are either %o0 or %o1 negative
58230025Sed	bge	divide			! if not, skip this junk
59230025Sed	xor	%o1,%o0,%g3	! record sign of result in sign of %g3
60230025Sed	tst	%o1
61230025Sed	bge	2f
62230025Sed	tst	%o0
63230025Sed	! %o1 < 0
64230025Sed	bge	divide
65230025Sed	neg	%o1
66230025Sed	2:
67230025Sed	! %o0 < 0
68230025Sed	neg	%o0
69230025Sed	! FALL THROUGH
70230025Seddivide:
71230025Sed	! Compute size of quotient, scale comparand.
72230025Sed	orcc	%o1,%g0,%o5		! movcc %o1,%o5
73230025Sed	te	2			! if %o1 = 0
74230025Sed	mov	%o0,%o3
75230025Sed	mov	0,%o2
76230025Sed	sethi	%hi(1<<(32-4 -1)),%g1
77230025Sed	cmp	%o3,%g1
78230025Sed	blu	not_really_big
79230025Sed	mov	0,%o4
80230025Sed	!
81230025Sed	! Here, the %o0 is >= 2��(31-4) or so. We must be careful here,
82230025Sed	! as our usual 4-at-a-shot divide step will cause overflow and havoc.
83230025Sed	! The total number of bits in the result here is 4*%o4+%g2, where
84230025Sed	! %g2 <= 4.
85230025Sed	! Compute %o4 in an unorthodox manner: know we need to Shift %o5 into
86230025Sed! the top decade: so don't even bother to compare to %o3.
87230025Sed1:
88230025Sed	cmp	%o5,%g1
89230025Sed	bgeu	3f
90230025Sed	mov	1,%g2
91230025Sed	sll	%o5,4,%o5
92230025Sed	b	1b
93230025Sed	inc	%o4
94230025Sed! Now compute %g2
95230025Sed2:	addcc	%o5,%o5,%o5
96230025Sed	bcc	not_too_big
97230025Sed	add	%g2,1,%g2
98230025Sed		! We're here if the %o1 overflowed when Shifting.
99230025Sed		! This means that %o3 has the high-order bit set.
100230025Sed		! Restore %o5 and subtract from %o3.
101230025Sed		sll	%g1,4 ,%g1	! high order bit
102230025Sed		srl	%o5,1,%o5		! rest of %o5
103230025Sed		add	%o5,%g1,%o5
104230025Sed		b	do_single_div
105230025Sed		dec	%g2
106230025Sednot_too_big:
107230025Sed3:	cmp	%o5,%o3
108230025Sed	blu	2b
109230025Sed	nop
110230025Sed	be	do_single_div
111230025Sed	nop
112230025Sed! %o5 > %o3: went too far: back up 1 step
113230025Sed!     srl %o5,1,%o5
114230025Sed!      dec %g2
115230025Sed! do single-bit divide steps
116230025Sed!
117230025Sed! We have to be careful here. We know that %o3 >= %o5, so we can do the
118230025Sed! first divide step without thinking. BUT, the others are conditional,
119230025Sed! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
120230025Sed! order bit set in the first step, just falling into the regular
121230025Sed! division loop will mess up the first time around.
122230025Sed! So we unroll slightly...
123230025Seddo_single_div:
124230025Sed	deccc	%g2
125230025Sed	bl	end_regular_divide
126230025Sed	nop
127230025Sed	sub	%o3,%o5,%o3
128230025Sed	mov	1,%o2
129235389Smarius	b,a	end_single_divloop
130235389Smarius	! EMPTY
131230025Sedsingle_divloop:
132230025Sed	sll	%o2,1,%o2
133230025Sed	bl	1f
134230025Sed	srl	%o5,1,%o5
135230025Sed	! %o3 >= 0
136230025Sed		sub	%o3,%o5,%o3
137230025Sed		b	2f
138230025Sed		inc	%o2
139230025Sed	1:	! %o3 < 0
140230025Sed		add	%o3,%o5,%o3
141230025Sed		dec	%o2
142230025Sed	2:
143230025Sed	end_single_divloop:
144230025Sed		deccc	%g2
145230025Sed		bge	single_divloop
146230025Sed		tst	%o3
147235389Smarius		b,a	end_regular_divide
148235389Smarius		! EMPTY
149230025Sednot_really_big:
150230025Sed1:
151230025Sed	sll	%o5,4,%o5
152230025Sed	cmp	%o5,%o3
153230025Sed	bleu	1b
154230025Sed	inccc	%o4
155230025Sed	be	got_result
156230025Sed	dec	%o4
157230025Seddo_regular_divide:
158230025Sed	! Do the main division iteration
159230025Sed	tst	%o3
160230025Sed	! Fall through into divide loop
161230025Seddivloop:
162230025Sed	sll	%o2,4,%o2
163230025Sed		!depth 1, accumulated bits 0
164230025Sed	bl	L.1.16
165230025Sed	srl	%o5,1,%o5
166230025Sed	! remainder is nonnegative
167230025Sed	subcc	%o3,%o5,%o3
168230025Sed	 	!depth 2, accumulated bits 1
169230025Sed	bl	L.2.17
170230025Sed	srl	%o5,1,%o5
171230025Sed	! remainder is nonnegative
172230025Sed	subcc	%o3,%o5,%o3
173230025Sed	 	!depth 3, accumulated bits 3
174230025Sed	bl	L.3.19
175230025Sed	srl	%o5,1,%o5
176230025Sed	! remainder is nonnegative
177230025Sed	subcc	%o3,%o5,%o3
178230025Sed	 	!depth 4, accumulated bits 7
179230025Sed	bl	L.4.23
180230025Sed	srl	%o5,1,%o5
181230025Sed	! remainder is nonnegative
182230025Sed	subcc	%o3,%o5,%o3
183230025Sed		b	9f
184230025Sed		add	%o2, (7*2+1), %o2
185230025SedL.4.23:
186230025Sed	! remainder is negative
187230025Sed	addcc	%o3,%o5,%o3
188230025Sed		b	9f
189230025Sed		add	%o2, (7*2-1), %o2
190230025SedL.3.19:
191230025Sed	! remainder is negative
192230025Sed	addcc	%o3,%o5,%o3
193230025Sed	 	!depth 4, accumulated bits 5
194230025Sed	bl	L.4.21
195230025Sed	srl	%o5,1,%o5
196230025Sed	! remainder is nonnegative
197230025Sed	subcc	%o3,%o5,%o3
198230025Sed		b	9f
199230025Sed		add	%o2, (5*2+1), %o2
200230025SedL.4.21:
201230025Sed	! remainder is negative
202230025Sed	addcc	%o3,%o5,%o3
203230025Sed		b	9f
204230025Sed		add	%o2, (5*2-1), %o2
205230025SedL.2.17:
206230025Sed	! remainder is negative
207230025Sed	addcc	%o3,%o5,%o3
208230025Sed	 	!depth 3, accumulated bits 1
209230025Sed	bl	L.3.17
210230025Sed	srl	%o5,1,%o5
211230025Sed	! remainder is nonnegative
212230025Sed	subcc	%o3,%o5,%o3
213230025Sed	 	!depth 4, accumulated bits 3
214230025Sed	bl	L.4.19
215230025Sed	srl	%o5,1,%o5
216230025Sed	! remainder is nonnegative
217230025Sed	subcc	%o3,%o5,%o3
218230025Sed		b	9f
219230025Sed		add	%o2, (3*2+1), %o2
220230025SedL.4.19:
221230025Sed	! remainder is negative
222230025Sed	addcc	%o3,%o5,%o3
223230025Sed		b	9f
224230025Sed		add	%o2, (3*2-1), %o2
225230025SedL.3.17:
226230025Sed	! remainder is negative
227230025Sed	addcc	%o3,%o5,%o3
228230025Sed	 	!depth 4, accumulated bits 1
229230025Sed	bl	L.4.17
230230025Sed	srl	%o5,1,%o5
231230025Sed	! remainder is nonnegative
232230025Sed	subcc	%o3,%o5,%o3
233230025Sed		b	9f
234230025Sed		add	%o2, (1*2+1), %o2
235230025SedL.4.17:
236230025Sed	! remainder is negative
237230025Sed	addcc	%o3,%o5,%o3
238230025Sed		b	9f
239230025Sed		add	%o2, (1*2-1), %o2
240230025SedL.1.16:
241230025Sed	! remainder is negative
242230025Sed	addcc	%o3,%o5,%o3
243230025Sed	 	!depth 2, accumulated bits -1
244230025Sed	bl	L.2.15
245230025Sed	srl	%o5,1,%o5
246230025Sed	! remainder is nonnegative
247230025Sed	subcc	%o3,%o5,%o3
248230025Sed	 	!depth 3, accumulated bits -1
249230025Sed	bl	L.3.15
250230025Sed	srl	%o5,1,%o5
251230025Sed	! remainder is nonnegative
252230025Sed	subcc	%o3,%o5,%o3
253230025Sed	 	!depth 4, accumulated bits -1
254230025Sed	bl	L.4.15
255230025Sed	srl	%o5,1,%o5
256230025Sed	! remainder is nonnegative
257230025Sed	subcc	%o3,%o5,%o3
258230025Sed		b	9f
259230025Sed		add	%o2, (-1*2+1), %o2
260230025SedL.4.15:
261230025Sed	! remainder is negative
262230025Sed	addcc	%o3,%o5,%o3
263230025Sed		b	9f
264230025Sed		add	%o2, (-1*2-1), %o2
265230025SedL.3.15:
266230025Sed	! remainder is negative
267230025Sed	addcc	%o3,%o5,%o3
268230025Sed	 	!depth 4, accumulated bits -3
269230025Sed	bl	L.4.13
270230025Sed	srl	%o5,1,%o5
271230025Sed	! remainder is nonnegative
272230025Sed	subcc	%o3,%o5,%o3
273230025Sed		b	9f
274230025Sed		add	%o2, (-3*2+1), %o2
275230025SedL.4.13:
276230025Sed	! remainder is negative
277230025Sed	addcc	%o3,%o5,%o3
278230025Sed		b	9f
279230025Sed		add	%o2, (-3*2-1), %o2
280230025SedL.2.15:
281230025Sed	! remainder is negative
282230025Sed	addcc	%o3,%o5,%o3
283230025Sed	 	!depth 3, accumulated bits -3
284230025Sed	bl	L.3.13
285230025Sed	srl	%o5,1,%o5
286230025Sed	! remainder is nonnegative
287230025Sed	subcc	%o3,%o5,%o3
288230025Sed	 	!depth 4, accumulated bits -5
289230025Sed	bl	L.4.11
290230025Sed	srl	%o5,1,%o5
291230025Sed	! remainder is nonnegative
292230025Sed	subcc	%o3,%o5,%o3
293230025Sed		b	9f
294230025Sed		add	%o2, (-5*2+1), %o2
295230025SedL.4.11:
296230025Sed	! remainder is negative
297230025Sed	addcc	%o3,%o5,%o3
298230025Sed		b	9f
299230025Sed		add	%o2, (-5*2-1), %o2
300230025SedL.3.13:
301230025Sed	! remainder is negative
302230025Sed	addcc	%o3,%o5,%o3
303230025Sed	 	!depth 4, accumulated bits -7
304230025Sed	bl	L.4.9
305230025Sed	srl	%o5,1,%o5
306230025Sed	! remainder is nonnegative
307230025Sed	subcc	%o3,%o5,%o3
308230025Sed		b	9f
309230025Sed		add	%o2, (-7*2+1), %o2
310230025SedL.4.9:
311230025Sed	! remainder is negative
312230025Sed	addcc	%o3,%o5,%o3
313230025Sed		b	9f
314230025Sed		add	%o2, (-7*2-1), %o2
315230025Sed	9:
316230025Sedend_regular_divide:
317230025Sed	deccc	%o4
318230025Sed	bge	divloop
319230025Sed	tst	%o3
320235389Smarius	bl,a	got_result
321235389Smarius	! non-restoring fixup if remainder < 0, otherwise annulled
322230025Sed	dec	%o2
323230025Sedgot_result:
324230025Sed	tst	%g3
325235389Smarius	bl,a	1f
326235389Smarius	! negate for answer < 0, otherwise annulled
327235389Smarius	neg	%o2,%o2			! %o2 <- -%o2
328230025Sed1:
329230025Sed	retl				! leaf-routine return
330230025Sed	mov	%o2,%o0			! quotient <- %o2
331