1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22
23!	.seg	"data"
24!	.asciz	"Copyr 1986 Sun Micro"
25	.seg	"text"
26
27#ident	"%Z%%M%	%I%	%E% SMI"
28
29/*
30 * Copyright 1986 Sun Microsystems, Inc.  All rights reserved.
31 * Use is subject to license terms.
32 */
33
34/*
35 * divison/remainder
36 *
37 * Input is:
38 *	dividend -- the thing being divided
39 * divisor  -- how many ways to divide
40 * Important parameters:
41 *	N -- how many bits per iteration we try to get
42 *		as our current guess:
43 *	WORDSIZE -- how many bits altogether we're talking about:
44 *		obviously:
45 * A derived constant:
46 *	TOPBITS -- how many bits are in the top "decade" of a number:
47 *
48 * Important variables are:
49 *	Q -- the partial quotient under development -- initally 0
50 *	R -- the remainder so far -- initially == the dividend
51 *	ITER -- number of iterations of the main division loop will
52 *		be required. Equal to CEIL( lg2(quotient)/4 )
53 *		Note that this is log_base_(2^4) of the quotient.
54 *	V -- the current comparand -- initially divisor*2^(ITER*4-1)
55 * Cost:
56 *	current estimate for non-large dividend is
57 *		CEIL( lg2(quotient) / 4 ) x ( 10 + 74/2 ) + C
58 *	a large dividend is one greater than 2^(31-4 ) and takes a
59 *	different path, as the upper bits of the quotient must be developed
60 *	one bit at a time.
61 */
62
63#include <sys/trap.h>
64#include <sys/asm_linkage.h>
65
66
67
68
69
70
71
72
73	! working variable
74
75
76/*
77 * this is the recursive definition of how we develop quotient digits.
78 * it takes three important parameters:
79 *	$1 -- the current depth, 1<=$1<=4
80 *	$2 -- the current accumulation of quotient bits
81 *	4  -- max depth
82 * We add a new bit to $2 and either recurse or
83 * insert the bits in the quotient.
84 * Dynamic input:
85 *	%o3 -- current remainder
86 *	%o2 -- current quotient
87 *	%o5 -- current comparand
88 *	cc -- set on current value of %o3
89 * Dynamic output:
90 * %o3', %o2', %o5', cc'
91 */
92
93
94
95
96
97!	RTENTRY(.udiv)		! unsigned divide
98	.global .udiv
99.udiv:
100	b	divide
101	mov	0,%g1		! result always positive
102
103!	RTENTRY(.div)		! SIGNED DIVIDE
104	.global	.div
105.div:
106	orcc	%o1,%o0,%g0 ! are either %o0 or %o1 negative
107	bge	divide		! if not, skip this junk
108	xor	%o1,%o0,%g1 ! record sign of result in sign of %g1
109		tst	%o1
110		bge	2f
111		tst	%o0
112	!	%o1 < 0
113		bge	divide
114		neg	%o1
115	2:
116	!	%o0 < 0
117		neg	%o0
118	!	FALL THROUGH
119
120
121divide:
122!	compute size of quotient, scale comparand
123	orcc	%o1,%g0,%o5	! movcc	%o1,%o5
124	bnz	0f		! if %o1 != 0
125	mov	%o0,%o3
126	ba	zero_divide
127	nop
1280:
129	cmp     %o3,%o5
130	blu     got_result ! if %o3<%o5 already, there's no point in continuing
131	mov	0,%o2
132	sethi	%hi(1<<(32-4 -1)),%g2
133	cmp	%o3,%g2
134	blu	not_really_big
135	mov	0,%o4
136	!
137	! here, the %o0 is >= 2^(31-4) or so. We must be careful here, as
138	! our usual 4-at-a-shot divide step will cause overflow and havoc. The
139	! total number of bits in the result here is 4*%o4+%g3, where %g3 <= 4.
140	! compute %o4, in an unorthodox manner: know we need to Shift %o5 into
141	!	the top decade: so don't even bother to compare to %o3.
142	1:
143		cmp	%o5,%g2
144		bgeu	3f
145		mov	1,%g3
146		sll	%o5,4,%o5
147		b	1b
148		inc	%o4
149	! now compute %g3
150	2:	addcc	%o5,%o5,%o5
151		bcc	not_too_big ! bcc	not_too_big
152		add	%g3,1,%g3
153			!
154			! here if the %o1 overflowed when Shifting
155			! this means that %o3 has the high-order bit set
156			! restore %o5 and subtract from %o3
157			sll	%g2,4 ,%g2 ! high order bit
158			srl	%o5,1,%o5 ! rest of %o5
159			add	%o5,%g2,%o5
160			b	do_single_div
161			sub	%g3,1,%g3
162	not_too_big:
163	3:	cmp	%o5,%o3
164		blu	2b
165		nop
166		be	do_single_div
167		nop
168	! %o5 > %o3: went too far: back up 1 step
169	!	srl	%o5,1,%o5
170	!	dec	%g3
171	! do single-bit divide steps
172	!
173	! we have to be careful here. We know that %o3 >= %o5, so we can do the
174	! first divide step without thinking. BUT, the others are conditional,
175	! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
176	! order bit set in the first step, just falling into the regular
177	! division loop will mess up the first time around.
178	! So we unroll slightly...
179	do_single_div:
180		deccc	%g3
181		bl	end_regular_divide
182		nop
183		sub	%o3,%o5,%o3
184		mov	1,%o2
185		b,a	end_single_divloop
186	single_divloop:
187		sll	%o2,1,%o2
188		bl	1f
189		srl	%o5,1,%o5
190		! %o3 >= 0
191		sub	%o3,%o5,%o3
192		b	2f
193		inc	%o2
194	1:	! %o3 < 0
195		add	%o3,%o5,%o3
196		dec	%o2
197	2:
198	end_single_divloop:
199		deccc	%g3
200		bge	single_divloop
201		tst	%o3
202		b,a	end_regular_divide
203
204not_really_big:
2051:
206	sll	%o5,4,%o5
207	cmp	%o5,%o3
208	bleu	1b
209	inccc	%o4
210	be	got_result
211	dec	%o4
212do_regular_divide:
213
214!	do the main division iteration
215	tst	%o3
216!	fall through into divide loop
217divloop:
218	sll	%o2,4,%o2
219		!depth 1, accumulated bits 0
220	bl	L.1.16
221	srl	%o5,1,%o5
222	! remainder is positive
223	subcc	%o3,%o5,%o3
224			!depth 2, accumulated bits 1
225	bl	L.2.17
226	srl	%o5,1,%o5
227	! remainder is positive
228	subcc	%o3,%o5,%o3
229			!depth 3, accumulated bits 3
230	bl	L.3.19
231	srl	%o5,1,%o5
232	! remainder is positive
233	subcc	%o3,%o5,%o3
234			!depth 4, accumulated bits 7
235	bl	L.4.23
236	srl	%o5,1,%o5
237	! remainder is positive
238	subcc	%o3,%o5,%o3
239		b	9f
240		add	%o2, (7*2+1), %o2
241
242L.4.23:	! remainder is negative
243	addcc	%o3,%o5,%o3
244		b	9f
245		add	%o2, (7*2-1), %o2
246
247
248
249
250L.3.19:	! remainder is negative
251	addcc	%o3,%o5,%o3
252			!depth 4, accumulated bits 5
253	bl	L.4.21
254	srl	%o5,1,%o5
255	! remainder is positive
256	subcc	%o3,%o5,%o3
257		b	9f
258		add	%o2, (5*2+1), %o2
259
260L.4.21:	! remainder is negative
261	addcc	%o3,%o5,%o3
262		b	9f
263		add	%o2, (5*2-1), %o2
264
265
266
267
268
269
270
271L.2.17:	! remainder is negative
272	addcc	%o3,%o5,%o3
273			!depth 3, accumulated bits 1
274	bl	L.3.17
275	srl	%o5,1,%o5
276	! remainder is positive
277	subcc	%o3,%o5,%o3
278			!depth 4, accumulated bits 3
279	bl	L.4.19
280	srl	%o5,1,%o5
281	! remainder is positive
282	subcc	%o3,%o5,%o3
283		b	9f
284		add	%o2, (3*2+1), %o2
285
286L.4.19:	! remainder is negative
287	addcc	%o3,%o5,%o3
288		b	9f
289		add	%o2, (3*2-1), %o2
290
291
292
293
294L.3.17:	! remainder is negative
295	addcc	%o3,%o5,%o3
296			!depth 4, accumulated bits 1
297	bl	L.4.17
298	srl	%o5,1,%o5
299	! remainder is positive
300	subcc	%o3,%o5,%o3
301		b	9f
302		add	%o2, (1*2+1), %o2
303
304L.4.17:	! remainder is negative
305	addcc	%o3,%o5,%o3
306		b	9f
307		add	%o2, (1*2-1), %o2
308
309
310
311
312
313
314
315
316
317
318L.1.16:	! remainder is negative
319	addcc	%o3,%o5,%o3
320			!depth 2, accumulated bits -1
321	bl	L.2.15
322	srl	%o5,1,%o5
323	! remainder is positive
324	subcc	%o3,%o5,%o3
325			!depth 3, accumulated bits -1
326	bl	L.3.15
327	srl	%o5,1,%o5
328	! remainder is positive
329	subcc	%o3,%o5,%o3
330			!depth 4, accumulated bits -1
331	bl	L.4.15
332	srl	%o5,1,%o5
333	! remainder is positive
334	subcc	%o3,%o5,%o3
335		b	9f
336		add	%o2, (-1*2+1), %o2
337
338L.4.15:	! remainder is negative
339	addcc	%o3,%o5,%o3
340		b	9f
341		add	%o2, (-1*2-1), %o2
342
343
344
345
346L.3.15:	! remainder is negative
347	addcc	%o3,%o5,%o3
348			!depth 4, accumulated bits -3
349	bl	L.4.13
350	srl	%o5,1,%o5
351	! remainder is positive
352	subcc	%o3,%o5,%o3
353		b	9f
354		add	%o2, (-3*2+1), %o2
355
356L.4.13:	! remainder is negative
357	addcc	%o3,%o5,%o3
358		b	9f
359		add	%o2, (-3*2-1), %o2
360
361
362
363
364
365
366
367L.2.15:	! remainder is negative
368	addcc	%o3,%o5,%o3
369			!depth 3, accumulated bits -3
370	bl	L.3.13
371	srl	%o5,1,%o5
372	! remainder is positive
373	subcc	%o3,%o5,%o3
374			!depth 4, accumulated bits -5
375	bl	L.4.11
376	srl	%o5,1,%o5
377	! remainder is positive
378	subcc	%o3,%o5,%o3
379		b	9f
380		add	%o2, (-5*2+1), %o2
381
382L.4.11:	! remainder is negative
383	addcc	%o3,%o5,%o3
384		b	9f
385		add	%o2, (-5*2-1), %o2
386
387
388
389
390L.3.13:	! remainder is negative
391	addcc	%o3,%o5,%o3
392			!depth 4, accumulated bits -7
393	bl	L.4.9
394	srl	%o5,1,%o5
395	! remainder is positive
396	subcc	%o3,%o5,%o3
397		b	9f
398		add	%o2, (-7*2+1), %o2
399
400L.4.9:	! remainder is negative
401	addcc	%o3,%o5,%o3
402		b	9f
403		add	%o2, (-7*2-1), %o2
404
405
406
407
408
409
410
411
412
413
414	9:
415
416end_regular_divide:
417	deccc	%o4
418	bge	divloop
419	tst	%o3
420	bl,a	got_result
421	dec	%o2
422
423
424got_result:
425	tst	%g1
426	bl,a	1f
427	neg	%o2	! quotient  <- -%o2
428
4291:
430	retl
431	mov	%o2,%o0	! quotient  <-  %o2
432
433
434zero_divide:
435	ta	ST_DIV0		! divide by zero trap
436	retl			! if handled, ignored, return
437	mov	0, %o0
438