190075Sobrien/* This is an assembly language implementation of mulsi3, divsi3, and modsi3
290075Sobrien   for the sparc processor.
350397Sobrien
4117395Skan   These routines are derived from the SPARC Architecture Manual, version 8,
550397Sobrien   slightly edited to match the desired calling convention, and also to
650397Sobrien   optimize them for our purposes.  */
750397Sobrien
850397Sobrien#ifdef L_mulsi3
950397Sobrien.text
1050397Sobrien	.align 4
1150397Sobrien	.global .umul
1250397Sobrien	.proc 4
1350397Sobrien.umul:
1450397Sobrien	or	%o0, %o1, %o4	! logical or of multiplier and multiplicand
1550397Sobrien	mov	%o0, %y		! multiplier to Y register
1650397Sobrien	andncc	%o4, 0xfff, %o5	! mask out lower 12 bits
1750397Sobrien	be	mul_shortway	! can do it the short way
1850397Sobrien	andcc	%g0, %g0, %o4	! zero the partial product and clear NV cc
1950397Sobrien	!
2050397Sobrien	! long multiply
2150397Sobrien	!
2250397Sobrien	mulscc	%o4, %o1, %o4	! first iteration of 33
2350397Sobrien	mulscc	%o4, %o1, %o4
2450397Sobrien	mulscc	%o4, %o1, %o4
2550397Sobrien	mulscc	%o4, %o1, %o4
2650397Sobrien	mulscc	%o4, %o1, %o4
2750397Sobrien	mulscc	%o4, %o1, %o4
2850397Sobrien	mulscc	%o4, %o1, %o4
2950397Sobrien	mulscc	%o4, %o1, %o4
3050397Sobrien	mulscc	%o4, %o1, %o4
3150397Sobrien	mulscc	%o4, %o1, %o4
3250397Sobrien	mulscc	%o4, %o1, %o4
3350397Sobrien	mulscc	%o4, %o1, %o4
3450397Sobrien	mulscc	%o4, %o1, %o4
3550397Sobrien	mulscc	%o4, %o1, %o4
3650397Sobrien	mulscc	%o4, %o1, %o4
3750397Sobrien	mulscc	%o4, %o1, %o4
3850397Sobrien	mulscc	%o4, %o1, %o4
3950397Sobrien	mulscc	%o4, %o1, %o4
4050397Sobrien	mulscc	%o4, %o1, %o4
4150397Sobrien	mulscc	%o4, %o1, %o4
4250397Sobrien	mulscc	%o4, %o1, %o4
4350397Sobrien	mulscc	%o4, %o1, %o4
4450397Sobrien	mulscc	%o4, %o1, %o4
4550397Sobrien	mulscc	%o4, %o1, %o4
4650397Sobrien	mulscc	%o4, %o1, %o4
4750397Sobrien	mulscc	%o4, %o1, %o4
4850397Sobrien	mulscc	%o4, %o1, %o4
4950397Sobrien	mulscc	%o4, %o1, %o4
5050397Sobrien	mulscc	%o4, %o1, %o4
5150397Sobrien	mulscc	%o4, %o1, %o4
5250397Sobrien	mulscc	%o4, %o1, %o4
5350397Sobrien	mulscc	%o4, %o1, %o4	! 32nd iteration
5450397Sobrien	mulscc	%o4, %g0, %o4	! last iteration only shifts
5550397Sobrien	! the upper 32 bits of product are wrong, but we do not care
5650397Sobrien	retl
5750397Sobrien	rd	%y, %o0
5850397Sobrien	!
5950397Sobrien	! short multiply
6050397Sobrien	!
6150397Sobrienmul_shortway:
6250397Sobrien	mulscc	%o4, %o1, %o4	! first iteration of 13
6350397Sobrien	mulscc	%o4, %o1, %o4
6450397Sobrien	mulscc	%o4, %o1, %o4
6550397Sobrien	mulscc	%o4, %o1, %o4
6650397Sobrien	mulscc	%o4, %o1, %o4
6750397Sobrien	mulscc	%o4, %o1, %o4
6850397Sobrien	mulscc	%o4, %o1, %o4
6950397Sobrien	mulscc	%o4, %o1, %o4
7050397Sobrien	mulscc	%o4, %o1, %o4
7150397Sobrien	mulscc	%o4, %o1, %o4
7250397Sobrien	mulscc	%o4, %o1, %o4
7350397Sobrien	mulscc	%o4, %o1, %o4	! 12th iteration
7450397Sobrien	mulscc	%o4, %g0, %o4	! last iteration only shifts
7550397Sobrien	rd	%y, %o5
7650397Sobrien	sll	%o4, 12, %o4	! left shift partial product by 12 bits
7750397Sobrien	srl	%o5, 20, %o5	! right shift partial product by 20 bits
7850397Sobrien	retl
7950397Sobrien	or	%o5, %o4, %o0	! merge for true product
8050397Sobrien#endif
8150397Sobrien
8250397Sobrien#ifdef L_divsi3
8350397Sobrien/*
84117395Skan * Division and remainder, from Appendix E of the SPARC Version 8
8550397Sobrien * Architecture Manual, with fixes from Gordon Irlam.
8650397Sobrien */
8750397Sobrien
8850397Sobrien/*
8950397Sobrien * Input: dividend and divisor in %o0 and %o1 respectively.
9050397Sobrien *
9150397Sobrien * m4 parameters:
9250397Sobrien *  .div	name of function to generate
9350397Sobrien *  div		div=div => %o0 / %o1; div=rem => %o0 % %o1
9450397Sobrien *  true		true=true => signed; true=false => unsigned
9550397Sobrien *
9650397Sobrien * Algorithm parameters:
9750397Sobrien *  N		how many bits per iteration we try to get (4)
9850397Sobrien *  WORDSIZE	total number of bits (32)
9950397Sobrien *
10050397Sobrien * Derived constants:
10150397Sobrien *  TOPBITS	number of bits in the top decade of a number
10250397Sobrien *
10350397Sobrien * Important variables:
10450397Sobrien *  Q		the partial quotient under development (initially 0)
10550397Sobrien *  R		the remainder so far, initially the dividend
10650397Sobrien *  ITER	number of main division loop iterations required;
10750397Sobrien *		equal to ceil(log2(quotient) / N).  Note that this
10850397Sobrien *		is the log base (2^N) of the quotient.
10950397Sobrien *  V		the current comparand, initially divisor*2^(ITER*N-1)
11050397Sobrien *
11150397Sobrien * Cost:
11250397Sobrien *  Current estimate for non-large dividend is
11350397Sobrien *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
11450397Sobrien *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
11550397Sobrien *  different path, as the upper bits of the quotient must be developed
11650397Sobrien *  one bit at a time.
11750397Sobrien */
11850397Sobrien        .global .udiv
11950397Sobrien        .align 4
12050397Sobrien        .proc 4
12150397Sobrien        .text
12250397Sobrien.udiv:
12350397Sobrien         b ready_to_divide
12450397Sobrien         mov 0, %g3             ! result is always positive
12550397Sobrien
12650397Sobrien        .global .div
12750397Sobrien        .align 4
12850397Sobrien        .proc 4
12950397Sobrien        .text
13050397Sobrien.div:
13150397Sobrien	! compute sign of result; if neither is negative, no problem
13250397Sobrien	orcc	%o1, %o0, %g0	! either negative?
13350397Sobrien	bge	ready_to_divide	! no, go do the divide
13450397Sobrien	xor	%o1, %o0, %g3	! compute sign in any case
13550397Sobrien	tst	%o1
13650397Sobrien	bge	1f
13750397Sobrien	tst	%o0
13850397Sobrien	! %o1 is definitely negative; %o0 might also be negative
13950397Sobrien	bge	ready_to_divide	! if %o0 not negative...
14050397Sobrien	sub	%g0, %o1, %o1	! in any case, make %o1 nonneg
14150397Sobrien1:	! %o0 is negative, %o1 is nonnegative
14250397Sobrien	sub	%g0, %o0, %o0	! make %o0 nonnegative
14350397Sobrien
14450397Sobrien
14550397Sobrienready_to_divide:
14650397Sobrien
14750397Sobrien	! Ready to divide.  Compute size of quotient; scale comparand.
14850397Sobrien	orcc	%o1, %g0, %o5
14950397Sobrien	bne	1f
15050397Sobrien	mov	%o0, %o3
15150397Sobrien
15250397Sobrien	! Divide by zero trap.  If it returns, return 0 (about as
15350397Sobrien	! wrong as possible, but that is what SunOS does...).
15450397Sobrien	ta	0x2    		! ST_DIV0
15550397Sobrien	retl
15650397Sobrien	clr	%o0
15750397Sobrien
15850397Sobrien1:
15950397Sobrien	cmp	%o3, %o5		! if %o1 exceeds %o0, done
16050397Sobrien	blu	got_result		! (and algorithm fails otherwise)
16150397Sobrien	clr	%o2
16250397Sobrien	sethi	%hi(1 << (32 - 4 - 1)), %g1
16350397Sobrien	cmp	%o3, %g1
16450397Sobrien	blu	not_really_big
16550397Sobrien	clr	%o4
16650397Sobrien
16750397Sobrien	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
16850397Sobrien	! as our usual N-at-a-shot divide step will cause overflow and havoc.
16950397Sobrien	! The number of bits in the result here is N*ITER+SC, where SC <= N.
17050397Sobrien	! Compute ITER in an unorthodox manner: know we need to shift V into
17150397Sobrien	! the top decade: so do not even bother to compare to R.
17250397Sobrien	1:
17350397Sobrien		cmp	%o5, %g1
17450397Sobrien		bgeu	3f
17550397Sobrien		mov	1, %g2
17650397Sobrien		sll	%o5, 4, %o5
17750397Sobrien		b	1b
17850397Sobrien		add	%o4, 1, %o4
17950397Sobrien
18050397Sobrien	! Now compute %g2.
18150397Sobrien	2:	addcc	%o5, %o5, %o5
18250397Sobrien		bcc	not_too_big
18350397Sobrien		add	%g2, 1, %g2
18450397Sobrien
18550397Sobrien		! We get here if the %o1 overflowed while shifting.
18650397Sobrien		! This means that %o3 has the high-order bit set.
18750397Sobrien		! Restore %o5 and subtract from %o3.
18850397Sobrien		sll	%g1, 4, %g1	! high order bit
18950397Sobrien		srl	%o5, 1, %o5	! rest of %o5
19050397Sobrien		add	%o5, %g1, %o5
19150397Sobrien		b	do_single_div
19250397Sobrien		sub	%g2, 1, %g2
19350397Sobrien
19450397Sobrien	not_too_big:
19550397Sobrien	3:	cmp	%o5, %o3
19650397Sobrien		blu	2b
19750397Sobrien		nop
19850397Sobrien		be	do_single_div
19950397Sobrien		nop
200117395Skan	/* NB: these are commented out in the V8-SPARC manual as well */
20150397Sobrien	/* (I do not understand this) */
20250397Sobrien	! %o5 > %o3: went too far: back up 1 step
20350397Sobrien	!	srl	%o5, 1, %o5
20450397Sobrien	!	dec	%g2
20550397Sobrien	! do single-bit divide steps
20650397Sobrien	!
20750397Sobrien	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
20850397Sobrien	! first divide step without thinking.  BUT, the others are conditional,
20950397Sobrien	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
21050397Sobrien	! order bit set in the first step, just falling into the regular
21150397Sobrien	! division loop will mess up the first time around.
21250397Sobrien	! So we unroll slightly...
21350397Sobrien	do_single_div:
21450397Sobrien		subcc	%g2, 1, %g2
21550397Sobrien		bl	end_regular_divide
21650397Sobrien		nop
21750397Sobrien		sub	%o3, %o5, %o3
21850397Sobrien		mov	1, %o2
21950397Sobrien		b	end_single_divloop
22050397Sobrien		nop
22150397Sobrien	single_divloop:
22250397Sobrien		sll	%o2, 1, %o2
22350397Sobrien		bl	1f
22450397Sobrien		srl	%o5, 1, %o5
22550397Sobrien		! %o3 >= 0
22650397Sobrien		sub	%o3, %o5, %o3
22750397Sobrien		b	2f
22850397Sobrien		add	%o2, 1, %o2
22950397Sobrien	1:	! %o3 < 0
23050397Sobrien		add	%o3, %o5, %o3
23150397Sobrien		sub	%o2, 1, %o2
23250397Sobrien	2:
23350397Sobrien	end_single_divloop:
23450397Sobrien		subcc	%g2, 1, %g2
23550397Sobrien		bge	single_divloop
23650397Sobrien		tst	%o3
23750397Sobrien		b,a	end_regular_divide
23850397Sobrien
23950397Sobriennot_really_big:
24050397Sobrien1:
24150397Sobrien	sll	%o5, 4, %o5
24250397Sobrien	cmp	%o5, %o3
24350397Sobrien	bleu	1b
24450397Sobrien	addcc	%o4, 1, %o4
24550397Sobrien	be	got_result
24650397Sobrien	sub	%o4, 1, %o4
24750397Sobrien
24850397Sobrien	tst	%o3	! set up for initial iteration
24950397Sobriendivloop:
25050397Sobrien	sll	%o2, 4, %o2
25150397Sobrien	! depth 1, accumulated bits 0
25250397Sobrien	bl	L1.16
25350397Sobrien	srl	%o5,1,%o5
25450397Sobrien	! remainder is positive
25550397Sobrien	subcc	%o3,%o5,%o3
25650397Sobrien	! depth 2, accumulated bits 1
25750397Sobrien	bl	L2.17
25850397Sobrien	srl	%o5,1,%o5
25950397Sobrien	! remainder is positive
26050397Sobrien	subcc	%o3,%o5,%o3
26150397Sobrien	! depth 3, accumulated bits 3
26250397Sobrien	bl	L3.19
26350397Sobrien	srl	%o5,1,%o5
26450397Sobrien	! remainder is positive
26550397Sobrien	subcc	%o3,%o5,%o3
26650397Sobrien	! depth 4, accumulated bits 7
26750397Sobrien	bl	L4.23
26850397Sobrien	srl	%o5,1,%o5
26950397Sobrien	! remainder is positive
27050397Sobrien	subcc	%o3,%o5,%o3
27150397Sobrien	b	9f
27250397Sobrien	add	%o2, (7*2+1), %o2
27350397Sobrien	
27450397SobrienL4.23:
27550397Sobrien	! remainder is negative
27650397Sobrien	addcc	%o3,%o5,%o3
27750397Sobrien	b	9f
27850397Sobrien	add	%o2, (7*2-1), %o2
27950397Sobrien	
28050397Sobrien	
28150397SobrienL3.19:
28250397Sobrien	! remainder is negative
28350397Sobrien	addcc	%o3,%o5,%o3
28450397Sobrien	! depth 4, accumulated bits 5
28550397Sobrien	bl	L4.21
28650397Sobrien	srl	%o5,1,%o5
28750397Sobrien	! remainder is positive
28850397Sobrien	subcc	%o3,%o5,%o3
28950397Sobrien	b	9f
29050397Sobrien	add	%o2, (5*2+1), %o2
29150397Sobrien	
29250397SobrienL4.21:
29350397Sobrien	! remainder is negative
29450397Sobrien	addcc	%o3,%o5,%o3
29550397Sobrien	b	9f
29650397Sobrien	add	%o2, (5*2-1), %o2
29750397Sobrien	
29850397SobrienL2.17:
29950397Sobrien	! remainder is negative
30050397Sobrien	addcc	%o3,%o5,%o3
30150397Sobrien	! depth 3, accumulated bits 1
30250397Sobrien	bl	L3.17
30350397Sobrien	srl	%o5,1,%o5
30450397Sobrien	! remainder is positive
30550397Sobrien	subcc	%o3,%o5,%o3
30650397Sobrien	! depth 4, accumulated bits 3
30750397Sobrien	bl	L4.19
30850397Sobrien	srl	%o5,1,%o5
30950397Sobrien	! remainder is positive
31050397Sobrien	subcc	%o3,%o5,%o3
31150397Sobrien	b	9f
31250397Sobrien	add	%o2, (3*2+1), %o2
31350397Sobrien	
31450397SobrienL4.19:
31550397Sobrien	! remainder is negative
31650397Sobrien	addcc	%o3,%o5,%o3
31750397Sobrien	b	9f
31850397Sobrien	add	%o2, (3*2-1), %o2
31950397Sobrien
32050397SobrienL3.17:
32150397Sobrien	! remainder is negative
32250397Sobrien	addcc	%o3,%o5,%o3
32350397Sobrien	! depth 4, accumulated bits 1
32450397Sobrien	bl	L4.17
32550397Sobrien	srl	%o5,1,%o5
32650397Sobrien	! remainder is positive
32750397Sobrien	subcc	%o3,%o5,%o3
32850397Sobrien	b	9f
32950397Sobrien	add	%o2, (1*2+1), %o2
33050397Sobrien
33150397SobrienL4.17:
33250397Sobrien	! remainder is negative
33350397Sobrien	addcc	%o3,%o5,%o3
33450397Sobrien	b	9f
33550397Sobrien	add	%o2, (1*2-1), %o2
33650397Sobrien	
33750397SobrienL1.16:
33850397Sobrien	! remainder is negative
33950397Sobrien	addcc	%o3,%o5,%o3
34050397Sobrien	! depth 2, accumulated bits -1
34150397Sobrien	bl	L2.15
34250397Sobrien	srl	%o5,1,%o5
34350397Sobrien	! remainder is positive
34450397Sobrien	subcc	%o3,%o5,%o3
34550397Sobrien	! depth 3, accumulated bits -1
34650397Sobrien	bl	L3.15
34750397Sobrien	srl	%o5,1,%o5
34850397Sobrien	! remainder is positive
34950397Sobrien	subcc	%o3,%o5,%o3
35050397Sobrien	! depth 4, accumulated bits -1
35150397Sobrien	bl	L4.15
35250397Sobrien	srl	%o5,1,%o5
35350397Sobrien	! remainder is positive
35450397Sobrien	subcc	%o3,%o5,%o3
35550397Sobrien	b	9f
35650397Sobrien	add	%o2, (-1*2+1), %o2
35750397Sobrien	
35850397SobrienL4.15:
35950397Sobrien	! remainder is negative
36050397Sobrien	addcc	%o3,%o5,%o3
36150397Sobrien	b	9f
36250397Sobrien	add	%o2, (-1*2-1), %o2
36350397Sobrien	
36450397SobrienL3.15:
36550397Sobrien	! remainder is negative
36650397Sobrien	addcc	%o3,%o5,%o3
36750397Sobrien	! depth 4, accumulated bits -3
36850397Sobrien	bl	L4.13
36950397Sobrien	srl	%o5,1,%o5
37050397Sobrien	! remainder is positive
37150397Sobrien	subcc	%o3,%o5,%o3
37250397Sobrien	b	9f
37350397Sobrien	add	%o2, (-3*2+1), %o2
37450397Sobrien	
37550397SobrienL4.13:
37650397Sobrien	! remainder is negative
37750397Sobrien	addcc	%o3,%o5,%o3
37850397Sobrien	b	9f
37950397Sobrien	add	%o2, (-3*2-1), %o2
38050397Sobrien	
38150397SobrienL2.15:
38250397Sobrien	! remainder is negative
38350397Sobrien	addcc	%o3,%o5,%o3
38450397Sobrien	! depth 3, accumulated bits -3
38550397Sobrien	bl	L3.13
38650397Sobrien	srl	%o5,1,%o5
38750397Sobrien	! remainder is positive
38850397Sobrien	subcc	%o3,%o5,%o3
38950397Sobrien	! depth 4, accumulated bits -5
39050397Sobrien	bl	L4.11
39150397Sobrien	srl	%o5,1,%o5
39250397Sobrien	! remainder is positive
39350397Sobrien	subcc	%o3,%o5,%o3
39450397Sobrien	b	9f
39550397Sobrien	add	%o2, (-5*2+1), %o2
39650397Sobrien	
39750397SobrienL4.11:
39850397Sobrien	! remainder is negative
39950397Sobrien	addcc	%o3,%o5,%o3
40050397Sobrien	b	9f
40150397Sobrien	add	%o2, (-5*2-1), %o2
40250397Sobrien	
40350397SobrienL3.13:
40450397Sobrien	! remainder is negative
40550397Sobrien	addcc	%o3,%o5,%o3
40650397Sobrien	! depth 4, accumulated bits -7
40750397Sobrien	bl	L4.9
40850397Sobrien	srl	%o5,1,%o5
40950397Sobrien	! remainder is positive
41050397Sobrien	subcc	%o3,%o5,%o3
41150397Sobrien	b	9f
41250397Sobrien	add	%o2, (-7*2+1), %o2
41350397Sobrien
41450397SobrienL4.9:
41550397Sobrien	! remainder is negative
41650397Sobrien	addcc	%o3,%o5,%o3
41750397Sobrien	b	9f
41850397Sobrien	add	%o2, (-7*2-1), %o2
41950397Sobrien	
42050397Sobrien	9:
42150397Sobrienend_regular_divide:
42250397Sobrien	subcc	%o4, 1, %o4
42350397Sobrien	bge	divloop
42450397Sobrien	tst	%o3
42550397Sobrien	bl,a	got_result
42650397Sobrien	! non-restoring fixup here (one instruction only!)
42750397Sobrien	sub	%o2, 1, %o2
42850397Sobrien
42950397Sobrien
43050397Sobriengot_result:
43150397Sobrien	! check to see if answer should be < 0
43250397Sobrien	tst	%g3
43350397Sobrien	bl,a	1f
43450397Sobrien	sub %g0, %o2, %o2
43550397Sobrien1:
43650397Sobrien	retl
43750397Sobrien	mov %o2, %o0
43850397Sobrien#endif
43950397Sobrien
44050397Sobrien#ifdef L_modsi3
44150397Sobrien/* This implementation was taken from glibc:
44250397Sobrien *
44350397Sobrien * Input: dividend and divisor in %o0 and %o1 respectively.
44450397Sobrien *
44550397Sobrien * Algorithm parameters:
44650397Sobrien *  N		how many bits per iteration we try to get (4)
44750397Sobrien *  WORDSIZE	total number of bits (32)
44850397Sobrien *
44950397Sobrien * Derived constants:
45050397Sobrien *  TOPBITS	number of bits in the top decade of a number
45150397Sobrien *
45250397Sobrien * Important variables:
45350397Sobrien *  Q		the partial quotient under development (initially 0)
45450397Sobrien *  R		the remainder so far, initially the dividend
45550397Sobrien *  ITER	number of main division loop iterations required;
45650397Sobrien *		equal to ceil(log2(quotient) / N).  Note that this
45750397Sobrien *		is the log base (2^N) of the quotient.
45850397Sobrien *  V		the current comparand, initially divisor*2^(ITER*N-1)
45950397Sobrien *
46050397Sobrien * Cost:
46150397Sobrien *  Current estimate for non-large dividend is
46250397Sobrien *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
46350397Sobrien *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
46450397Sobrien *  different path, as the upper bits of the quotient must be developed
46550397Sobrien *  one bit at a time.
46650397Sobrien */
46750397Sobrien.text
46850397Sobrien	.align 4
46950397Sobrien	.global	.urem
47050397Sobrien	.proc 4
47150397Sobrien.urem:
47250397Sobrien	b	divide
47350397Sobrien	mov	0, %g3		! result always positive
47450397Sobrien
47550397Sobrien        .align 4
47650397Sobrien	.global .rem
47750397Sobrien	.proc 4
47850397Sobrien.rem:
47950397Sobrien	! compute sign of result; if neither is negative, no problem
48050397Sobrien	orcc	%o1, %o0, %g0	! either negative?
48150397Sobrien	bge	2f			! no, go do the divide
48250397Sobrien	mov	%o0, %g3		! sign of remainder matches %o0
48350397Sobrien	tst	%o1
48450397Sobrien	bge	1f
48550397Sobrien	tst	%o0
48650397Sobrien	! %o1 is definitely negative; %o0 might also be negative
48750397Sobrien	bge	2f			! if %o0 not negative...
48850397Sobrien	sub	%g0, %o1, %o1	! in any case, make %o1 nonneg
48950397Sobrien1:	! %o0 is negative, %o1 is nonnegative
49050397Sobrien	sub	%g0, %o0, %o0	! make %o0 nonnegative
49150397Sobrien2:
49250397Sobrien
49350397Sobrien	! Ready to divide.  Compute size of quotient; scale comparand.
49450397Sobriendivide:
49550397Sobrien	orcc	%o1, %g0, %o5
49650397Sobrien	bne	1f
49750397Sobrien	mov	%o0, %o3
49850397Sobrien
49950397Sobrien		! Divide by zero trap.  If it returns, return 0 (about as
50050397Sobrien		! wrong as possible, but that is what SunOS does...).
50150397Sobrien		ta	0x2   !ST_DIV0
50250397Sobrien		retl
50350397Sobrien		clr	%o0
50450397Sobrien
50550397Sobrien1:
50650397Sobrien	cmp	%o3, %o5		! if %o1 exceeds %o0, done
50750397Sobrien	blu	got_result		! (and algorithm fails otherwise)
50850397Sobrien	clr	%o2
50950397Sobrien	sethi	%hi(1 << (32 - 4 - 1)), %g1
51050397Sobrien	cmp	%o3, %g1
51150397Sobrien	blu	not_really_big
51250397Sobrien	clr	%o4
51350397Sobrien
51450397Sobrien	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
51550397Sobrien	! as our usual N-at-a-shot divide step will cause overflow and havoc.
51650397Sobrien	! The number of bits in the result here is N*ITER+SC, where SC <= N.
51750397Sobrien	! Compute ITER in an unorthodox manner: know we need to shift V into
51850397Sobrien	! the top decade: so do not even bother to compare to R.
51950397Sobrien	1:
52050397Sobrien		cmp	%o5, %g1
52150397Sobrien		bgeu	3f
52250397Sobrien		mov	1, %g2
52350397Sobrien		sll	%o5, 4, %o5
52450397Sobrien		b	1b
52550397Sobrien		add	%o4, 1, %o4
52650397Sobrien
52750397Sobrien	! Now compute %g2.
52850397Sobrien	2:	addcc	%o5, %o5, %o5
52950397Sobrien		bcc	not_too_big
53050397Sobrien		add	%g2, 1, %g2
53150397Sobrien
53250397Sobrien		! We get here if the %o1 overflowed while shifting.
53350397Sobrien		! This means that %o3 has the high-order bit set.
53450397Sobrien		! Restore %o5 and subtract from %o3.
53550397Sobrien		sll	%g1, 4, %g1	! high order bit
53650397Sobrien		srl	%o5, 1, %o5		! rest of %o5
53750397Sobrien		add	%o5, %g1, %o5
53850397Sobrien		b	do_single_div
53950397Sobrien		sub	%g2, 1, %g2
54050397Sobrien
54150397Sobrien	not_too_big:
54250397Sobrien	3:	cmp	%o5, %o3
54350397Sobrien		blu	2b
54450397Sobrien		nop
54550397Sobrien		be	do_single_div
54650397Sobrien		nop
547117395Skan	/* NB: these are commented out in the V8-SPARC manual as well */
54850397Sobrien	/* (I do not understand this) */
54950397Sobrien	! %o5 > %o3: went too far: back up 1 step
55050397Sobrien	!	srl	%o5, 1, %o5
55150397Sobrien	!	dec	%g2
55250397Sobrien	! do single-bit divide steps
55350397Sobrien	!
55450397Sobrien	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
55550397Sobrien	! first divide step without thinking.  BUT, the others are conditional,
55650397Sobrien	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
55750397Sobrien	! order bit set in the first step, just falling into the regular
55850397Sobrien	! division loop will mess up the first time around.
55950397Sobrien	! So we unroll slightly...
56050397Sobrien	do_single_div:
56150397Sobrien		subcc	%g2, 1, %g2
56250397Sobrien		bl	end_regular_divide
56350397Sobrien		nop
56450397Sobrien		sub	%o3, %o5, %o3
56550397Sobrien		mov	1, %o2
56650397Sobrien		b	end_single_divloop
56750397Sobrien		nop
56850397Sobrien	single_divloop:
56950397Sobrien		sll	%o2, 1, %o2
57050397Sobrien		bl	1f
57150397Sobrien		srl	%o5, 1, %o5
57250397Sobrien		! %o3 >= 0
57350397Sobrien		sub	%o3, %o5, %o3
57450397Sobrien		b	2f
57550397Sobrien		add	%o2, 1, %o2
57650397Sobrien	1:	! %o3 < 0
57750397Sobrien		add	%o3, %o5, %o3
57850397Sobrien		sub	%o2, 1, %o2
57950397Sobrien	2:
58050397Sobrien	end_single_divloop:
58150397Sobrien		subcc	%g2, 1, %g2
58250397Sobrien		bge	single_divloop
58350397Sobrien		tst	%o3
58450397Sobrien		b,a	end_regular_divide
58550397Sobrien
58650397Sobriennot_really_big:
58750397Sobrien1:
58850397Sobrien	sll	%o5, 4, %o5
58950397Sobrien	cmp	%o5, %o3
59050397Sobrien	bleu	1b
59150397Sobrien	addcc	%o4, 1, %o4
59250397Sobrien	be	got_result
59350397Sobrien	sub	%o4, 1, %o4
59450397Sobrien
59550397Sobrien	tst	%o3	! set up for initial iteration
59650397Sobriendivloop:
59750397Sobrien	sll	%o2, 4, %o2
59850397Sobrien		! depth 1, accumulated bits 0
59950397Sobrien	bl	L1.16
60050397Sobrien	srl	%o5,1,%o5
60150397Sobrien	! remainder is positive
60250397Sobrien	subcc	%o3,%o5,%o3
60350397Sobrien	! depth 2, accumulated bits 1
60450397Sobrien	bl	L2.17
60550397Sobrien	srl	%o5,1,%o5
60650397Sobrien	! remainder is positive
60750397Sobrien	subcc	%o3,%o5,%o3
60850397Sobrien	! depth 3, accumulated bits 3
60950397Sobrien	bl	L3.19
61050397Sobrien	srl	%o5,1,%o5
61150397Sobrien	! remainder is positive
61250397Sobrien	subcc	%o3,%o5,%o3
61350397Sobrien	! depth 4, accumulated bits 7
61450397Sobrien	bl	L4.23
61550397Sobrien	srl	%o5,1,%o5
61650397Sobrien	! remainder is positive
61750397Sobrien	subcc	%o3,%o5,%o3
61850397Sobrien	b	9f
61950397Sobrien	add	%o2, (7*2+1), %o2
62050397SobrienL4.23:
62150397Sobrien	! remainder is negative
62250397Sobrien	addcc	%o3,%o5,%o3
62350397Sobrien	b	9f
62450397Sobrien	add	%o2, (7*2-1), %o2
62550397Sobrien	
62650397SobrienL3.19:
62750397Sobrien	! remainder is negative
62850397Sobrien	addcc	%o3,%o5,%o3
62950397Sobrien	! depth 4, accumulated bits 5
63050397Sobrien	bl	L4.21
63150397Sobrien	srl	%o5,1,%o5
63250397Sobrien	! remainder is positive
63350397Sobrien	subcc	%o3,%o5,%o3
63450397Sobrien	b	9f
63550397Sobrien	add	%o2, (5*2+1), %o2
63650397Sobrien	
63750397SobrienL4.21:
63850397Sobrien	! remainder is negative
63950397Sobrien	addcc	%o3,%o5,%o3
64050397Sobrien	b	9f
64150397Sobrien	add	%o2, (5*2-1), %o2
64250397Sobrien	
64350397SobrienL2.17:
64450397Sobrien	! remainder is negative
64550397Sobrien	addcc	%o3,%o5,%o3
64650397Sobrien	! depth 3, accumulated bits 1
64750397Sobrien	bl	L3.17
64850397Sobrien	srl	%o5,1,%o5
64950397Sobrien	! remainder is positive
65050397Sobrien	subcc	%o3,%o5,%o3
65150397Sobrien	! depth 4, accumulated bits 3
65250397Sobrien	bl	L4.19
65350397Sobrien	srl	%o5,1,%o5
65450397Sobrien	! remainder is positive
65550397Sobrien	subcc	%o3,%o5,%o3
65650397Sobrien	b	9f
65750397Sobrien	add	%o2, (3*2+1), %o2
65850397Sobrien	
65950397SobrienL4.19:
66050397Sobrien	! remainder is negative
66150397Sobrien	addcc	%o3,%o5,%o3
66250397Sobrien	b	9f
66350397Sobrien	add	%o2, (3*2-1), %o2
66450397Sobrien	
66550397SobrienL3.17:
66650397Sobrien	! remainder is negative
66750397Sobrien	addcc	%o3,%o5,%o3
66850397Sobrien	! depth 4, accumulated bits 1
66950397Sobrien	bl	L4.17
67050397Sobrien	srl	%o5,1,%o5
67150397Sobrien	! remainder is positive
67250397Sobrien	subcc	%o3,%o5,%o3
67350397Sobrien	b	9f
67450397Sobrien	add	%o2, (1*2+1), %o2
67550397Sobrien	
67650397SobrienL4.17:
67750397Sobrien	! remainder is negative
67850397Sobrien	addcc	%o3,%o5,%o3
67950397Sobrien	b	9f
68050397Sobrien	add	%o2, (1*2-1), %o2
68150397Sobrien	
68250397SobrienL1.16:
68350397Sobrien	! remainder is negative
68450397Sobrien	addcc	%o3,%o5,%o3
68550397Sobrien	! depth 2, accumulated bits -1
68650397Sobrien	bl	L2.15
68750397Sobrien	srl	%o5,1,%o5
68850397Sobrien	! remainder is positive
68950397Sobrien	subcc	%o3,%o5,%o3
69050397Sobrien	! depth 3, accumulated bits -1
69150397Sobrien	bl	L3.15
69250397Sobrien	srl	%o5,1,%o5
69350397Sobrien	! remainder is positive
69450397Sobrien	subcc	%o3,%o5,%o3
69550397Sobrien	! depth 4, accumulated bits -1
69650397Sobrien	bl	L4.15
69750397Sobrien	srl	%o5,1,%o5
69850397Sobrien	! remainder is positive
69950397Sobrien	subcc	%o3,%o5,%o3
70050397Sobrien	b	9f
70150397Sobrien	add	%o2, (-1*2+1), %o2
70250397Sobrien	
70350397SobrienL4.15:
70450397Sobrien	! remainder is negative
70550397Sobrien	addcc	%o3,%o5,%o3
70650397Sobrien	b	9f
70750397Sobrien	add	%o2, (-1*2-1), %o2
70850397Sobrien	
70950397SobrienL3.15:
71050397Sobrien	! remainder is negative
71150397Sobrien	addcc	%o3,%o5,%o3
71250397Sobrien	! depth 4, accumulated bits -3
71350397Sobrien	bl	L4.13
71450397Sobrien	srl	%o5,1,%o5
71550397Sobrien	! remainder is positive
71650397Sobrien	subcc	%o3,%o5,%o3
71750397Sobrien	b	9f
71850397Sobrien	add	%o2, (-3*2+1), %o2
71950397Sobrien	
72050397SobrienL4.13:
72150397Sobrien	! remainder is negative
72250397Sobrien	addcc	%o3,%o5,%o3
72350397Sobrien	b	9f
72450397Sobrien	add	%o2, (-3*2-1), %o2
72550397Sobrien	
72650397SobrienL2.15:
72750397Sobrien	! remainder is negative
72850397Sobrien	addcc	%o3,%o5,%o3
72950397Sobrien	! depth 3, accumulated bits -3
73050397Sobrien	bl	L3.13
73150397Sobrien	srl	%o5,1,%o5
73250397Sobrien	! remainder is positive
73350397Sobrien	subcc	%o3,%o5,%o3
73450397Sobrien	! depth 4, accumulated bits -5
73550397Sobrien	bl	L4.11
73650397Sobrien	srl	%o5,1,%o5
73750397Sobrien	! remainder is positive
73850397Sobrien	subcc	%o3,%o5,%o3
73950397Sobrien	b	9f
74050397Sobrien	add	%o2, (-5*2+1), %o2
74150397Sobrien	
74250397SobrienL4.11:
74350397Sobrien	! remainder is negative
74450397Sobrien	addcc	%o3,%o5,%o3
74550397Sobrien	b	9f
74650397Sobrien	add	%o2, (-5*2-1), %o2
74750397Sobrien	
74850397SobrienL3.13:
74950397Sobrien	! remainder is negative
75050397Sobrien	addcc	%o3,%o5,%o3
75150397Sobrien	! depth 4, accumulated bits -7
75250397Sobrien	bl	L4.9
75350397Sobrien	srl	%o5,1,%o5
75450397Sobrien	! remainder is positive
75550397Sobrien	subcc	%o3,%o5,%o3
75650397Sobrien	b	9f
75750397Sobrien	add	%o2, (-7*2+1), %o2
75850397Sobrien	
75950397SobrienL4.9:
76050397Sobrien	! remainder is negative
76150397Sobrien	addcc	%o3,%o5,%o3
76250397Sobrien	b	9f
76350397Sobrien	add	%o2, (-7*2-1), %o2
76450397Sobrien	
76550397Sobrien	9:
76650397Sobrienend_regular_divide:
76750397Sobrien	subcc	%o4, 1, %o4
76850397Sobrien	bge	divloop
76950397Sobrien	tst	%o3
77050397Sobrien	bl,a	got_result
77150397Sobrien	! non-restoring fixup here (one instruction only!)
77250397Sobrien	add	%o3, %o1, %o3
77350397Sobrien
77450397Sobriengot_result:
77550397Sobrien	! check to see if answer should be < 0
77650397Sobrien	tst	%g3
77750397Sobrien	bl,a	1f
77850397Sobrien	sub %g0, %o3, %o3
77950397Sobrien1:
78050397Sobrien	retl
78150397Sobrien	mov %o3, %o0
78250397Sobrien
78350397Sobrien#endif
78450397Sobrien
785