config/sparc/lb1spc.asm

90075Sobrien/* This is an assembly language implementation of mulsi3, divsi3, and modsi3
90075Sobrien   for the sparc processor.
50397Sobrien
117395Skan   These routines are derived from the SPARC Architecture Manual, version 8,
50397Sobrien   slightly edited to match the desired calling convention, and also to
50397Sobrien   optimize them for our purposes.  */
50397Sobrien
50397Sobrien#ifdef L_mulsi3
50397Sobrien.text
50397Sobrien	.align 4
50397Sobrien	.global .umul
50397Sobrien	.proc 4
50397Sobrien.umul:
50397Sobrien	or	%o0, %o1, %o4	! logical or of multiplier and multiplicand
50397Sobrien	mov	%o0, %y		! multiplier to Y register
50397Sobrien	andncc	%o4, 0xfff, %o5	! mask out lower 12 bits
50397Sobrien	be	mul_shortway	! can do it the short way
50397Sobrien	andcc	%g0, %g0, %o4	! zero the partial product and clear NV cc
50397Sobrien	!
50397Sobrien	! long multiply
50397Sobrien	!
50397Sobrien	mulscc	%o4, %o1, %o4	! first iteration of 33
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4	! 32nd iteration
50397Sobrien	mulscc	%o4, %g0, %o4	! last iteration only shifts
50397Sobrien	! the upper 32 bits of product are wrong, but we do not care
50397Sobrien	retl
50397Sobrien	rd	%y, %o0
50397Sobrien	!
50397Sobrien	! short multiply
50397Sobrien	!
50397Sobrienmul_shortway:
50397Sobrien	mulscc	%o4, %o1, %o4	! first iteration of 13
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4
50397Sobrien	mulscc	%o4, %o1, %o4	! 12th iteration
50397Sobrien	mulscc	%o4, %g0, %o4	! last iteration only shifts
50397Sobrien	rd	%y, %o5
50397Sobrien	sll	%o4, 12, %o4	! left shift partial product by 12 bits
50397Sobrien	srl	%o5, 20, %o5	! right shift partial product by 20 bits
50397Sobrien	retl
50397Sobrien	or	%o5, %o4, %o0	! merge for true product
50397Sobrien#endif
50397Sobrien
50397Sobrien#ifdef L_divsi3
50397Sobrien/*
117395Skan * Division and remainder, from Appendix E of the SPARC Version 8
50397Sobrien * Architecture Manual, with fixes from Gordon Irlam.
50397Sobrien */
50397Sobrien
50397Sobrien/*
50397Sobrien * Input: dividend and divisor in %o0 and %o1 respectively.
50397Sobrien *
50397Sobrien * m4 parameters:
50397Sobrien *  .div	name of function to generate
50397Sobrien *  div		div=div => %o0 / %o1; div=rem => %o0 % %o1
50397Sobrien *  true		true=true => signed; true=false => unsigned
50397Sobrien *
50397Sobrien * Algorithm parameters:
50397Sobrien *  N		how many bits per iteration we try to get (4)
50397Sobrien *  WORDSIZE	total number of bits (32)
50397Sobrien *
50397Sobrien * Derived constants:
50397Sobrien *  TOPBITS	number of bits in the top decade of a number
50397Sobrien *
50397Sobrien * Important variables:
50397Sobrien *  Q		the partial quotient under development (initially 0)
50397Sobrien *  R		the remainder so far, initially the dividend
50397Sobrien *  ITER	number of main division loop iterations required;
50397Sobrien *		equal to ceil(log2(quotient) / N).  Note that this
50397Sobrien *		is the log base (2^N) of the quotient.
50397Sobrien *  V		the current comparand, initially divisor*2^(ITER*N-1)
50397Sobrien *
50397Sobrien * Cost:
50397Sobrien *  Current estimate for non-large dividend is
50397Sobrien *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
50397Sobrien *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
50397Sobrien *  different path, as the upper bits of the quotient must be developed
50397Sobrien *  one bit at a time.
50397Sobrien */
50397Sobrien        .global .udiv
50397Sobrien        .align 4
50397Sobrien        .proc 4
50397Sobrien        .text
50397Sobrien.udiv:
50397Sobrien         b ready_to_divide
50397Sobrien         mov 0, %g3             ! result is always positive
50397Sobrien
50397Sobrien        .global .div
50397Sobrien        .align 4
50397Sobrien        .proc 4
50397Sobrien        .text
50397Sobrien.div:
50397Sobrien	! compute sign of result; if neither is negative, no problem
50397Sobrien	orcc	%o1, %o0, %g0	! either negative?
50397Sobrien	bge	ready_to_divide	! no, go do the divide
50397Sobrien	xor	%o1, %o0, %g3	! compute sign in any case
50397Sobrien	tst	%o1
50397Sobrien	bge	1f
50397Sobrien	tst	%o0
50397Sobrien	! %o1 is definitely negative; %o0 might also be negative
50397Sobrien	bge	ready_to_divide	! if %o0 not negative...
50397Sobrien	sub	%g0, %o1, %o1	! in any case, make %o1 nonneg
50397Sobrien1:	! %o0 is negative, %o1 is nonnegative
50397Sobrien	sub	%g0, %o0, %o0	! make %o0 nonnegative
50397Sobrien
50397Sobrien
50397Sobrienready_to_divide:
50397Sobrien
50397Sobrien	! Ready to divide.  Compute size of quotient; scale comparand.
50397Sobrien	orcc	%o1, %g0, %o5
50397Sobrien	bne	1f
50397Sobrien	mov	%o0, %o3
50397Sobrien
50397Sobrien	! Divide by zero trap.  If it returns, return 0 (about as
50397Sobrien	! wrong as possible, but that is what SunOS does...).
50397Sobrien	ta	0x2    		! ST_DIV0
50397Sobrien	retl
50397Sobrien	clr	%o0
50397Sobrien
50397Sobrien1:
50397Sobrien	cmp	%o3, %o5		! if %o1 exceeds %o0, done
50397Sobrien	blu	got_result		! (and algorithm fails otherwise)
50397Sobrien	clr	%o2
50397Sobrien	sethi	%hi(1 << (32 - 4 - 1)), %g1
50397Sobrien	cmp	%o3, %g1
50397Sobrien	blu	not_really_big
50397Sobrien	clr	%o4
50397Sobrien
50397Sobrien	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
50397Sobrien	! as our usual N-at-a-shot divide step will cause overflow and havoc.
50397Sobrien	! The number of bits in the result here is N*ITER+SC, where SC <= N.
50397Sobrien	! Compute ITER in an unorthodox manner: know we need to shift V into
50397Sobrien	! the top decade: so do not even bother to compare to R.
50397Sobrien	1:
50397Sobrien		cmp	%o5, %g1
50397Sobrien		bgeu	3f
50397Sobrien		mov	1, %g2
50397Sobrien		sll	%o5, 4, %o5
50397Sobrien		b	1b
50397Sobrien		add	%o4, 1, %o4
50397Sobrien
50397Sobrien	! Now compute %g2.
50397Sobrien	2:	addcc	%o5, %o5, %o5
50397Sobrien		bcc	not_too_big
50397Sobrien		add	%g2, 1, %g2
50397Sobrien
50397Sobrien		! We get here if the %o1 overflowed while shifting.
50397Sobrien		! This means that %o3 has the high-order bit set.
50397Sobrien		! Restore %o5 and subtract from %o3.
50397Sobrien		sll	%g1, 4, %g1	! high order bit
50397Sobrien		srl	%o5, 1, %o5	! rest of %o5
50397Sobrien		add	%o5, %g1, %o5
50397Sobrien		b	do_single_div
50397Sobrien		sub	%g2, 1, %g2
50397Sobrien
50397Sobrien	not_too_big:
50397Sobrien	3:	cmp	%o5, %o3
50397Sobrien		blu	2b
50397Sobrien		nop
50397Sobrien		be	do_single_div
50397Sobrien		nop
117395Skan	/* NB: these are commented out in the V8-SPARC manual as well */
50397Sobrien	/* (I do not understand this) */
50397Sobrien	! %o5 > %o3: went too far: back up 1 step
50397Sobrien	!	srl	%o5, 1, %o5
50397Sobrien	!	dec	%g2
50397Sobrien	! do single-bit divide steps
50397Sobrien	!
50397Sobrien	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
50397Sobrien	! first divide step without thinking.  BUT, the others are conditional,
50397Sobrien	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
50397Sobrien	! order bit set in the first step, just falling into the regular
50397Sobrien	! division loop will mess up the first time around.
50397Sobrien	! So we unroll slightly...
50397Sobrien	do_single_div:
50397Sobrien		subcc	%g2, 1, %g2
50397Sobrien		bl	end_regular_divide
50397Sobrien		nop
50397Sobrien		sub	%o3, %o5, %o3
50397Sobrien		mov	1, %o2
50397Sobrien		b	end_single_divloop
50397Sobrien		nop
50397Sobrien	single_divloop:
50397Sobrien		sll	%o2, 1, %o2
50397Sobrien		bl	1f
50397Sobrien		srl	%o5, 1, %o5
50397Sobrien		! %o3 >= 0
50397Sobrien		sub	%o3, %o5, %o3
50397Sobrien		b	2f
50397Sobrien		add	%o2, 1, %o2
50397Sobrien	1:	! %o3 < 0
50397Sobrien		add	%o3, %o5, %o3
50397Sobrien		sub	%o2, 1, %o2
50397Sobrien	2:
50397Sobrien	end_single_divloop:
50397Sobrien		subcc	%g2, 1, %g2
50397Sobrien		bge	single_divloop
50397Sobrien		tst	%o3
50397Sobrien		b,a	end_regular_divide
50397Sobrien
50397Sobriennot_really_big:
50397Sobrien1:
50397Sobrien	sll	%o5, 4, %o5
50397Sobrien	cmp	%o5, %o3
50397Sobrien	bleu	1b
50397Sobrien	addcc	%o4, 1, %o4
50397Sobrien	be	got_result
50397Sobrien	sub	%o4, 1, %o4
50397Sobrien
50397Sobrien	tst	%o3	! set up for initial iteration
50397Sobriendivloop:
50397Sobrien	sll	%o2, 4, %o2
50397Sobrien	! depth 1, accumulated bits 0
50397Sobrien	bl	L1.16
50397Sobrien	srl	%o5,1,%o5
50397Sobrien	! remainder is positive
50397Sobrien	subcc	%o3,%o5,%o3
50397Sobrien	! depth 2, accumulated bits 1
50397Sobrien	bl	L2.17
50397Sobrien	srl	%o5,1,%o5
50397Sobrien	! remainder is positive
50397Sobrien	subcc	%o3,%o5,%o3
50397Sobrien	! depth 3, accumulated bits 3
50397Sobrien	bl	L3.19
50397Sobrien	srl	%o5,1,%o5
50397Sobrien	! remainder is positive
50397Sobrien	subcc	%o3,%o5,%o3
50397Sobrien	! depth 4, accumulated bits 7
50397Sobrien	bl	L4.23
50397Sobrien	srl	%o5,1,%o5
50397Sobrien	! remainder is positive
50397Sobrien	subcc	%o3,%o5,%o3
50397Sobrien	b	9f
50397Sobrien	add	%o2, (7*2+1), %o2
50397Sobrien
50397SobrienL4.23:
50397Sobrien	! remainder is negative
50397Sobrien	addcc	%o3,%o5,%o3
50397Sobrien	b	9f
50397Sobrien	add	%o2, (7*2-1), %o2
50397Sobrien
50397Sobrien
50397SobrienL3.19:
50397Sobrien	! remainder is negative
50397Sobrien	addcc	%o3,%o5,%o3
50397Sobrien	! depth 4, accumulated bits 5
50397Sobrien	bl	L4.21
50397Sobrien	srl	%o5,1,%o5
50397Sobrien	! remainder is positive
50397Sobrien	subcc	%o3,%o5,%o3
50397Sobrien	b	9f
50397Sobrien	add	%o2, (5*2+1), %o2
50397Sobrien
50397SobrienL4.21:
50397Sobrien	! remainder is negative
50397Sobrien	addcc	%o3,%o5,%o3
50397Sobrien	b	9f
50397Sobrien	add	%o2, (5*2-1), %o2
50397Sobrien
50397SobrienL2.17:
50397Sobrien	! remainder is negative
50397Sobrien	addcc	%o3,%o5,%o3
50397Sobrien	! depth 3, accumulated bits 1
50397Sobrien	bl	L3.17
50397Sobrien	srl	%o5,1,%o5
50397Sobrien	! remainder is positive
50397Sobrien	subcc	%o3,%o5,%o3
50397Sobrien	! depth 4, accumulated bits 3
50397Sobrien	bl	L4.19
50397Sobrien	srl	%o5,1,%o5
50397Sobrien	! remainder is positive
50397Sobrien	subcc	%o3,%o5,%o3
50397Sobrien	b	9f
50397Sobrien	add	%o2, (3*2+1), %o2
50397Sobrien
50397SobrienL4.19:
50397Sobrien	! remainder is negative
50397Sobrien	addcc	%o3,%o5,%o3
50397Sobrien	b	9f
50397Sobrien	add	%o2, (3*2-1), %o2
50397Sobrien
50397SobrienL3.17:
50397Sobrien	! remainder is negative
50397Sobrien	addcc	%o3,%o5,%o3
50397Sobrien	! depth 4, accumulated bits 1
50397Sobrien	bl	L4.17
50397Sobrien	srl	%o5,1,%o5
50397Sobrien	! remainder is positive
50397Sobrien	subcc	%o3,%o5,%o3
50397Sobrien	b	9f
50397Sobrien	add	%o2, (1*2+1), %o2
50397Sobrien
50397SobrienL4.17:
50397Sobrien	! remainder is negative
50397Sobrien	addcc	%o3,%o5,%o3
50397Sobrien	b	9f
50397Sobrien	add	%o2, (1*2-1), %o2
50397Sobrien
50397SobrienL1.16:
50397Sobrien	! remainder is negative
50397Sobrien	addcc	%o3,%o5,%o3
50397Sobrien	! depth 2, accumulated bits -1
50397Sobrien	bl	L2.15
50397Sobrien	srl	%o5,1,%o5
50397Sobrien	! remainder is positive
50397Sobrien	subcc	%o3,%o5,%o3
50397Sobrien	! depth 3, accumulated bits -1
50397Sobrien	bl	L3.15
50397Sobrien	srl	%o5,1,%o5
50397Sobrien	! remainder is positive
50397Sobrien	subcc	%o3,%o5,%o3
50397Sobrien	! depth 4, accumulated bits -1
50397Sobrien	bl	L4.15
50397Sobrien	srl	%o5,1,%o5
50397Sobrien	! remainder is positive
50397Sobrien	subcc	%o3,%o5,%o3
50397Sobrien	b	9f
50397Sobrien	add	%o2, (-1*2+1), %o2
50397Sobrien
50397SobrienL4.15:
50397Sobrien	! remainder is negative
50397Sobrien	addcc	%o3,%o5,%o3
50397Sobrien	b	9f
50397Sobrien	add	%o2, (-1*2-1), %o2
50397Sobrien
50397SobrienL3.15:
50397Sobrien	! remainder is negative
50397Sobrien	addcc	%o3,%o5,%o3
50397Sobrien	! depth 4, accumulated bits -3
50397Sobrien	bl	L4.13
50397Sobrien	srl	%o5,1,%o5
50397Sobrien	! remainder is positive
50397Sobrien	subcc	%o3,%o5,%o3
50397Sobrien	b	9f
50397Sobrien	add	%o2, (-3*2+1), %o2
50397Sobrien
50397SobrienL4.13:
50397Sobrien	! remainder is negative
50397Sobrien	addcc	%o3,%o5,%o3
50397Sobrien	b	9f
50397Sobrien	add	%o2, (-3*2-1), %o2
50397Sobrien
50397SobrienL2.15:
50397Sobrien	! remainder is negative
50397Sobrien	addcc	%o3,%o5,%o3
50397Sobrien	! depth 3, accumulated bits -3
50397Sobrien	bl	L3.13
50397Sobrien	srl	%o5,1,%o5
50397Sobrien	! remainder is positive
50397Sobrien	subcc	%o3,%o5,%o3
50397Sobrien	! depth 4, accumulated bits -5
50397Sobrien	bl	L4.11
50397Sobrien	srl	%o5,1,%o5
50397Sobrien	! remainder is positive
50397Sobrien	subcc	%o3,%o5,%o3
50397Sobrien	b	9f
50397Sobrien	add	%o2, (-5*2+1), %o2
50397Sobrien
50397SobrienL4.11:
50397Sobrien	! remainder is negative
50397Sobrien	addcc	%o3,%o5,%o3
50397Sobrien	b	9f
50397Sobrien	add	%o2, (-5*2-1), %o2
50397Sobrien
50397SobrienL3.13:
50397Sobrien	! remainder is negative
50397Sobrien	addcc	%o3,%o5,%o3
50397Sobrien	! depth 4, accumulated bits -7
50397Sobrien	bl	L4.9
50397Sobrien	srl	%o5,1,%o5
50397Sobrien	! remainder is positive
50397Sobrien	subcc	%o3,%o5,%o3
50397Sobrien	b	9f
50397Sobrien	add	%o2, (-7*2+1), %o2
50397Sobrien
50397SobrienL4.9:
50397Sobrien	! remainder is negative
50397Sobrien	addcc	%o3,%o5,%o3
50397Sobrien	b	9f
50397Sobrien	add	%o2, (-7*2-1), %o2
50397Sobrien
50397Sobrien	9:
50397Sobrienend_regular_divide:
50397Sobrien	subcc	%o4, 1, %o4
50397Sobrien	bge	divloop
50397Sobrien	tst	%o3
50397Sobrien	bl,a	got_result
50397Sobrien	! non-restoring fixup here (one instruction only!)
50397Sobrien	sub	%o2, 1, %o2
50397Sobrien
50397Sobrien
50397Sobriengot_result:
50397Sobrien	! check to see if answer should be < 0
50397Sobrien	tst	%g3
50397Sobrien	bl,a	1f
50397Sobrien	sub %g0, %o2, %o2
50397Sobrien1:
50397Sobrien	retl
50397Sobrien	mov %o2, %o0
50397Sobrien#endif
50397Sobrien
50397Sobrien#ifdef L_modsi3
50397Sobrien/* This implementation was taken from glibc:
50397Sobrien *
50397Sobrien * Input: dividend and divisor in %o0 and %o1 respectively.
50397Sobrien *
50397Sobrien * Algorithm parameters:
50397Sobrien *  N		how many bits per iteration we try to get (4)
50397Sobrien *  WORDSIZE	total number of bits (32)
50397Sobrien *
50397Sobrien * Derived constants:
50397Sobrien *  TOPBITS	number of bits in the top decade of a number
50397Sobrien *
50397Sobrien * Important variables:
50397Sobrien *  Q		the partial quotient under development (initially 0)
50397Sobrien *  R		the remainder so far, initially the dividend
50397Sobrien *  ITER	number of main division loop iterations required;
50397Sobrien *		equal to ceil(log2(quotient) / N).  Note that this
50397Sobrien *		is the log base (2^N) of the quotient.
50397Sobrien *  V		the current comparand, initially divisor*2^(ITER*N-1)
50397Sobrien *
50397Sobrien * Cost:
50397Sobrien *  Current estimate for non-large dividend is
50397Sobrien *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
50397Sobrien *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
50397Sobrien *  different path, as the upper bits of the quotient must be developed
50397Sobrien *  one bit at a time.
50397Sobrien */
50397Sobrien.text
50397Sobrien	.align 4
50397Sobrien	.global	.urem
50397Sobrien	.proc 4
50397Sobrien.urem:
50397Sobrien	b	divide
50397Sobrien	mov	0, %g3		! result always positive
50397Sobrien
50397Sobrien        .align 4
50397Sobrien	.global .rem
50397Sobrien	.proc 4
50397Sobrien.rem:
50397Sobrien	! compute sign of result; if neither is negative, no problem
50397Sobrien	orcc	%o1, %o0, %g0	! either negative?
50397Sobrien	bge	2f			! no, go do the divide
50397Sobrien	mov	%o0, %g3		! sign of remainder matches %o0
50397Sobrien	tst	%o1
50397Sobrien	bge	1f
50397Sobrien	tst	%o0
50397Sobrien	! %o1 is definitely negative; %o0 might also be negative
50397Sobrien	bge	2f			! if %o0 not negative...
50397Sobrien	sub	%g0, %o1, %o1	! in any case, make %o1 nonneg
50397Sobrien1:	! %o0 is negative, %o1 is nonnegative
50397Sobrien	sub	%g0, %o0, %o0	! make %o0 nonnegative
50397Sobrien2:
50397Sobrien
50397Sobrien	! Ready to divide.  Compute size of quotient; scale comparand.
50397Sobriendivide:
50397Sobrien	orcc	%o1, %g0, %o5
50397Sobrien	bne	1f
50397Sobrien	mov	%o0, %o3
50397Sobrien
50397Sobrien		! Divide by zero trap.  If it returns, return 0 (about as
50397Sobrien		! wrong as possible, but that is what SunOS does...).
50397Sobrien		ta	0x2   !ST_DIV0
50397Sobrien		retl
50397Sobrien		clr	%o0
50397Sobrien
50397Sobrien1:
50397Sobrien	cmp	%o3, %o5		! if %o1 exceeds %o0, done
50397Sobrien	blu	got_result		! (and algorithm fails otherwise)
50397Sobrien	clr	%o2
50397Sobrien	sethi	%hi(1 << (32 - 4 - 1)), %g1
50397Sobrien	cmp	%o3, %g1
50397Sobrien	blu	not_really_big
50397Sobrien	clr	%o4
50397Sobrien
50397Sobrien	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
50397Sobrien	! as our usual N-at-a-shot divide step will cause overflow and havoc.
50397Sobrien	! The number of bits in the result here is N*ITER+SC, where SC <= N.
50397Sobrien	! Compute ITER in an unorthodox manner: know we need to shift V into
50397Sobrien	! the top decade: so do not even bother to compare to R.
50397Sobrien	1:
50397Sobrien		cmp	%o5, %g1
50397Sobrien		bgeu	3f
50397Sobrien		mov	1, %g2
50397Sobrien		sll	%o5, 4, %o5
50397Sobrien		b	1b
50397Sobrien		add	%o4, 1, %o4
50397Sobrien
50397Sobrien	! Now compute %g2.
50397Sobrien	2:	addcc	%o5, %o5, %o5
50397Sobrien		bcc	not_too_big
50397Sobrien		add	%g2, 1, %g2
50397Sobrien
50397Sobrien		! We get here if the %o1 overflowed while shifting.
50397Sobrien		! This means that %o3 has the high-order bit set.
50397Sobrien		! Restore %o5 and subtract from %o3.
50397Sobrien		sll	%g1, 4, %g1	! high order bit
50397Sobrien		srl	%o5, 1, %o5		! rest of %o5
50397Sobrien		add	%o5, %g1, %o5
50397Sobrien		b	do_single_div
50397Sobrien		sub	%g2, 1, %g2
50397Sobrien
50397Sobrien	not_too_big:
50397Sobrien	3:	cmp	%o5, %o3
50397Sobrien		blu	2b
50397Sobrien		nop
50397Sobrien		be	do_single_div
50397Sobrien		nop
117395Skan	/* NB: these are commented out in the V8-SPARC manual as well */
50397Sobrien	/* (I do not understand this) */
50397Sobrien	! %o5 > %o3: went too far: back up 1 step
50397Sobrien	!	srl	%o5, 1, %o5
50397Sobrien	!	dec	%g2
50397Sobrien	! do single-bit divide steps
50397Sobrien	!
50397Sobrien	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
50397Sobrien	! first divide step without thinking.  BUT, the others are conditional,
50397Sobrien	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
50397Sobrien	! order bit set in the first step, just falling into the regular
50397Sobrien	! division loop will mess up the first time around.
50397Sobrien	! So we unroll slightly...
50397Sobrien	do_single_div:
50397Sobrien		subcc	%g2, 1, %g2
50397Sobrien		bl	end_regular_divide
50397Sobrien		nop
50397Sobrien		sub	%o3, %o5, %o3
50397Sobrien		mov	1, %o2
50397Sobrien		b	end_single_divloop
50397Sobrien		nop
50397Sobrien	single_divloop:
50397Sobrien		sll	%o2, 1, %o2
50397Sobrien		bl	1f
50397Sobrien		srl	%o5, 1, %o5
50397Sobrien		! %o3 >= 0
50397Sobrien		sub	%o3, %o5, %o3
50397Sobrien		b	2f
50397Sobrien		add	%o2, 1, %o2
50397Sobrien	1:	! %o3 < 0
50397Sobrien		add	%o3, %o5, %o3
50397Sobrien		sub	%o2, 1, %o2
50397Sobrien	2:
50397Sobrien	end_single_divloop:
50397Sobrien		subcc	%g2, 1, %g2
50397Sobrien		bge	single_divloop
50397Sobrien		tst	%o3
50397Sobrien		b,a	end_regular_divide
50397Sobrien
50397Sobriennot_really_big:
50397Sobrien1:
50397Sobrien	sll	%o5, 4, %o5
50397Sobrien	cmp	%o5, %o3
50397Sobrien	bleu	1b
50397Sobrien	addcc	%o4, 1, %o4
50397Sobrien	be	got_result
50397Sobrien	sub	%o4, 1, %o4
50397Sobrien
50397Sobrien	tst	%o3	! set up for initial iteration
50397Sobriendivloop:
50397Sobrien	sll	%o2, 4, %o2
50397Sobrien		! depth 1, accumulated bits 0
50397Sobrien	bl	L1.16
50397Sobrien	srl	%o5,1,%o5
50397Sobrien	! remainder is positive
50397Sobrien	subcc	%o3,%o5,%o3
50397Sobrien	! depth 2, accumulated bits 1
50397Sobrien	bl	L2.17
50397Sobrien	srl	%o5,1,%o5
50397Sobrien	! remainder is positive
50397Sobrien	subcc	%o3,%o5,%o3
50397Sobrien	! depth 3, accumulated bits 3
50397Sobrien	bl	L3.19
50397Sobrien	srl	%o5,1,%o5
50397Sobrien	! remainder is positive
50397Sobrien	subcc	%o3,%o5,%o3
50397Sobrien	! depth 4, accumulated bits 7
50397Sobrien	bl	L4.23
50397Sobrien	srl	%o5,1,%o5
50397Sobrien	! remainder is positive
50397Sobrien	subcc	%o3,%o5,%o3
50397Sobrien	b	9f
50397Sobrien	add	%o2, (7*2+1), %o2
50397SobrienL4.23:
50397Sobrien	! remainder is negative
50397Sobrien	addcc	%o3,%o5,%o3
50397Sobrien	b	9f
50397Sobrien	add	%o2, (7*2-1), %o2
50397Sobrien
50397SobrienL3.19:
50397Sobrien	! remainder is negative
50397Sobrien	addcc	%o3,%o5,%o3
50397Sobrien	! depth 4, accumulated bits 5
50397Sobrien	bl	L4.21
50397Sobrien	srl	%o5,1,%o5
50397Sobrien	! remainder is positive
50397Sobrien	subcc	%o3,%o5,%o3
50397Sobrien	b	9f
50397Sobrien	add	%o2, (5*2+1), %o2
50397Sobrien
50397SobrienL4.21:
50397Sobrien	! remainder is negative
50397Sobrien	addcc	%o3,%o5,%o3
50397Sobrien	b	9f
50397Sobrien	add	%o2, (5*2-1), %o2
50397Sobrien
50397SobrienL2.17:
50397Sobrien	! remainder is negative
50397Sobrien	addcc	%o3,%o5,%o3
50397Sobrien	! depth 3, accumulated bits 1
50397Sobrien	bl	L3.17
50397Sobrien	srl	%o5,1,%o5
50397Sobrien	! remainder is positive
50397Sobrien	subcc	%o3,%o5,%o3
50397Sobrien	! depth 4, accumulated bits 3
50397Sobrien	bl	L4.19
50397Sobrien	srl	%o5,1,%o5
50397Sobrien	! remainder is positive
50397Sobrien	subcc	%o3,%o5,%o3
50397Sobrien	b	9f
50397Sobrien	add	%o2, (3*2+1), %o2
50397Sobrien
50397SobrienL4.19:
50397Sobrien	! remainder is negative
50397Sobrien	addcc	%o3,%o5,%o3
50397Sobrien	b	9f
50397Sobrien	add	%o2, (3*2-1), %o2
50397Sobrien
50397SobrienL3.17:
50397Sobrien	! remainder is negative
50397Sobrien	addcc	%o3,%o5,%o3
50397Sobrien	! depth 4, accumulated bits 1
50397Sobrien	bl	L4.17
50397Sobrien	srl	%o5,1,%o5
50397Sobrien	! remainder is positive
50397Sobrien	subcc	%o3,%o5,%o3
50397Sobrien	b	9f
50397Sobrien	add	%o2, (1*2+1), %o2
50397Sobrien
50397SobrienL4.17:
50397Sobrien	! remainder is negative
50397Sobrien	addcc	%o3,%o5,%o3
50397Sobrien	b	9f
50397Sobrien	add	%o2, (1*2-1), %o2
50397Sobrien
50397SobrienL1.16:
50397Sobrien	! remainder is negative
50397Sobrien	addcc	%o3,%o5,%o3
50397Sobrien	! depth 2, accumulated bits -1
50397Sobrien	bl	L2.15
50397Sobrien	srl	%o5,1,%o5
50397Sobrien	! remainder is positive
50397Sobrien	subcc	%o3,%o5,%o3
50397Sobrien	! depth 3, accumulated bits -1
50397Sobrien	bl	L3.15
50397Sobrien	srl	%o5,1,%o5
50397Sobrien	! remainder is positive
50397Sobrien	subcc	%o3,%o5,%o3
50397Sobrien	! depth 4, accumulated bits -1
50397Sobrien	bl	L4.15
50397Sobrien	srl	%o5,1,%o5
50397Sobrien	! remainder is positive
50397Sobrien	subcc	%o3,%o5,%o3
50397Sobrien	b	9f
50397Sobrien	add	%o2, (-1*2+1), %o2
50397Sobrien
50397SobrienL4.15:
50397Sobrien	! remainder is negative
50397Sobrien	addcc	%o3,%o5,%o3
50397Sobrien	b	9f
50397Sobrien	add	%o2, (-1*2-1), %o2
50397Sobrien
50397SobrienL3.15:
50397Sobrien	! remainder is negative
50397Sobrien	addcc	%o3,%o5,%o3
50397Sobrien	! depth 4, accumulated bits -3
50397Sobrien	bl	L4.13
50397Sobrien	srl	%o5,1,%o5
50397Sobrien	! remainder is positive
50397Sobrien	subcc	%o3,%o5,%o3
50397Sobrien	b	9f
50397Sobrien	add	%o2, (-3*2+1), %o2
50397Sobrien
50397SobrienL4.13:
50397Sobrien	! remainder is negative
50397Sobrien	addcc	%o3,%o5,%o3
50397Sobrien	b	9f
50397Sobrien	add	%o2, (-3*2-1), %o2
50397Sobrien
50397SobrienL2.15:
50397Sobrien	! remainder is negative
50397Sobrien	addcc	%o3,%o5,%o3
50397Sobrien	! depth 3, accumulated bits -3
50397Sobrien	bl	L3.13
50397Sobrien	srl	%o5,1,%o5
50397Sobrien	! remainder is positive
50397Sobrien	subcc	%o3,%o5,%o3
50397Sobrien	! depth 4, accumulated bits -5
50397Sobrien	bl	L4.11
50397Sobrien	srl	%o5,1,%o5
50397Sobrien	! remainder is positive
50397Sobrien	subcc	%o3,%o5,%o3
50397Sobrien	b	9f
50397Sobrien	add	%o2, (-5*2+1), %o2
50397Sobrien
50397SobrienL4.11:
50397Sobrien	! remainder is negative
50397Sobrien	addcc	%o3,%o5,%o3
50397Sobrien	b	9f
50397Sobrien	add	%o2, (-5*2-1), %o2
50397Sobrien
50397SobrienL3.13:
50397Sobrien	! remainder is negative
50397Sobrien	addcc	%o3,%o5,%o3
50397Sobrien	! depth 4, accumulated bits -7
50397Sobrien	bl	L4.9
50397Sobrien	srl	%o5,1,%o5
50397Sobrien	! remainder is positive
50397Sobrien	subcc	%o3,%o5,%o3
50397Sobrien	b	9f
50397Sobrien	add	%o2, (-7*2+1), %o2
50397Sobrien
50397SobrienL4.9:
50397Sobrien	! remainder is negative
50397Sobrien	addcc	%o3,%o5,%o3
50397Sobrien	b	9f
50397Sobrien	add	%o2, (-7*2-1), %o2
50397Sobrien
50397Sobrien	9:
50397Sobrienend_regular_divide:
50397Sobrien	subcc	%o4, 1, %o4
50397Sobrien	bge	divloop
50397Sobrien	tst	%o3
50397Sobrien	bl,a	got_result
50397Sobrien	! non-restoring fixup here (one instruction only!)
50397Sobrien	add	%o3, %o1, %o3
50397Sobrien
50397Sobriengot_result:
50397Sobrien	! check to see if answer should be < 0
50397Sobrien	tst	%g3
50397Sobrien	bl,a	1f
50397Sobrien	sub %g0, %o3, %o3
50397Sobrien1:
50397Sobrien	retl
50397Sobrien	mov %o3, %o0
50397Sobrien
50397Sobrien#endif
50397Sobrien