builtins/i386/divdi3.S

276789Sdim// This file is dual licensed under the MIT and the University of Illinois Open
276789Sdim// Source Licenses. See LICENSE.TXT for details.
276789Sdim
276789Sdim#include "../assembly.h"
276789Sdim
276789Sdim// di_int __divdi3(di_int a, di_int b);
276789Sdim
276789Sdim// result = a / b.
276789Sdim// both inputs and the output are 64-bit signed integers.
276789Sdim// This will do whatever the underlying hardware is set to do on division by zero.
276789Sdim// No other exceptions are generated, as the divide cannot overflow.
276789Sdim//
276789Sdim// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware
276789Sdim// on x86_64.  The performance goal is ~40 cycles per divide, which is faster than
276789Sdim// currently possible via simulation of integer divides on the x87 unit.
276789Sdim//
276789Sdim// Stephen Canon, December 2008
276789Sdim
276789Sdim#ifdef __i386__
276789Sdim
276789Sdim.text
276789Sdim.balign 4
276789SdimDEFINE_COMPILERRT_FUNCTION(__divdi3)
276789Sdim
276789Sdim/* This is currently implemented by wrapping the unsigned divide up in an absolute
276789Sdim   value, then restoring the correct sign at the end of the computation.  This could
276789Sdim   certainly be improved upon. */
276789Sdim
276789Sdim	pushl		%esi
276789Sdim	movl	 20(%esp),			%edx	// high word of b
276789Sdim	movl	 16(%esp),			%eax	// low word of b
276789Sdim	movl		%edx,			%ecx
276789Sdim	sarl		$31,			%ecx	// (b < 0) ? -1 : 0
276789Sdim	xorl		%ecx,			%eax
276789Sdim	xorl		%ecx,			%edx	// EDX:EAX = (b < 0) ? not(b) : b
276789Sdim	subl		%ecx,			%eax
276789Sdim	sbbl		%ecx,			%edx	// EDX:EAX = abs(b)
276789Sdim	movl		%edx,		 20(%esp)
276789Sdim	movl		%eax,		 16(%esp)	// store abs(b) back to stack
276789Sdim	movl		%ecx,			%esi	// set aside sign of b
276789Sdim
276789Sdim	movl	 12(%esp),			%edx	// high word of b
276789Sdim	movl	  8(%esp),			%eax	// low word of b
276789Sdim	movl		%edx,			%ecx
276789Sdim	sarl		$31,			%ecx	// (a < 0) ? -1 : 0
276789Sdim	xorl		%ecx,			%eax
276789Sdim	xorl		%ecx,			%edx	// EDX:EAX = (a < 0) ? not(a) : a
276789Sdim	subl		%ecx,			%eax
276789Sdim	sbbl		%ecx,			%edx	// EDX:EAX = abs(a)
276789Sdim	movl		%edx,		 12(%esp)
276789Sdim	movl		%eax,		  8(%esp)	// store abs(a) back to stack
276789Sdim	xorl		%ecx,			%esi	// sign of result = (sign of a) ^ (sign of b)
276789Sdim
276789Sdim	pushl		%ebx
276789Sdim	movl	 24(%esp),			%ebx	// Find the index i of the leading bit in b.
276789Sdim	bsrl		%ebx,			%ecx	// If the high word of b is zero, jump to
276789Sdim	jz			9f						// the code to handle that special case [9].
276789Sdim
276789Sdim	/* High word of b is known to be non-zero on this branch */
276789Sdim
276789Sdim	movl	 20(%esp),			%eax	// Construct bhi, containing bits [1+i:32+i] of b
276789Sdim
276789Sdim	shrl		%cl,			%eax	// Practically, this means that bhi is given by:
276789Sdim	shrl		%eax					//
276789Sdim	notl		%ecx					//		bhi = (high word of b) << (31 - i) |
276789Sdim	shll		%cl,			%ebx	//			  (low word of b) >> (1 + i)
276789Sdim	orl			%eax,			%ebx	//
276789Sdim	movl	 16(%esp),			%edx	// Load the high and low words of a, and jump
276789Sdim	movl	 12(%esp),			%eax	// to [1] if the high word is larger than bhi
276789Sdim	cmpl		%ebx,			%edx	// to avoid overflowing the upcoming divide.
276789Sdim	jae			1f
276789Sdim
276789Sdim	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
276789Sdim
276789Sdim	divl		%ebx					// eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r
276789Sdim
276789Sdim	pushl		%edi
276789Sdim	notl		%ecx
276789Sdim	shrl		%eax
276789Sdim	shrl		%cl,			%eax	// q = qs >> (1 + i)
276789Sdim	movl		%eax,			%edi
276789Sdim	mull	 24(%esp)					// q*blo
276789Sdim	movl	 16(%esp),			%ebx
276789Sdim	movl	 20(%esp),			%ecx	// ECX:EBX = a
276789Sdim	subl		%eax,			%ebx
276789Sdim	sbbl		%edx,			%ecx	// ECX:EBX = a - q*blo
276789Sdim	movl	 28(%esp),			%eax
276789Sdim	imull		%edi,			%eax	// q*bhi
276789Sdim	subl		%eax,			%ecx	// ECX:EBX = a - q*b
276789Sdim	sbbl		$0,				%edi	// decrement q if remainder is negative
276789Sdim	xorl		%edx,			%edx
276789Sdim	movl		%edi,			%eax
276789Sdim
276789Sdim	addl		%esi,			%eax	// Restore correct sign to result
276789Sdim	adcl		%esi,			%edx
276789Sdim	xorl		%esi,			%eax
276789Sdim	xorl		%esi,			%edx
276789Sdim	popl		%edi					// Restore callee-save registers
276789Sdim	popl		%ebx
276789Sdim	popl		%esi
276789Sdim	retl								// Return
276789Sdim
276789Sdim
276789Sdim1:	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
276789Sdim
276789Sdim	subl		%ebx,			%edx	// subtract bhi from ahi so that divide will not
276789Sdim	divl		%ebx					// overflow, and find q and r such that
276789Sdim										//
276789Sdim										//		ahi:alo = (1:q)*bhi + r
276789Sdim										//
276789Sdim										// Note that q is a number in (31-i).(1+i)
276789Sdim										// fix point.
276789Sdim
276789Sdim	pushl		%edi
276789Sdim	notl		%ecx
276789Sdim	shrl		%eax
276789Sdim	orl			$0x80000000,	%eax
276789Sdim	shrl		%cl,			%eax	// q = (1:qs) >> (1 + i)
276789Sdim	movl		%eax,			%edi
276789Sdim	mull	 24(%esp)					// q*blo
276789Sdim	movl	 16(%esp),			%ebx
276789Sdim	movl	 20(%esp),			%ecx	// ECX:EBX = a
276789Sdim	subl		%eax,			%ebx
276789Sdim	sbbl		%edx,			%ecx	// ECX:EBX = a - q*blo
276789Sdim	movl	 28(%esp),			%eax
276789Sdim	imull		%edi,			%eax	// q*bhi
276789Sdim	subl		%eax,			%ecx	// ECX:EBX = a - q*b
276789Sdim	sbbl		$0,				%edi	// decrement q if remainder is negative
276789Sdim	xorl		%edx,			%edx
276789Sdim	movl		%edi,			%eax
276789Sdim
276789Sdim	addl		%esi,			%eax	// Restore correct sign to result
276789Sdim	adcl		%esi,			%edx
276789Sdim	xorl		%esi,			%eax
276789Sdim	xorl		%esi,			%edx
276789Sdim	popl		%edi					// Restore callee-save registers
276789Sdim	popl		%ebx
276789Sdim	popl		%esi
276789Sdim	retl								// Return
276789Sdim
276789Sdim
276789Sdim9:	/* High word of b is zero on this branch */
276789Sdim
276789Sdim	movl	 16(%esp),			%eax	// Find qhi and rhi such that
276789Sdim	movl	 20(%esp),			%ecx	//
276789Sdim	xorl		%edx,			%edx	//		ahi = qhi*b + rhi	with	0 ��� rhi < b
276789Sdim	divl		%ecx					//
276789Sdim	movl		%eax,			%ebx	//
276789Sdim	movl	 12(%esp),			%eax	// Find qlo such that
276789Sdim	divl		%ecx					//
276789Sdim	movl		%ebx,			%edx	//		rhi:alo = qlo*b + rlo  with 0 ��� rlo < b
276789Sdim
276789Sdim	addl		%esi,			%eax	// Restore correct sign to result
276789Sdim	adcl		%esi,			%edx
276789Sdim	xorl		%esi,			%eax
276789Sdim	xorl		%esi,			%edx
276789Sdim	popl		%ebx					// Restore callee-save registers
276789Sdim	popl		%esi
276789Sdim	retl								// Return
276789SdimEND_COMPILERRT_FUNCTION(__divdi3)
276789Sdim
276789Sdim#endif // __i386__