lib/i386/divdi3.S

222656Sed// This file is dual licensed under the MIT and the University of Illinois Open
222656Sed// Source Licenses. See LICENSE.TXT for details.
214152Sed
214152Sed#include "../assembly.h"
214152Sed
214152Sed// di_int __divdi3(di_int a, di_int b);
214152Sed
214152Sed// result = a / b.
214152Sed// both inputs and the output are 64-bit signed integers.
214152Sed// This will do whatever the underlying hardware is set to do on division by zero.
214152Sed// No other exceptions are generated, as the divide cannot overflow.
214152Sed//
214152Sed// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware
214152Sed// on x86_64.  The performance goal is ~40 cycles per divide, which is faster than
214152Sed// currently possible via simulation of integer divides on the x87 unit.
214152Sed//
214152Sed// Stephen Canon, December 2008
214152Sed
214152Sed#ifdef __i386__
214152Sed
214152Sed.text
214152Sed.align 4
214152SedDEFINE_COMPILERRT_FUNCTION(__divdi3)
214152Sed
214152Sed/* This is currently implemented by wrapping the unsigned divide up in an absolute
214152Sed   value, then restoring the correct sign at the end of the computation.  This could
214152Sed   certainly be improved upon. */
214152Sed
214152Sed	pushl		%esi
214152Sed	movl	 20(%esp),			%edx	// high word of b
214152Sed	movl	 16(%esp),			%eax	// low word of b
214152Sed	movl		%edx,			%ecx
214152Sed	sarl		$31,			%ecx	// (b < 0) ? -1 : 0
214152Sed	xorl		%ecx,			%eax
214152Sed	xorl		%ecx,			%edx	// EDX:EAX = (b < 0) ? not(b) : b
214152Sed	subl		%ecx,			%eax
214152Sed	sbbl		%ecx,			%edx	// EDX:EAX = abs(b)
214152Sed	movl		%edx,		 20(%esp)
214152Sed	movl		%eax,		 16(%esp)	// store abs(b) back to stack
214152Sed	movl		%ecx,			%esi	// set aside sign of b
214152Sed
214152Sed	movl	 12(%esp),			%edx	// high word of b
214152Sed	movl	  8(%esp),			%eax	// low word of b
214152Sed	movl		%edx,			%ecx
214152Sed	sarl		$31,			%ecx	// (a < 0) ? -1 : 0
214152Sed	xorl		%ecx,			%eax
214152Sed	xorl		%ecx,			%edx	// EDX:EAX = (a < 0) ? not(a) : a
214152Sed	subl		%ecx,			%eax
214152Sed	sbbl		%ecx,			%edx	// EDX:EAX = abs(a)
214152Sed	movl		%edx,		 12(%esp)
214152Sed	movl		%eax,		  8(%esp)	// store abs(a) back to stack
214152Sed	xorl		%ecx,			%esi	// sign of result = (sign of a) ^ (sign of b)
214152Sed
214152Sed	pushl		%ebx
214152Sed	movl	 24(%esp),			%ebx	// Find the index i of the leading bit in b.
214152Sed	bsrl		%ebx,			%ecx	// If the high word of b is zero, jump to
214152Sed	jz			9f						// the code to handle that special case [9].
214152Sed
214152Sed	/* High word of b is known to be non-zero on this branch */
214152Sed
214152Sed	movl	 20(%esp),			%eax	// Construct bhi, containing bits [1+i:32+i] of b
214152Sed
214152Sed	shrl		%cl,			%eax	// Practically, this means that bhi is given by:
214152Sed	shrl		%eax					//
214152Sed	notl		%ecx					//		bhi = (high word of b) << (31 - i) |
214152Sed	shll		%cl,			%ebx	//			  (low word of b) >> (1 + i)
214152Sed	orl			%eax,			%ebx	//
214152Sed	movl	 16(%esp),			%edx	// Load the high and low words of a, and jump
214152Sed	movl	 12(%esp),			%eax	// to [1] if the high word is larger than bhi
214152Sed	cmpl		%ebx,			%edx	// to avoid overflowing the upcoming divide.
214152Sed	jae			1f
214152Sed
214152Sed	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
214152Sed
214152Sed	divl		%ebx					// eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r
214152Sed
214152Sed	pushl		%edi
214152Sed	notl		%ecx
214152Sed	shrl		%eax
214152Sed	shrl		%cl,			%eax	// q = qs >> (1 + i)
214152Sed	movl		%eax,			%edi
214152Sed	mull	 24(%esp)					// q*blo
214152Sed	movl	 16(%esp),			%ebx
214152Sed	movl	 20(%esp),			%ecx	// ECX:EBX = a
214152Sed	subl		%eax,			%ebx
214152Sed	sbbl		%edx,			%ecx	// ECX:EBX = a - q*blo
214152Sed	movl	 28(%esp),			%eax
214152Sed	imull		%edi,			%eax	// q*bhi
214152Sed	subl		%eax,			%ecx	// ECX:EBX = a - q*b
214152Sed	sbbl		$0,				%edi	// decrement q if remainder is negative
214152Sed	xorl		%edx,			%edx
214152Sed	movl		%edi,			%eax
214152Sed
214152Sed	addl		%esi,			%eax	// Restore correct sign to result
214152Sed	adcl		%esi,			%edx
214152Sed	xorl		%esi,			%eax
214152Sed	xorl		%esi,			%edx
214152Sed	popl		%edi					// Restore callee-save registers
214152Sed	popl		%ebx
214152Sed	popl		%esi
214152Sed	retl								// Return
214152Sed
214152Sed
214152Sed1:	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
214152Sed
214152Sed	subl		%ebx,			%edx	// subtract bhi from ahi so that divide will not
214152Sed	divl		%ebx					// overflow, and find q and r such that
214152Sed										//
214152Sed										//		ahi:alo = (1:q)*bhi + r
214152Sed										//
214152Sed										// Note that q is a number in (31-i).(1+i)
214152Sed										// fix point.
214152Sed
214152Sed	pushl		%edi
214152Sed	notl		%ecx
214152Sed	shrl		%eax
214152Sed	orl			$0x80000000,	%eax
214152Sed	shrl		%cl,			%eax	// q = (1:qs) >> (1 + i)
214152Sed	movl		%eax,			%edi
214152Sed	mull	 24(%esp)					// q*blo
214152Sed	movl	 16(%esp),			%ebx
214152Sed	movl	 20(%esp),			%ecx	// ECX:EBX = a
214152Sed	subl		%eax,			%ebx
214152Sed	sbbl		%edx,			%ecx	// ECX:EBX = a - q*blo
214152Sed	movl	 28(%esp),			%eax
214152Sed	imull		%edi,			%eax	// q*bhi
214152Sed	subl		%eax,			%ecx	// ECX:EBX = a - q*b
214152Sed	sbbl		$0,				%edi	// decrement q if remainder is negative
214152Sed	xorl		%edx,			%edx
214152Sed	movl		%edi,			%eax
214152Sed
214152Sed	addl		%esi,			%eax	// Restore correct sign to result
214152Sed	adcl		%esi,			%edx
214152Sed	xorl		%esi,			%eax
214152Sed	xorl		%esi,			%edx
214152Sed	popl		%edi					// Restore callee-save registers
214152Sed	popl		%ebx
214152Sed	popl		%esi
214152Sed	retl								// Return
214152Sed
214152Sed
214152Sed9:	/* High word of b is zero on this branch */
214152Sed
214152Sed	movl	 16(%esp),			%eax	// Find qhi and rhi such that
214152Sed	movl	 20(%esp),			%ecx	//
214152Sed	xorl		%edx,			%edx	//		ahi = qhi*b + rhi	with	0 ��� rhi < b
214152Sed	divl		%ecx					//
214152Sed	movl		%eax,			%ebx	//
214152Sed	movl	 12(%esp),			%eax	// Find qlo such that
214152Sed	divl		%ecx					//
214152Sed	movl		%ebx,			%edx	//		rhi:alo = qlo*b + rlo  with 0 ��� rlo < b
214152Sed
214152Sed	addl		%esi,			%eax	// Restore correct sign to result
214152Sed	adcl		%esi,			%edx
214152Sed	xorl		%esi,			%eax
214152Sed	xorl		%esi,			%edx
214152Sed	popl		%ebx					// Restore callee-save registers
214152Sed	popl		%esi
214152Sed	retl								// Return
214152Sed
214152Sed#endif // __i386__