1222656Sed// This file is dual licensed under the MIT and the University of Illinois Open
2222656Sed// Source Licenses. See LICENSE.TXT for details.
3214152Sed
4214152Sed#include "../assembly.h"
5214152Sed
6214152Sed// di_int __divdi3(di_int a, di_int b);
7214152Sed
8214152Sed// result = a / b.
9214152Sed// both inputs and the output are 64-bit signed integers.
10214152Sed// This will do whatever the underlying hardware is set to do on division by zero.
11214152Sed// No other exceptions are generated, as the divide cannot overflow.
12214152Sed//
13214152Sed// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware
14214152Sed// on x86_64.  The performance goal is ~40 cycles per divide, which is faster than
15214152Sed// currently possible via simulation of integer divides on the x87 unit.
16214152Sed//
17214152Sed// Stephen Canon, December 2008
18214152Sed
19214152Sed#ifdef __i386__
20214152Sed
21214152Sed.text
22214152Sed.align 4
23214152SedDEFINE_COMPILERRT_FUNCTION(__divdi3)
24214152Sed
25214152Sed/* This is currently implemented by wrapping the unsigned divide up in an absolute
26214152Sed   value, then restoring the correct sign at the end of the computation.  This could
27214152Sed   certainly be improved upon. */
28214152Sed
29214152Sed	pushl		%esi
30214152Sed	movl	 20(%esp),			%edx	// high word of b
31214152Sed	movl	 16(%esp),			%eax	// low word of b
32214152Sed	movl		%edx,			%ecx
33214152Sed	sarl		$31,			%ecx	// (b < 0) ? -1 : 0
34214152Sed	xorl		%ecx,			%eax
35214152Sed	xorl		%ecx,			%edx	// EDX:EAX = (b < 0) ? not(b) : b
36214152Sed	subl		%ecx,			%eax
37214152Sed	sbbl		%ecx,			%edx	// EDX:EAX = abs(b)
38214152Sed	movl		%edx,		 20(%esp)
39214152Sed	movl		%eax,		 16(%esp)	// store abs(b) back to stack
40214152Sed	movl		%ecx,			%esi	// set aside sign of b
41214152Sed
42214152Sed	movl	 12(%esp),			%edx	// high word of b
43214152Sed	movl	  8(%esp),			%eax	// low word of b
44214152Sed	movl		%edx,			%ecx
45214152Sed	sarl		$31,			%ecx	// (a < 0) ? -1 : 0
46214152Sed	xorl		%ecx,			%eax
47214152Sed	xorl		%ecx,			%edx	// EDX:EAX = (a < 0) ? not(a) : a
48214152Sed	subl		%ecx,			%eax
49214152Sed	sbbl		%ecx,			%edx	// EDX:EAX = abs(a)
50214152Sed	movl		%edx,		 12(%esp)
51214152Sed	movl		%eax,		  8(%esp)	// store abs(a) back to stack
52214152Sed	xorl		%ecx,			%esi	// sign of result = (sign of a) ^ (sign of b)
53214152Sed
54214152Sed	pushl		%ebx
55214152Sed	movl	 24(%esp),			%ebx	// Find the index i of the leading bit in b.
56214152Sed	bsrl		%ebx,			%ecx	// If the high word of b is zero, jump to
57214152Sed	jz			9f						// the code to handle that special case [9].
58214152Sed
59214152Sed	/* High word of b is known to be non-zero on this branch */
60214152Sed
61214152Sed	movl	 20(%esp),			%eax	// Construct bhi, containing bits [1+i:32+i] of b
62214152Sed
63214152Sed	shrl		%cl,			%eax	// Practically, this means that bhi is given by:
64214152Sed	shrl		%eax					//
65214152Sed	notl		%ecx					//		bhi = (high word of b) << (31 - i) |
66214152Sed	shll		%cl,			%ebx	//			  (low word of b) >> (1 + i)
67214152Sed	orl			%eax,			%ebx	//
68214152Sed	movl	 16(%esp),			%edx	// Load the high and low words of a, and jump
69214152Sed	movl	 12(%esp),			%eax	// to [1] if the high word is larger than bhi
70214152Sed	cmpl		%ebx,			%edx	// to avoid overflowing the upcoming divide.
71214152Sed	jae			1f
72214152Sed
73214152Sed	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
74214152Sed
75214152Sed	divl		%ebx					// eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r
76214152Sed
77214152Sed	pushl		%edi
78214152Sed	notl		%ecx
79214152Sed	shrl		%eax
80214152Sed	shrl		%cl,			%eax	// q = qs >> (1 + i)
81214152Sed	movl		%eax,			%edi
82214152Sed	mull	 24(%esp)					// q*blo
83214152Sed	movl	 16(%esp),			%ebx
84214152Sed	movl	 20(%esp),			%ecx	// ECX:EBX = a
85214152Sed	subl		%eax,			%ebx
86214152Sed	sbbl		%edx,			%ecx	// ECX:EBX = a - q*blo
87214152Sed	movl	 28(%esp),			%eax
88214152Sed	imull		%edi,			%eax	// q*bhi
89214152Sed	subl		%eax,			%ecx	// ECX:EBX = a - q*b
90214152Sed	sbbl		$0,				%edi	// decrement q if remainder is negative
91214152Sed	xorl		%edx,			%edx
92214152Sed	movl		%edi,			%eax
93214152Sed
94214152Sed	addl		%esi,			%eax	// Restore correct sign to result
95214152Sed	adcl		%esi,			%edx
96214152Sed	xorl		%esi,			%eax
97214152Sed	xorl		%esi,			%edx
98214152Sed	popl		%edi					// Restore callee-save registers
99214152Sed	popl		%ebx
100214152Sed	popl		%esi
101214152Sed	retl								// Return
102214152Sed
103214152Sed
104214152Sed1:	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
105214152Sed
106214152Sed	subl		%ebx,			%edx	// subtract bhi from ahi so that divide will not
107214152Sed	divl		%ebx					// overflow, and find q and r such that
108214152Sed										//
109214152Sed										//		ahi:alo = (1:q)*bhi + r
110214152Sed										//
111214152Sed										// Note that q is a number in (31-i).(1+i)
112214152Sed										// fix point.
113214152Sed
114214152Sed	pushl		%edi
115214152Sed	notl		%ecx
116214152Sed	shrl		%eax
117214152Sed	orl			$0x80000000,	%eax
118214152Sed	shrl		%cl,			%eax	// q = (1:qs) >> (1 + i)
119214152Sed	movl		%eax,			%edi
120214152Sed	mull	 24(%esp)					// q*blo
121214152Sed	movl	 16(%esp),			%ebx
122214152Sed	movl	 20(%esp),			%ecx	// ECX:EBX = a
123214152Sed	subl		%eax,			%ebx
124214152Sed	sbbl		%edx,			%ecx	// ECX:EBX = a - q*blo
125214152Sed	movl	 28(%esp),			%eax
126214152Sed	imull		%edi,			%eax	// q*bhi
127214152Sed	subl		%eax,			%ecx	// ECX:EBX = a - q*b
128214152Sed	sbbl		$0,				%edi	// decrement q if remainder is negative
129214152Sed	xorl		%edx,			%edx
130214152Sed	movl		%edi,			%eax
131214152Sed
132214152Sed	addl		%esi,			%eax	// Restore correct sign to result
133214152Sed	adcl		%esi,			%edx
134214152Sed	xorl		%esi,			%eax
135214152Sed	xorl		%esi,			%edx
136214152Sed	popl		%edi					// Restore callee-save registers
137214152Sed	popl		%ebx
138214152Sed	popl		%esi
139214152Sed	retl								// Return
140214152Sed
141214152Sed
142214152Sed9:	/* High word of b is zero on this branch */
143214152Sed
144214152Sed	movl	 16(%esp),			%eax	// Find qhi and rhi such that
145214152Sed	movl	 20(%esp),			%ecx	//
146214152Sed	xorl		%edx,			%edx	//		ahi = qhi*b + rhi	with	0 ��� rhi < b
147214152Sed	divl		%ecx					//
148214152Sed	movl		%eax,			%ebx	//
149214152Sed	movl	 12(%esp),			%eax	// Find qlo such that
150214152Sed	divl		%ecx					//
151214152Sed	movl		%ebx,			%edx	//		rhi:alo = qlo*b + rlo  with 0 ��� rlo < b
152214152Sed
153214152Sed	addl		%esi,			%eax	// Restore correct sign to result
154214152Sed	adcl		%esi,			%edx
155214152Sed	xorl		%esi,			%eax
156214152Sed	xorl		%esi,			%edx
157214152Sed	popl		%ebx					// Restore callee-save registers
158214152Sed	popl		%esi
159214152Sed	retl								// Return
160214152Sed
161214152Sed#endif // __i386__
162