1276789Sdim// This file is dual licensed under the MIT and the University of Illinois Open
2276789Sdim// Source Licenses. See LICENSE.TXT for details.
3276789Sdim
4276789Sdim#include "../assembly.h"
5276789Sdim
6276789Sdim// di_int __divdi3(di_int a, di_int b);
7276789Sdim
8276789Sdim// result = a / b.
9276789Sdim// both inputs and the output are 64-bit signed integers.
10276789Sdim// This will do whatever the underlying hardware is set to do on division by zero.
11276789Sdim// No other exceptions are generated, as the divide cannot overflow.
12276789Sdim//
13276789Sdim// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware
14276789Sdim// on x86_64.  The performance goal is ~40 cycles per divide, which is faster than
15276789Sdim// currently possible via simulation of integer divides on the x87 unit.
16276789Sdim//
17276789Sdim// Stephen Canon, December 2008
18276789Sdim
19276789Sdim#ifdef __i386__
20276789Sdim
21276789Sdim.text
22276789Sdim.balign 4
23276789SdimDEFINE_COMPILERRT_FUNCTION(__divdi3)
24276789Sdim
25276789Sdim/* This is currently implemented by wrapping the unsigned divide up in an absolute
26276789Sdim   value, then restoring the correct sign at the end of the computation.  This could
27276789Sdim   certainly be improved upon. */
28276789Sdim
29276789Sdim	pushl		%esi
30276789Sdim	movl	 20(%esp),			%edx	// high word of b
31276789Sdim	movl	 16(%esp),			%eax	// low word of b
32276789Sdim	movl		%edx,			%ecx
33276789Sdim	sarl		$31,			%ecx	// (b < 0) ? -1 : 0
34276789Sdim	xorl		%ecx,			%eax
35276789Sdim	xorl		%ecx,			%edx	// EDX:EAX = (b < 0) ? not(b) : b
36276789Sdim	subl		%ecx,			%eax
37276789Sdim	sbbl		%ecx,			%edx	// EDX:EAX = abs(b)
38276789Sdim	movl		%edx,		 20(%esp)
39276789Sdim	movl		%eax,		 16(%esp)	// store abs(b) back to stack
40276789Sdim	movl		%ecx,			%esi	// set aside sign of b
41276789Sdim
42276789Sdim	movl	 12(%esp),			%edx	// high word of b
43276789Sdim	movl	  8(%esp),			%eax	// low word of b
44276789Sdim	movl		%edx,			%ecx
45276789Sdim	sarl		$31,			%ecx	// (a < 0) ? -1 : 0
46276789Sdim	xorl		%ecx,			%eax
47276789Sdim	xorl		%ecx,			%edx	// EDX:EAX = (a < 0) ? not(a) : a
48276789Sdim	subl		%ecx,			%eax
49276789Sdim	sbbl		%ecx,			%edx	// EDX:EAX = abs(a)
50276789Sdim	movl		%edx,		 12(%esp)
51276789Sdim	movl		%eax,		  8(%esp)	// store abs(a) back to stack
52276789Sdim	xorl		%ecx,			%esi	// sign of result = (sign of a) ^ (sign of b)
53276789Sdim
54276789Sdim	pushl		%ebx
55276789Sdim	movl	 24(%esp),			%ebx	// Find the index i of the leading bit in b.
56276789Sdim	bsrl		%ebx,			%ecx	// If the high word of b is zero, jump to
57276789Sdim	jz			9f						// the code to handle that special case [9].
58276789Sdim
59276789Sdim	/* High word of b is known to be non-zero on this branch */
60276789Sdim
61276789Sdim	movl	 20(%esp),			%eax	// Construct bhi, containing bits [1+i:32+i] of b
62276789Sdim
63276789Sdim	shrl		%cl,			%eax	// Practically, this means that bhi is given by:
64276789Sdim	shrl		%eax					//
65276789Sdim	notl		%ecx					//		bhi = (high word of b) << (31 - i) |
66276789Sdim	shll		%cl,			%ebx	//			  (low word of b) >> (1 + i)
67276789Sdim	orl			%eax,			%ebx	//
68276789Sdim	movl	 16(%esp),			%edx	// Load the high and low words of a, and jump
69276789Sdim	movl	 12(%esp),			%eax	// to [1] if the high word is larger than bhi
70276789Sdim	cmpl		%ebx,			%edx	// to avoid overflowing the upcoming divide.
71276789Sdim	jae			1f
72276789Sdim
73276789Sdim	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
74276789Sdim
75276789Sdim	divl		%ebx					// eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r
76276789Sdim
77276789Sdim	pushl		%edi
78276789Sdim	notl		%ecx
79276789Sdim	shrl		%eax
80276789Sdim	shrl		%cl,			%eax	// q = qs >> (1 + i)
81276789Sdim	movl		%eax,			%edi
82276789Sdim	mull	 24(%esp)					// q*blo
83276789Sdim	movl	 16(%esp),			%ebx
84276789Sdim	movl	 20(%esp),			%ecx	// ECX:EBX = a
85276789Sdim	subl		%eax,			%ebx
86276789Sdim	sbbl		%edx,			%ecx	// ECX:EBX = a - q*blo
87276789Sdim	movl	 28(%esp),			%eax
88276789Sdim	imull		%edi,			%eax	// q*bhi
89276789Sdim	subl		%eax,			%ecx	// ECX:EBX = a - q*b
90276789Sdim	sbbl		$0,				%edi	// decrement q if remainder is negative
91276789Sdim	xorl		%edx,			%edx
92276789Sdim	movl		%edi,			%eax
93276789Sdim
94276789Sdim	addl		%esi,			%eax	// Restore correct sign to result
95276789Sdim	adcl		%esi,			%edx
96276789Sdim	xorl		%esi,			%eax
97276789Sdim	xorl		%esi,			%edx
98276789Sdim	popl		%edi					// Restore callee-save registers
99276789Sdim	popl		%ebx
100276789Sdim	popl		%esi
101276789Sdim	retl								// Return
102276789Sdim
103276789Sdim
104276789Sdim1:	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
105276789Sdim
106276789Sdim	subl		%ebx,			%edx	// subtract bhi from ahi so that divide will not
107276789Sdim	divl		%ebx					// overflow, and find q and r such that
108276789Sdim										//
109276789Sdim										//		ahi:alo = (1:q)*bhi + r
110276789Sdim										//
111276789Sdim										// Note that q is a number in (31-i).(1+i)
112276789Sdim										// fix point.
113276789Sdim
114276789Sdim	pushl		%edi
115276789Sdim	notl		%ecx
116276789Sdim	shrl		%eax
117276789Sdim	orl			$0x80000000,	%eax
118276789Sdim	shrl		%cl,			%eax	// q = (1:qs) >> (1 + i)
119276789Sdim	movl		%eax,			%edi
120276789Sdim	mull	 24(%esp)					// q*blo
121276789Sdim	movl	 16(%esp),			%ebx
122276789Sdim	movl	 20(%esp),			%ecx	// ECX:EBX = a
123276789Sdim	subl		%eax,			%ebx
124276789Sdim	sbbl		%edx,			%ecx	// ECX:EBX = a - q*blo
125276789Sdim	movl	 28(%esp),			%eax
126276789Sdim	imull		%edi,			%eax	// q*bhi
127276789Sdim	subl		%eax,			%ecx	// ECX:EBX = a - q*b
128276789Sdim	sbbl		$0,				%edi	// decrement q if remainder is negative
129276789Sdim	xorl		%edx,			%edx
130276789Sdim	movl		%edi,			%eax
131276789Sdim
132276789Sdim	addl		%esi,			%eax	// Restore correct sign to result
133276789Sdim	adcl		%esi,			%edx
134276789Sdim	xorl		%esi,			%eax
135276789Sdim	xorl		%esi,			%edx
136276789Sdim	popl		%edi					// Restore callee-save registers
137276789Sdim	popl		%ebx
138276789Sdim	popl		%esi
139276789Sdim	retl								// Return
140276789Sdim
141276789Sdim
142276789Sdim9:	/* High word of b is zero on this branch */
143276789Sdim
144276789Sdim	movl	 16(%esp),			%eax	// Find qhi and rhi such that
145276789Sdim	movl	 20(%esp),			%ecx	//
146276789Sdim	xorl		%edx,			%edx	//		ahi = qhi*b + rhi	with	0 ��� rhi < b
147276789Sdim	divl		%ecx					//
148276789Sdim	movl		%eax,			%ebx	//
149276789Sdim	movl	 12(%esp),			%eax	// Find qlo such that
150276789Sdim	divl		%ecx					//
151276789Sdim	movl		%ebx,			%edx	//		rhi:alo = qlo*b + rlo  with 0 ��� rlo < b
152276789Sdim
153276789Sdim	addl		%esi,			%eax	// Restore correct sign to result
154276789Sdim	adcl		%esi,			%edx
155276789Sdim	xorl		%esi,			%eax
156276789Sdim	xorl		%esi,			%edx
157276789Sdim	popl		%ebx					// Restore callee-save registers
158276789Sdim	popl		%esi
159276789Sdim	retl								// Return
160276789SdimEND_COMPILERRT_FUNCTION(__divdi3)
161276789Sdim
162276789Sdim#endif // __i386__
163