1222656Sed// This file is dual licensed under the MIT and the University of Illinois Open 2222656Sed// Source Licenses. See LICENSE.TXT for details. 3214152Sed 4214152Sed#include "../assembly.h" 5214152Sed 6214152Sed// di_int __divdi3(di_int a, di_int b); 7214152Sed 8214152Sed// result = a / b. 9214152Sed// both inputs and the output are 64-bit signed integers. 10214152Sed// This will do whatever the underlying hardware is set to do on division by zero. 11214152Sed// No other exceptions are generated, as the divide cannot overflow. 12214152Sed// 13214152Sed// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware 14214152Sed// on x86_64. The performance goal is ~40 cycles per divide, which is faster than 15214152Sed// currently possible via simulation of integer divides on the x87 unit. 16214152Sed// 17214152Sed// Stephen Canon, December 2008 18214152Sed 19214152Sed#ifdef __i386__ 20214152Sed 21214152Sed.text 22214152Sed.align 4 23214152SedDEFINE_COMPILERRT_FUNCTION(__divdi3) 24214152Sed 25214152Sed/* This is currently implemented by wrapping the unsigned divide up in an absolute 26214152Sed value, then restoring the correct sign at the end of the computation. This could 27214152Sed certainly be improved upon. */ 28214152Sed 29214152Sed pushl %esi 30214152Sed movl 20(%esp), %edx // high word of b 31214152Sed movl 16(%esp), %eax // low word of b 32214152Sed movl %edx, %ecx 33214152Sed sarl $31, %ecx // (b < 0) ? -1 : 0 34214152Sed xorl %ecx, %eax 35214152Sed xorl %ecx, %edx // EDX:EAX = (b < 0) ? not(b) : b 36214152Sed subl %ecx, %eax 37214152Sed sbbl %ecx, %edx // EDX:EAX = abs(b) 38214152Sed movl %edx, 20(%esp) 39214152Sed movl %eax, 16(%esp) // store abs(b) back to stack 40214152Sed movl %ecx, %esi // set aside sign of b 41214152Sed 42214152Sed movl 12(%esp), %edx // high word of b 43214152Sed movl 8(%esp), %eax // low word of b 44214152Sed movl %edx, %ecx 45214152Sed sarl $31, %ecx // (a < 0) ? -1 : 0 46214152Sed xorl %ecx, %eax 47214152Sed xorl %ecx, %edx // EDX:EAX = (a < 0) ? not(a) : a 48214152Sed subl %ecx, %eax 49214152Sed sbbl %ecx, %edx // EDX:EAX = abs(a) 50214152Sed movl %edx, 12(%esp) 51214152Sed movl %eax, 8(%esp) // store abs(a) back to stack 52214152Sed xorl %ecx, %esi // sign of result = (sign of a) ^ (sign of b) 53214152Sed 54214152Sed pushl %ebx 55214152Sed movl 24(%esp), %ebx // Find the index i of the leading bit in b. 56214152Sed bsrl %ebx, %ecx // If the high word of b is zero, jump to 57214152Sed jz 9f // the code to handle that special case [9]. 58214152Sed 59214152Sed /* High word of b is known to be non-zero on this branch */ 60214152Sed 61214152Sed movl 20(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b 62214152Sed 63214152Sed shrl %cl, %eax // Practically, this means that bhi is given by: 64214152Sed shrl %eax // 65214152Sed notl %ecx // bhi = (high word of b) << (31 - i) | 66214152Sed shll %cl, %ebx // (low word of b) >> (1 + i) 67214152Sed orl %eax, %ebx // 68214152Sed movl 16(%esp), %edx // Load the high and low words of a, and jump 69214152Sed movl 12(%esp), %eax // to [1] if the high word is larger than bhi 70214152Sed cmpl %ebx, %edx // to avoid overflowing the upcoming divide. 71214152Sed jae 1f 72214152Sed 73214152Sed /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ 74214152Sed 75214152Sed divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r 76214152Sed 77214152Sed pushl %edi 78214152Sed notl %ecx 79214152Sed shrl %eax 80214152Sed shrl %cl, %eax // q = qs >> (1 + i) 81214152Sed movl %eax, %edi 82214152Sed mull 24(%esp) // q*blo 83214152Sed movl 16(%esp), %ebx 84214152Sed movl 20(%esp), %ecx // ECX:EBX = a 85214152Sed subl %eax, %ebx 86214152Sed sbbl %edx, %ecx // ECX:EBX = a - q*blo 87214152Sed movl 28(%esp), %eax 88214152Sed imull %edi, %eax // q*bhi 89214152Sed subl %eax, %ecx // ECX:EBX = a - q*b 90214152Sed sbbl $0, %edi // decrement q if remainder is negative 91214152Sed xorl %edx, %edx 92214152Sed movl %edi, %eax 93214152Sed 94214152Sed addl %esi, %eax // Restore correct sign to result 95214152Sed adcl %esi, %edx 96214152Sed xorl %esi, %eax 97214152Sed xorl %esi, %edx 98214152Sed popl %edi // Restore callee-save registers 99214152Sed popl %ebx 100214152Sed popl %esi 101214152Sed retl // Return 102214152Sed 103214152Sed 104214152Sed1: /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ 105214152Sed 106214152Sed subl %ebx, %edx // subtract bhi from ahi so that divide will not 107214152Sed divl %ebx // overflow, and find q and r such that 108214152Sed // 109214152Sed // ahi:alo = (1:q)*bhi + r 110214152Sed // 111214152Sed // Note that q is a number in (31-i).(1+i) 112214152Sed // fix point. 113214152Sed 114214152Sed pushl %edi 115214152Sed notl %ecx 116214152Sed shrl %eax 117214152Sed orl $0x80000000, %eax 118214152Sed shrl %cl, %eax // q = (1:qs) >> (1 + i) 119214152Sed movl %eax, %edi 120214152Sed mull 24(%esp) // q*blo 121214152Sed movl 16(%esp), %ebx 122214152Sed movl 20(%esp), %ecx // ECX:EBX = a 123214152Sed subl %eax, %ebx 124214152Sed sbbl %edx, %ecx // ECX:EBX = a - q*blo 125214152Sed movl 28(%esp), %eax 126214152Sed imull %edi, %eax // q*bhi 127214152Sed subl %eax, %ecx // ECX:EBX = a - q*b 128214152Sed sbbl $0, %edi // decrement q if remainder is negative 129214152Sed xorl %edx, %edx 130214152Sed movl %edi, %eax 131214152Sed 132214152Sed addl %esi, %eax // Restore correct sign to result 133214152Sed adcl %esi, %edx 134214152Sed xorl %esi, %eax 135214152Sed xorl %esi, %edx 136214152Sed popl %edi // Restore callee-save registers 137214152Sed popl %ebx 138214152Sed popl %esi 139214152Sed retl // Return 140214152Sed 141214152Sed 142214152Sed9: /* High word of b is zero on this branch */ 143214152Sed 144214152Sed movl 16(%esp), %eax // Find qhi and rhi such that 145214152Sed movl 20(%esp), %ecx // 146214152Sed xorl %edx, %edx // ahi = qhi*b + rhi with 0 ��� rhi < b 147214152Sed divl %ecx // 148214152Sed movl %eax, %ebx // 149214152Sed movl 12(%esp), %eax // Find qlo such that 150214152Sed divl %ecx // 151214152Sed movl %ebx, %edx // rhi:alo = qlo*b + rlo with 0 ��� rlo < b 152214152Sed 153214152Sed addl %esi, %eax // Restore correct sign to result 154214152Sed adcl %esi, %edx 155214152Sed xorl %esi, %eax 156214152Sed xorl %esi, %edx 157214152Sed popl %ebx // Restore callee-save registers 158214152Sed popl %esi 159214152Sed retl // Return 160214152Sed 161214152Sed#endif // __i386__ 162