1276789Sdim// This file is dual licensed under the MIT and the University of Illinois Open 2276789Sdim// Source Licenses. See LICENSE.TXT for details. 3276789Sdim 4276789Sdim#include "../assembly.h" 5276789Sdim 6276789Sdim// di_int __divdi3(di_int a, di_int b); 7276789Sdim 8276789Sdim// result = a / b. 9276789Sdim// both inputs and the output are 64-bit signed integers. 10276789Sdim// This will do whatever the underlying hardware is set to do on division by zero. 11276789Sdim// No other exceptions are generated, as the divide cannot overflow. 12276789Sdim// 13276789Sdim// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware 14276789Sdim// on x86_64. The performance goal is ~40 cycles per divide, which is faster than 15276789Sdim// currently possible via simulation of integer divides on the x87 unit. 16276789Sdim// 17276789Sdim// Stephen Canon, December 2008 18276789Sdim 19276789Sdim#ifdef __i386__ 20276789Sdim 21276789Sdim.text 22276789Sdim.balign 4 23276789SdimDEFINE_COMPILERRT_FUNCTION(__divdi3) 24276789Sdim 25276789Sdim/* This is currently implemented by wrapping the unsigned divide up in an absolute 26276789Sdim value, then restoring the correct sign at the end of the computation. This could 27276789Sdim certainly be improved upon. */ 28276789Sdim 29276789Sdim pushl %esi 30276789Sdim movl 20(%esp), %edx // high word of b 31276789Sdim movl 16(%esp), %eax // low word of b 32276789Sdim movl %edx, %ecx 33276789Sdim sarl $31, %ecx // (b < 0) ? -1 : 0 34276789Sdim xorl %ecx, %eax 35276789Sdim xorl %ecx, %edx // EDX:EAX = (b < 0) ? not(b) : b 36276789Sdim subl %ecx, %eax 37276789Sdim sbbl %ecx, %edx // EDX:EAX = abs(b) 38276789Sdim movl %edx, 20(%esp) 39276789Sdim movl %eax, 16(%esp) // store abs(b) back to stack 40276789Sdim movl %ecx, %esi // set aside sign of b 41276789Sdim 42276789Sdim movl 12(%esp), %edx // high word of b 43276789Sdim movl 8(%esp), %eax // low word of b 44276789Sdim movl %edx, %ecx 45276789Sdim sarl $31, %ecx // (a < 0) ? -1 : 0 46276789Sdim xorl %ecx, %eax 47276789Sdim xorl %ecx, %edx // EDX:EAX = (a < 0) ? not(a) : a 48276789Sdim subl %ecx, %eax 49276789Sdim sbbl %ecx, %edx // EDX:EAX = abs(a) 50276789Sdim movl %edx, 12(%esp) 51276789Sdim movl %eax, 8(%esp) // store abs(a) back to stack 52276789Sdim xorl %ecx, %esi // sign of result = (sign of a) ^ (sign of b) 53276789Sdim 54276789Sdim pushl %ebx 55276789Sdim movl 24(%esp), %ebx // Find the index i of the leading bit in b. 56276789Sdim bsrl %ebx, %ecx // If the high word of b is zero, jump to 57276789Sdim jz 9f // the code to handle that special case [9]. 58276789Sdim 59276789Sdim /* High word of b is known to be non-zero on this branch */ 60276789Sdim 61276789Sdim movl 20(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b 62276789Sdim 63276789Sdim shrl %cl, %eax // Practically, this means that bhi is given by: 64276789Sdim shrl %eax // 65276789Sdim notl %ecx // bhi = (high word of b) << (31 - i) | 66276789Sdim shll %cl, %ebx // (low word of b) >> (1 + i) 67276789Sdim orl %eax, %ebx // 68276789Sdim movl 16(%esp), %edx // Load the high and low words of a, and jump 69276789Sdim movl 12(%esp), %eax // to [1] if the high word is larger than bhi 70276789Sdim cmpl %ebx, %edx // to avoid overflowing the upcoming divide. 71276789Sdim jae 1f 72276789Sdim 73276789Sdim /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ 74276789Sdim 75276789Sdim divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r 76276789Sdim 77276789Sdim pushl %edi 78276789Sdim notl %ecx 79276789Sdim shrl %eax 80276789Sdim shrl %cl, %eax // q = qs >> (1 + i) 81276789Sdim movl %eax, %edi 82276789Sdim mull 24(%esp) // q*blo 83276789Sdim movl 16(%esp), %ebx 84276789Sdim movl 20(%esp), %ecx // ECX:EBX = a 85276789Sdim subl %eax, %ebx 86276789Sdim sbbl %edx, %ecx // ECX:EBX = a - q*blo 87276789Sdim movl 28(%esp), %eax 88276789Sdim imull %edi, %eax // q*bhi 89276789Sdim subl %eax, %ecx // ECX:EBX = a - q*b 90276789Sdim sbbl $0, %edi // decrement q if remainder is negative 91276789Sdim xorl %edx, %edx 92276789Sdim movl %edi, %eax 93276789Sdim 94276789Sdim addl %esi, %eax // Restore correct sign to result 95276789Sdim adcl %esi, %edx 96276789Sdim xorl %esi, %eax 97276789Sdim xorl %esi, %edx 98276789Sdim popl %edi // Restore callee-save registers 99276789Sdim popl %ebx 100276789Sdim popl %esi 101276789Sdim retl // Return 102276789Sdim 103276789Sdim 104276789Sdim1: /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ 105276789Sdim 106276789Sdim subl %ebx, %edx // subtract bhi from ahi so that divide will not 107276789Sdim divl %ebx // overflow, and find q and r such that 108276789Sdim // 109276789Sdim // ahi:alo = (1:q)*bhi + r 110276789Sdim // 111276789Sdim // Note that q is a number in (31-i).(1+i) 112276789Sdim // fix point. 113276789Sdim 114276789Sdim pushl %edi 115276789Sdim notl %ecx 116276789Sdim shrl %eax 117276789Sdim orl $0x80000000, %eax 118276789Sdim shrl %cl, %eax // q = (1:qs) >> (1 + i) 119276789Sdim movl %eax, %edi 120276789Sdim mull 24(%esp) // q*blo 121276789Sdim movl 16(%esp), %ebx 122276789Sdim movl 20(%esp), %ecx // ECX:EBX = a 123276789Sdim subl %eax, %ebx 124276789Sdim sbbl %edx, %ecx // ECX:EBX = a - q*blo 125276789Sdim movl 28(%esp), %eax 126276789Sdim imull %edi, %eax // q*bhi 127276789Sdim subl %eax, %ecx // ECX:EBX = a - q*b 128276789Sdim sbbl $0, %edi // decrement q if remainder is negative 129276789Sdim xorl %edx, %edx 130276789Sdim movl %edi, %eax 131276789Sdim 132276789Sdim addl %esi, %eax // Restore correct sign to result 133276789Sdim adcl %esi, %edx 134276789Sdim xorl %esi, %eax 135276789Sdim xorl %esi, %edx 136276789Sdim popl %edi // Restore callee-save registers 137276789Sdim popl %ebx 138276789Sdim popl %esi 139276789Sdim retl // Return 140276789Sdim 141276789Sdim 142276789Sdim9: /* High word of b is zero on this branch */ 143276789Sdim 144276789Sdim movl 16(%esp), %eax // Find qhi and rhi such that 145276789Sdim movl 20(%esp), %ecx // 146276789Sdim xorl %edx, %edx // ahi = qhi*b + rhi with 0 ��� rhi < b 147276789Sdim divl %ecx // 148276789Sdim movl %eax, %ebx // 149276789Sdim movl 12(%esp), %eax // Find qlo such that 150276789Sdim divl %ecx // 151276789Sdim movl %ebx, %edx // rhi:alo = qlo*b + rlo with 0 ��� rlo < b 152276789Sdim 153276789Sdim addl %esi, %eax // Restore correct sign to result 154276789Sdim adcl %esi, %edx 155276789Sdim xorl %esi, %eax 156276789Sdim xorl %esi, %edx 157276789Sdim popl %ebx // Restore callee-save registers 158276789Sdim popl %esi 159276789Sdim retl // Return 160276789SdimEND_COMPILERRT_FUNCTION(__divdi3) 161276789Sdim 162276789Sdim#endif // __i386__ 163