1222625Sed/*===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===// 2222625Sed * 3222625Sed * The LLVM Compiler Infrastructure 4222625Sed * 5222625Sed * This file is dual licensed under the MIT and the University of Illinois Open 6222625Sed * Source Licenses. See LICENSE.TXT for details. 7222625Sed * 8222625Sed *===----------------------------------------------------------------------===// 9222625Sed * 10222625Sed * This file implements the __udivsi3 (32-bit unsigned integer divide) 11222625Sed * function for the ARM architecture. A naive digit-by-digit computation is 12222625Sed * employed for simplicity. 13222625Sed * 14222625Sed *===----------------------------------------------------------------------===*/ 15222625Sed 16222625Sed#include "../assembly.h" 17222625Sed 18222625Sed#define ESTABLISH_FRAME \ 19222625Sed push {r7, lr} ;\ 20222625Sed mov r7, sp 21222625Sed#define CLEAR_FRAME_AND_RETURN \ 22222625Sed pop {r7, pc} 23222625Sed 24222625Sed#define a r0 25222625Sed#define b r1 26222625Sed#define r r2 27222625Sed#define i r3 28222625Sed#define q ip 29222625Sed#define one lr 30222625Sed 31222625Sed.syntax unified 32222625Sed.align 3 33222625Sed// Ok, APCS and AAPCS agree on 32 bit args, so it's safe to use the same routine. 34222625SedDEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3) 35222625SedDEFINE_COMPILERRT_FUNCTION(__udivsi3) 36245628Sandrew#if __ARM_ARCH_7S__ 37245628Sandrew tst r1,r1 38245628Sandrew beq LOCAL_LABEL(divzero) 39245628Sandrew udiv r0, r0, r1 40245628Sandrew bx lr 41245628Sandrew LOCAL_LABEL(divzero): 42245628Sandrew mov r0,#0 43245628Sandrew bx lr 44245628Sandrew#else 45222625Sed// We use a simple digit by digit algorithm; before we get into the actual 46222625Sed// divide loop, we must calculate the left-shift amount necessary to align 47222625Sed// the MSB of the divisor with that of the dividend (If this shift is 48222625Sed// negative, then the result is zero, and we early out). We also conjure a 49222625Sed// bit mask of 1 to use in constructing the quotient, and initialize the 50222625Sed// quotient to zero. 51222625Sed ESTABLISH_FRAME 52222625Sed clz r2, a 53222625Sed tst b, b // detect divide-by-zero 54222625Sed clz r3, b 55222625Sed mov q, #0 56222625Sed beq LOCAL_LABEL(return) // return 0 if b is zero. 57222625Sed mov one, #1 58222625Sed subs i, r3, r2 59222625Sed blt LOCAL_LABEL(return) // return 0 if MSB(a) < MSB(b) 60222625Sed 61222625SedLOCAL_LABEL(mainLoop): 62222625Sed// This loop basically implements the following: 63222625Sed// 64222625Sed// do { 65222625Sed// if (a >= b << i) { 66222625Sed// a -= b << i; 67222625Sed// q |= 1 << i; 68222625Sed// if (a == 0) break; 69222625Sed// } 70222625Sed// } while (--i) 71222625Sed// 72222625Sed// Note that this does not perform the final iteration (i == 0); by doing it 73222625Sed// this way, we can merge the two branches which is a substantial win for 74222625Sed// such a tight loop on current ARM architectures. 75222625Sed subs r, a, b, lsl i 76222625Sed orrhs q, q,one, lsl i 77222625Sed movhs a, r 78222625Sed subsne i, i, #1 79222625Sed bhi LOCAL_LABEL(mainLoop) 80222625Sed 81222625Sed// Do the final test subtraction and update of quotient (i == 0), as it is 82222625Sed// not performed in the main loop. 83222625Sed subs r, a, b 84222625Sed orrhs q, #1 85222625Sed 86222625SedLOCAL_LABEL(return): 87222625Sed// Move the quotient to r0 and return. 88222625Sed mov r0, q 89222625Sed CLEAR_FRAME_AND_RETURN 90245628Sandrew#endif 91