1222625Sed/*===-- udivmodsi4.S - 32-bit unsigned integer divide and modulus ---------===// 2222625Sed * 3222625Sed * The LLVM Compiler Infrastructure 4222625Sed * 5222625Sed * This file is dual licensed under the MIT and the University of Illinois Open 6222625Sed * Source Licenses. See LICENSE.TXT for details. 7222625Sed * 8222625Sed *===----------------------------------------------------------------------===// 9222625Sed * 10222625Sed * This file implements the __udivmodsi4 (32-bit unsigned integer divide and 11222625Sed * modulus) function for the ARM architecture. A naive digit-by-digit 12222625Sed * computation is employed for simplicity. 13222625Sed * 14222625Sed *===----------------------------------------------------------------------===*/ 15222625Sed 16222625Sed#include "../assembly.h" 17222625Sed 18222625Sed#define ESTABLISH_FRAME \ 19222625Sed push {r4, r7, lr} ;\ 20222625Sed add r7, sp, #4 21222625Sed#define CLEAR_FRAME_AND_RETURN \ 22222625Sed pop {r4, r7, pc} 23222625Sed 24222625Sed#define a r0 25222625Sed#define b r1 26222625Sed#define i r3 27222625Sed#define r r4 28222625Sed#define q ip 29222625Sed#define one lr 30222625Sed 31222625Sed.syntax unified 32222625Sed.align 3 33222625SedDEFINE_COMPILERRT_FUNCTION(__udivmodsi4) 34222625Sed// We use a simple digit by digit algorithm; before we get into the actual 35222625Sed// divide loop, we must calculate the left-shift amount necessary to align 36222625Sed// the MSB of the divisor with that of the dividend (If this shift is 37222625Sed// negative, then the result is zero, and we early out). We also conjure a 38222625Sed// bit mask of 1 to use in constructing the quotient, and initialize the 39222625Sed// quotient to zero. 40222625Sed ESTABLISH_FRAME 41222625Sed clz r4, a 42222625Sed tst b, b // detect divide-by-zero 43222625Sed clz r3, b 44222625Sed mov q, #0 45222625Sed beq LOCAL_LABEL(return) // return 0 if b is zero. 46222625Sed mov one, #1 47222625Sed subs i, r3, r4 48222625Sed blt LOCAL_LABEL(return) // return 0 if MSB(a) < MSB(b) 49222625Sed 50222625SedLOCAL_LABEL(mainLoop): 51222625Sed// This loop basically implements the following: 52222625Sed// 53222625Sed// do { 54222625Sed// if (a >= b << i) { 55222625Sed// a -= b << i; 56222625Sed// q |= 1 << i; 57222625Sed// if (a == 0) break; 58222625Sed// } 59222625Sed// } while (--i) 60222625Sed// 61222625Sed// Note that this does not perform the final iteration (i == 0); by doing it 62222625Sed// this way, we can merge the two branches which is a substantial win for 63222625Sed// such a tight loop on current ARM architectures. 64222625Sed subs r, a, b, lsl i 65222625Sed orrhs q, q,one, lsl i 66222625Sed movhs a, r 67222625Sed subsne i, i, #1 68222625Sed bhi LOCAL_LABEL(mainLoop) 69222625Sed 70222625Sed// Do the final test subtraction and update of quotient (i == 0), as it is 71222625Sed// not performed in the main loop. 72222625Sed subs r, a, b 73222625Sed orrhs q, #1 74222625Sed movhs a, r 75222625Sed 76222625SedLOCAL_LABEL(return): 77222625Sed// Store the remainder, and move the quotient to r0, then return. 78222625Sed str a, [r2] 79222625Sed mov r0, q 80222625Sed CLEAR_FRAME_AND_RETURN 81