1/*===-- udivmodsi4.S - 32-bit unsigned integer divide and modulus ---------===//
2 *
3 *                     The LLVM Compiler Infrastructure
4 *
5 * This file is dual licensed under the MIT and the University of Illinois Open
6 * Source Licenses. See LICENSE.TXT for details.
7 *
8 *===----------------------------------------------------------------------===//
9 *
10 * This file implements the __udivmodsi4 (32-bit unsigned integer divide and
11 * modulus) function for the ARM architecture.  A naive digit-by-digit
12 * computation is employed for simplicity.
13 *
14 *===----------------------------------------------------------------------===*/
15
16#include "../assembly.h"
17
18#define ESTABLISH_FRAME    \
19    push   {r4, r7, lr}   ;\
20    add     r7,     sp, #4
21#define CLEAR_FRAME_AND_RETURN \
22    pop    {r4, r7, pc}
23
24#define a r0
25#define b r1
26#define i r3
27#define r r4
28#define q ip
29#define one lr
30
31.syntax unified
32.align 3
33DEFINE_COMPILERRT_FUNCTION(__udivmodsi4)
34//  We use a simple digit by digit algorithm; before we get into the actual
35//  divide loop, we must calculate the left-shift amount necessary to align
36//  the MSB of the divisor with that of the dividend (If this shift is
37//  negative, then the result is zero, and we early out). We also conjure a
38//  bit mask of 1 to use in constructing the quotient, and initialize the
39//  quotient to zero.
40    ESTABLISH_FRAME
41    clz     r4,     a
42    tst     b,      b   // detect divide-by-zero
43    clz     r3,     b
44    mov     q,      #0
45    beq     LOCAL_LABEL(return)    // return 0 if b is zero.
46    mov     one,    #1
47    subs    i,      r3, r4
48    blt     LOCAL_LABEL(return)    // return 0 if MSB(a) < MSB(b)
49
50LOCAL_LABEL(mainLoop):
51//  This loop basically implements the following:
52//
53//  do {
54//      if (a >= b << i) {
55//          a -= b << i;
56//          q |= 1 << i;
57//          if (a == 0) break;
58//      }
59//  } while (--i)
60//
61//  Note that this does not perform the final iteration (i == 0); by doing it
62//  this way, we can merge the two branches which is a substantial win for
63//  such a tight loop on current ARM architectures.
64    subs    r,      a,  b, lsl i
65    orrhs   q,      q,one, lsl i
66    movhs   a,      r
67    subsne  i,      i, #1
68    bhi     LOCAL_LABEL(mainLoop)
69
70//  Do the final test subtraction and update of quotient (i == 0), as it is
71//  not performed in the main loop.
72    subs    r,      a,  b
73    orrhs   q,      #1
74    movhs   a,      r
75
76LOCAL_LABEL(return):
77//  Store the remainder, and move the quotient to r0, then return.
78    str     a,     [r2]
79    mov     r0,     q
80    CLEAR_FRAME_AND_RETURN
81