1/*===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===//
2 *
3 *                     The LLVM Compiler Infrastructure
4 *
5 * This file is dual licensed under the MIT and the University of Illinois Open
6 * Source Licenses. See LICENSE.TXT for details.
7 *
8 *===----------------------------------------------------------------------===//
9 *
10 * This file implements the __udivsi3 (32-bit unsigned integer divide)
11 * function for the ARM architecture.  A naive digit-by-digit computation is
12 * employed for simplicity.
13 *
14 *===----------------------------------------------------------------------===*/
15
16#include "../assembly.h"
17
18#define ESTABLISH_FRAME \
19    push   {r7, lr}    ;\
20    mov     r7,     sp
21#define CLEAR_FRAME_AND_RETURN \
22    pop    {r7, pc}
23
24#define a r0
25#define b r1
26#define r r2
27#define i r3
28#define q ip
29#define one lr
30
31.syntax unified
32.align 3
33// Ok, APCS and AAPCS agree on 32 bit args, so it's safe to use the same routine.
34DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3)
35DEFINE_COMPILERRT_FUNCTION(__udivsi3)
36#if __ARM_ARCH_7S__
37	tst	r1,r1
38	beq	LOCAL_LABEL(divzero)
39	udiv	r0, r0, r1
40	bx  	lr
41	LOCAL_LABEL(divzero):
42	mov	r0,#0
43	bx	lr
44#else
45//  We use a simple digit by digit algorithm; before we get into the actual
46//  divide loop, we must calculate the left-shift amount necessary to align
47//  the MSB of the divisor with that of the dividend (If this shift is
48//  negative, then the result is zero, and we early out). We also conjure a
49//  bit mask of 1 to use in constructing the quotient, and initialize the
50//  quotient to zero.
51    ESTABLISH_FRAME
52    clz     r2,     a
53    tst     b,      b   // detect divide-by-zero
54    clz     r3,     b
55    mov     q,      #0
56    beq     LOCAL_LABEL(return)    // return 0 if b is zero.
57    mov     one,    #1
58    subs    i,      r3, r2
59    blt     LOCAL_LABEL(return)    // return 0 if MSB(a) < MSB(b)
60
61LOCAL_LABEL(mainLoop):
62//  This loop basically implements the following:
63//
64//  do {
65//      if (a >= b << i) {
66//          a -= b << i;
67//          q |= 1 << i;
68//          if (a == 0) break;
69//      }
70//  } while (--i)
71//
72//  Note that this does not perform the final iteration (i == 0); by doing it
73//  this way, we can merge the two branches which is a substantial win for
74//  such a tight loop on current ARM architectures.
75    subs    r,      a,  b, lsl i
76    orrhs   q,      q,one, lsl i
77    movhs   a,      r
78    subsne  i,      i, #1
79    bhi     LOCAL_LABEL(mainLoop)
80
81//  Do the final test subtraction and update of quotient (i == 0), as it is
82//  not performed in the main loop.
83    subs    r,      a,  b
84    orrhs   q,      #1
85
86LOCAL_LABEL(return):
87//  Move the quotient to r0 and return.
88    mov     r0,     q
89    CLEAR_FRAME_AND_RETURN
90#endif
91