1222625Sed/*===-- umodsi3.S - 32-bit unsigned integer modulus -----------------------===//
2222625Sed *
3222625Sed *                     The LLVM Compiler Infrastructure
4222625Sed *
5222625Sed * This file is dual licensed under the MIT and the University of Illinois Open
6222625Sed * Source Licenses. See LICENSE.TXT for details.
7222625Sed *
8222625Sed *===----------------------------------------------------------------------===//
9222625Sed *
10222625Sed * This file implements the __umodsi3 (32-bit unsigned integer modulus)
11222625Sed * function for the ARM architecture.  A naive digit-by-digit computation is
12222625Sed * employed for simplicity.
13222625Sed *
14222625Sed *===----------------------------------------------------------------------===*/
15222625Sed
16222625Sed#include "../assembly.h"
17222625Sed
18222625Sed#define a r0
19222625Sed#define b r1
20222625Sed#define r r2
21222625Sed#define i r3
22222625Sed
23222625Sed.syntax unified
24222625Sed.align 3
25222625SedDEFINE_COMPILERRT_FUNCTION(__umodsi3)
26222625Sed//  We use a simple digit by digit algorithm; before we get into the actual
27222625Sed//  divide loop, we must calculate the left-shift amount necessary to align
28222625Sed//  the MSB of the divisor with that of the dividend.
29222625Sed    clz     r2,     a
30222625Sed    tst     b,      b       // detect b == 0
31222625Sed    clz     r3,     b
32222625Sed    bxeq    lr              // return a if b == 0
33222625Sed    subs    i,      r3, r2
34222625Sed    bxlt    lr              // return a if MSB(a) < MSB(b)
35222625Sed
36222625SedLOCAL_LABEL(mainLoop):
37222625Sed//  This loop basically implements the following:
38222625Sed//
39222625Sed//  do {
40222625Sed//      if (a >= b << i) {
41222625Sed//          a -= b << i;
42222625Sed//          if (a == 0) break;
43222625Sed//      }
44222625Sed//  } while (--i)
45222625Sed//
46222625Sed//  Note that this does not perform the final iteration (i == 0); by doing it
47222625Sed//  this way, we can merge the two branches which is a substantial win for
48222625Sed//  such a tight loop on current ARM architectures.
49222625Sed    subs    r,      a,  b, lsl i
50222625Sed    movhs   a,      r
51222625Sed    subsne  i,      i, #1
52222625Sed    bhi     LOCAL_LABEL(mainLoop)
53222625Sed
54222625Sed//  Do the final test subtraction and update of remainder (i == 0), as it is
55222625Sed//  not performed in the main loop.
56222625Sed    subs    r,      a,  b
57222625Sed    movhs   a,      r
58222625Sed    bx      lr
59