umodsi3.S revision 296373
128263Spst/*===-- umodsi3.S - 32-bit unsigned integer modulus -----------------------===//
228263Spst *
350472Speter *                     The LLVM Compiler Infrastructure
428263Spst *
528263Spst * This file is dual licensed under the MIT and the University of Illinois Open
661981Sbrian * Source Licenses. See LICENSE.TXT for details.
761981Sbrian *
861981Sbrian *===----------------------------------------------------------------------===//
961981Sbrian *
1061981Sbrian * This file implements the __umodsi3 (32-bit unsigned integer modulus)
1161981Sbrian * function for the ARM architecture.  A naive digit-by-digit computation is
1261981Sbrian * employed for simplicity.
1328320Spst *
1461981Sbrian *===----------------------------------------------------------------------===*/
1561981Sbrian
1665843Sbrian#include "../assembly.h"
1761981Sbrian
1865843Sbrian#define a r0
1965843Sbrian#define b r1
2065843Sbrian#define r r2
2177496Sbrian#define i r3
2277496Sbrian
2377496Sbrian.syntax unified
2477496Sbrian.align 3
2577496SbrianDEFINE_COMPILERRT_FUNCTION(__umodsi3)
2665843Sbrian//  We use a simple digit by digit algorithm; before we get into the actual
2761981Sbrian//  divide loop, we must calculate the left-shift amount necessary to align
2861981Sbrian//  the MSB of the divisor with that of the dividend.
2961981Sbrian    clz     r2,     a
3061981Sbrian    tst     b,      b       // detect b == 0
3165843Sbrian    clz     r3,     b
32208060Sdougb    bxeq    lr              // return a if b == 0
3377492Sbrian    subs    i,      r3, r2
3477492Sbrian    bxlt    lr              // return a if MSB(a) < MSB(b)
3577492Sbrian
3677492SbrianLOCAL_LABEL(mainLoop):
3777492Sbrian//  This loop basically implements the following:
3877492Sbrian//
3977492Sbrian//  do {
4077492Sbrian//      if (a >= b << i) {
4177492Sbrian//          a -= b << i;
4277492Sbrian//          if (a == 0) break;
4377492Sbrian//      }
4465843Sbrian//  } while (--i)
4577492Sbrian//
4662054Sbrian//  Note that this does not perform the final iteration (i == 0); by doing it
4762054Sbrian//  this way, we can merge the two branches which is a substantial win for
4862054Sbrian//  such a tight loop on current ARM architectures.
4965843Sbrian    subs    r,      a,  b, lsl i
5062054Sbrian    movhs   a,      r
5161981Sbrian    subsne  i,      i, #1
5265843Sbrian    bhi     LOCAL_LABEL(mainLoop)
5365843Sbrian
5461981Sbrian//  Do the final test subtraction and update of remainder (i == 0), as it is
5565843Sbrian//  not performed in the main loop.
5665843Sbrian    subs    r,      a,  b
57    movhs   a,      r
58    bx      lr
59