1353358Sdim//===-- umodsi3.S - 32-bit unsigned integer modulus -----------------------===// 2353358Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6353358Sdim// 7353358Sdim//===----------------------------------------------------------------------===// 8353358Sdim// 9353358Sdim// This file implements the __umodsi3 (32-bit unsigned integer modulus) 10353358Sdim// function for the ARM 32-bit architecture. 11353358Sdim// 12353358Sdim//===----------------------------------------------------------------------===// 13276789Sdim 14276789Sdim#include "../assembly.h" 15276789Sdim 16276789Sdim .syntax unified 17276789Sdim .text 18327952Sdim DEFINE_CODE_STATE 19276789Sdim 20276789Sdim@ unsigned int __umodsi3(unsigned int divident, unsigned int divisor) 21276789Sdim@ Calculate and return the remainder of the (unsigned) division. 22276789Sdim 23276789Sdim .p2align 2 24276789SdimDEFINE_COMPILERRT_FUNCTION(__umodsi3) 25276789Sdim#if __ARM_ARCH_EXT_IDIV__ 26276789Sdim tst r1, r1 27276789Sdim beq LOCAL_LABEL(divby0) 28276789Sdim udiv r2, r0, r1 29276789Sdim mls r0, r2, r1, r0 30276789Sdim bx lr 31276789Sdim#else 32276789Sdim cmp r1, #1 33276789Sdim bcc LOCAL_LABEL(divby0) 34276789Sdim ITT(eq) 35276789Sdim moveq r0, #0 36276789Sdim JMPc(lr, eq) 37276789Sdim cmp r0, r1 38276789Sdim IT(cc) 39276789Sdim JMPc(lr, cc) 40276789Sdim 41353358Sdim // Implement division using binary long division algorithm. 42353358Sdim // 43353358Sdim // r0 is the numerator, r1 the denominator. 44353358Sdim // 45353358Sdim // The code before JMP computes the correct shift I, so that 46353358Sdim // r0 and (r1 << I) have the highest bit set in the same position. 47353358Sdim // At the time of JMP, ip := .Ldiv0block - 8 * I. 48353358Sdim // This depends on the fixed instruction size of block. 49353358Sdim // For ARM mode, this is 8 Bytes, for THUMB mode 10 Bytes. 50353358Sdim // 51353358Sdim // block(shift) implements the test-and-update-quotient core. 52353358Sdim // It assumes (r0 << shift) can be computed without overflow and 53353358Sdim // that (r0 << shift) < 2 * r1. The quotient is stored in r3. 54353358Sdim 55276789Sdim# ifdef __ARM_FEATURE_CLZ 56276789Sdim clz ip, r0 57276789Sdim clz r3, r1 58353358Sdim // r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. 59276789Sdim sub r3, r3, ip 60316005Sdim# if defined(USE_THUMB_2) 61276789Sdim adr ip, LOCAL_LABEL(div0block) + 1 62276789Sdim sub ip, ip, r3, lsl #1 63276789Sdim# else 64276789Sdim adr ip, LOCAL_LABEL(div0block) 65276789Sdim# endif 66276789Sdim sub ip, ip, r3, lsl #3 67276789Sdim bx ip 68276789Sdim# else 69316005Sdim# if defined(USE_THUMB_2) 70276789Sdim# error THUMB mode requires CLZ or UDIV 71276789Sdim# endif 72276789Sdim mov r2, r0 73276789Sdim adr ip, LOCAL_LABEL(div0block) 74276789Sdim 75276789Sdim lsr r3, r2, #16 76276789Sdim cmp r3, r1 77276789Sdim movhs r2, r3 78276789Sdim subhs ip, ip, #(16 * 8) 79276789Sdim 80276789Sdim lsr r3, r2, #8 81276789Sdim cmp r3, r1 82276789Sdim movhs r2, r3 83276789Sdim subhs ip, ip, #(8 * 8) 84276789Sdim 85276789Sdim lsr r3, r2, #4 86276789Sdim cmp r3, r1 87276789Sdim movhs r2, r3 88276789Sdim subhs ip, #(4 * 8) 89276789Sdim 90276789Sdim lsr r3, r2, #2 91276789Sdim cmp r3, r1 92276789Sdim movhs r2, r3 93276789Sdim subhs ip, ip, #(2 * 8) 94276789Sdim 95353358Sdim // Last block, no need to update r2 or r3. 96276789Sdim cmp r1, r2, lsr #1 97276789Sdim subls ip, ip, #(1 * 8) 98276789Sdim 99276789Sdim JMP(ip) 100276789Sdim# endif 101276789Sdim 102276789Sdim#define IMM # 103276789Sdim 104276789Sdim#define block(shift) \ 105276789Sdim cmp r0, r1, lsl IMM shift; \ 106276789Sdim IT(hs); \ 107276789Sdim WIDE(subhs) r0, r0, r1, lsl IMM shift 108276789Sdim 109276789Sdim block(31) 110276789Sdim block(30) 111276789Sdim block(29) 112276789Sdim block(28) 113276789Sdim block(27) 114276789Sdim block(26) 115276789Sdim block(25) 116276789Sdim block(24) 117276789Sdim block(23) 118276789Sdim block(22) 119276789Sdim block(21) 120276789Sdim block(20) 121276789Sdim block(19) 122276789Sdim block(18) 123276789Sdim block(17) 124276789Sdim block(16) 125276789Sdim block(15) 126276789Sdim block(14) 127276789Sdim block(13) 128276789Sdim block(12) 129276789Sdim block(11) 130276789Sdim block(10) 131276789Sdim block(9) 132276789Sdim block(8) 133276789Sdim block(7) 134276789Sdim block(6) 135276789Sdim block(5) 136276789Sdim block(4) 137276789Sdim block(3) 138276789Sdim block(2) 139276789Sdim block(1) 140276789SdimLOCAL_LABEL(div0block): 141276789Sdim block(0) 142276789Sdim JMP(lr) 143353358Sdim#endif // __ARM_ARCH_EXT_IDIV__ 144276789Sdim 145276789SdimLOCAL_LABEL(divby0): 146276789Sdim mov r0, #0 147276789Sdim#ifdef __ARM_EABI__ 148276789Sdim b __aeabi_idiv0 149276789Sdim#else 150276789Sdim JMP(lr) 151276789Sdim#endif 152276789Sdim 153276789SdimEND_COMPILERRT_FUNCTION(__umodsi3) 154309124Sdim 155309124SdimNO_EXEC_STACK_DIRECTIVE 156309124Sdim 157