1dnl ARM v6t2 mpn_divrem_1 and mpn_preinv_divrem_1. 2 3dnl Contributed to the GNU project by Torbj��rn Granlund. 4 5dnl Copyright 2012 Free Software Foundation, Inc. 6 7dnl This file is part of the GNU MP Library. 8dnl 9dnl The GNU MP Library is free software; you can redistribute it and/or modify 10dnl it under the terms of either: 11dnl 12dnl * the GNU Lesser General Public License as published by the Free 13dnl Software Foundation; either version 3 of the License, or (at your 14dnl option) any later version. 15dnl 16dnl or 17dnl 18dnl * the GNU General Public License as published by the Free Software 19dnl Foundation; either version 2 of the License, or (at your option) any 20dnl later version. 21dnl 22dnl or both in parallel, as here. 23dnl 24dnl The GNU MP Library is distributed in the hope that it will be useful, but 25dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27dnl for more details. 28dnl 29dnl You should have received copies of the GNU General Public License and the 30dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31dnl see https://www.gnu.org/licenses/. 32 33include(`../config.m4') 34 35C norm unorm frac 36C StrongARM - - - 37C XScale - - - 38C Cortex-A7 ? ? ? 39C Cortex-A8 ? ? ? 40C Cortex-A9 13 14 13 41C Cortex-A15 11.4 11.8 11.1 42 43C TODO 44C * Optimise inner-loops better, they could likely run a cycle or two faster. 45C * Decrease register usage, streamline non-loop code. 46 47define(`qp_arg', `r0') 48define(`fn', `r1') 49define(`up_arg', `r2') 50define(`n_arg', `r3') 51define(`d_arg', `0') 52define(`dinv_arg',`4') 53define(`cnt_arg', `8') 54 55define(`n', `r9') 56define(`qp', `r5') 57define(`up', `r6') 58define(`cnt', `r7') 59define(`tnc', `r10') 60define(`dinv', `r0') 61define(`d', `r4') 62 63ASM_START() 64PROLOGUE(mpn_preinv_divrem_1) 65 stmfd sp!, {r4, r5, r6, r7, r8, r9, r10, r11, lr} 66 ldr d, [sp, #9*4+d_arg] 67 ldr cnt, [sp, #9*4+cnt_arg] 68 str r1, [sp, #9*4+d_arg] C reuse d stack slot for fn 69 sub n, r3, #1 70 add r3, r1, n 71 cmp d, #0 72 add qp, qp_arg, r3, lsl #2 C put qp at Q[] end 73 add up, up_arg, n, lsl #2 C put up at U[] end 74 ldr dinv, [sp, #9*4+dinv_arg] 75 blt L(nent) 76 b L(uent) 77EPILOGUE() 78 79PROLOGUE(mpn_divrem_1) 80 stmfd sp!, {r4, r5, r6, r7, r8, r9, r10, r11, lr} 81 sub n, r3, #1 82 ldr d, [sp, #9*4+d_arg] C d 83 str r1, [sp, #9*4+d_arg] C reuse d stack slot for fn 84 add r3, r1, n 85 cmp d, #0 86 add qp, qp_arg, r3, lsl #2 C put qp at Q[] end 87 add up, up_arg, n, lsl #2 C put up at U[] end 88 blt L(normalised) 89 90L(unnorm): 91 clz cnt, d 92 mov r0, d, lsl cnt C pass d << cnt 93 bl mpn_invert_limb 94L(uent): 95 mov d, d, lsl cnt C d <<= cnt 96 cmp n, #0 97 mov r1, #0 C r 98 blt L(frac) 99 100 ldr r11, [up, #0] 101 102 rsb tnc, cnt, #32 103 mov r1, r11, lsr tnc 104 mov r11, r11, lsl cnt 105 beq L(uend) 106 107 ldr r3, [up, #-4]! 108 orr r2, r11, r3, lsr tnc 109 b L(mid) 110 111L(utop): 112 mls r1, d, r8, r11 113 mov r11, r3, lsl cnt 114 ldr r3, [up, #-4]! 115 cmp r1, r2 116 addhi r1, r1, d 117 subhi r8, r8, #1 118 orr r2, r11, r3, lsr tnc 119 cmp r1, d 120 bcs L(ufx) 121L(uok): str r8, [qp], #-4 122L(mid): add r8, r1, #1 123 mov r11, r2 124 umlal r2, r8, r1, dinv 125 subs n, n, #1 126 bne L(utop) 127 128 mls r1, d, r8, r11 129 mov r11, r3, lsl cnt 130 cmp r1, r2 131 addhi r1, r1, d 132 subhi r8, r8, #1 133 cmp r1, d 134 rsbcs r1, d, r1 135 addcs r8, r8, #1 136 str r8, [qp], #-4 137 138L(uend):add r8, r1, #1 139 mov r2, r11 140 umlal r2, r8, r1, dinv 141 mls r1, d, r8, r11 142 cmp r1, r2 143 addhi r1, r1, d 144 subhi r8, r8, #1 145 cmp r1, d 146 rsbcs r1, d, r1 147 addcs r8, r8, #1 148 str r8, [qp], #-4 149L(frac): 150 ldr r2, [sp, #9*4+d_arg] C fn 151 cmp r2, #0 152 beq L(fend) 153 154L(ftop):mov r6, #0 155 add r3, r1, #1 156 umlal r6, r3, r1, dinv 157 mov r8, #0 158 mls r1, d, r3, r8 159 cmp r1, r6 160 addhi r1, r1, d 161 subhi r3, r3, #1 162 subs r2, r2, #1 163 str r3, [qp], #-4 164 bne L(ftop) 165 166L(fend):mov r11, r1, lsr cnt 167L(rtn): mov r0, r11 168 ldmfd sp!, {r4, r5, r6, r7, r8, r9, r10, r11, pc} 169 170L(normalised): 171 mov r0, d 172 bl mpn_invert_limb 173L(nent): 174 cmp n, #0 175 mov r11, #0 C r 176 blt L(nend) 177 178 ldr r11, [up, #0] 179 cmp r11, d 180 movlo r2, #0 C hi q limb 181 movhs r2, #1 C hi q limb 182 subhs r11, r11, d 183 184 str r2, [qp], #-4 185 cmp n, #0 186 beq L(nend) 187 188L(ntop):ldr r1, [up, #-4]! 189 add r12, r11, #1 190 umlal r1, r12, r11, dinv 191 ldr r3, [up, #0] 192 mls r11, d, r12, r3 193 cmp r11, r1 194 addhi r11, r11, d 195 subhi r12, r12, #1 196 cmp d, r11 197 bls L(nfx) 198L(nok): str r12, [qp], #-4 199 subs n, n, #1 200 bne L(ntop) 201 202L(nend):mov r1, r11 C r 203 mov cnt, #0 C shift cnt 204 b L(frac) 205 206L(nfx): add r12, r12, #1 207 rsb r11, d, r11 208 b L(nok) 209L(ufx): rsb r1, d, r1 210 add r8, r8, #1 211 b L(uok) 212EPILOGUE() 213