1dnl ARM64 mpn_rsh1add_n and mpn_rsh1sub_n. 2 3dnl Contributed to the GNU project by Torbj��rn Granlund. 4 5dnl Copyright 2017 Free Software Foundation, Inc. 6 7dnl This file is part of the GNU MP Library. 8dnl 9dnl The GNU MP Library is free software; you can redistribute it and/or modify 10dnl it under the terms of either: 11dnl 12dnl * the GNU Lesser General Public License as published by the Free 13dnl Software Foundation; either version 3 of the License, or (at your 14dnl option) any later version. 15dnl 16dnl or 17dnl 18dnl * the GNU General Public License as published by the Free Software 19dnl Foundation; either version 2 of the License, or (at your option) any 20dnl later version. 21dnl 22dnl or both in parallel, as here. 23dnl 24dnl The GNU MP Library is distributed in the hope that it will be useful, but 25dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27dnl for more details. 28dnl 29dnl You should have received copies of the GNU General Public License and the 30dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31dnl see https://www.gnu.org/licenses/. 32 33include(`../config.m4') 34 35C cycles/limb assumed optimal c/l 36C Cortex-A53 3.25-3.75 3.0 steady 37C Cortex-A57 2.15 1.75 38C X-Gene 2.75 2.5 39 40changecom(blah) 41 42define(`rp', `x0') 43define(`up', `x1') 44define(`vp', `x2') 45define(`n', `x3') 46 47ifdef(`OPERATION_rsh1add_n', ` 48 define(`ADDSUB', adds) 49 define(`ADDSUBC', adcs) 50 define(`COND', `cs') 51 define(`func_n', mpn_rsh1add_n)') 52ifdef(`OPERATION_rsh1sub_n', ` 53 define(`ADDSUB', subs) 54 define(`ADDSUBC', sbcs) 55 define(`COND', `cc') 56 define(`func_n', mpn_rsh1sub_n)') 57 58MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1sub_n) 59 60ASM_START() 61PROLOGUE(func_n) 62 lsr x18, n, #2 63 64 tbz n, #0, L(bx0) 65 66L(bx1): ldr x5, [up],#8 67 ldr x9, [vp],#8 68 tbnz n, #1, L(b11) 69 70L(b01): ADDSUB x13, x5, x9 71 and x10, x13, #1 72 cbz x18, L(1) 73 ldp x4, x5, [up],#48 74 ldp x8, x9, [vp],#48 75 ADDSUBC x14, x4, x8 76 ADDSUBC x15, x5, x9 77 ldp x4, x5, [up,#-32] 78 ldp x8, x9, [vp,#-32] 79 extr x17, x14, x13, #1 80 ADDSUBC x12, x4, x8 81 ADDSUBC x13, x5, x9 82 str x17, [rp], #24 83 sub x18, x18, #1 84 cbz x18, L(end) 85 b L(top) 86 87L(1): cset x14, COND 88 extr x17, x14, x13, #1 89 str x17, [rp] 90 mov x0, x10 91 ret 92 93L(b11): ADDSUB x15, x5, x9 94 and x10, x15, #1 95 96 ldp x4, x5, [up],#32 97 ldp x8, x9, [vp],#32 98 ADDSUBC x12, x4, x8 99 ADDSUBC x13, x5, x9 100 cbz x18, L(3) 101 ldp x4, x5, [up,#-16] 102 ldp x8, x9, [vp,#-16] 103 extr x17, x12, x15, #1 104 ADDSUBC x14, x4, x8 105 ADDSUBC x15, x5, x9 106 str x17, [rp], #8 107 b L(mid) 108 109L(3): extr x17, x12, x15, #1 110 str x17, [rp], #8 111 b L(2) 112 113L(bx0): tbz n, #1, L(b00) 114 115L(b10): ldp x4, x5, [up],#32 116 ldp x8, x9, [vp],#32 117 ADDSUB x12, x4, x8 118 ADDSUBC x13, x5, x9 119 and x10, x12, #1 120 cbz x18, L(2) 121 ldp x4, x5, [up,#-16] 122 ldp x8, x9, [vp,#-16] 123 ADDSUBC x14, x4, x8 124 ADDSUBC x15, x5, x9 125 b L(mid) 126 127L(b00): ldp x4, x5, [up],#48 128 ldp x8, x9, [vp],#48 129 ADDSUB x14, x4, x8 130 ADDSUBC x15, x5, x9 131 and x10, x14, #1 132 ldp x4, x5, [up,#-32] 133 ldp x8, x9, [vp,#-32] 134 ADDSUBC x12, x4, x8 135 ADDSUBC x13, x5, x9 136 add rp, rp, #16 137 sub x18, x18, #1 138 cbz x18, L(end) 139 140 ALIGN(16) 141L(top): ldp x4, x5, [up,#-16] 142 ldp x8, x9, [vp,#-16] 143 extr x16, x15, x14, #1 144 extr x17, x12, x15, #1 145 ADDSUBC x14, x4, x8 146 ADDSUBC x15, x5, x9 147 stp x16, x17, [rp,#-16] 148L(mid): ldp x4, x5, [up],#32 149 ldp x8, x9, [vp],#32 150 extr x16, x13, x12, #1 151 extr x17, x14, x13, #1 152 ADDSUBC x12, x4, x8 153 ADDSUBC x13, x5, x9 154 stp x16, x17, [rp],#32 155 sub x18, x18, #1 156 cbnz x18, L(top) 157 158L(end): extr x16, x15, x14, #1 159 extr x17, x12, x15, #1 160 stp x16, x17, [rp,#-16] 161L(2): cset x14, COND 162 extr x16, x13, x12, #1 163 extr x17, x14, x13, #1 164 stp x16, x17, [rp] 165 166L(ret): mov x0, x10 167 ret 168EPILOGUE() 169