1dnl x86-64 mpn_divrem_1 -- mpn by limb division. 2 3dnl Copyright 2004, 2005, 2007-2010, 2012, 2014 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33 34C norm unorm frac 35C AMD K8,K9 15 15 12 36C AMD K10 15 15 12 37C Intel P4 44 44 43 38C Intel core2 24 24 19.5 39C Intel corei 19 19 18 40C Intel atom 51 51 36 41C VIA nano 46 44 22.5 42 43C mp_limb_t 44C mpn_divrem_1 (mp_ptr qp, mp_size_t fn, 45C mp_srcptr np, mp_size_t nn, mp_limb_t d) 46 47C mp_limb_t 48C mpn_preinv_divrem_1 (mp_ptr qp, mp_size_t fn, 49C mp_srcptr np, mp_size_t nn, mp_limb_t d, 50C mp_limb_t dinv, int cnt) 51 52C INPUT PARAMETERS 53define(`qp', `%rdi') 54define(`fn_param', `%rsi') 55define(`up_param', `%rdx') 56define(`un_param', `%rcx') 57define(`d', `%r8') 58define(`dinv', `%r9') C only for mpn_preinv_divrem_1 59C shift passed on stack C only for mpn_preinv_divrem_1 60 61define(`cnt', `%rcx') 62define(`up', `%rsi') 63define(`fn', `%r12') 64define(`un', `%rbx') 65 66 67C rax rbx rcx rdx rsi rdi rbp r8 r9 r10 r11 r12 r13 r14 r15 68C cnt qp d dinv 69 70ABI_SUPPORT(DOS64) 71ABI_SUPPORT(STD64) 72 73IFSTD(`define(`CNTOFF', `40($1)')') 74IFDOS(`define(`CNTOFF', `104($1)')') 75 76ASM_START() 77 TEXT 78 ALIGN(16) 79PROLOGUE(mpn_preinv_divrem_1) 80 FUNC_ENTRY(4) 81IFDOS(` mov 56(%rsp), %r8 ') 82IFDOS(` mov 64(%rsp), %r9 ') 83 xor R32(%rax), R32(%rax) 84 push %r13 85 push %r12 86 push %rbp 87 push %rbx 88 89 mov fn_param, fn 90 mov un_param, un 91 add fn_param, un_param 92 mov up_param, up 93 94 lea -8(qp,un_param,8), qp 95 96 mov CNTOFF(%rsp), R8(cnt) 97 shl R8(cnt), d 98 jmp L(ent) 99EPILOGUE() 100 101 ALIGN(16) 102PROLOGUE(mpn_divrem_1) 103 FUNC_ENTRY(4) 104IFDOS(` mov 56(%rsp), %r8 ') 105 xor R32(%rax), R32(%rax) 106 push %r13 107 push %r12 108 push %rbp 109 push %rbx 110 111 mov fn_param, fn 112 mov un_param, un 113 add fn_param, un_param 114 mov up_param, up 115 je L(ret) 116 117 lea -8(qp,un_param,8), qp 118 xor R32(%rbp), R32(%rbp) 119 120L(unnormalized): 121 test un, un 122 je L(44) 123 mov -8(up,un,8), %rax 124 cmp d, %rax 125 jae L(44) 126 mov %rbp, (qp) 127 mov %rax, %rbp 128 lea -8(qp), qp 129 je L(ret) 130 dec un 131L(44): 132 bsr d, %rcx 133 not R32(%rcx) 134 sal R8(%rcx), d 135 sal R8(%rcx), %rbp 136 137 push %rcx 138IFSTD(` push %rdi ') 139IFSTD(` push %rsi ') 140 push %r8 141IFSTD(` sub $8, %rsp ') 142IFSTD(` mov d, %rdi ') 143IFDOS(` sub $40, %rsp ') 144IFDOS(` mov d, %rcx ') 145 ASSERT(nz, `test $15, %rsp') 146 CALL( mpn_invert_limb) 147IFSTD(` add $8, %rsp ') 148IFDOS(` add $40, %rsp ') 149 pop %r8 150IFSTD(` pop %rsi ') 151IFSTD(` pop %rdi ') 152 pop %rcx 153 154 mov %rax, dinv 155 mov %rbp, %rax 156 test un, un 157 je L(frac) 158 159L(ent): mov -8(up,un,8), %rbp 160 shr R8(%rcx), %rax 161 shld R8(%rcx), %rbp, %rax 162 sub $2, un 163 js L(end) 164 165 ALIGN(16) 166L(top): lea 1(%rax), %r11 167 mul dinv 168 mov (up,un,8), %r10 169 shld R8(%rcx), %r10, %rbp 170 mov %rbp, %r13 171 add %rax, %r13 172 adc %r11, %rdx 173 mov %rdx, %r11 174 imul d, %rdx 175 sub %rdx, %rbp 176 lea (d,%rbp), %rax 177 sub $8, qp 178 cmp %r13, %rbp 179 cmovc %rbp, %rax 180 adc $-1, %r11 181 cmp d, %rax 182 jae L(ufx) 183L(uok): dec un 184 mov %r11, 8(qp) 185 mov %r10, %rbp 186 jns L(top) 187 188L(end): lea 1(%rax), %r11 189 sal R8(%rcx), %rbp 190 mul dinv 191 add %rbp, %rax 192 adc %r11, %rdx 193 mov %rax, %r11 194 mov %rdx, %r13 195 imul d, %rdx 196 sub %rdx, %rbp 197 mov d, %rax 198 add %rbp, %rax 199 cmp %r11, %rbp 200 cmovc %rbp, %rax 201 adc $-1, %r13 202 cmp d, %rax 203 jae L(efx) 204L(eok): mov %r13, (qp) 205 sub $8, qp 206 jmp L(frac) 207 208L(ufx): sub d, %rax 209 inc %r11 210 jmp L(uok) 211L(efx): sub d, %rax 212 inc %r13 213 jmp L(eok) 214 215L(frac):mov d, %rbp 216 neg %rbp 217 jmp L(fent) 218 219 ALIGN(16) C K8-K10 P6-CNR P6-NHM P4 220L(ftop):mul dinv C 0,12 0,17 0,17 221 add %r11, %rdx C 5 8 10 222 mov %rax, %r11 C 4 8 3 223 mov %rdx, %r13 C 6 9 11 224 imul %rbp, %rdx C 6 9 11 225 mov d, %rax C 226 add %rdx, %rax C 10 14 14 227 cmp %r11, %rdx C 10 14 14 228 cmovc %rdx, %rax C 11 15 15 229 adc $-1, %r13 C 230 mov %r13, (qp) C 231 sub $8, qp C 232L(fent):lea 1(%rax), %r11 C 233 dec fn C 234 jns L(ftop) C 235 236 shr R8(%rcx), %rax 237L(ret): pop %rbx 238 pop %rbp 239 pop %r12 240 pop %r13 241 FUNC_EXIT() 242 ret 243EPILOGUE() 244