1dnl AMD64 mpn_add_err3_n, mpn_sub_err3_n 2 3dnl Contributed by David Harvey. 4 5dnl Copyright 2011 Free Software Foundation, Inc. 6 7dnl This file is part of the GNU MP Library. 8dnl 9dnl The GNU MP Library is free software; you can redistribute it and/or modify 10dnl it under the terms of either: 11dnl 12dnl * the GNU Lesser General Public License as published by the Free 13dnl Software Foundation; either version 3 of the License, or (at your 14dnl option) any later version. 15dnl 16dnl or 17dnl 18dnl * the GNU General Public License as published by the Free Software 19dnl Foundation; either version 2 of the License, or (at your option) any 20dnl later version. 21dnl 22dnl or both in parallel, as here. 23dnl 24dnl The GNU MP Library is distributed in the hope that it will be useful, but 25dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27dnl for more details. 28dnl 29dnl You should have received copies of the GNU General Public License and the 30dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31dnl see https://www.gnu.org/licenses/. 32 33include(`../config.m4') 34 35C cycles/limb 36C AMD K8,K9 7.0 37C AMD K10 ? 38C Intel P4 ? 39C Intel core2 ? 40C Intel corei ? 41C Intel atom ? 42C VIA nano ? 43 44C INPUT PARAMETERS 45define(`rp', `%rdi') 46define(`up', `%rsi') 47define(`vp', `%rdx') 48define(`ep', `%rcx') 49define(`yp1', `%r8') 50define(`yp2', `%r9') 51define(`yp3_param', `8(%rsp)') 52define(`n_param', `16(%rsp)') 53define(`cy_param', `24(%rsp)') 54 55define(`n', `%r10') 56define(`yp3', `%rcx') 57define(`t', `%rbx') 58 59define(`e1l', `%rbp') 60define(`e1h', `%r11') 61define(`e2l', `%r12') 62define(`e2h', `%r13') 63define(`e3l', `%r14') 64define(`e3h', `%r15') 65 66 67 68ifdef(`OPERATION_add_err3_n', ` 69 define(ADCSBB, adc) 70 define(func, mpn_add_err3_n)') 71ifdef(`OPERATION_sub_err3_n', ` 72 define(ADCSBB, sbb) 73 define(func, mpn_sub_err3_n)') 74 75MULFUNC_PROLOGUE(mpn_add_err3_n mpn_sub_err3_n) 76 77 78ASM_START() 79 TEXT 80 ALIGN(16) 81PROLOGUE(func) 82 mov cy_param, %rax 83 mov n_param, n 84 85 push %rbx 86 push %rbp 87 push %r12 88 push %r13 89 push %r14 90 push %r15 91 92 push ep 93 mov 64(%rsp), yp3 C load from yp3_param 94 95 xor R32(e1l), R32(e1l) 96 xor R32(e1h), R32(e1h) 97 xor R32(e2l), R32(e2l) 98 xor R32(e2h), R32(e2h) 99 xor R32(e3l), R32(e3l) 100 xor R32(e3h), R32(e3h) 101 102 sub yp1, yp2 103 sub yp1, yp3 104 105 lea -8(yp1,n,8), yp1 106 lea (rp,n,8), rp 107 lea (up,n,8), up 108 lea (vp,n,8), vp 109 neg n 110 111 ALIGN(16) 112L(top): 113 shr $1, %rax C restore carry 114 mov (up,n,8), %rax 115 ADCSBB (vp,n,8), %rax 116 mov %rax, (rp,n,8) 117 sbb %rax, %rax C save carry and generate mask 118 119 mov (yp1), t 120 and %rax, t 121 add t, e1l 122 adc $0, e1h 123 124 mov (yp1,yp2), t 125 and %rax, t 126 add t, e2l 127 adc $0, e2h 128 129 mov (yp1,yp3), t 130 and %rax, t 131 add t, e3l 132 adc $0, e3h 133 134 lea -8(yp1), yp1 135 inc n 136 jnz L(top) 137 138L(end): 139 and $1, %eax 140 pop ep 141 142 mov e1l, (ep) 143 mov e1h, 8(ep) 144 mov e2l, 16(ep) 145 mov e2h, 24(ep) 146 mov e3l, 32(ep) 147 mov e3h, 40(ep) 148 149 pop %r15 150 pop %r14 151 pop %r13 152 pop %r12 153 pop %rbp 154 pop %rbx 155 ret 156EPILOGUE() 157