aors_err2_n.asm revision 1.1.1.1
1dnl AMD64 mpn_add_err2_n, mpn_sub_err2_n 2 3dnl Contributed by David Harvey. 4 5dnl Copyright 2011 Free Software Foundation, Inc. 6 7dnl This file is part of the GNU MP Library. 8 9dnl The GNU MP Library is free software; you can redistribute it and/or modify 10dnl it under the terms of the GNU Lesser General Public License as published 11dnl by the Free Software Foundation; either version 3 of the License, or (at 12dnl your option) any later version. 13 14dnl The GNU MP Library is distributed in the hope that it will be useful, but 15dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 16dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 17dnl License for more details. 18 19dnl You should have received a copy of the GNU Lesser General Public License 20dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 21 22include(`../config.m4') 23 24C cycles/limb 25C AMD K8,K9 4.5 26C AMD K10 ? 27C Intel P4 ? 28C Intel core2 6.9 29C Intel corei ? 30C Intel atom ? 31C VIA nano ? 32 33 34C INPUT PARAMETERS 35define(`rp', `%rdi') 36define(`up', `%rsi') 37define(`vp', `%rdx') 38define(`ep', `%rcx') 39define(`yp1', `%r8') 40define(`yp2', `%r9') 41define(`n_param', `8(%rsp)') 42define(`cy_param', `16(%rsp)') 43 44define(`cy1', `%r14') 45define(`cy2', `%rax') 46 47define(`n', `%r10') 48 49define(`w', `%rbx') 50define(`e1l', `%rbp') 51define(`e1h', `%r11') 52define(`e2l', `%r12') 53define(`e2h', `%r13') 54 55 56ifdef(`OPERATION_add_err2_n', ` 57 define(ADCSBB, adc) 58 define(func, mpn_add_err2_n)') 59ifdef(`OPERATION_sub_err2_n', ` 60 define(ADCSBB, sbb) 61 define(func, mpn_sub_err2_n)') 62 63MULFUNC_PROLOGUE(mpn_add_err2_n mpn_sub_err2_n) 64 65 66ASM_START() 67 TEXT 68 ALIGN(16) 69PROLOGUE(func) 70 mov cy_param, cy2 71 mov n_param, n 72 73 push %rbx 74 push %rbp 75 push %r12 76 push %r13 77 push %r14 78 79 xor R32(e1l), R32(e1l) 80 xor R32(e1h), R32(e1h) 81 xor R32(e2l), R32(e2l) 82 xor R32(e2h), R32(e2h) 83 84 sub yp1, yp2 85 86 lea (rp,n,8), rp 87 lea (up,n,8), up 88 lea (vp,n,8), vp 89 90 test $1, n 91 jnz L(odd) 92 93 lea -8(yp1,n,8), yp1 94 neg n 95 jmp L(top) 96 97 ALIGN(16) 98L(odd): 99 lea -16(yp1,n,8), yp1 100 neg n 101 shr $1, cy2 102 mov (up,n,8), w 103 ADCSBB (vp,n,8), w 104 cmovc 8(yp1), e1l 105 cmovc 8(yp1,yp2), e2l 106 mov w, (rp,n,8) 107 sbb cy2, cy2 108 inc n 109 jz L(end) 110 111 ALIGN(16) 112L(top): 113 mov (up,n,8), w 114 shr $1, cy2 C restore carry 115 ADCSBB (vp,n,8), w 116 mov w, (rp,n,8) 117 sbb cy1, cy1 C generate mask, preserve CF 118 119 mov 8(up,n,8), w 120 ADCSBB 8(vp,n,8), w 121 mov w, 8(rp,n,8) 122 sbb cy2, cy2 C generate mask, preserve CF 123 124 mov (yp1), w C (e1h:e1l) += cy1 * yp1 limb 125 and cy1, w 126 add w, e1l 127 adc $0, e1h 128 129 and (yp1,yp2), cy1 C (e2h:e2l) += cy1 * yp2 limb 130 add cy1, e2l 131 adc $0, e2h 132 133 mov -8(yp1), w C (e1h:e1l) += cy2 * next yp1 limb 134 and cy2, w 135 add w, e1l 136 adc $0, e1h 137 138 mov -8(yp1,yp2), w C (e2h:e2l) += cy2 * next yp2 limb 139 and cy2, w 140 add w, e2l 141 adc $0, e2h 142 143 add $2, n 144 lea -16(yp1), yp1 145 jnz L(top) 146L(end): 147 148 mov e1l, (ep) 149 mov e1h, 8(ep) 150 mov e2l, 16(ep) 151 mov e2h, 24(ep) 152 153 and $1, %eax C return carry 154 155 pop %r14 156 pop %r13 157 pop %r12 158 pop %rbp 159 pop %rbx 160 ret 161EPILOGUE() 162