1dnl AMD64 mpn_addlsh2_n and mpn_rsblsh2_n. R = 2*V +- U. 2dnl ("rsb" means reversed subtract, name mandated by mpn_sublsh2_n which 3dnl subtacts the shifted operand from the unshifted operand.) 4 5dnl Copyright 2009 Free Software Foundation, Inc. 6 7dnl This file is part of the GNU MP Library. 8 9dnl The GNU MP Library is free software; you can redistribute it and/or modify 10dnl it under the terms of the GNU Lesser General Public License as published 11dnl by the Free Software Foundation; either version 3 of the License, or (at 12dnl your option) any later version. 13 14dnl The GNU MP Library is distributed in the hope that it will be useful, but 15dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 16dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 17dnl License for more details. 18 19dnl You should have received a copy of the GNU Lesser General Public License 20dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 21 22include(`../config.m4') 23 24 25C cycles/limb 26C K8,K9: 2 27C K10: 2 28C P4: ? 29C P6 core2: 3 30C P6 corei7: 2.75 31C P6 atom: ? 32 33C INPUT PARAMETERS 34define(`rp', `%rdi') 35define(`up', `%rsi') 36define(`vp', `%rdx') 37define(`n', `%rcx') 38 39ifdef(`OPERATION_addlsh2_n',` 40 define(ADDSUB, `add') 41 define(ADCSBB, `adc') 42 define(func, mpn_addlsh2_n)') 43ifdef(`OPERATION_rsblsh2_n',` 44 define(ADDSUB, `sub') 45 define(ADCSBB, `sbb') 46 define(func, mpn_rsblsh2_n)') 47 48MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_rsblsh2_n) 49 50ASM_START() 51 TEXT 52 ALIGN(16) 53PROLOGUE(func) 54 push %r12 55 push %r13 56 push %r14 57 push %r15 58 59 mov (vp), %r8 60 lea (,%r8,4), %r12 61 shr $62, %r8 62 63 mov R32(n), R32(%rax) 64 lea (rp,n,8), rp 65 lea (up,n,8), up 66 lea (vp,n,8), vp 67 neg n 68 and $3, R8(%rax) 69 je L(b00) 70 cmp $2, R8(%rax) 71 jc L(b01) 72 je L(b10) 73 74L(b11): mov 8(vp,n,8), %r10 75 lea (%r8,%r10,4), %r14 76 shr $62, %r10 77 mov 16(vp,n,8), %r11 78 lea (%r10,%r11,4), %r15 79 shr $62, %r11 80 ADDSUB (up,n,8), %r12 81 ADCSBB 8(up,n,8), %r14 82 ADCSBB 16(up,n,8), %r15 83 sbb R32(%rax), R32(%rax) C save carry for next 84 mov %r12, (rp,n,8) 85 mov %r14, 8(rp,n,8) 86 mov %r15, 16(rp,n,8) 87 add $3, n 88 js L(top) 89 jmp L(end) 90 91L(b01): mov %r8, %r11 92 ADDSUB (up,n,8), %r12 93 sbb R32(%rax), R32(%rax) C save carry for next 94 mov %r12, (rp,n,8) 95 add $1, n 96 js L(top) 97 jmp L(end) 98 99L(b10): mov 8(vp,n,8), %r11 100 lea (%r8,%r11,4), %r15 101 shr $62, %r11 102 ADDSUB (up,n,8), %r12 103 ADCSBB 8(up,n,8), %r15 104 sbb R32(%rax), R32(%rax) C save carry for next 105 mov %r12, (rp,n,8) 106 mov %r15, 8(rp,n,8) 107 add $2, n 108 js L(top) 109 jmp L(end) 110 111L(b00): mov 8(vp,n,8), %r9 112 mov 16(vp,n,8), %r10 113 jmp L(e00) 114 115 ALIGN(16) 116L(top): mov 16(vp,n,8), %r10 117 mov (vp,n,8), %r8 118 mov 8(vp,n,8), %r9 119 lea (%r11,%r8,4), %r12 120 shr $62, %r8 121L(e00): lea (%r8,%r9,4), %r13 122 shr $62, %r9 123 mov 24(vp,n,8), %r11 124 lea (%r9,%r10,4), %r14 125 shr $62, %r10 126 lea (%r10,%r11,4), %r15 127 shr $62, %r11 128 add R32(%rax), R32(%rax) C restore carry 129 ADCSBB (up,n,8), %r12 130 ADCSBB 8(up,n,8), %r13 131 ADCSBB 16(up,n,8), %r14 132 ADCSBB 24(up,n,8), %r15 133 mov %r12, (rp,n,8) 134 mov %r13, 8(rp,n,8) 135 mov %r14, 16(rp,n,8) 136 sbb R32(%rax), R32(%rax) C save carry for next 137 mov %r15, 24(rp,n,8) 138 add $4, n 139 js L(top) 140L(end): 141 142ifdef(`OPERATION_addlsh2_n',` 143 sub R32(%r11), R32(%rax) 144 neg R32(%rax)') 145ifdef(`OPERATION_rsblsh2_n',` 146 add R32(%r11), R32(%rax) 147 movslq R32(%rax), %rax') 148 149 pop %r15 150 pop %r14 151 pop %r13 152 pop %r12 153 ret 154EPILOGUE() 155