aorslsh1_n.asm revision 1.1.1.1
1dnl x86-64 mpn_addlsh1_n and mpn_sublsh1_n, optimized for "Core" 2. 2 3dnl Copyright 2008 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of the GNU Lesser General Public License as published 9dnl by the Free Software Foundation; either version 3 of the License, or (at 10dnl your option) any later version. 11 12dnl The GNU MP Library is distributed in the hope that it will be useful, but 13dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15dnl License for more details. 16 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22C cycles/limb 23C K8,K9: 4.25 24C K10: ? 25C P4: ? 26C P6-15: 3 27 28C INPUT PARAMETERS 29define(`rp',`%rdi') 30define(`up',`%rsi') 31define(`vp',`%rdx') 32define(`n', `%rcx') 33 34ifdef(`OPERATION_addlsh1_n', ` 35 define(ADDSUB, add) 36 define(ADCSBB, adc) 37 define(func, mpn_addlsh1_n)') 38ifdef(`OPERATION_sublsh1_n', ` 39 define(ADDSUB, sub) 40 define(ADCSBB, sbb) 41 define(func, mpn_sublsh1_n)') 42 43MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n) 44 45ASM_START() 46 TEXT 47 ALIGN(8) 48PROLOGUE(func) 49 push %rbx 50 push %r12 51 52 mov R32(%rcx), R32(%rax) 53 lea 24(up,n,8), up 54 lea 24(vp,n,8), vp 55 lea 24(rp,n,8), rp 56 neg n 57 58 xor R32(%r11), R32(%r11) 59 60 mov -24(vp,n,8), %r8 C do first limb early 61 shrd $63, %r8, %r11 62 63 and $3, R32(%rax) 64 je L(b0) 65 cmp $2, R32(%rax) 66 jc L(b1) 67 je L(b2) 68 69L(b3): mov -16(vp,n,8), %r9 70 shrd $63, %r9, %r8 71 mov -8(vp,n,8), %r10 72 shrd $63, %r10, %r9 73 mov -24(up,n,8), %r12 74 ADDSUB %r11, %r12 75 mov %r12, -24(rp,n,8) 76 mov -16(up,n,8), %r12 77 ADCSBB %r8, %r12 78 mov %r12, -16(rp,n,8) 79 mov -8(up,n,8), %r12 80 ADCSBB %r9, %r12 81 mov %r12, -8(rp,n,8) 82 mov %r10, %r11 83 sbb R32(%rax), R32(%rax) C save cy 84 add $3, n 85 js L(top) 86 jmp L(end) 87 88L(b1): mov -24(up,n,8), %r12 89 ADDSUB %r11, %r12 90 mov %r12, -24(rp,n,8) 91 mov %r8, %r11 92 sbb R32(%rax), R32(%rax) C save cy 93 inc n 94 js L(top) 95 jmp L(end) 96 97L(b2): mov -16(vp,n,8), %r9 98 shrd $63, %r9, %r8 99 mov -24(up,n,8), %r12 100 ADDSUB %r11, %r12 101 mov %r12, -24(rp,n,8) 102 mov -16(up,n,8), %r12 103 ADCSBB %r8, %r12 104 mov %r12, -16(rp,n,8) 105 mov %r9, %r11 106 sbb R32(%rax), R32(%rax) C save cy 107 add $2, n 108 js L(top) 109 jmp L(end) 110 111 ALIGN(16) 112L(top): mov -24(vp,n,8), %r8 113 shrd $63, %r8, %r11 114L(b0): mov -16(vp,n,8), %r9 115 shrd $63, %r9, %r8 116 mov -8(vp,n,8), %r10 117 shrd $63, %r10, %r9 118 mov (vp,n,8), %rbx 119 shrd $63, %rbx, %r10 120 121 add R32(%rax), R32(%rax) C restore cy 122 123 mov -24(up,n,8), %r12 124 ADCSBB %r11, %r12 125 mov %r12, -24(rp,n,8) 126 127 mov -16(up,n,8), %r12 128 ADCSBB %r8, %r12 129 mov %r12, -16(rp,n,8) 130 131 mov -8(up,n,8), %r12 132 ADCSBB %r9, %r12 133 mov %r12, -8(rp,n,8) 134 135 mov (up,n,8), %r12 136 ADCSBB %r10, %r12 137 mov %r12, (rp,n,8) 138 139 mov %rbx, %r11 140 sbb R32(%rax), R32(%rax) C save cy 141 142 add $4, n 143 js L(top) 144 145L(end): add %r11, %r11 146 pop %r12 147 pop %rbx 148 sbb $0, R32(%rax) 149 neg R32(%rax) 150 ret 151EPILOGUE() 152