12998SN/Adnl x86-64 mpn_addmul_1 and mpn_submul_1, optimized for "Core 2". 215601Sskovalev 32998SN/Adnl Copyright 2003, 2004, 2005, 2007, 2008, 2009 Free Software Foundation, Inc. 42998SN/A 52998SN/Adnl This file is part of the GNU MP Library. 62998SN/A 72998SN/Adnl The GNU MP Library is free software; you can redistribute it and/or modify 82998SN/Adnl it under the terms of the GNU Lesser General Public License as published 92998SN/Adnl by the Free Software Foundation; either version 3 of the License, or (at 102998SN/Adnl your option) any later version. 112998SN/A 122998SN/Adnl The GNU MP Library is distributed in the hope that it will be useful, but 132998SN/Adnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 142998SN/Adnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 152998SN/Adnl License for more details. 162998SN/A 172998SN/Adnl You should have received a copy of the GNU Lesser General Public License 182998SN/Adnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 192998SN/A 202998SN/Ainclude(`../config.m4') 212998SN/A 222998SN/AC cycles/limb 232998SN/AC K8,K9: 4 242998SN/AC K10: 4 252998SN/AC P4: ? 262998SN/AC P6 core2: 4.3-4.5 (fluctuating) 272998SN/AC P6 corei7: 5 282998SN/A 292998SN/AC INPUT PARAMETERS 3015601Sskovalevdefine(`rp', `%rdi') 3116177Salanbdefine(`up', `%rsi') 3216177Salanbdefine(`n', `%rdx') 3316177Salanbdefine(`v0', `%rcx') 3415601Sskovalev 3515601Sskovalevifdef(`OPERATION_addmul_1',` 3615601Sskovalev define(`ADDSUB', `add') 374479SN/A define(`func', `mpn_addmul_1') 384479SN/A') 394479SN/Aifdef(`OPERATION_submul_1',` 404479SN/A define(`ADDSUB', `sub') 412998SN/A define(`func', `mpn_submul_1') 422998SN/A') 432998SN/A 442998SN/AMULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1) 4515601Sskovalev 4615601SskovalevASM_START() 4715601Sskovalev TEXT 4815601Sskovalev ALIGN(16) 4915601SskovalevPROLOGUE(func) 5015601Sskovalev push %rbx 512998SN/A push %rbp 522998SN/A lea (%rdx), %rbx 532998SN/A neg %rbx 542998SN/A 552998SN/A mov (up), %rax 562998SN/A mov (rp), %r10 572998SN/A 582998SN/A lea -16(rp,%rdx,8), rp 592998SN/A lea (up,%rdx,8), up 602998SN/A mul %rcx 612998SN/A 622998SN/A bt $0, R32(%rbx) 632998SN/A jc L(odd) 642998SN/A 652998SN/A lea (%rax), %r11 662998SN/A mov 8(up,%rbx,8), %rax 672998SN/A lea (%rdx), %rbp 682998SN/A mul %rcx 692998SN/A add $2, %rbx 709372Sxuelei jns L(n2) 712998SN/A 722998SN/A lea (%rax), %r8 732998SN/A mov (up,%rbx,8), %rax 742998SN/A lea (%rdx), %r9 752998SN/A jmp L(mid) 762998SN/A 772998SN/AL(odd): add $1, %rbx 782998SN/A jns L(n1) 792998SN/A 802998SN/A lea (%rax), %r8 812998SN/A mov (up,%rbx,8), %rax 822998SN/A lea (%rdx), %r9 832998SN/A mul %rcx 842998SN/A lea (%rax), %r11 852998SN/A mov 8(up,%rbx,8), %rax 862998SN/A lea (%rdx), %rbp 872998SN/A jmp L(e) 882998SN/A 892998SN/A ALIGN(16) 902998SN/AL(top): mul %rcx 912998SN/A ADDSUB %r8, %r10 922998SN/A lea (%rax), %r8 932998SN/A mov (up,%rbx,8), %rax 942998SN/A adc %r9, %r11 952998SN/A mov %r10, -8(rp,%rbx,8) 962998SN/A mov (rp,%rbx,8), %r10 972998SN/A lea (%rdx), %r9 982998SN/A adc $0, %rbp 992998SN/AL(mid): mul %rcx 1002998SN/A ADDSUB %r11, %r10 1012998SN/A lea (%rax), %r11 1022998SN/A mov 8(up,%rbx,8), %rax 1032998SN/A adc %rbp, %r8 1042998SN/A mov %r10, (rp,%rbx,8) 1052998SN/A mov 8(rp,%rbx,8), %r10 1062998SN/A lea (%rdx), %rbp 1072998SN/A adc $0, %r9 1082998SN/AL(e): add $2, %rbx 1092998SN/A js L(top) 1102998SN/A 1112998SN/A mul %rcx 1122998SN/A ADDSUB %r8, %r10 1132998SN/A adc %r9, %r11 1142998SN/A mov %r10, -8(rp) 1152998SN/A adc $0, %rbp 1162998SN/AL(n2): mov (rp), %r10 1172998SN/A ADDSUB %r11, %r10 1182998SN/A adc %rbp, %rax 1192998SN/A mov %r10, (rp) 1202998SN/A adc $0, %rdx 1212998SN/AL(n1): mov 8(rp), %r10 1222998SN/A ADDSUB %rax, %r10 1232998SN/A mov %r10, 8(rp) 1242998SN/A mov R32(%rbx), R32(%rax) C zero rax 1252998SN/A adc %rdx, %rax 1262998SN/A pop %rbp 1272998SN/A pop %rbx 1282998SN/A ret 1292998SN/AEPILOGUE() 1302998SN/A