1dnl Intel mpn_add_n/mpn_sub_n optimised for Conroe, Nehalem. 2 3dnl Copyright 2006, 2007, 2011-2013 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33C cycles/limb 34C AMD K8,K9 2 35C AMD K10 1.93\2 36C AMD bull 1.62\2.1 37C AMD pile 1.6\1.7 38C AMD steam 39C AMD excavator 40C AMD bobcat 2.79 41C AMD jaguar 2.54 42C Intel P4 10 43C Intel core2 2 44C Intel NHM 2 45C Intel SBR 2 46C Intel IBR 1.95 47C Intel HWL 1.72 48C Intel BWL 1.54 49C Intel SKL 1.52 50C Intel atom 9 51C Intel SLM 6.5 52C VIA nano 3 53 54C INPUT PARAMETERS 55define(`rp', `%rdi') 56define(`up', `%rsi') 57define(`vp', `%rdx') 58define(`n', `%rcx') 59define(`cy', `%r8') 60 61ifdef(`OPERATION_add_n', ` 62 define(ADCSBB, adc) 63 define(func, mpn_add_n) 64 define(func_nc, mpn_add_nc)') 65ifdef(`OPERATION_sub_n', ` 66 define(ADCSBB, sbb) 67 define(func, mpn_sub_n) 68 define(func_nc, mpn_sub_nc)') 69 70MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc) 71 72ABI_SUPPORT(DOS64) 73ABI_SUPPORT(STD64) 74 75ASM_START() 76 TEXT 77 ALIGN(16) 78PROLOGUE(func) 79 FUNC_ENTRY(4) 80 xor %r8, %r8 81L(start): 82 mov (up), %r10 83 mov (vp), %r11 84 85 lea (up,n,8), up 86 lea (vp,n,8), vp 87 lea (rp,n,8), rp 88 mov R32(n), R32(%rax) 89 neg n 90 and $3, R32(%rax) 91 je L(b00) 92 add %rax, n C clear low rcx bits for jrcxz 93 cmp $2, R32(%rax) 94 jl L(b01) 95 je L(b10) 96 97L(b11): neg %r8 C set cy 98 jmp L(e11) 99 100L(b00): neg %r8 C set cy 101 mov %r10, %r8 102 mov %r11, %r9 103 lea 4(n), n 104 jmp L(e00) 105 106 nop 107 nop 108 nop 109L(b01): neg %r8 C set cy 110 jmp L(top) 111 112L(b10): neg %r8 C set cy 113 mov %r10, %r8 114 mov %r11, %r9 115 jmp L(e10) 116 117L(end): ADCSBB %r11, %r10 118 mov %r10, -8(rp) 119 mov R32(%rcx), R32(%rax) C clear eax, ecx contains 0 120 adc R32(%rax), R32(%rax) 121 FUNC_EXIT() 122 ret 123 124 ALIGN(16) 125L(top): jrcxz L(end) 126 mov (up,n,8), %r8 127 mov (vp,n,8), %r9 128 lea 4(n), n 129 ADCSBB %r11, %r10 130 mov %r10, -40(rp,n,8) 131L(e00): mov -24(up,n,8), %r10 132 mov -24(vp,n,8), %r11 133 ADCSBB %r9, %r8 134 mov %r8, -32(rp,n,8) 135L(e11): mov -16(up,n,8), %r8 136 mov -16(vp,n,8), %r9 137 ADCSBB %r11, %r10 138 mov %r10, -24(rp,n,8) 139L(e10): mov -8(up,n,8), %r10 140 mov -8(vp,n,8), %r11 141 ADCSBB %r9, %r8 142 mov %r8, -16(rp,n,8) 143 jmp L(top) 144EPILOGUE() 145 146PROLOGUE(func_nc) 147 FUNC_ENTRY(4) 148IFDOS(` mov 56(%rsp), %r8 ') 149 jmp L(start) 150EPILOGUE() 151