1dnl X86-64 mpn_add_n, mpn_sub_n, optimised for Intel Atom. 2 3dnl Copyright 2011, 2017 Free Software Foundation, Inc. 4 5dnl Contributed to the GNU project by Marco Bodrato. Ported to 64-bit by 6dnl Torbj��rn Granlund. 7 8dnl This file is part of the GNU MP Library. 9dnl 10dnl The GNU MP Library is free software; you can redistribute it and/or modify 11dnl it under the terms of either: 12dnl 13dnl * the GNU Lesser General Public License as published by the Free 14dnl Software Foundation; either version 3 of the License, or (at your 15dnl option) any later version. 16dnl 17dnl or 18dnl 19dnl * the GNU General Public License as published by the Free Software 20dnl Foundation; either version 2 of the License, or (at your option) any 21dnl later version. 22dnl 23dnl or both in parallel, as here. 24dnl 25dnl The GNU MP Library is distributed in the hope that it will be useful, but 26dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 27dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 28dnl for more details. 29dnl 30dnl You should have received copies of the GNU General Public License and the 31dnl GNU Lesser General Public License along with the GNU MP Library. If not, 32dnl see https://www.gnu.org/licenses/. 33 34include(`../config.m4') 35 36C cycles/limb 37C AMD K8,K9 2 38C AMD K10 2 39C AMD bull 2.34\2.63 40C AMD pile 2.27\2.52 41C AMD steam 42C AMD excavator 43C AMD bobcat 2.79 44C AMD jaguar 2.78 45C Intel P4 11 46C Intel core2 7.5 47C Intel NHM 8.5 48C Intel SBR 2.11 49C Intel IBR 2.07 50C Intel HWL 1.75 51C Intel BWL 1.51 52C Intel SKL 1.52 53C Intel atom 3 54C Intel SLM 4 55C VIA nano 56 57define(`rp', `%rdi') C rcx 58define(`up', `%rsi') C rdx 59define(`vp', `%rdx') C r8 60define(`n', `%rcx') C r9 61define(`cy', `%r8') C rsp+40 (mpn_add_nc and mpn_sub_nc) 62 63ifdef(`OPERATION_add_n', ` 64 define(ADCSBB, adc) 65 define(func_n, mpn_add_n) 66 define(func_nc, mpn_add_nc)') 67ifdef(`OPERATION_sub_n', ` 68 define(ADCSBB, sbb) 69 define(func_n, mpn_sub_n) 70 define(func_nc, mpn_sub_nc)') 71 72MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc) 73 74ABI_SUPPORT(DOS64) 75ABI_SUPPORT(STD64) 76 77ASM_START() 78 TEXT 79 ALIGN(16) 80PROLOGUE(func_n) 81 FUNC_ENTRY(4) 82 xor cy, cy C carry 83 84L(com): shr n C n >> 1 85 jz L(1) C n == 1 86 jc L(1m2) C n % 2 == 1 87 88L(0m2): shr cy 89 mov (up), %r10 90 lea 8(up), up 91 lea 8(vp), vp 92 lea -8(rp), rp 93 jmp L(mid) 94 95L(1): shr cy 96 mov (up), %r9 97 jmp L(end) 98 99L(1m2): shr cy 100 mov (up), %r9 101 102 ALIGN(16) 103L(top): ADCSBB (vp), %r9 104 lea 16(up), up 105 mov -8(up), %r10 106 lea 16(vp), vp 107 mov %r9, (rp) 108L(mid): ADCSBB -8(vp), %r10 109 lea 16(rp), rp 110 dec n 111 mov (up), %r9 112 mov %r10, -8(rp) 113 jnz L(top) 114 115L(end): ADCSBB (vp), %r9 116 mov $0, R32(%rax) 117 mov %r9, (rp) 118 adc R32(%rax), R32(%rax) 119 FUNC_EXIT() 120 ret 121EPILOGUE() 122 123PROLOGUE(func_nc) 124 FUNC_ENTRY(4) 125IFDOS(` mov 56(%rsp), cy ') 126 jmp L(com) 127EPILOGUE() 128ASM_END() 129