1dnl AMD64 mpn_gcd_11 optimised for AMD BD4, ZN1. 2 3dnl Based on the K7 gcd_1.asm, by Kevin Ryde. Rehacked for AMD64 by Torbjorn 4dnl Granlund. 5 6dnl Copyright 2000-2002, 2005, 2009, 2011, 2012, 2017, 2019 Free Software 7dnl Foundation, Inc. 8 9dnl This file is part of the GNU MP Library. 10dnl 11dnl The GNU MP Library is free software; you can redistribute it and/or modify 12dnl it under the terms of either: 13dnl 14dnl * the GNU Lesser General Public License as published by the Free 15dnl Software Foundation; either version 3 of the License, or (at your 16dnl option) any later version. 17dnl 18dnl or 19dnl 20dnl * the GNU General Public License as published by the Free Software 21dnl Foundation; either version 2 of the License, or (at your option) any 22dnl later version. 23dnl 24dnl or both in parallel, as here. 25dnl 26dnl The GNU MP Library is distributed in the hope that it will be useful, but 27dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 28dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 29dnl for more details. 30dnl 31dnl You should have received copies of the GNU General Public License and the 32dnl GNU Lesser General Public License along with the GNU MP Library. If not, 33dnl see https://www.gnu.org/licenses/. 34 35include(`../config.m4') 36 37 38C cycles/bit (approx) 39C AMD K8,K9 - 40C AMD K10 - 41C AMD bd1 - 42C AMD bd2 - 43C AMD bd3 - 44C AMD bd4 3.73 45C AMD bt1 - 46C AMD bt2 - 47C AMD zn1 3.33 48C AMD zn2 3.48 49C Intel P4 - 50C Intel CNR - 51C Intel PNR - 52C Intel NHM - 53C Intel WSM - 54C Intel SBR - 55C Intel IBR - 56C Intel HWL ? 57C Intel BWL ? 58C Intel SKL ? 59C Intel atom - 60C Intel SLM - 61C Intel GLM - 62C Intel GLM+ - 63C VIA nano - 64 65define(`u0', `%rdi') 66define(`v0', `%rsi') 67 68ABI_SUPPORT(DOS64) 69ABI_SUPPORT(STD64) 70 71ASM_START() 72 TEXT 73 ALIGN(64) 74PROLOGUE(mpn_gcd_11) 75 FUNC_ENTRY(2) 76 mov u0, %rax 77 mov v0, %rdx 78 sub u0, %rdx C v - u 79 jz L(end) 80 81 ALIGN(16) 82L(top): rep;bsf %rdx, %rcx C tzcnt! 83 sub v0, u0 C u - v 84 cmovc %rdx, u0 C u = |u - v| 85 cmovc %rax, v0 C v = min(u,v) 86 shrx( %rcx, u0, %rax) 87 shrx( %rcx, u0, u0) 88 mov v0, %rdx 89 sub %rax, %rdx C v - u 90 jnz L(top) 91 92L(end): C rax = result 93 C rdx = 0 for the benefit of internal gcd_22 call 94 FUNC_EXIT() 95 ret 96EPILOGUE() 97