1dnl AMD64 mpn_gcd_22. Assumes useless bsf, useless shrd, tzcnt, no shlx. 2 3dnl Copyright 2019 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33 34C cycles/bit 35C AMD K8,K9 12.3 36C AMD K10 8.0 37C AMD bd1 10.0 38C AMD bd2 7.2 39C AMD bd3 ? 40C AMD bd4 6.7 41C AMD bt1 13.6 42C AMD bt2 8.9 43C AMD zn1 5.7 44C AMD zn2 5.6 45C Intel P4 ? 46C Intel CNR 9.7 47C Intel PNR 9.7 48C Intel NHM 9.4 49C Intel WSM 9.5 50C Intel SBR 10.3 51C Intel IBR ? 52C Intel HWL 8.2 53C Intel BWL 7.4 54C Intel SKL 7.3 55C Intel atom 26.5 56C Intel SLM 17.4 57C Intel GLM 13.4 58C Intel GLM+ 12.4 59C VIA nano ? 60 61 62define(`u1', `%rdi') 63define(`u0', `%rsi') 64define(`v1', `%rdx') 65define(`v0_param', `%rcx') 66 67define(`v0', `%rax') 68define(`cnt', `%rcx') 69 70define(`s0', `%r8') 71define(`s1', `%r9') 72define(`t0', `%r10') 73define(`t1', `%r11') 74 75dnl ABI_SUPPORT(DOS64) C returns mp_double_limb_t in memory 76ABI_SUPPORT(STD64) 77 78ASM_START() 79 TEXT 80 ALIGN(64) 81PROLOGUE(mpn_gcd_22) 82 FUNC_ENTRY(4) 83 mov v0_param, v0 84 85 ALIGN(16) 86L(top): mov v0, t0 87 sub u0, t0 88 jz L(lowz) C jump when low limb result = 0 89 mov v1, t1 90 sbb u1, t1 91 92 rep;bsf t0, cnt C tzcnt! 93 mov u0, s0 94 mov u1, s1 95 96 sub v0, u0 97 sbb v1, u1 98 99L(bck): cmovc t0, u0 C u = |u - v| 100 cmovc t1, u1 C u = |u - v| 101 cmovc s0, v0 C v = min(u,v) 102 cmovc s1, v1 C v = min(u,v) 103 104C Rightshift (u1,,u0) into (u1,,u0) 105L(shr): shr R8(cnt), u0 106 mov u1, t1 107 shr R8(cnt), u1 108 neg cnt 109 shl R8(cnt), t1 110 or t1, u0 111 112 test v1, v1 113 jnz L(top) 114 test u1, u1 115 jnz L(top) 116 117L(gcd_11): 118 mov v0, %rdi 119C mov u0, %rsi 120 TCALL( mpn_gcd_11) 121 122L(lowz):C We come here when v0 - u0 = 0 123 C 1. If v1 - u1 = 0, then gcd is u = v. 124 C 2. Else compute gcd_21({v1,v0}, |u1-v1|) 125 mov v1, t0 126 sub u1, t0 127 je L(end) 128 129 xor t1, t1 130 rep;bsf t0, cnt C tzcnt! 131 mov u0, s0 132 mov u1, s1 133 mov u1, u0 134 xor u1, u1 135 sub v1, u0 136 jmp L(bck) 137 138L(end): C mov v0, %rax 139 C mov v1, %rdx 140 FUNC_EXIT() 141 ret 142EPILOGUE() 143