1dnl  AMD64 mpn_gcd_11 optimised for AMD BD4, ZN1.
2
3dnl  Based on the K7 gcd_1.asm, by Kevin Ryde.  Rehacked for AMD64 by Torbjorn
4dnl  Granlund.
5
6dnl  Copyright 2000-2002, 2005, 2009, 2011, 2012, 2017, 2019 Free Software
7dnl  Foundation, Inc.
8
9dnl  This file is part of the GNU MP Library.
10dnl
11dnl  The GNU MP Library is free software; you can redistribute it and/or modify
12dnl  it under the terms of either:
13dnl
14dnl    * the GNU Lesser General Public License as published by the Free
15dnl      Software Foundation; either version 3 of the License, or (at your
16dnl      option) any later version.
17dnl
18dnl  or
19dnl
20dnl    * the GNU General Public License as published by the Free Software
21dnl      Foundation; either version 2 of the License, or (at your option) any
22dnl      later version.
23dnl
24dnl  or both in parallel, as here.
25dnl
26dnl  The GNU MP Library is distributed in the hope that it will be useful, but
27dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
28dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
29dnl  for more details.
30dnl
31dnl  You should have received copies of the GNU General Public License and the
32dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
33dnl  see https://www.gnu.org/licenses/.
34
35include(`../config.m4')
36
37
38C	     cycles/bit (approx)
39C AMD K8,K9	 -
40C AMD K10	 -
41C AMD bd1	 -
42C AMD bd2	 -
43C AMD bd3	 -
44C AMD bd4	 3.73
45C AMD bt1	 -
46C AMD bt2	 -
47C AMD zn1	 3.33
48C AMD zn2	 3.48
49C Intel P4	 -
50C Intel CNR	 -
51C Intel PNR	 -
52C Intel NHM	 -
53C Intel WSM	 -
54C Intel SBR	 -
55C Intel IBR	 -
56C Intel HWL	 ?
57C Intel BWL	 ?
58C Intel SKL	 ?
59C Intel atom	 -
60C Intel SLM	 -
61C Intel GLM	 -
62C Intel GLM+	 -
63C VIA nano	 -
64
65define(`u0',    `%rdi')
66define(`v0',    `%rsi')
67
68ABI_SUPPORT(DOS64)
69ABI_SUPPORT(STD64)
70
71ASM_START()
72	TEXT
73	ALIGN(64)
74PROLOGUE(mpn_gcd_11)
75	FUNC_ENTRY(2)
76	mov	u0, %rax
77	mov	v0, %rdx
78	sub	u0, %rdx		C v - u
79	jz	L(end)
80
81	ALIGN(16)
82L(top):	rep;bsf	%rdx, %rcx		C tzcnt!
83	sub	v0, u0			C u - v
84	cmovc	%rdx, u0		C u = |u - v|
85	cmovc	%rax, v0		C v = min(u,v)
86	shrx(	%rcx, u0, %rax)
87	shrx(	%rcx, u0, u0)
88	mov	v0, %rdx
89	sub	%rax, %rdx		C v - u
90	jnz	L(top)
91
92L(end):	C rax = result
93	C rdx = 0 for the benefit of internal gcd_22 call
94	FUNC_EXIT()
95	ret
96EPILOGUE()
97