1dnl  AMD64 mpn_gcd_22.  Assumes useful bsf, useless shrd, no tzcnt, no shlx.
2
3dnl  Copyright 2019 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33
34C	     cycles/bit
35C AMD K8,K9	 ?
36C AMD K10	 7.4
37C AMD bd1	 9.9
38C AMD bd2	 ?
39C AMD bd3	 ?
40C AMD bd4	 ?
41C AMD bt1	 ?
42C AMD bt2	 ?
43C AMD zn1	 ?
44C AMD zn2	 ?
45C Intel P4	 ?
46C Intel CNR	 ?
47C Intel PNR	 ?
48C Intel NHM	 9.2
49C Intel WSM	 9.0
50C Intel SBR	 ?
51C Intel IBR	 ?
52C Intel HWL	 ?
53C Intel BWL	 ?
54C Intel SKL	 ?
55C Intel atom	 ?
56C Intel SLM	 ?
57C Intel GLM	 ?
58C Intel GLM+	 ?
59C VIA nano	 ?
60
61
62define(`u1',    `%rdi')
63define(`u0',    `%rsi')
64define(`v1',    `%rdx')
65define(`v0_param', `%rcx')
66
67define(`v0',    `%rax')
68define(`cnt',   `%rcx')
69
70define(`s0',    `%r8')
71define(`s1',    `%r9')
72define(`t0',    `%r10')
73define(`t1',    `%r11')
74
75dnl ABI_SUPPORT(DOS64)	C returns mp_double_limb_t in memory
76ABI_SUPPORT(STD64)
77
78ASM_START()
79	TEXT
80	ALIGN(64)
81PROLOGUE(mpn_gcd_22)
82	FUNC_ENTRY(4)
83	mov	v0_param, v0
84
85	ALIGN(16)
86L(top):	mov	v0, t0
87	sub	u0, t0
88	jz	L(lowz)		C	jump when low limb result = 0
89	mov	v1, t1
90	sbb	u1, t1
91
92	mov	u0, s0
93	mov	u1, s1
94
95	bsf	t0, cnt
96
97	sub	v0, u0
98	sbb	v1, u1
99
100L(bck):	cmovc	t0, u0		C u = |u - v|
101	cmovnc	u1, t1		C u = |u - v|
102	cmovc	s0, v0		C v = min(u,v)
103	cmovc	s1, v1		C v = min(u,v)
104
105	shr	R8(cnt), u0
106	mov	t1, u1
107	shr	R8(cnt), u1
108	neg	cnt
109	shl	R8(cnt), t1
110	or	t1, u0
111
112	test	u1, u1
113	jnz	L(top)
114	test	v1, v1
115	jnz	L(top)
116
117L(gcd_11):
118	mov	v0, %rdi
119C	mov	u0, %rsi
120	TCALL(	mpn_gcd_11)
121
122L(lowz):C We come here when v0 - u0 = 0
123	C 1. If v1 - u1 = 0, then gcd is u = v.
124	C 2. Else compute gcd_21({v1,v0}, |u1-v1|)
125	mov	v1, t0
126	sub	u1, t0
127	je	L(end)
128
129	xor	t1, t1
130	mov	u0, s0
131	mov	u1, s1
132	bsf	t0, cnt
133	mov	u1, u0
134	xor	u1, u1
135	sub	v1, u0
136	jmp	L(bck)
137
138L(end):	C mov	v0, %rax
139	C mov	v1, %rdx
140	FUNC_EXIT()
141	ret
142EPILOGUE()
143