1218822Sdimdnl  ARM v8a mpn_gcd_22.
2218822Sdim
3218822Sdimdnl  Copyright 2019 Free Software Foundation, Inc.
4218822Sdim
5218822Sdimdnl  This file is part of the GNU MP Library.
6218822Sdimdnl
7218822Sdimdnl  The GNU MP Library is free software; you can redistribute it and/or modify
8218822Sdimdnl  it under the terms of either:
9218822Sdimdnl
10218822Sdimdnl    * the GNU Lesser General Public License as published by the Free
11218822Sdimdnl      Software Foundation; either version 3 of the License, or (at your
12218822Sdimdnl      option) any later version.
13218822Sdimdnl
14218822Sdimdnl  or
15218822Sdimdnl
16218822Sdimdnl    * the GNU General Public License as published by the Free Software
17218822Sdimdnl      Foundation; either version 2 of the License, or (at your option) any
18218822Sdimdnl      later version.
19218822Sdimdnl
20130561Sobriendnl  or both in parallel, as here.
21130561Sobriendnl
22130561Sobriendnl  The GNU MP Library is distributed in the hope that it will be useful, but
23130561Sobriendnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24130561Sobriendnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25130561Sobriendnl  for more details.
26130561Sobriendnl
27130561Sobriendnl  You should have received copies of the GNU General Public License and the
28130561Sobriendnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29130561Sobriendnl  see https://www.gnu.org/licenses/.
30130561Sobrien
31130561Sobrieninclude(`../config.m4')
32130561Sobrien
33130561Sobrienchangecom(blah)
34130561Sobrien
35130561SobrienC	     cycles/bit (approx)
36130561SobrienC Cortex-A35	 ?
37130561SobrienC Cortex-A53	 7.26
38130561SobrienC Cortex-A55	 ?
39130561SobrienC Cortex-A57	 ?
40130561SobrienC Cortex-A72	 5.72
41130561SobrienC Cortex-A73	 6.43
42130561SobrienC Cortex-A75	 ?
43130561SobrienC Cortex-A76	 ?
44130561SobrienC Cortex-A77	 ?
45130561Sobrien
46130561Sobrien
47130561Sobriendefine(`u1',    `x0')
48130561Sobriendefine(`u0',    `x1')
49130561Sobriendefine(`v1',    `x2')
50130561Sobriendefine(`v0',    `x3')
51130561Sobrien
52130561Sobriendefine(`t0',    `x5')
53130561Sobriendefine(`t1',    `x6')
54130561Sobriendefine(`cnt',   `x7')
55130561Sobriendefine(`tnc',   `x8')
56130561Sobrien
57130561SobrienASM_START()
58130561SobrienPROLOGUE(mpn_gcd_22)
59130561Sobrien
60130561Sobrien	ALIGN(16)
61130561SobrienL(top):	subs	t0, u0, v0		C 0 6
62130561Sobrien	cbz	t0, L(lowz)
63130561Sobrien	sbcs	t1, u1, v1		C 1 7
64130561Sobrien
65130561Sobrien	rbit	cnt, t0			C 1
66130561Sobrien
67130561Sobrien	cneg	t0, t0, cc		C 2
68130561Sobrien	cinv	t1, t1, cc		C 2 u = |u - v|
69130561SobrienL(bck):	csel	v0, v0, u0, cs		C 2
70130561Sobrien	csel	v1, v1, u1, cs		C 2 v = min(u,v)
71130561Sobrien
72130561Sobrien	clz	cnt, cnt		C 2
73130561Sobrien	sub	tnc, xzr, cnt		C 3
74130561Sobrien
75130561Sobrien	lsr	u0, t0, cnt		C 3
76130561Sobrien	lsl	x14, t1, tnc		C 4
77130561Sobrien	lsr	u1, t1, cnt		C 3
78130561Sobrien	orr	u0, u0, x14		C 5
79130561Sobrien
80130561Sobrien	orr	x11, u1, v1
81130561Sobrien	cbnz	x11, L(top)
82130561Sobrien
83130561Sobrien
84130561Sobrien	subs	x4, u0, v0		C			0
85130561Sobrien	b.eq	L(end1)			C
86130561Sobrien
87130561Sobrien	ALIGN(16)
88130561SobrienL(top1):rbit	x12, x4			C			1,5
89130561Sobrien	clz	x12, x12		C			2
90130561Sobrien	csneg	x4, x4, x4, cs		C v = abs(u-v), even	1
91130561Sobrien	csel	u0, v0, u0, cs		C u = min(u,v)		1
92130561Sobrien	lsr	v0, x4, x12		C			3
93130561Sobrien	subs	x4, u0, v0		C			4
94130561Sobrien	b.ne	L(top1)			C
95130561SobrienL(end1):mov	x0, u0
96130561Sobrien	mov	x1, #0
97130561Sobrien	ret
98130561Sobrien
99130561SobrienL(lowz):C We come here when v0 - u0 = 0
100130561Sobrien	C 1. If v1 - u1 = 0, then gcd is u = v.
101130561Sobrien	C 2. Else compute gcd_21({v1,v0}, |u1-v1|)
102130561Sobrien	subs	t0, u1, v1
103130561Sobrien	b.eq	L(end)
104130561Sobrien	mov	t1, #0
105130561Sobrien	rbit	cnt, t0			C 1
106130561Sobrien	cneg	t0, t0, cc		C 2
107130561Sobrien	b	L(bck)			C FIXME: make conditional
108130561Sobrien
109130561SobrienL(end):	mov	x0, v0
110130561Sobrien	mov	x1, v1
111130561Sobrien	ret
112130561SobrienEPILOGUE()
113130561Sobrien