cnd_aors_n.asm revision 1.1.1.2
1dnl  ARM64 mpn_cnd_add_n, mpn_cnd_sub_n
2
3dnl  Contributed to the GNU project by Torbj��rn Granlund.
4
5dnl  Copyright 2012, 2013, 2017 Free Software Foundation, Inc.
6
7dnl  This file is part of the GNU MP Library.
8dnl
9dnl  The GNU MP Library is free software; you can redistribute it and/or modify
10dnl  it under the terms of either:
11dnl
12dnl    * the GNU Lesser General Public License as published by the Free
13dnl      Software Foundation; either version 3 of the License, or (at your
14dnl      option) any later version.
15dnl
16dnl  or
17dnl
18dnl    * the GNU General Public License as published by the Free Software
19dnl      Foundation; either version 2 of the License, or (at your option) any
20dnl      later version.
21dnl
22dnl  or both in parallel, as here.
23dnl
24dnl  The GNU MP Library is distributed in the hope that it will be useful, but
25dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
27dnl  for more details.
28dnl
29dnl  You should have received copies of the GNU General Public License and the
30dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
31dnl  see https://www.gnu.org/licenses/.
32
33include(`../config.m4')
34
35C	     cycles/limb
36C Cortex-A53	3.87-4.37
37C Cortex-A57	 1.75
38C X-Gene	 2.0
39
40changecom(blah)
41
42define(`cnd',	`x0')
43define(`rp',	`x1')
44define(`up',	`x2')
45define(`vp',	`x3')
46define(`n',	`x4')
47
48ifdef(`OPERATION_cnd_add_n', `
49  define(`ADDSUBC',	adcs)
50  define(`CLRCY',	`cmn	xzr, xzr')
51  define(`RETVAL',	`cset	x0, cs')
52  define(`func',	mpn_cnd_add_n)')
53ifdef(`OPERATION_cnd_sub_n', `
54  define(`ADDSUBC',	sbcs)
55  define(`CLRCY',	`cmp	xzr, xzr')
56  define(`RETVAL',	`cset	x0, cc')
57  define(`func',	mpn_cnd_sub_n)')
58
59MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)
60
61ASM_START()
62PROLOGUE(func)
63	cmp	cnd, #1
64	sbc	cnd, cnd, cnd
65
66	CLRCY
67
68	lsr	x18, n, #2
69	tbz	n, #0, L(bx0)
70
71L(bx1):	ldr	x13, [vp]
72	ldr	x11, [up]
73	bic	x7, x13, cnd
74	ADDSUBC	x9, x11, x7
75	str	x9, [rp]
76	tbnz	n, #1, L(b11)
77
78L(b01):	cbz	x18, L(rt)
79	ldp	x12, x13, [vp,#8]
80	ldp	x10, x11, [up,#8]
81	sub	up, up, #8
82	sub	vp, vp, #8
83	sub	rp, rp, #24
84	b	L(mid)
85
86L(b11):	ldp	x12, x13, [vp,#8]!
87	ldp	x10, x11, [up,#8]!
88	sub	rp, rp, #8
89	cbz	x18, L(end)
90	b	L(top)
91
92L(bx0):	ldp	x12, x13, [vp]
93	ldp	x10, x11, [up]
94	tbnz	n, #1, L(b10)
95
96L(b00):	sub	up, up, #16
97	sub	vp, vp, #16
98	sub	rp, rp, #32
99	b	L(mid)
100
101L(b10):	sub	rp, rp, #16
102	cbz	x18, L(end)
103
104	ALIGN(16)
105L(top):	bic	x6, x12, cnd
106	bic	x7, x13, cnd
107	ldp	x12, x13, [vp,#16]
108	ADDSUBC	x8, x10, x6
109	ADDSUBC	x9, x11, x7
110	ldp	x10, x11, [up,#16]
111	stp	x8, x9, [rp,#16]
112L(mid):	bic	x6, x12, cnd
113	bic	x7, x13, cnd
114	ldp	x12, x13, [vp,#32]!
115	ADDSUBC	x8, x10, x6
116	ADDSUBC	x9, x11, x7
117	ldp	x10, x11, [up,#32]!
118	stp	x8, x9, [rp,#32]!
119	sub	x18, x18, #1
120	cbnz	x18, L(top)
121
122L(end):	bic	x6, x12, cnd
123	bic	x7, x13, cnd
124	ADDSUBC	x8, x10, x6
125	ADDSUBC	x9, x11, x7
126	stp	x8, x9, [rp,#16]
127L(rt):	RETVAL
128	ret
129EPILOGUE()
130