1dnl  Alpha ev6 nails mpn_add_n and mpn_sub_n.
2
3dnl  Copyright 2002, 2006 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31
32dnl  Runs at 2.5 cycles/limb.  It would be possible to reach 2.0 cycles/limb
33dnl  with 8-way unrolling.
34
35include(`../config.m4')
36
37dnl  INPUT PARAMETERS
38define(`rp',`r16')
39define(`up',`r17')
40define(`vp',`r18')
41define(`n',`r19')
42
43define(`rl0',`r0')
44define(`rl1',`r1')
45define(`rl2',`r2')
46define(`rl3',`r3')
47
48define(`ul0',`r4')
49define(`ul1',`r5')
50define(`ul2',`r6')
51define(`ul3',`r7')
52
53define(`vl0',`r22')
54define(`vl1',`r23')
55define(`vl2',`r24')
56define(`vl3',`r25')
57
58define(`numb_mask',`r21')
59
60define(`NAIL_BITS',`GMP_NAIL_BITS')
61define(`CYSH',`GMP_NUMB_BITS')
62
63dnl  This declaration is munged by configure
64NAILS_SUPPORT(1-63)
65
66ifdef(`OPERATION_add_n', `
67	define(`OP',        addq)
68	define(`CYSH',`GMP_NUMB_BITS')
69	define(`func',  mpn_add_n)')
70ifdef(`OPERATION_sub_n', `
71	define(`OP',        subq)
72	define(`CYSH',63)
73	define(`func',  mpn_sub_n)')
74
75MULFUNC_PROLOGUE(mpn_add_n mpn_sub_n)
76
77ASM_START()
78PROLOGUE(func)
79	lda	numb_mask, -1(r31)
80	srl	numb_mask, NAIL_BITS, numb_mask
81	bis	r31,	r31,	r20
82
83	and	n,	3,	r25
84	lda	n,	-4(n)
85	beq	r25,	L(ge4)
86
87L(lp0):	ldq	ul0,	0(up)
88	lda	up,	8(up)
89	ldq	vl0,	0(vp)
90	lda	vp,	8(vp)
91	lda	rp,	8(rp)
92	lda	r25,	-1(r25)
93	OP	ul0,	vl0,	rl0
94	OP	rl0,	r20,	rl0
95	and	rl0, numb_mask,	r28
96	stq	r28,	-8(rp)
97	srl	rl0,	CYSH,	r20
98	bne	r25,	L(lp0)
99
100	blt	n,	L(ret)
101
102L(ge4):	ldq	ul0,	0(up)
103	ldq	vl0,	0(vp)
104	ldq	ul1,	8(up)
105	ldq	vl1,	8(vp)
106	ldq	ul2,	16(up)
107	ldq	vl2,	16(vp)
108	ldq	ul3,	24(up)
109	ldq	vl3,	24(vp)
110	lda	up,	32(up)
111	lda	vp,	32(vp)
112	lda	n,	-4(n)
113	bge	n,	L(ge8)
114
115	OP	ul0,	vl0,	rl0	C		main-add 0
116	OP	rl0,	r20,	rl0	C		cy-add 0
117	OP	ul1,	vl1,	rl1	C		main-add 1
118	srl	rl0,	CYSH,	r20	C		gen cy 0
119	OP	rl1,	r20,	rl1	C		cy-add 1
120	and	rl0,numb_mask,	r27
121	br	r31,	L(cj0)
122
123L(ge8):	OP	ul0,	vl0,	rl0	C		main-add 0
124	ldq	ul0,	0(up)
125	ldq	vl0,	0(vp)
126	OP	rl0,	r20,	rl0	C		cy-add 0
127	OP	ul1,	vl1,	rl1	C		main-add 1
128	srl	rl0,	CYSH,	r20	C		gen cy 0
129	ldq	ul1,	8(up)
130	ldq	vl1,	8(vp)
131	OP	rl1,	r20,	rl1	C		cy-add 1
132	and	rl0,numb_mask,	r27
133	OP	ul2,	vl2,	rl2	C		main-add 2
134	srl	rl1,	CYSH,	r20	C		gen cy 1
135	ldq	ul2,	16(up)
136	ldq	vl2,	16(vp)
137	OP	rl2,	r20,	rl2	C		cy-add 2
138	and	rl1,numb_mask,	r28
139	stq	r27,	0(rp)
140	OP	ul3,	vl3,	rl3	C		main-add 3
141	srl	rl2,	CYSH,	r20	C		gen cy 2
142	ldq	ul3,	24(up)
143	ldq	vl3,	24(vp)
144	OP	rl3,	r20,	rl3	C		cy-add 3
145	and	rl2,numb_mask,	r27
146	stq	r28,	8(rp)
147	lda	rp,	32(rp)
148	lda	up,	32(up)
149	lda	vp,	32(vp)
150	lda	n,	-4(n)
151	blt	n,	L(end)
152
153	ALIGN(32)
154L(top):	OP	ul0,	vl0,	rl0	C		main-add 0
155	srl	rl3,	CYSH,	r20	C		gen cy 3
156	ldq	ul0,	0(up)
157	ldq	vl0,	0(vp)
158
159	OP	rl0,	r20,	rl0	C		cy-add 0
160	and	rl3,numb_mask,	r28
161	stq	r27,	-16(rp)
162	bis	r31,	r31,	r31
163
164	OP	ul1,	vl1,	rl1	C		main-add 1
165	srl	rl0,	CYSH,	r20	C		gen cy 0
166	ldq	ul1,	8(up)
167	ldq	vl1,	8(vp)
168
169	OP	rl1,	r20,	rl1	C		cy-add 1
170	and	rl0,numb_mask,	r27
171	stq	r28,	-8(rp)
172	bis	r31,	r31,	r31
173
174	OP	ul2,	vl2,	rl2	C		main-add 2
175	srl	rl1,	CYSH,	r20	C		gen cy 1
176	ldq	ul2,	16(up)
177	ldq	vl2,	16(vp)
178
179	OP	rl2,	r20,	rl2	C		cy-add 2
180	and	rl1,numb_mask,	r28
181	stq	r27,	0(rp)
182	bis	r31,	r31,	r31
183
184	OP	ul3,	vl3,	rl3	C		main-add 3
185	srl	rl2,	CYSH,	r20	C		gen cy 2
186	ldq	ul3,	24(up)
187	ldq	vl3,	24(vp)
188
189	OP	rl3,	r20,	rl3	C		cy-add 3
190	and	rl2,numb_mask,	r27
191	stq	r28,	8(rp)
192	bis	r31,	r31,	r31
193
194	bis	r31,	r31,	r31
195	lda	n,	-4(n)
196	lda	up,	32(up)
197	lda	vp,	32(vp)
198
199	bis	r31,	r31,	r31
200	bis	r31,	r31,	r31
201	lda	rp,	32(rp)
202	bge	n,	L(top)
203
204L(end):	OP	ul0,	vl0,	rl0	C		main-add 0
205	srl	rl3,	CYSH,	r20	C		gen cy 3
206	OP	rl0,	r20,	rl0	C		cy-add 0
207	and	rl3,numb_mask,	r28
208	stq	r27,	-16(rp)
209	OP	ul1,	vl1,	rl1	C		main-add 1
210	srl	rl0,	CYSH,	r20	C		gen cy 0
211	OP	rl1,	r20,	rl1	C		cy-add 1
212	and	rl0,numb_mask,	r27
213	stq	r28,	-8(rp)
214L(cj0):	OP	ul2,	vl2,	rl2	C		main-add 2
215	srl	rl1,	CYSH,	r20	C		gen cy 1
216	OP	rl2,	r20,	rl2	C		cy-add 2
217	and	rl1,numb_mask,	r28
218	stq	r27,	0(rp)
219	OP	ul3,	vl3,	rl3	C		main-add 3
220	srl	rl2,	CYSH,	r20	C		gen cy 2
221	OP	rl3,	r20,	rl3	C		cy-add 3
222	and	rl2,numb_mask,	r27
223	stq	r28,	8(rp)
224
225	srl	rl3,	CYSH,	r20	C		gen cy 3
226	and	rl3,numb_mask,	r28
227	stq	r27,	16(rp)
228	stq	r28,	24(rp)
229
230L(ret):	and	r20,	1,	r0
231	ret	r31,	(r26),	1
232EPILOGUE()
233ASM_END()
234