1dnl  Alpha mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1).
2
3dnl  Copyright 2003 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of the GNU Lesser General Public License as published
9dnl  by the Free Software Foundation; either version 3 of the License, or (at
10dnl  your option) any later version.
11
12dnl  The GNU MP Library is distributed in the hope that it will be useful, but
13dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15dnl  License for more details.
16
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20include(`../config.m4')
21
22C      cycles/limb
23C EV4:    12.5
24C EV5:     6.25
25C EV6:     4.375 (i.e., worse than separate mpn_lshift and mpn_add_n at 3.875)
26
27C TODO
28C  * Write special version for ev6, as this is a slowdown for 100 < n < 2200
29C    compared to separate mpn_lshift and mpn_add_n.
30C  * Use addq instead of sll for left shift, and similarly cmplt instead of srl
31C    for right shift.
32
33dnl  INPUT PARAMETERS
34define(`rp',`r16')
35define(`up',`r17')
36define(`vp',`r18')
37define(`n', `r19')
38
39define(`u0', `r8')
40define(`u1', `r1')
41define(`u2', `r2')
42define(`u3', `r3')
43define(`v0', `r4')
44define(`v1', `r5')
45define(`v2', `r6')
46define(`v3', `r7')
47
48define(`cy0', `r0')
49define(`cy1', `r20')
50define(`cy', `r22')
51define(`rr', `r24')
52define(`ps', `r25')
53define(`sl', `r28')
54
55ifdef(`OPERATION_addlsh1_n',`
56  define(ADDSUB,       addq)
57  define(CARRY,       `cmpult $1,$2,$3')
58  define(func, mpn_addlsh1_n)
59')
60ifdef(`OPERATION_sublsh1_n',`
61  define(ADDSUB,       subq)
62  define(CARRY,       `cmpult $2,$1,$3')
63  define(func, mpn_sublsh1_n)
64')
65
66MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
67
68ASM_START()
69PROLOGUE(func)
70	lda	n, -4(n)
71	bis	r31, r31, cy1
72	and	n, 3, r1
73	beq	r1, $Lb00
74	cmpeq	r1, 1, r2
75	bne	r2, $Lb01
76	cmpeq	r1, 2, r2
77	bne	r2, $Lb10
78$Lb11:	C n = 3, 7, 11, ...
79	ldq	v0, 0(vp)
80	ldq	u0, 0(up)
81	ldq	v1, 8(vp)
82	ldq	u1, 8(up)
83	ldq	v2, 16(vp)
84	ldq	u2, 16(up)
85	lda	vp, 24(vp)
86	lda	up, 24(up)
87	bge	n, $Loop
88	br	r31, $Lcj3
89$Lb10:	C n = 2, 6, 10, ...
90	bis	r31, r31, cy0
91	ldq	v1, 0(vp)
92	ldq	u1, 0(up)
93	ldq	v2, 8(vp)
94	ldq	u2, 8(up)
95	lda	rp, -8(rp)
96	blt	n, $Lcj2
97	ldq	v3, 16(vp)
98	ldq	u3, 16(up)
99	lda	vp, 48(vp)
100	lda	up, 16(up)
101	br	r31, $LL10
102$Lb01:	C n = 1, 5, 9, ...
103	ldq	v2, 0(vp)
104	ldq	u2, 0(up)
105	lda	rp, -16(rp)
106	blt	n, $Lcj1
107	ldq	v3, 8(vp)
108	ldq	u3, 8(up)
109	ldq	v0, 16(vp)
110	ldq	u0, 16(up)
111	lda	vp, 40(vp)
112	lda	up, 8(up)
113	lda	rp, 32(rp)
114	br	r31, $LL01
115$Lb00:	C n = 4, 8, 12, ...
116	bis	r31, r31, cy0
117	ldq	v3, 0(vp)
118	ldq	u3, 0(up)
119	ldq	v0, 8(vp)
120	ldq	u0, 8(up)
121	ldq	v1, 16(vp)
122	ldq	u1, 16(up)
123	lda	vp, 32(vp)
124	lda	rp, 8(rp)
125	br	r31, $LL00x
126	ALIGN(16)
127C 0
128$Loop:	sll	v0, 1, sl	C left shift vlimb
129	ldq	v3, 0(vp)
130C 1
131	ADDSUB	u0, sl, ps	C ulimb + (vlimb << 1)
132	ldq	u3, 0(up)
133C 2
134	ADDSUB	ps, cy1, rr	C consume carry from previous operation
135	srl	v0, 63, cy0	C carry out #1
136C 3
137	CARRY(	ps, u0, cy)	C carry out #2
138	stq	rr, 0(rp)
139C 4
140	addq	cy, cy0, cy0	C combine carry out #1 and #2
141	CARRY(	rr, ps, cy)	C carry out #3
142C 5
143	addq	cy, cy0, cy0	C final carry out
144	lda	vp, 32(vp)	C bookkeeping
145C 6
146$LL10:	sll	v1, 1, sl
147	ldq	v0, -24(vp)
148C 7
149	ADDSUB	u1, sl, ps
150	ldq	u0, 8(up)
151C 8
152	ADDSUB	ps, cy0, rr
153	srl	v1, 63, cy1
154C 9
155	CARRY(	ps, u1, cy)
156	stq	rr, 8(rp)
157C 10
158	addq	cy, cy1, cy1
159	CARRY(	rr, ps, cy)
160C 11
161	addq	cy, cy1, cy1
162	lda	rp, 32(rp)	C bookkeeping
163C 12
164$LL01:	sll	v2, 1, sl
165	ldq	v1, -16(vp)
166C 13
167	ADDSUB	u2, sl, ps
168	ldq	u1, 16(up)
169C 14
170	ADDSUB	ps, cy1, rr
171	srl	v2, 63, cy0
172C 15
173	CARRY(	ps, u2, cy)
174	stq	rr, -16(rp)
175C 16
176	addq	cy, cy0, cy0
177	CARRY(	rr, ps, cy)
178C 17
179	addq	cy, cy0, cy0
180$LL00x:	lda	up, 32(up)	C bookkeeping
181C 18
182	sll	v3, 1, sl
183	ldq	v2, -8(vp)
184C 19
185	ADDSUB	u3, sl, ps
186	ldq	u2, -8(up)
187C 20
188	ADDSUB	ps, cy0, rr
189	srl	v3, 63, cy1
190C 21
191	CARRY(	ps, u3, cy)
192	stq	rr, -8(rp)
193C 22
194	addq	cy, cy1, cy1
195	CARRY(	rr, ps, cy)
196C 23
197	addq	cy, cy1, cy1
198	lda	n, -4(n)	C bookkeeping
199C 24
200	bge	n, $Loop
201
202$Lcj3:	sll	v0, 1, sl
203	ADDSUB	u0, sl, ps
204	ADDSUB	ps, cy1, rr
205	srl	v0, 63, cy0
206	CARRY(	ps, u0, cy)
207	stq	rr, 0(rp)
208	addq	cy, cy0, cy0
209	CARRY(	rr, ps, cy)
210	addq	cy, cy0, cy0
211
212$Lcj2:	sll	v1, 1, sl
213	ADDSUB	u1, sl, ps
214	ADDSUB	ps, cy0, rr
215	srl	v1, 63, cy1
216	CARRY(	ps, u1, cy)
217	stq	rr, 8(rp)
218	addq	cy, cy1, cy1
219	CARRY(	rr, ps, cy)
220	addq	cy, cy1, cy1
221
222$Lcj1:	sll	v2, 1, sl
223	ADDSUB	u2, sl, ps
224	ADDSUB	ps, cy1, rr
225	srl	v2, 63, cy0
226	CARRY(	ps, u2, cy)
227	stq	rr, 16(rp)
228	addq	cy, cy0, cy0
229	CARRY(	rr, ps, cy)
230	addq	cy, cy0, cy0
231
232	ret	r31,(r26),1
233EPILOGUE()
234ASM_END()
235