1dnl  Alpha mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1).
2
3dnl  Copyright 2003, 2013 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33C      cycles/limb
34C EV4:     ?
35C EV5:     7
36C EV6:     4
37
38C TODO
39C  * Tune to reach 3.75 c/l on ev6.
40
41define(`rp',`r16')
42define(`up',`r17')
43define(`vp',`r18')
44define(`n', `r19')
45
46define(`u0', `r8')
47define(`u1', `r1')
48define(`v0', `r4')
49define(`v1', `r5')
50
51define(`cy0', `r0')
52define(`cy1', `r20')
53define(`cy', `r22')
54define(`rr', `r24')
55define(`ps', `r25')
56define(`sl', `r28')
57
58ifdef(`OPERATION_addlsh1_n',`
59  define(ADDSUB,       addq)
60  define(CARRY,       `cmpult $1,$2,$3')
61  define(func, mpn_addlsh1_n)
62')
63ifdef(`OPERATION_sublsh1_n',`
64  define(ADDSUB,       subq)
65  define(CARRY,       `cmpult $2,$1,$3')
66  define(func, mpn_sublsh1_n)
67')
68
69MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
70
71ASM_START()
72PROLOGUE(func)
73	and	n, 2, cy0
74	blbs	n, L(bx1)
75L(bx0):	ldq	v1, 0(vp)
76	ldq	u1, 0(up)
77	lda	r2, 0(r31)
78	bne	cy0, L(b10)
79
80L(b00):	lda	vp, 48(vp)
81	lda	up, -16(up)
82	lda	rp, -8(rp)
83	lda	cy0, 0(r31)
84	br	r31, L(lo0)
85
86L(b10):	lda	vp, 32(vp)
87	lda	rp, 8(rp)
88	lda	cy0, 0(r31)
89	br	r31, L(lo2)
90
91L(bx1):	ldq	v0, 0(vp)
92	ldq	u0, 0(up)
93	lda	r3, 0(r31)
94	beq	cy0, L(b01)
95
96L(b11):	lda	vp, 40(vp)
97	lda	up, -24(up)
98	lda	rp, 16(rp)
99	lda	cy1, 0(r31)
100	br	r31, L(lo3)
101
102L(b01):	lda	n, -4(n)
103	lda	cy1, 0(r31)
104	ble	n, L(end)
105	lda	vp, 24(vp)
106	lda	up, -8(up)
107
108	ALIGN(16)
109L(top):	addq	v0, v0, r6
110	ldq	v1, -16(vp)
111	addq	r6, r3, sl	C combined vlimb
112	ldq	u1, 16(up)
113	ADDSUB	u0, sl, ps	C ulimb + (vlimb << 1)
114	cmplt	v0, r31, r2	C high v bits
115	ADDSUB	ps, cy1, rr	C consume carry from previous operation
116	CARRY(	ps, u0, cy0)	C carry out #2
117	stq	rr, 0(rp)
118	CARRY(	rr, ps, cy)	C carry out #3
119	lda	vp, 32(vp)	C bookkeeping
120	addq	cy, cy0, cy0	C final carry out
121L(lo0):	addq	v1, v1, r7
122	ldq	v0, -40(vp)
123	addq	r7, r2, sl
124	ldq	u0, 24(up)
125	ADDSUB	u1, sl, ps
126	cmplt	v1, r31, r3
127	ADDSUB	ps, cy0, rr
128	CARRY(	ps, u1, cy1)
129	stq	rr, 8(rp)
130	CARRY(	rr, ps, cy)
131	lda	rp, 32(rp)	C bookkeeping
132	addq	cy, cy1, cy1
133L(lo3):	addq	v0, v0, r6
134	ldq	v1, -32(vp)
135	addq	r6, r3, sl
136	ldq	u1, 32(up)
137	ADDSUB	u0, sl, ps
138	cmplt	v0, r31, r2
139	ADDSUB	ps, cy1, rr
140	CARRY(	ps, u0, cy0)
141	stq	rr, -16(rp)
142	CARRY(	rr, ps, cy)
143	lda	up, 32(up)	C bookkeeping
144	addq	cy, cy0, cy0
145L(lo2):	addq	v1, v1, r7
146	ldq	v0, -24(vp)
147	addq	r7, r2, sl
148	ldq	u0, 8(up)
149	ADDSUB	u1, sl, ps
150	cmplt	v1, r31, r3
151	ADDSUB	ps, cy0, rr
152	CARRY(	ps, u1, cy1)
153	stq	rr, -8(rp)
154	CARRY(	rr, ps, cy)
155	lda	n, -4(n)	C bookkeeping
156	addq	cy, cy1, cy1
157	bgt	n, L(top)
158
159L(end):	addq	v0, v0, r6
160	addq	r6, r3, sl
161	ADDSUB	u0, sl, ps
162	cmplt	v0, r31, r2
163	ADDSUB	ps, cy1, rr
164	CARRY(	ps, u0, cy0)
165	stq	rr, 0(rp)
166	CARRY(	rr, ps, cy)
167	addq	cy, cy0, cy0
168	addq	cy0, r2, r0
169
170	ret	r31,(r26),1
171EPILOGUE()
172ASM_END()
173