1dnl  Alpha mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1).
2
3dnl  Copyright 2003, 2013 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33C      cycles/limb
34C EV4:     ?
35C EV5:     6.25
36C EV6:     4.5
37
38define(`rp',`r16')
39define(`up',`r17')
40define(`vp',`r18')
41define(`n', `r19')
42
43define(`u0', `r8')
44define(`u1', `r1')
45define(`v0', `r4')
46define(`v1', `r5')
47
48define(`cy0', `r0')
49define(`cy1', `r20')
50define(`cy', `r22')
51define(`rr', `r24')
52define(`ps', `r25')
53define(`sl', `r28')
54
55ifdef(`OPERATION_addlsh1_n',`
56  define(ADDSUB,       addq)
57  define(CARRY,       `cmpult $1,$2,$3')
58  define(func, mpn_addlsh1_n)
59')
60ifdef(`OPERATION_sublsh1_n',`
61  define(ADDSUB,       subq)
62  define(CARRY,       `cmpult $2,$1,$3')
63  define(func, mpn_sublsh1_n)
64')
65
66MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
67
68ASM_START()
69PROLOGUE(func)
70	and	n, 2, cy0
71	blbs	n, L(bx1)
72L(bx0):	ldq	v1, 0(vp)
73	ldq	u1, 0(up)
74	nop
75	bne	cy0, L(b10)
76
77L(b00):	lda	vp, 48(vp)
78	lda	up, -16(up)
79	lda	rp, -8(rp)
80	br	r31, L(lo0)
81
82L(b10):	lda	vp, 32(vp)
83	lda	rp, 8(rp)
84	lda	cy0, 0(r31)
85	br	r31, L(lo2)
86
87L(bx1):	ldq	v0, 0(vp)
88	ldq	u0, 0(up)
89	lda	cy1, 0(r31)
90	beq	cy0, L(b01)
91
92L(b11):	lda	vp, 40(vp)
93	lda	up, -24(up)
94	lda	rp, 16(rp)
95	br	r31, L(lo3)
96
97L(b01):	lda	n, -4(n)
98	ble	n, L(end)
99	lda	vp, 24(vp)
100	lda	up, -8(up)
101
102	ALIGN(16)
103L(top):	addq	v0, v0, sl	C left shift vlimb
104	ldq	v1, -16(vp)
105	ADDSUB	u0, sl, ps	C ulimb + (vlimb << 1)
106	cmplt	v0, r31, cy0	C carry out #1
107	ldq	u1, 16(up)
108	ADDSUB	ps, cy1, rr	C consume carry from previous operation
109	CARRY(	ps, u0, cy)	C carry out #2
110	stq	rr, 0(rp)
111	addq	cy, cy0, cy0	C combine carry out #1 and #2
112	CARRY(	rr, ps, cy)	C carry out #3
113	addq	cy, cy0, cy0	C final carry out
114	lda	vp, 32(vp)	C bookkeeping
115L(lo0):	addq	v1, v1, sl
116	ldq	v0, -40(vp)
117	ADDSUB	u1, sl, ps
118	cmplt	v1, r31, cy1
119	ldq	u0, 24(up)
120	ADDSUB	ps, cy0, rr
121	CARRY(	ps, u1, cy)
122	stq	rr, 8(rp)
123	addq	cy, cy1, cy1
124	CARRY(	rr, ps, cy)
125	addq	cy, cy1, cy1
126	lda	rp, 32(rp)	C bookkeeping
127L(lo3):	addq	v0, v0, sl
128	ldq	v1, -32(vp)
129	ADDSUB	u0, sl, ps
130	cmplt	v0, r31, cy0
131	ldq	u1, 32(up)
132	ADDSUB	ps, cy1, rr
133	CARRY(	ps, u0, cy)
134	stq	rr, -16(rp)
135	addq	cy, cy0, cy0
136	CARRY(	rr, ps, cy)
137	addq	cy, cy0, cy0
138	lda	up, 32(up)	C bookkeeping
139L(lo2):	addq	v1, v1, sl
140	ldq	v0, -24(vp)
141	ADDSUB	u1, sl, ps
142	cmplt	v1, r31, cy1
143	ldq	u0, 8(up)
144	ADDSUB	ps, cy0, rr
145	CARRY(	ps, u1, cy)
146	stq	rr, -8(rp)
147	addq	cy, cy1, cy1
148	CARRY(	rr, ps, cy)
149	addq	cy, cy1, cy1
150	lda	n, -4(n)	C bookkeeping
151	bgt	n, L(top)
152
153L(end):	addq	v0, v0, sl
154	ADDSUB	u0, sl, ps
155	ADDSUB	ps, cy1, rr
156	cmplt	v0, r31, cy0
157	CARRY(	ps, u0, cy)
158	stq	rr, 0(rp)
159	addq	cy, cy0, cy0
160	CARRY(	rr, ps, cy)
161	addq	cy, cy0, r0
162	ret	r31,(r26),1
163EPILOGUE()
164ASM_END()
165