1dnl  PowerPC-32 mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
2
3dnl  Copyright 1999-2001, 2003-2005, 2007, 2011 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33C                   cycles/limb
34C POWER3/PPC630          1.5
35C POWER4/PPC970          2
36C POWER5                 2
37C POWER6                 2.78
38C POWER7               2.15-2.87
39
40C This code is based on powerpc64/aors_n.asm.
41
42C INPUT PARAMETERS
43C rp	r3
44C up	r4
45C vp	r5
46C n	r6
47
48ifdef(`OPERATION_add_n',`
49  define(ADDSUBC,	adde)
50  define(ADDSUB,	addc)
51  define(func,		mpn_add_n)
52  define(func_nc,	mpn_add_nc)
53  define(GENRVAL,	`addi	r3, r3, 1')
54  define(SETCBR,	`addic	r0, $1, -1')
55  define(CLRCB,		`addic	r0, r0, 0')
56')
57ifdef(`OPERATION_sub_n',`
58  define(ADDSUBC,	subfe)
59  define(ADDSUB,	subfc)
60  define(func,		mpn_sub_n)
61  define(func_nc,	mpn_sub_nc)
62  define(GENRVAL,	`neg	r3, r3')
63  define(SETCBR,	`subfic	r0, $1, 0')
64  define(CLRCB,		`addic	r0, r1, -1')
65')
66
67MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
68
69ASM_START()
70PROLOGUE(func_nc)
71	SETCBR(r7)
72	b	L(ent)
73EPILOGUE()
74
75PROLOGUE(func)
76	CLRCB
77L(ent):	stwu	r1, -32(r1)
78	rlwinm.	r0, r6, 0,30,31	C r0 = n & 3, set cr0
79	cmpwi	cr6, r0, 2
80	stw	r28, 8(r1)
81	addi	r6, r6, 3	C compute count...
82	stw	r29, 12(r1)
83	srwi	r6, r6, 2	C ...for ctr
84	stw	r30, 16(r1)
85	mtctr	r6		C copy count into ctr
86	stw	r31, 20(r1)
87	beq	cr0, L(b00)
88	blt	cr6, L(b01)
89	beq	cr6, L(b10)
90
91L(b11):	lwz	r8, 0(r4)	C load s1 limb
92	lwz	r9, 0(r5)	C load s2 limb
93	lwz	r10, 4(r4)	C load s1 limb
94	lwz	r11, 4(r5)	C load s2 limb
95	lwz	r12, 8(r4)	C load s1 limb
96	addi	r4, r4, 12
97	lwz	r0, 8(r5)	C load s2 limb
98	addi	r5, r5, 12
99	ADDSUBC	r29, r9, r8
100	ADDSUBC	r30, r11, r10
101	ADDSUBC	r31, r0, r12
102	stw	r29, 0(r3)
103	stw	r30, 4(r3)
104	stw	r31, 8(r3)
105	addi	r3, r3, 12
106	bdnz	L(go)
107	b	L(ret)
108
109L(b01):	lwz	r12, 0(r4)	C load s1 limb
110	addi	r4, r4, 4
111	lwz	r0, 0(r5)	C load s2 limb
112	addi	r5, r5, 4
113	ADDSUBC	r31, r0, r12	C add
114	stw	r31, 0(r3)
115	addi	r3, r3, 4
116	bdnz	L(go)
117	b	L(ret)
118
119L(b10):	lwz	r10, 0(r4)	C load s1 limb
120	lwz	r11, 0(r5)	C load s2 limb
121	lwz	r12, 4(r4)	C load s1 limb
122	addi	r4, r4, 8
123	lwz	r0, 4(r5)	C load s2 limb
124	addi	r5, r5, 8
125	ADDSUBC	r30, r11, r10	C add
126	ADDSUBC	r31, r0, r12	C add
127	stw	r30, 0(r3)
128	stw	r31, 4(r3)
129	addi	r3, r3, 8
130	bdnz	L(go)
131	b	L(ret)
132
133L(b00):	C INITCY		C clear/set cy
134L(go):	lwz	r6, 0(r4)	C load s1 limb
135	lwz	r7, 0(r5)	C load s2 limb
136	lwz	r8, 4(r4)	C load s1 limb
137	lwz	r9, 4(r5)	C load s2 limb
138	lwz	r10, 8(r4)	C load s1 limb
139	lwz	r11, 8(r5)	C load s2 limb
140	lwz	r12, 12(r4)	C load s1 limb
141	lwz	r0, 12(r5)	C load s2 limb
142	bdz	L(end)
143
144	addi	r4, r4, 16
145	addi	r5, r5, 16
146
147	ALIGN(16)
148L(top):	ADDSUBC	r28, r7, r6
149	lwz	r6, 0(r4)	C load s1 limb
150	lwz	r7, 0(r5)	C load s2 limb
151	ADDSUBC	r29, r9, r8
152	lwz	r8, 4(r4)	C load s1 limb
153	lwz	r9, 4(r5)	C load s2 limb
154	ADDSUBC	r30, r11, r10
155	lwz	r10, 8(r4)	C load s1 limb
156	lwz	r11, 8(r5)	C load s2 limb
157	ADDSUBC	r31, r0, r12
158	lwz	r12, 12(r4)	C load s1 limb
159	lwz	r0, 12(r5)	C load s2 limb
160	stw	r28, 0(r3)
161	addi	r4, r4, 16
162	stw	r29, 4(r3)
163	addi	r5, r5, 16
164	stw	r30, 8(r3)
165	stw	r31, 12(r3)
166	addi	r3, r3, 16
167	bdnz	L(top)		C decrement ctr and loop back
168
169L(end):	ADDSUBC	r28, r7, r6
170	ADDSUBC	r29, r9, r8
171	ADDSUBC	r30, r11, r10
172	ADDSUBC	r31, r0, r12
173	stw	r28, 0(r3)
174	stw	r29, 4(r3)
175	stw	r30, 8(r3)
176	stw	r31, 12(r3)
177
178L(ret):
179	lwz	r28, 8(r1)
180	lwz	r29, 12(r1)
181	subfe	r3, r0, r0	C -cy
182	lwz	r30, 16(r1)
183	GENRVAL
184	lwz	r31, 20(r1)
185	addi	r1, r1, 32
186	blr
187EPILOGUE()
188