aors_n.asm revision 1.1.1.1
1dnl  PowerPC-32 mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
2
3dnl  Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2007, 2011 Free Software
4dnl  Foundation, Inc.
5
6dnl  This file is part of the GNU MP Library.
7
8dnl  The GNU MP Library is free software; you can redistribute it and/or modify
9dnl  it under the terms of the GNU Lesser General Public License as published
10dnl  by the Free Software Foundation; either version 3 of the License, or (at
11dnl  your option) any later version.
12
13dnl  The GNU MP Library is distributed in the hope that it will be useful, but
14dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16dnl  License for more details.
17
18dnl  You should have received a copy of the GNU Lesser General Public License
19dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
20
21include(`../config.m4')
22
23C                   cycles/limb
24C POWER3/PPC630          1.5
25C POWER4/PPC970          2
26C POWER5                 2
27C POWER6                 2.78
28C POWER7               2.15-2.87
29
30C This code is based on powerpc64/aors_n.asm.
31
32C INPUT PARAMETERS
33C rp	r3
34C up	r4
35C vp	r5
36C n	r6
37
38ifdef(`OPERATION_add_n',`
39  define(ADDSUBC,	adde)
40  define(ADDSUB,	addc)
41  define(func,		mpn_add_n)
42  define(func_nc,	mpn_add_nc)
43  define(GENRVAL,	`addi	r3, r3, 1')
44  define(SETCBR,	`addic	r0, $1, -1')
45  define(CLRCB,		`addic	r0, r0, 0')
46')
47ifdef(`OPERATION_sub_n',`
48  define(ADDSUBC,	subfe)
49  define(ADDSUB,	subfc)
50  define(func,		mpn_sub_n)
51  define(func_nc,	mpn_sub_nc)
52  define(GENRVAL,	`neg	r3, r3')
53  define(SETCBR,	`subfic	r0, $1, 0')
54  define(CLRCB,		`addic	r0, r1, -1')
55')
56
57MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
58
59ASM_START()
60PROLOGUE(func_nc)
61	SETCBR(r7)
62	b	L(ent)
63EPILOGUE()
64
65PROLOGUE(func)
66	CLRCB
67L(ent):	stw	r31, -4(r1)
68	stw	r30, -8(r1)
69	stw	r29, -12(r1)
70	stw	r28, -16(r1)
71
72	rlwinm.	r0, r6, 0,30,31	C r0 = n & 3, set cr0
73	cmpwi	cr6, r0, 2
74	addi	r6, r6, 3	C compute count...
75	srwi	r6, r6, 2	C ...for ctr
76	mtctr	r6		C copy count into ctr
77	beq	cr0, L(b00)
78	blt	cr6, L(b01)
79	beq	cr6, L(b10)
80
81L(b11):	lwz	r8, 0(r4)	C load s1 limb
82	lwz	r9, 0(r5)	C load s2 limb
83	lwz	r10, 4(r4)	C load s1 limb
84	lwz	r11, 4(r5)	C load s2 limb
85	lwz	r12, 8(r4)	C load s1 limb
86	addi	r4, r4, 12
87	lwz	r0, 8(r5)	C load s2 limb
88	addi	r5, r5, 12
89	ADDSUBC	r29, r9, r8
90	ADDSUBC	r30, r11, r10
91	ADDSUBC	r31, r0, r12
92	stw	r29, 0(r3)
93	stw	r30, 4(r3)
94	stw	r31, 8(r3)
95	addi	r3, r3, 12
96	bdnz	L(go)
97	b	L(ret)
98
99L(b01):	lwz	r12, 0(r4)	C load s1 limb
100	addi	r4, r4, 4
101	lwz	r0, 0(r5)	C load s2 limb
102	addi	r5, r5, 4
103	ADDSUBC	r31, r0, r12	C add
104	stw	r31, 0(r3)
105	addi	r3, r3, 4
106	bdnz	L(go)
107	b	L(ret)
108
109L(b10):	lwz	r10, 0(r4)	C load s1 limb
110	lwz	r11, 0(r5)	C load s2 limb
111	lwz	r12, 4(r4)	C load s1 limb
112	addi	r4, r4, 8
113	lwz	r0, 4(r5)	C load s2 limb
114	addi	r5, r5, 8
115	ADDSUBC	r30, r11, r10	C add
116	ADDSUBC	r31, r0, r12	C add
117	stw	r30, 0(r3)
118	stw	r31, 4(r3)
119	addi	r3, r3, 8
120	bdnz	L(go)
121	b	L(ret)
122
123L(b00):	C INITCY		C clear/set cy
124L(go):	lwz	r6, 0(r4)	C load s1 limb
125	lwz	r7, 0(r5)	C load s2 limb
126	lwz	r8, 4(r4)	C load s1 limb
127	lwz	r9, 4(r5)	C load s2 limb
128	lwz	r10, 8(r4)	C load s1 limb
129	lwz	r11, 8(r5)	C load s2 limb
130	lwz	r12, 12(r4)	C load s1 limb
131	lwz	r0, 12(r5)	C load s2 limb
132	bdz	L(end)
133
134	addi	r4, r4, 16
135	addi	r5, r5, 16
136
137	ALIGN(16)
138L(top):	ADDSUBC	r28, r7, r6
139	lwz	r6, 0(r4)	C load s1 limb
140	lwz	r7, 0(r5)	C load s2 limb
141	ADDSUBC	r29, r9, r8
142	lwz	r8, 4(r4)	C load s1 limb
143	lwz	r9, 4(r5)	C load s2 limb
144	ADDSUBC	r30, r11, r10
145	lwz	r10, 8(r4)	C load s1 limb
146	lwz	r11, 8(r5)	C load s2 limb
147	ADDSUBC	r31, r0, r12
148	lwz	r12, 12(r4)	C load s1 limb
149	lwz	r0, 12(r5)	C load s2 limb
150	stw	r28, 0(r3)
151	addi	r4, r4, 16
152	stw	r29, 4(r3)
153	addi	r5, r5, 16
154	stw	r30, 8(r3)
155	stw	r31, 12(r3)
156	addi	r3, r3, 16
157	bdnz	L(top)		C decrement ctr and loop back
158
159L(end):	ADDSUBC	r28, r7, r6
160	ADDSUBC	r29, r9, r8
161	ADDSUBC	r30, r11, r10
162	ADDSUBC	r31, r0, r12
163	stw	r28, 0(r3)
164	stw	r29, 4(r3)
165	stw	r30, 8(r3)
166	stw	r31, 12(r3)
167
168L(ret):	lwz	r31, -4(r1)
169	lwz	r30, -8(r1)
170	lwz	r29, -12(r1)
171	lwz	r28, -16(r1)
172
173	subfe	r3, r0, r0	C -cy
174	GENRVAL
175	blr
176EPILOGUE()
177