1dnl  Intel Pentium mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
2
3dnl  Copyright 1992, 1994-1996, 1999, 2000, 2002 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33
34C P5: 2.375 cycles/limb
35
36
37ifdef(`OPERATION_add_n',`
38	define(M4_inst,        adcl)
39	define(M4_function_n,  mpn_add_n)
40	define(M4_function_nc, mpn_add_nc)
41
42',`ifdef(`OPERATION_sub_n',`
43	define(M4_inst,        sbbl)
44	define(M4_function_n,  mpn_sub_n)
45	define(M4_function_nc, mpn_sub_nc)
46
47',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
48')')')
49
50MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
51
52
53C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
54C                          mp_size_t size);
55C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
56C                           mp_size_t size, mp_limb_t carry);
57
58defframe(PARAM_CARRY,20)
59defframe(PARAM_SIZE, 16)
60defframe(PARAM_SRC2, 12)
61defframe(PARAM_SRC1, 8)
62defframe(PARAM_DST,  4)
63
64	TEXT
65	ALIGN(8)
66PROLOGUE(M4_function_nc)
67
68	pushl	%edi
69	pushl	%esi
70	pushl	%ebx
71	pushl	%ebp
72deflit(`FRAME',16)
73
74	movl	PARAM_DST,%edi
75	movl	PARAM_SRC1,%esi
76	movl	PARAM_SRC2,%ebp
77	movl	PARAM_SIZE,%ecx
78
79	movl	(%ebp),%ebx
80
81	decl	%ecx
82	movl	%ecx,%edx
83	shrl	$3,%ecx
84	andl	$7,%edx
85	testl	%ecx,%ecx		C zero carry flag
86	jz	L(endgo)
87
88	pushl	%edx
89FRAME_pushl()
90	movl	PARAM_CARRY,%eax
91	shrl	%eax			C shift bit 0 into carry
92	jmp	L(oop)
93
94L(endgo):
95deflit(`FRAME',16)
96	movl	PARAM_CARRY,%eax
97	shrl	%eax			C shift bit 0 into carry
98	jmp	L(end)
99
100EPILOGUE()
101
102
103	ALIGN(8)
104PROLOGUE(M4_function_n)
105
106	pushl	%edi
107	pushl	%esi
108	pushl	%ebx
109	pushl	%ebp
110deflit(`FRAME',16)
111
112	movl	PARAM_DST,%edi
113	movl	PARAM_SRC1,%esi
114	movl	PARAM_SRC2,%ebp
115	movl	PARAM_SIZE,%ecx
116
117	movl	(%ebp),%ebx
118
119	decl	%ecx
120	movl	%ecx,%edx
121	shrl	$3,%ecx
122	andl	$7,%edx
123	testl	%ecx,%ecx		C zero carry flag
124	jz	L(end)
125	pushl	%edx
126FRAME_pushl()
127
128	ALIGN(8)
129L(oop):	movl	28(%edi),%eax		C fetch destination cache line
130	leal	32(%edi),%edi
131
132L(1):	movl	(%esi),%eax
133	movl	4(%esi),%edx
134	M4_inst	%ebx,%eax
135	movl	4(%ebp),%ebx
136	M4_inst	%ebx,%edx
137	movl	8(%ebp),%ebx
138	movl	%eax,-32(%edi)
139	movl	%edx,-28(%edi)
140
141L(2):	movl	8(%esi),%eax
142	movl	12(%esi),%edx
143	M4_inst	%ebx,%eax
144	movl	12(%ebp),%ebx
145	M4_inst	%ebx,%edx
146	movl	16(%ebp),%ebx
147	movl	%eax,-24(%edi)
148	movl	%edx,-20(%edi)
149
150L(3):	movl	16(%esi),%eax
151	movl	20(%esi),%edx
152	M4_inst	%ebx,%eax
153	movl	20(%ebp),%ebx
154	M4_inst	%ebx,%edx
155	movl	24(%ebp),%ebx
156	movl	%eax,-16(%edi)
157	movl	%edx,-12(%edi)
158
159L(4):	movl	24(%esi),%eax
160	movl	28(%esi),%edx
161	M4_inst	%ebx,%eax
162	movl	28(%ebp),%ebx
163	M4_inst	%ebx,%edx
164	movl	32(%ebp),%ebx
165	movl	%eax,-8(%edi)
166	movl	%edx,-4(%edi)
167
168	leal	32(%esi),%esi
169	leal	32(%ebp),%ebp
170	decl	%ecx
171	jnz	L(oop)
172
173	popl	%edx
174FRAME_popl()
175L(end):
176	decl	%edx			C test %edx w/o clobbering carry
177	js	L(end2)
178	incl	%edx
179L(oop2):
180	leal	4(%edi),%edi
181	movl	(%esi),%eax
182	M4_inst	%ebx,%eax
183	movl	4(%ebp),%ebx
184	movl	%eax,-4(%edi)
185	leal	4(%esi),%esi
186	leal	4(%ebp),%ebp
187	decl	%edx
188	jnz	L(oop2)
189L(end2):
190	movl	(%esi),%eax
191	M4_inst	%ebx,%eax
192	movl	%eax,(%edi)
193
194	sbbl	%eax,%eax
195	negl	%eax
196
197	popl	%ebp
198	popl	%ebx
199	popl	%esi
200	popl	%edi
201	ret
202
203EPILOGUE()
204