1dnl  Intel P6 mpn_add_n/mpn_sub_n -- mpn add or subtract.
2
3dnl  Copyright 2006 Free Software Foundation, Inc.
4dnl
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of the GNU Lesser General Public License as published
9dnl  by the Free Software Foundation; either version 3 of the License, or (at
10dnl  your option) any later version.
11dnl
12dnl  The GNU MP Library is distributed in the hope that it will be useful, but
13dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15dnl  License for more details.
16dnl
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20include(`../config.m4')
21
22C TODO:
23C  * Avoid indexed addressing, it makes us stall on the two-ported register
24C    file.
25
26C                           cycles/limb
27C P6 model 0-8,10-12)           3.17
28C P6 model 9   (Banias)         ?
29C P6 model 13  (Dothan)         2.25
30
31
32define(`rp',	`%edi')
33define(`up',	`%esi')
34define(`vp',	`%ebx')
35define(`n',	`%ecx')
36
37ifdef(`OPERATION_add_n', `
38	define(ADCSBB,	      adc)
39	define(func,	      mpn_add_n)
40	define(func_nc,	      mpn_add_nc)')
41ifdef(`OPERATION_sub_n', `
42	define(ADCSBB,	      sbb)
43	define(func,	      mpn_sub_n)
44	define(func_nc,	      mpn_sub_nc)')
45
46MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
47
48ASM_START()
49
50	TEXT
51	ALIGN(16)
52
53PROLOGUE(func)
54	xor	%edx, %edx
55L(start):
56	push	%edi
57	push	%esi
58	push	%ebx
59
60	mov	16(%esp), rp
61	mov	20(%esp), up
62	mov	24(%esp), vp
63	mov	28(%esp), n
64
65	lea	(up,n,4), up
66	lea	(vp,n,4), vp
67	lea	(rp,n,4), rp
68
69	neg	n
70	mov	n, %eax
71	and	$-8, n
72	and	$7, %eax
73	shl	$2, %eax			C 4x
74ifdef(`PIC',`
75	call	L(pic_calc)
76L(here):
77',`
78	lea	L(ent) (%eax,%eax,2), %eax	C 12x
79')
80
81	shr	%edx				C set cy flag
82	jmp	*%eax
83
84ifdef(`PIC',`
85L(pic_calc):
86	C See mpn/x86/README about old gas bugs
87	lea	(%eax,%eax,2), %eax
88	add	$L(ent)-L(here), %eax
89	add	(%esp), %eax
90	ret_internal
91')
92
93L(end):
94	sbb	%eax, %eax
95	neg	%eax
96	pop	%ebx
97	pop	%esi
98	pop	%edi
99	ret
100
101	ALIGN(16)
102L(top):
103	jecxz	L(end)
104L(ent):
105Zdisp(	mov,	0,(up,n,4), %eax)
106Zdisp(	ADCSBB,	0,(vp,n,4), %eax)
107Zdisp(	mov,	%eax, 0,(rp,n,4))
108
109	mov	4(up,n,4), %edx
110	ADCSBB	4(vp,n,4), %edx
111	mov	%edx, 4(rp,n,4)
112
113	mov	8(up,n,4), %eax
114	ADCSBB	8(vp,n,4), %eax
115	mov	%eax, 8(rp,n,4)
116
117	mov	12(up,n,4), %edx
118	ADCSBB	12(vp,n,4), %edx
119	mov	%edx, 12(rp,n,4)
120
121	mov	16(up,n,4), %eax
122	ADCSBB	16(vp,n,4), %eax
123	mov	%eax, 16(rp,n,4)
124
125	mov	20(up,n,4), %edx
126	ADCSBB	20(vp,n,4), %edx
127	mov	%edx, 20(rp,n,4)
128
129	mov	24(up,n,4), %eax
130	ADCSBB	24(vp,n,4), %eax
131	mov	%eax, 24(rp,n,4)
132
133	mov	28(up,n,4), %edx
134	ADCSBB	28(vp,n,4), %edx
135	mov	%edx, 28(rp,n,4)
136
137	lea	8(n), n
138	jmp	L(top)
139
140EPILOGUE()
141
142PROLOGUE(func_nc)
143	movl	20(%esp), %edx
144	jmp	L(start)
145EPILOGUE()
146