1dnl  AMD64 mpn_add_err3_n, mpn_sub_err3_n
2
3dnl  Contributed by David Harvey.
4
5dnl  Copyright 2011 Free Software Foundation, Inc.
6
7dnl  This file is part of the GNU MP Library.
8dnl
9dnl  The GNU MP Library is free software; you can redistribute it and/or modify
10dnl  it under the terms of either:
11dnl
12dnl    * the GNU Lesser General Public License as published by the Free
13dnl      Software Foundation; either version 3 of the License, or (at your
14dnl      option) any later version.
15dnl
16dnl  or
17dnl
18dnl    * the GNU General Public License as published by the Free Software
19dnl      Foundation; either version 2 of the License, or (at your option) any
20dnl      later version.
21dnl
22dnl  or both in parallel, as here.
23dnl
24dnl  The GNU MP Library is distributed in the hope that it will be useful, but
25dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
27dnl  for more details.
28dnl
29dnl  You should have received copies of the GNU General Public License and the
30dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
31dnl  see https://www.gnu.org/licenses/.
32
33include(`../config.m4')
34
35C	     cycles/limb
36C AMD K8,K9	 7.0
37C AMD K10	 ?
38C Intel P4	 ?
39C Intel core2	 ?
40C Intel corei	 ?
41C Intel atom	 ?
42C VIA nano	 ?
43
44C INPUT PARAMETERS
45define(`rp',	`%rdi')
46define(`up',	`%rsi')
47define(`vp',	`%rdx')
48define(`ep',	`%rcx')
49define(`yp1',	`%r8')
50define(`yp2',   `%r9')
51define(`yp3_param',   `8(%rsp)')
52define(`n_param',     `16(%rsp)')
53define(`cy_param',    `24(%rsp)')
54
55define(`n',     `%r10')
56define(`yp3',   `%rcx')
57define(`t',     `%rbx')
58
59define(`e1l',	`%rbp')
60define(`e1h',	`%r11')
61define(`e2l',	`%r12')
62define(`e2h',	`%r13')
63define(`e3l',   `%r14')
64define(`e3h',   `%r15')
65
66
67
68ifdef(`OPERATION_add_err3_n', `
69	define(ADCSBB,	      adc)
70	define(func,	      mpn_add_err3_n)')
71ifdef(`OPERATION_sub_err3_n', `
72	define(ADCSBB,	      sbb)
73	define(func,	      mpn_sub_err3_n)')
74
75MULFUNC_PROLOGUE(mpn_add_err3_n mpn_sub_err3_n)
76
77
78ASM_START()
79	TEXT
80	ALIGN(16)
81PROLOGUE(func)
82	mov	cy_param, %rax
83	mov	n_param, n
84
85	push	%rbx
86	push	%rbp
87	push	%r12
88	push	%r13
89	push	%r14
90	push	%r15
91
92	push	ep
93	mov	64(%rsp), yp3       C load from yp3_param
94
95	xor	R32(e1l), R32(e1l)
96	xor	R32(e1h), R32(e1h)
97	xor	R32(e2l), R32(e2l)
98	xor	R32(e2h), R32(e2h)
99	xor	R32(e3l), R32(e3l)
100	xor	R32(e3h), R32(e3h)
101
102	sub	yp1, yp2
103	sub	yp1, yp3
104
105	lea	-8(yp1,n,8), yp1
106	lea	(rp,n,8), rp
107	lea	(up,n,8), up
108	lea	(vp,n,8), vp
109	neg	n
110
111	ALIGN(16)
112L(top):
113	shr	$1, %rax		C restore carry
114	mov	(up,n,8), %rax
115	ADCSBB	(vp,n,8), %rax
116	mov	%rax, (rp,n,8)
117	sbb	%rax, %rax		C save carry and generate mask
118
119	mov	(yp1), t
120	and	%rax, t
121	add	t, e1l
122	adc	$0, e1h
123
124	mov	(yp1,yp2), t
125	and	%rax, t
126	add	t, e2l
127	adc	$0, e2h
128
129	mov	(yp1,yp3), t
130	and	%rax, t
131	add	t, e3l
132	adc	$0, e3h
133
134	lea	-8(yp1), yp1
135	inc	n
136	jnz     L(top)
137
138L(end):
139	and	$1, %eax
140	pop	ep
141
142	mov	e1l, (ep)
143	mov	e1h, 8(ep)
144	mov	e2l, 16(ep)
145	mov	e2h, 24(ep)
146	mov	e3l, 32(ep)
147	mov	e3h, 40(ep)
148
149	pop	%r15
150	pop	%r14
151	pop	%r13
152	pop	%r12
153	pop	%rbp
154	pop	%rbx
155	ret
156EPILOGUE()
157