1dnl  x86-64 mpn_addlsh1_n and mpn_sublsh1_n, optimized for "Core" 2.
2
3dnl  Copyright 2008 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of the GNU Lesser General Public License as published
9dnl  by the Free Software Foundation; either version 3 of the License, or (at
10dnl  your option) any later version.
11
12dnl  The GNU MP Library is distributed in the hope that it will be useful, but
13dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15dnl  License for more details.
16
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20include(`../config.m4')
21
22C	     cycles/limb
23C K8,K9:	 4.25
24C K10:		 ?
25C P4:		 ?
26C P6-15:	 3
27
28C INPUT PARAMETERS
29define(`rp',`%rdi')
30define(`up',`%rsi')
31define(`vp',`%rdx')
32define(`n', `%rcx')
33
34ifdef(`OPERATION_addlsh1_n', `
35	define(ADDSUB,	add)
36	define(ADCSBB,	adc)
37	define(func,	mpn_addlsh1_n)')
38ifdef(`OPERATION_sublsh1_n', `
39	define(ADDSUB,	sub)
40	define(ADCSBB,	sbb)
41	define(func,	mpn_sublsh1_n)')
42
43MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
44
45ASM_START()
46	TEXT
47	ALIGN(8)
48PROLOGUE(func)
49	push	%rbx
50	push	%r12
51
52	mov	R32(%rcx), R32(%rax)
53	lea	24(up,n,8), up
54	lea	24(vp,n,8), vp
55	lea	24(rp,n,8), rp
56	neg	n
57
58	xor	R32(%r11), R32(%r11)
59
60	mov	-24(vp,n,8), %r8	C do first limb early
61	shrd	$63, %r8, %r11
62
63	and	$3, R32(%rax)
64	je	L(b0)
65	cmp	$2, R32(%rax)
66	jc	L(b1)
67	je	L(b2)
68
69L(b3):	mov	-16(vp,n,8), %r9
70	shrd	$63, %r9, %r8
71	mov	-8(vp,n,8), %r10
72	shrd	$63, %r10, %r9
73	mov	-24(up,n,8), %r12
74	ADDSUB	%r11, %r12
75	mov	%r12, -24(rp,n,8)
76	mov	-16(up,n,8), %r12
77	ADCSBB	%r8, %r12
78	mov	%r12, -16(rp,n,8)
79	mov	-8(up,n,8), %r12
80	ADCSBB	%r9, %r12
81	mov	%r12, -8(rp,n,8)
82	mov	%r10, %r11
83	sbb	R32(%rax), R32(%rax)	C save cy
84	add	$3, n
85	js	L(top)
86	jmp	L(end)
87
88L(b1):	mov	-24(up,n,8), %r12
89	ADDSUB	%r11, %r12
90	mov	%r12, -24(rp,n,8)
91	mov	%r8, %r11
92	sbb	R32(%rax), R32(%rax)	C save cy
93	inc	n
94	js	L(top)
95	jmp	L(end)
96
97L(b2):	mov	-16(vp,n,8), %r9
98	shrd	$63, %r9, %r8
99	mov	-24(up,n,8), %r12
100	ADDSUB	%r11, %r12
101	mov	%r12, -24(rp,n,8)
102	mov	-16(up,n,8), %r12
103	ADCSBB	%r8, %r12
104	mov	%r12, -16(rp,n,8)
105	mov	%r9, %r11
106	sbb	R32(%rax), R32(%rax)	C save cy
107	add	$2, n
108	js	L(top)
109	jmp	L(end)
110
111	ALIGN(16)
112L(top):	mov	-24(vp,n,8), %r8
113	shrd	$63, %r8, %r11
114L(b0):	mov	-16(vp,n,8), %r9
115	shrd	$63, %r9, %r8
116	mov	-8(vp,n,8), %r10
117	shrd	$63, %r10, %r9
118	mov	(vp,n,8), %rbx
119	shrd	$63, %rbx, %r10
120
121	add	R32(%rax), R32(%rax)	C restore cy
122
123	mov	-24(up,n,8), %r12
124	ADCSBB	%r11, %r12
125	mov	%r12, -24(rp,n,8)
126
127	mov	-16(up,n,8), %r12
128	ADCSBB	%r8, %r12
129	mov	%r12, -16(rp,n,8)
130
131	mov	-8(up,n,8), %r12
132	ADCSBB	%r9, %r12
133	mov	%r12, -8(rp,n,8)
134
135	mov	(up,n,8), %r12
136	ADCSBB	%r10, %r12
137	mov	%r12, (rp,n,8)
138
139	mov	%rbx, %r11
140	sbb	R32(%rax), R32(%rax)	C save cy
141
142	add	$4, n
143	js	L(top)
144
145L(end):	add	%r11, %r11
146	pop	%r12
147	pop	%rbx
148	sbb	$0, R32(%rax)
149	neg	R32(%rax)
150	ret
151EPILOGUE()
152