1dnl  Intel P6-15 mpn_add_n/mpn_sub_n -- mpn add or subtract.
2
3dnl  Copyright 2006, 2007 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of the GNU Lesser General Public License as published
9dnl  by the Free Software Foundation; either version 3 of the License, or (at
10dnl  your option) any later version.
11
12dnl  The GNU MP Library is distributed in the hope that it will be useful, but
13dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15dnl  License for more details.
16
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20include(`../config.m4')
21
22
23C	     cycles/limb
24C K8,K9:	 2.25
25C K10:		 2
26C P4:		10
27C P6 core2:	 2.05
28C P6 corei7:	 2.3
29
30C INPUT PARAMETERS
31define(`rp',	`%rdi')
32define(`up',	`%rsi')
33define(`vp',	`%rdx')
34define(`n',	`%rcx')
35define(`cy',	`%r8')
36
37ifdef(`OPERATION_add_n', `
38	define(ADCSBB,	      adc)
39	define(func,	      mpn_add_n)
40	define(func_nc,	      mpn_add_nc)')
41ifdef(`OPERATION_sub_n', `
42	define(ADCSBB,	      sbb)
43	define(func,	      mpn_sub_n)
44	define(func_nc,	      mpn_sub_nc)')
45
46MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
47
48ASM_START()
49
50	TEXT
51	ALIGN(16)
52
53PROLOGUE(func_nc)
54	jmp	L(start)
55EPILOGUE()
56
57PROLOGUE(func)
58	xor	%r8, %r8
59L(start):
60	mov	(up), %r10
61	mov	(vp), %r11
62
63	lea	-8(up,n,8), up
64	lea	-8(vp,n,8), vp
65	lea	-16(rp,n,8), rp
66	mov	%ecx, %eax
67	neg	n
68	and	$3, %eax
69	je	L(b00)
70	add	%rax, n		C clear low rcx bits for jrcxz
71	cmp	$2, %eax
72	jl	L(b01)
73	je	L(b10)
74
75L(b11):	shr	%r8			C set cy
76	jmp	L(e11)
77
78L(b00):	shr	%r8			C set cy
79	mov	%r10, %r8
80	mov	%r11, %r9
81	lea	4(n), n
82	jmp	L(e00)
83
84L(b01):	shr	%r8			C set cy
85	jmp	L(e01)
86
87L(b10):	shr	%r8			C set cy
88	mov	%r10, %r8
89	mov	%r11, %r9
90	jmp	L(e10)
91
92L(end):	ADCSBB	%r11, %r10
93	mov	%r10, 8(rp)
94	mov	%ecx, %eax		C clear eax, ecx contains 0
95	adc	%eax, %eax
96	ret
97
98	ALIGN(16)
99L(top):
100	mov	-24(up,n,8), %r8
101	mov	-24(vp,n,8), %r9
102	ADCSBB	%r11, %r10
103	mov	%r10, -24(rp,n,8)
104L(e00):
105	mov	-16(up,n,8), %r10
106	mov	-16(vp,n,8), %r11
107	ADCSBB	%r9, %r8
108	mov	%r8, -16(rp,n,8)
109L(e11):
110	mov	-8(up,n,8), %r8
111	mov	-8(vp,n,8), %r9
112	ADCSBB	%r11, %r10
113	mov	%r10, -8(rp,n,8)
114L(e10):
115	mov	(up,n,8), %r10
116	mov	(vp,n,8), %r11
117	ADCSBB	%r9, %r8
118	mov	%r8, (rp,n,8)
119L(e01):
120	jrcxz	L(end)
121	lea	4(n), n
122	jmp	L(top)
123
124EPILOGUE()
125